Re: [ovs-dev] [PATCH ovn v9 7/8] ovn-controller: Use the tracked runtime data changes for flow calculation.

2020-06-02 Thread Numan Siddique
On Thu, May 28, 2020 at 4:35 PM  wrote:

> From: Venkata Anil 
>
> This patch processes the logical flows of tracked datapaths
> and tracked logical ports. To handle the tracked logical port
> changes, reference of logical flows to port bindings is maintained.
>
> Acked-by: Mark Michelson 
> Co-Authored-by: Numan Siddique 
> Signed-off-by: Venkata Anil 
> Signed-off-by: Numan Siddique 
> ---
>  controller/lflow.c  |  69 -
>  controller/lflow.h  |  13 +++-
>  controller/ovn-controller.c | 117 +++-
>  controller/physical.h   |   3 +-
>  tests/ovn-performance.at|  12 ++--
>  5 files changed, 151 insertions(+), 63 deletions(-)
>
> diff --git a/controller/lflow.c b/controller/lflow.c
> index 01214a3a6..8fd205c9c 100644
> --- a/controller/lflow.c
> +++ b/controller/lflow.c
> @@ -258,7 +258,7 @@ lflow_resource_destroy_lflow(struct lflow_resource_ref
> *lfrr,
>  /* Adds the logical flows from the Logical_Flow table to flow tables. */
>  static void
>  add_logical_flows(struct lflow_ctx_in *l_ctx_in,
> -  struct lflow_ctx_out *l_ctx_out)
> +struct lflow_ctx_out *l_ctx_out)
>  {
>  const struct sbrec_logical_flow *lflow;
>
> @@ -649,6 +649,8 @@ consider_logical_flow(const struct sbrec_logical_flow
> *lflow,
>  int64_t dp_id = lflow->logical_datapath->tunnel_key;
>  char buf[16];
>  snprintf(buf, sizeof(buf), "%"PRId64"_%"PRId64, dp_id,
> port_id);
> +lflow_resource_add(l_ctx_out->lfrr, REF_TYPE_PORTBINDING,
> buf,
> +   >header_.uuid);
>  if (!sset_contains(l_ctx_in->local_lport_ids, buf)) {
>  VLOG_DBG("lflow "UUID_FMT
>   " port %s in match is not local, skip",
> @@ -847,3 +849,68 @@ lflow_destroy(void)
>  expr_symtab_destroy();
>  shash_destroy();
>  }
> +
> +bool
> +lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *dp,
> + struct lflow_ctx_in *l_ctx_in,
> + struct lflow_ctx_out *l_ctx_out)
> +{
> +bool handled = true;
> +struct hmap dhcp_opts = HMAP_INITIALIZER(_opts);
> +struct hmap dhcpv6_opts = HMAP_INITIALIZER(_opts);
> +const struct sbrec_dhcp_options *dhcp_opt_row;
> +SBREC_DHCP_OPTIONS_TABLE_FOR_EACH (dhcp_opt_row,
> +   l_ctx_in->dhcp_options_table) {
> +dhcp_opt_add(_opts, dhcp_opt_row->name, dhcp_opt_row->code,
> + dhcp_opt_row->type);
> +}
> +
> +
> +const struct sbrec_dhcpv6_options *dhcpv6_opt_row;
> +SBREC_DHCPV6_OPTIONS_TABLE_FOR_EACH (dhcpv6_opt_row,
> + l_ctx_in->dhcpv6_options_table) {
> +   dhcp_opt_add(_opts, dhcpv6_opt_row->name,
> dhcpv6_opt_row->code,
> +dhcpv6_opt_row->type);
> +}
> +
> +struct hmap nd_ra_opts = HMAP_INITIALIZER(_ra_opts);
> +nd_ra_opts_init(_ra_opts);
> +
> +struct controller_event_options controller_event_opts;
> +controller_event_opts_init(_event_opts);
> +
> +struct sbrec_logical_flow *lf_row = sbrec_logical_flow_index_init_row(
> +l_ctx_in->sbrec_logical_flow_by_logical_datapath);
> +sbrec_logical_flow_index_set_logical_datapath(lf_row, dp);
> +
> +const struct sbrec_logical_flow *lflow;
> +SBREC_LOGICAL_FLOW_FOR_EACH_EQUAL (
> +lflow, lf_row, l_ctx_in->sbrec_logical_flow_by_logical_datapath) {
>

I found myself an issue here.
Before calling consider_logical_flow(), we need to delete the lflow by
calling
ofctrl_remove_flows().

I'll handle that in v10.



+if (!consider_logical_flow(lflow, _opts, _opts,
> +   _ra_opts, _event_opts,
> +   l_ctx_in, l_ctx_out)) {
> +handled = false;
> +break;
> +}
> +}
> +
> +dhcp_opts_destroy(_opts);
> +dhcp_opts_destroy(_opts);
> +nd_ra_opts_destroy(_ra_opts);
> +controller_event_opts_destroy(_event_opts);
> +return handled;
> +}
> +
> +bool
> +lflow_handle_flows_for_lport(const struct sbrec_port_binding *pb,
> + struct lflow_ctx_in *l_ctx_in,
> + struct lflow_ctx_out *l_ctx_out)
> +{
> +int64_t dp_id = pb->datapath->tunnel_key;
> +char pb_ref_name[16];
> +snprintf(pb_ref_name, sizeof(pb_ref_name), "%"PRId64"_%"PRId64,
> + dp_id, pb->tunnel_key);
> +bool changed = true;
> +return lflow_handle_changed_ref(REF_TYPE_PORTBINDING, pb_ref_name,
> +l_ctx_in, l_ctx_out, );
> +}
>

There is one more case I missed for handling lflows for lport.

If there is a flow like -- inport ==X && && is_chassis_resident(Y)
and if the port Yget updated, this flow gets missed because it is not
stored in then REF_TYPE_PORTBINDIN for Y.

I'll 

Re: [ovs-dev] [PATCH ovn v9 5/8] ovn-controller: I-P for ct zone and OVS interface changes in flow output stage.

2020-06-02 Thread Han Zhou
Thanks for the update. LGTM, except some minor changes related to the
comment in v9 4/8.

On Thu, May 28, 2020 at 4:05 AM  wrote:
>
> From: Numan Siddique 
>
> This patch handles ct zone changes and OVS interface changes incrementally
> in the flow output stage.
>
> Any changes to ct zone can be handled by running physical_run() instead
of running
> flow_output_run(). And any changes to OVS interfaces can be either handled
> incrementally (for OVS interfaces representing VIFs) or just running
> physical_run() (for tunnel and other types of interfaces).
>
> To better handle this, a new engine node 'physical_flow_changes' is added
which
> handles changes to ct zone and OVS interfaces.
>
> Acked-by: Mark Michelson 
> Signed-off-by: Numan Siddique 
> ---
>  controller/binding.c|  23 +-
>  controller/binding.h|  24 +-
>  controller/ovn-controller.c | 146 +++-
>  controller/physical.c   |  51 +
>  controller/physical.h   |   5 +-
>  5 files changed, 224 insertions(+), 25 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 74e0e0710..63c7203bb 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -503,7 +503,7 @@ remove_local_lport_ids(const struct
sbrec_port_binding *binding_rec,
>   * 'struct local_binding' is used. A shash of these local bindings is
>   * maintained with the 'external_ids:iface-id' as the key to the shash.
>   *
> - * struct local_binding has 3 main fields:
> + * struct local_binding (defined in binding.h) has 3 main fields:
>   *- type
>   *- OVS interface row object
>   *- Port_Binding row object
> @@ -554,21 +554,6 @@ remove_local_lport_ids(const struct
sbrec_port_binding *binding_rec,
>   *   - For each 'virtual' Port Binding (of type BT_VIRTUAL) provided its
parent
>   * is bound to this chassis.
>   */
> -enum local_binding_type {
> -BT_VIF,
> -BT_CONTAINER,
> -BT_VIRTUAL
> -};
> -
> -struct local_binding {
> -char *name;
> -enum local_binding_type type;
> -const struct ovsrec_interface *iface;
> -const struct sbrec_port_binding *pb;
> -
> -/* shash of 'struct local_binding' representing children. */
> -struct shash children;
> -};
>
>  static struct local_binding *
>  local_binding_create(const char *name, const struct ovsrec_interface
*iface,
> @@ -590,12 +575,6 @@ local_binding_add(struct shash *local_bindings,
struct local_binding *lbinding)
>  shash_add(local_bindings, lbinding->name, lbinding);
>  }
>
> -static struct local_binding *
> -local_binding_find(struct shash *local_bindings, const char *name)
> -{
> -return shash_find_data(local_bindings, name);
> -}
> -
>  static void
>  local_binding_destroy(struct local_binding *lbinding)
>  {
> diff --git a/controller/binding.h b/controller/binding.h
> index f7ace6faf..21118ecd4 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -18,6 +18,7 @@
>  #define OVN_BINDING_H 1
>
>  #include 
> +#include "openvswitch/shash.h"
>
>  struct hmap;
>  struct ovsdb_idl;
> @@ -32,7 +33,6 @@ struct sbrec_chassis;
>  struct sbrec_port_binding_table;
>  struct sset;
>  struct sbrec_port_binding;
> -struct shash;
>
>  struct binding_ctx_in {
>  struct ovsdb_idl_txn *ovnsb_idl_txn;
> @@ -60,6 +60,28 @@ struct binding_ctx_out {
>  struct smap *local_iface_ids;
>  };
>
> +enum local_binding_type {
> +BT_VIF,
> +BT_CONTAINER,
> +BT_VIRTUAL
> +};
> +
> +struct local_binding {
> +char *name;
> +enum local_binding_type type;
> +const struct ovsrec_interface *iface;
> +const struct sbrec_port_binding *pb;
> +
> +/* shash of 'struct local_binding' representing children. */
> +struct shash children;
> +};
> +
> +static inline struct local_binding *
> +local_binding_find(struct shash *local_bindings, const char *name)
> +{
> +return shash_find_data(local_bindings, name);
> +}
> +
>  void binding_register_ovs_idl(struct ovsdb_idl *);
>  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 89a911b36..6381e3856 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct engine_node
*node,
>
>  ovs_assert(br_int && chassis);
>
> +struct ovsrec_interface_table *iface_table =
> +(struct ovsrec_interface_table *)EN_OVSDB_GET(
> +engine_get_input("OVS_interface", node));
> +
>  struct ed_type_ct_zones *ct_zones_data =
>  engine_get_input_data("ct_zones", node);
> +
>  struct simap *ct_zones = _zones_data->current;
>
>  p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct engine_node
*node,
>  p_ctx->mc_group_table = multicast_group_table;
>  p_ctx->br_int = br_int;

Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Ben Pfaff
On Wed, Jun 03, 2020 at 01:22:52AM +, Linhaifeng wrote:
> 
> 
> -Original Message-
> From: Ben Pfaff [mailto:b...@ovn.org] 
> Sent: Wednesday, June 3, 2020 1:28 AM
> To: Linhaifeng 
> Cc: Yanqin Wei ; d...@openvswitch.org; nd ; 
> Lilijun (Jerry) ; chenchanghu 
> ; Lichunhe 
> Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> 
> On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> > We should update rcu pointer first then use ovsrcu_postpone to free 
> > otherwise maybe cause use-after-free.
> > e.g.,reader indicates momentary quiescent and access old pointer after 
> > writer postpone free old pointer and before setting new pointer.
> > 
> > Signed-off-by: Linhaifeng 
> 
> I don't see how that's possible, since the writer hasn't quiesced.
> 
> I think the logic is as follow, Could you help me find out where is incorrect?
> 
> 1.1 -> 1.2 -> 3.1 -> 3.2 -> 2.1 -> 2.2 -> 2.3 -> 2.1 -> 1.3 -> 1.4 -> 3.3 -> 
> 2.2(use after free)
> 
> wirter:
> 1.1 use postone to free old pointer
> 1.2 flush cbsets to flushed_cbsets
> 1.3 update new pointer
> 1.4 quiesced
> 
> Read:
> 2.1. read pointer
> 2.2. use pointer
> 2.3. quiesced
> 
> Rcu:
> 3.1 pop flushed_cbsets
> 3.2 ovsrcu_synchronize
> 3.3 call all cb to free

So you're saying this:

1.1 use postone to free old pointer (A)
1.2 flush cbsets to flushed_cbsets

3.1 pop flushed_cbsets
3.2 ovsrcu_synchronize

2.1. read pointer (A)
2.2. use pointer (A)
2.3. quiesced

2.1. read pointer (A)

1.3 update new pointer (B)
1.4 quiesced

3.3 call all cb to free (A)

2.2. use pointer (A)

Wow, you are absolutely right.  This had never occurred to me.  Thank
you!  I'll review your patch.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v9 2/8] ovn-controller: I-P for SB port binding and OVS interface in runtime_data.

2020-06-02 Thread Han Zhou
On Thu, May 28, 2020 at 4:05 AM  wrote:
>
> From: Numan Siddique 
>
> This patch handles SB port binding changes and OVS interface changes
> incrementally in the runtime_data stage which otherwise would have
> resulted in calls to binding_run().
>
> Prior to this patch, port binding changes were handled differently.
> The changes were only evaluated to see if they need full recompute
> or they can be ignored.
>
> With this patch, the runtime data is updated with the changes (both
> SB port binding and OVS interface) and ports are claimed/released in
> the handlers itself, avoiding the need to trigger recompute of runtime
> data stage.
>
> Acked-by: Mark Michelson 
> Signed-off-by: Numan Siddique 
> ---
>  controller/binding.c| 1002 ++-
>  controller/binding.h|9 +-
>  controller/ovn-controller.c |   61 ++-
>  tests/ovn-performance.at|   13 +
>  4 files changed, 953 insertions(+), 132 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 6a13d1a0e..74e0e0710 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -360,16 +360,6 @@ destroy_qos_map(struct hmap *qos_map)
>  hmap_destroy(qos_map);
>  }
>
> -static void
> -update_local_lport_ids(struct sset *local_lport_ids,
> -   const struct sbrec_port_binding *pb)
> -{
> -char buf[16];
> -snprintf(buf, sizeof(buf), "%"PRId64"_%"PRId64,
> - pb->datapath->tunnel_key, pb->tunnel_key);
> -sset_add(local_lport_ids, buf);
> -}
> -
>  /*
>   * Get the encap from the chassis for this port. The interface
>   * may have an external_ids:encap-ip= set; if so we
> @@ -448,10 +438,10 @@ is_network_plugged(const struct sbrec_port_binding
*binding_rec,
>  }
>
>  static void
> -consider_localnet_port(const struct sbrec_port_binding *binding_rec,
> -   struct shash *bridge_mappings,
> -   struct sset *egress_ifaces,
> -   struct hmap *local_datapaths)
> +update_ld_localnet_port(const struct sbrec_port_binding *binding_rec,
> +struct shash *bridge_mappings,
> +struct sset *egress_ifaces,
> +struct hmap *local_datapaths)
>  {
>  /* Ignore localnet ports for unplugged networks. */
>  if (!is_network_plugged(binding_rec, bridge_mappings)) {
> @@ -481,6 +471,27 @@ consider_localnet_port(const struct
sbrec_port_binding *binding_rec,
>  ld->localnet_port = binding_rec;
>  }
>
> +static void
> +update_local_lport_ids(struct sset *local_lport_ids,
> +   const struct sbrec_port_binding *pb)
> +{
> +char buf[16];
> +snprintf(buf, sizeof(buf), "%"PRId64"_%"PRId64,
> + pb->datapath->tunnel_key, pb->tunnel_key);
> +sset_add(local_lport_ids, buf);
> +}
> +
> +static void
> +remove_local_lport_ids(const struct sbrec_port_binding *binding_rec,
> +   struct sset *local_lport_ids)
> +{
> +char buf[16];
> +snprintf(buf, sizeof(buf), "%"PRId64"_%"PRId64,
> +binding_rec->datapath->tunnel_key,
> +binding_rec->tunnel_key);
> +sset_find_and_delete(local_lport_ids, buf);
> +}
> +
>  /* Local bindings. binding.c module binds the logical port (represented
by
>   * Port_Binding rows) and sets the 'chassis' column when it sees the
>   * OVS interface row (of type "" or "internal") with the
> @@ -520,6 +531,10 @@ consider_localnet_port(const struct
sbrec_port_binding *binding_rec,
>   *  BT_VIF. Its Port_Binding row's 'parent' column is
set to
>   *  its parent's Port_Binding. It shares the OVS
interface row
>   *  with the parent.
> + *  Each ovn-controller when it sees a container
Port_Binding,
> + *  it creates 'struct local_binding' for the parent
> + *  Port_Binding and for its even if the OVS interface
row for
> + *  the parent is not present.
>   *
>   *  BT_VIRTUAL: Represents a local binding which has a parent of type
BT_VIF.
>   *  Its Port_Binding type is "virtual" and it shares the OVS
> @@ -527,6 +542,17 @@ consider_localnet_port(const struct
sbrec_port_binding *binding_rec,
>   *  Port_Binding of type "virtual" is claimed by pinctrl
module
>   *  when it sees the ARP packet from the parent's VIF.
>   *
> + *
> + *  An object of 'struct local_binding' is created:
> + *- For each interface that has iface-id configured with the type -
BT_VIF.
> + *
> + *- For each container Port Binding (of type BT_CONTAINER) and its
> + *  parent Port_Binding (of type BT_VIF), no matter if
> + *  they are bound to this chassis i.e even if OVS interface row for
the
> + *  parent is not present.
> + *
> + *   - For each 'virtual' Port Binding (of type BT_VIRTUAL) provided its
parent
> + * is bound to this chassis.

Numan, thanks for more detailed 

Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Linhaifeng


> -Original Message-
> From: Ben Pfaff [mailto:b...@ovn.org]
> Sent: Wednesday, June 3, 2020 8:35 AM
> To: Yanqin Wei 
> Cc: Linhaifeng ; d...@openvswitch.org; nd
> ; Lilijun (Jerry) ; chenchanghu
> ; Lichunhe 
> Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> 
> This is not how RCU works in OVS.  Every thread is by default considered
> active.  They rarely quiesce except implicitly inside poll_block().
> Please read the large comment at the top of ovs-rcu.h.
> 
> Is your patch based on actual bugs that you have found, or is it just some 
> kind
> of precaution?  If it is the latter, then it is not needed.
> 
Is an actual bug for old version bug it's also suitable for the other codes in 
ovs.

Here is the debug info:
linux-mNuKFc:/Images/linhf/830/Euler_compile_env # gdb -p `pidof ovs-vswitchd`
GNU gdb (GDB) Red Hat Enterprise Linux 8.2-3.h2
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "aarch64-Huawei-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
.
Find the GDB manual and other documentation resources online at:
.

For help, type "help".
Type "apropos word" to search for commands related to "word".
Attaching to process 102706
[New LWP 109133]
[New LWP 109134]
[New LWP 109297]
[New LWP 109298]
[New LWP 109299]
[New LWP 109300]
[New LWP 109303]
[New LWP 109304]
[New LWP 109308]
[New LWP 109309]
[New LWP 109310]
[New LWP 109311]
[New LWP 109522]
[New LWP 109523]
[New LWP 109603]
[New LWP 109615]
[New LWP 109619]
[New LWP 109655]
[New LWP 109673]
[New LWP 109794]
[New LWP 109795]
[New LWP 113953]
[New LWP 114362]
[New LWP 114364]
[New LWP 114368]
[New LWP 114370]
[New LWP 114373]
[New LWP 114377]
[New LWP 115594]
[New LWP 115595]
[New LWP 115596]
[New LWP 115597]
[New LWP 115598]
[New LWP 115600]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
0x879981ac in poll () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install glib2-2.54.2-2.h1.aarch64 
glibc-2.28-9.h17.aarch64 keyutils-libs-1.5.8-3.aarch64 
krb5-libs-1.15.1-34.h2.aarch64 libcgroup-0.41-15.h3.aarch64 
libcom_err-1.44.3-1.h4.aarch64 libgcc-7.3.0-20190804.h18.aarch64 
libselinux-2.5-12.aarch64 numactl-libs-2.0.9-7.h1.aarch64 
openssl-libs-1.0.2k-16.h6.aarch64 pcre-8.32-17.h9.aarch64 
uvpkmc-1.0.1-807.aarch64 zlib-1.2.7-17.aarch64
(gdb) b dpcls_destroy_subtable
Breakpoint 1 at 0x508bcc: file lib/dpif-netdev.c, line 6919.
(gdb) b ovsrcu_call_postponed
Breakpoint 2 at 0x5b7d34: file lib/ovs-rcu.c, line 336.
(gdb) c
Continuing.
[Switching to Thread 0x83b97860 (LWP 109304)]

Thread 9 "urcu2" hit Breakpoint 2, ovsrcu_call_postponed () at lib/ovs-rcu.c:336
warning: Source file is more recent than executable.
336 {
(gdb) n
339 int wait_del = 0;
(gdb) 
340 while(wait_del);
(gdb) set wait_del = 1
(gdb) c
Continuing.
[Switching to Thread 0x51748860 (LWP 115598)]

Thread 34 "revalidator19" hit Breakpoint 1, dpcls_destroy_subtable 
(cls=0x1c00a420, subtable=0x3c009250) at lib/dpif-netdev.c:6919
6919int wait_get = 0;
(gdb) n
6920VLOG_DBG("Destroying subtable %p for in_port %d", subtable, 
cls->in_port);
(gdb) 
6921pvector_remove(>subtables, subtable);
(gdb) 
6922cmap_remove(>subtables_map, >cmap_node,
(gdb) set wait_get = 1
(gdb) n
6924cmap_destroy(>rules);
(gdb) p subtable->rules
$1 = {impl = {p = 0x30008940}}
(gdb) s
cmap_destroy (cmap=0x3c009258) at lib/cmap.c:288
288 if (cmap) {
(gdb) n
289 struct cmap_impl *impl = cmap_get_impl(cmap);
(gdb) 
290 if (impl != _cmap) {
(gdb) 
291 ovsrcu_postpone(free_cacheline, impl);
(gdb) s
ovsrcu_postpone__ (function=0x6029b0 , aux=0x30008940) at 
lib/ovs-rcu.c:315
315 struct ovsrcu_perthread *perthread = ovsrcu_perthread_get();
(gdb) n
318 int size = ARRAY_SIZE(cbset->cbs);
(gdb) 
319 cbset = perthread->cbset;
(gdb) 
320 if (!cbset) {
(gdb) 
325 cb = >cbs[cbset->n_cbs++];
(gdb) 
326 cb->function = function;
(gdb) 
327 cb->aux = aux;
(gdb) 
329 if (cbset->n_cbs >= size) {
(gdb) set size = cbset->n_cbs
(gdb) n
330 ovsrcu_flush_cbset(perthread);
(gdb) s
ovsrcu_flush_cbset (perthread=0x30001210) at lib/ovs-rcu.c:397
397 ovsrcu_flush_cbset__(perthread, false);
(gdb) s
ovsrcu_flush_cbset__ (perthread=0x30001210, protected=false) at 
lib/ovs-rcu.c:380
380 struct ovsrcu_cbset *cbset = perthread->cbset;
(gdb) n
382 if (cbset) {
(gdb) 
383 

Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Linhaifeng



-Original Message-
From: Yanqin Wei [mailto:yanqin@arm.com] 
Sent: Wednesday, June 3, 2020 7:23 AM
To: Ben Pfaff ; Linhaifeng 
Cc: d...@openvswitch.org; nd ; Lilijun (Jerry) 
; chenchanghu ; Lichunhe 

Subject: RE: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

Hi Ben,

If my understanding is correct, the writer could not be a rcu thread because it 
does not need report holding or not holding pointers.
So old memory will be freed after all rcu thread report quiesce.

The write also is rcu thread. If not first update pointer the reader can also 
get the old pointer after call ovsrcu_quiesced.

Best Regards,
Wei Yanqin

> -Original Message-
> From: Ben Pfaff 
> Sent: Wednesday, June 3, 2020 1:28 AM
> To: Linhaifeng 
> Cc: Yanqin Wei ; d...@openvswitch.org; nd 
> ; Lilijun (Jerry) ; chenchanghu 
> ; Lichunhe 
> Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> 
> On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> > We should update rcu pointer first then use ovsrcu_postpone to free 
> > otherwise maybe cause use-after-free.
> > e.g.,reader indicates momentary quiescent and access old pointer 
> > after writer postpone free old pointer and before setting new pointer.
> >
> > Signed-off-by: Linhaifeng 
> 
> I don't see how that's possible, since the writer hasn't quiesced.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Linhaifeng



-Original Message-
From: Ben Pfaff [mailto:b...@ovn.org] 
Sent: Wednesday, June 3, 2020 1:28 AM
To: Linhaifeng 
Cc: Yanqin Wei ; d...@openvswitch.org; nd ; 
Lilijun (Jerry) ; chenchanghu 
; Lichunhe 
Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> We should update rcu pointer first then use ovsrcu_postpone to free 
> otherwise maybe cause use-after-free.
> e.g.,reader indicates momentary quiescent and access old pointer after 
> writer postpone free old pointer and before setting new pointer.
> 
> Signed-off-by: Linhaifeng 

I don't see how that's possible, since the writer hasn't quiesced.

I think the logic is as follow, Could you help me find out where is incorrect?

1.1 -> 1.2 -> 3.1 -> 3.2 -> 2.1 -> 2.2 -> 2.3 -> 2.1 -> 1.3 -> 1.4 -> 3.3 -> 
2.2(use after free)

wirter:
1.1 use postone to free old pointer
1.2 flush cbsets to flushed_cbsets
1.3 update new pointer
1.4 quiesced

Read:
2.1. read pointer
2.2. use pointer
2.3. quiesced

Rcu:
3.1 pop flushed_cbsets
3.2 ovsrcu_synchronize
3.3 call all cb to free



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] test: add more tests to IP-buffering unit-test

2020-06-02 Thread Ankur Sharma
Hi Lorenzo,

Please find my comments inline.

Regards,
Ankur


From: dev  on behalf of Ankur Sharma 

Sent: Monday, June 1, 2020 9:05 PM
To: Lorenzo Bianconi ; ovs-dev@openvswitch.org 

Subject: Re: [ovs-dev] [PATCH ovn] test: add more tests to IP-buffering 
unit-test

Hi Lorenzo,

Thanks a lot for the patch.
I will get back on this by EOD PST tomorrow.

Regards,
Ankur

From: Lorenzo Bianconi 
Sent: Friday, May 29, 2020 7:01 AM
To: ovs-dev@openvswitch.org 
Cc: Ankur Sharma ; num...@ovn.org ; 
dce...@redhat.com 
Subject: [PATCH ovn] test: add more tests to IP-buffering unit-test

Check the hv is sending the ICMP request using the FIP as src IP in
a DVR scenario. Update previous tests introducing source NAT for
distributed traffic cases

Signed-off-by: Lorenzo Bianconi 
---
 tests/ovn.at | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tests/ovn.at b/tests/ovn.at
index 15b40ca1e..57f632c4e 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -14873,7 +14873,7 @@ ovn_start
 # Physical network:
 # Tw0 hypervisors hv[12].
 # hv1 hosts vif sw0-p0.
-# hv1 hosts vif sw1-p0.
+# hv2 hosts vif sw1-p0 and sw0-p1

 send_icmp_packet() {
 local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv4_src=$5 ipv4_dst=$6 
ip_chksum=$7 data=$8
@@ -14973,6 +14973,9 @@ ovn-nbctl lsp-add sw0 sw0-p1 \
 ovn-nbctl lsp-add sw1 sw1-p0 \
 -- lsp-set-addresses sw1-p0 unknown

+ovn-nbctl lr-nat-add lr0 snat 172.16.1.1 192.168.1.0/24
+ovn-nbctl lr-nat-add lr0 snat 2002::1 2001::/64
+
 OVN_POPULATE_ARP
 ovn-nbctl --wait=hv sync

@@ -14997,15 +15000,15 @@ dst_ip6=20020010

 data=0800bee4391a0001

-send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip  $data
+send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip 0d1c $data
 send_arp_reply 2 1 $dst_mac $router_mac1 $dst_ip $router_ip
 echo $(get_arp_req $router_mac1 $router_ip $dst_ip) > expected
-echo 
"${dst_mac}${router_mac1}0800451c4000fe010100${src_ip}${dst_ip}${data}" 
>> expected
+echo 
"${dst_mac}${router_mac1}0800451c4000fe0122b5${router_ip}${dst_ip}${data}"
 >> expected

 OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])

 nd_ip=ff020001ff10
-ip6_hdr=60083afe${src_ip6}${dst_ip6}
+ip6_hdr=60083afe${router_ip6}${dst_ip6}

 send_icmp6_packet 1 1 $src_mac $router_mac0 $src_ip6 $dst_ip6
 echo $(get_nd $router_mac1 $nd_src_ip6 $nd_ip $dst_ip6) >> expected
@@ -15016,15 +15019,19 @@ OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])

 # Create FIP on sw0-p0, add a route on logical router pipeline and
 # ARP request for a unkwon destination is sent using FIP MAC/IP
+ovn-nbctl lr-nat-del lr0 snat
 ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.1.2 192.168.1.3 sw0-p1 
f0:00:00:01:02:04
 ovn-nbctl lr-route-add lr0 172.16.2.0/24 172.16.1.11

 dst_ip=$(ip_to_hex 172 16 2 10)
 fip_ip=$(ip_to_hex 172 16 1 2)
 src_ip=$(ip_to_hex 192 168 1 3)
-gw_router=$(ip_to_hex 172 16 1 11)
-send_icmp_packet 2 2 f0110203 $router_mac0 $src_ip $dst_ip  $data
-echo $(get_arp_req f0010204 $fip_ip $gw_router) >> expected
+gw_router_ip=$(ip_to_hex 172 16 1 11)
+gw_router_mac=f0010a0a
+send_icmp_packet 2 2 f0110203 $router_mac0 $src_ip $dst_ip 0c1b $data
[ANKUR]: Confused by above line, ofport 2 on hv2 is sw1-p0, whereas i believe 
we wanted port to use sw0-p1, i.e ofport3
+echo $(get_arp_req f0010204 $fip_ip $gw_router_ip) >> expected
+send_arp_reply 2 1 $gw_router_mac f0010204 $gw_router_ip $fip_ip
+echo 
"${gw_router_mac}f00102040800451c4000fe0121b4${fip_ip}${dst_ip}${data}"
 >> expected

 OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])

[ANKUR]: May be i am missing something, but how are we validating buffering 
here. We should be validating the pcap on some external endpoint right,
For example, generate a icmp packet from internal port to an external endpoint, 
simulate an ARP reply and then look at tx pcap on destination.


--
2.26.2

___
dev mailing list
d...@openvswitch.org
https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwICAg=s883GpUCOChKOHiocYtGcg=mZwX9gFQgeJHzTg-68aCJgsODyUEVsHGFOfL90J6MJY=WlGoK4pEkybzFr5FvyLYn_x3hZuLSfQcHJVgmPbU548=UJvQpPo9180SVm6LdvHiFtsdKo6eGyatkCN8NxS4z4I=
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Ben Pfaff
Oh, I apologize that I made a mistake about the author.

I appreciate feedback from anyone.

On Wed, Jun 03, 2020 at 12:37:27AM +, Yanqin Wei wrote:
> Hi Ben,
> 
> This patch is from Linhai, but I have the same concern about this.  I will 
> read ovs-rcu comments and feedback.
> Thanks for your time.
> 
> Best Regards,
> Wei Yanqin
> 
> > -Original Message-
> > From: Ben Pfaff 
> > Sent: Wednesday, June 3, 2020 8:35 AM
> > To: Yanqin Wei 
> > Cc: Linhaifeng ; d...@openvswitch.org; nd
> > ; Lilijun (Jerry) ; chenchanghu
> > ; Lichunhe 
> > Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> > 
> > This is not how RCU works in OVS.  Every thread is by default considered 
> > active.
> > They rarely quiesce except implicitly inside poll_block().
> > Please read the large comment at the top of ovs-rcu.h.
> > 
> > Is your patch based on actual bugs that you have found, or is it just some 
> > kind
> > of precaution?  If it is the latter, then it is not needed.
> > 
> > On Tue, Jun 02, 2020 at 11:22:57PM +, Yanqin Wei wrote:
> > > Hi Ben,
> > >
> > > If my understanding is correct, the writer could not be a rcu thread 
> > > because it
> > does not need report holding or not holding pointers.
> > > So old memory will be freed after all rcu thread report quiesce.
> > >
> > > Best Regards,
> > > Wei Yanqin
> > >
> > > > -Original Message-
> > > > From: Ben Pfaff 
> > > > Sent: Wednesday, June 3, 2020 1:28 AM
> > > > To: Linhaifeng 
> > > > Cc: Yanqin Wei ; d...@openvswitch.org; nd
> > > > ; Lilijun (Jerry) ;
> > > > chenchanghu ; Lichunhe
> > 
> > > > Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> > > >
> > > > On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> > > > > We should update rcu pointer first then use ovsrcu_postpone to
> > > > > free otherwise maybe cause use-after-free.
> > > > > e.g.,reader indicates momentary quiescent and access old pointer
> > > > > after writer postpone free old pointer and before setting new pointer.
> > > > >
> > > > > Signed-off-by: Linhaifeng 
> > > >
> > > > I don't see how that's possible, since the writer hasn't quiesced.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Yanqin Wei
Hi Ben,

This patch is from Linhai, but I have the same concern about this.  I will read 
ovs-rcu comments and feedback.
Thanks for your time.

Best Regards,
Wei Yanqin

> -Original Message-
> From: Ben Pfaff 
> Sent: Wednesday, June 3, 2020 8:35 AM
> To: Yanqin Wei 
> Cc: Linhaifeng ; d...@openvswitch.org; nd
> ; Lilijun (Jerry) ; chenchanghu
> ; Lichunhe 
> Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> 
> This is not how RCU works in OVS.  Every thread is by default considered 
> active.
> They rarely quiesce except implicitly inside poll_block().
> Please read the large comment at the top of ovs-rcu.h.
> 
> Is your patch based on actual bugs that you have found, or is it just some 
> kind
> of precaution?  If it is the latter, then it is not needed.
> 
> On Tue, Jun 02, 2020 at 11:22:57PM +, Yanqin Wei wrote:
> > Hi Ben,
> >
> > If my understanding is correct, the writer could not be a rcu thread 
> > because it
> does not need report holding or not holding pointers.
> > So old memory will be freed after all rcu thread report quiesce.
> >
> > Best Regards,
> > Wei Yanqin
> >
> > > -Original Message-
> > > From: Ben Pfaff 
> > > Sent: Wednesday, June 3, 2020 1:28 AM
> > > To: Linhaifeng 
> > > Cc: Yanqin Wei ; d...@openvswitch.org; nd
> > > ; Lilijun (Jerry) ;
> > > chenchanghu ; Lichunhe
> 
> > > Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> > >
> > > On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> > > > We should update rcu pointer first then use ovsrcu_postpone to
> > > > free otherwise maybe cause use-after-free.
> > > > e.g.,reader indicates momentary quiescent and access old pointer
> > > > after writer postpone free old pointer and before setting new pointer.
> > > >
> > > > Signed-off-by: Linhaifeng 
> > >
> > > I don't see how that's possible, since the writer hasn't quiesced.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Ben Pfaff
This is not how RCU works in OVS.  Every thread is by default considered
active.  They rarely quiesce except implicitly inside poll_block().
Please read the large comment at the top of ovs-rcu.h.

Is your patch based on actual bugs that you have found, or is it just
some kind of precaution?  If it is the latter, then it is not needed.

On Tue, Jun 02, 2020 at 11:22:57PM +, Yanqin Wei wrote:
> Hi Ben,
> 
> If my understanding is correct, the writer could not be a rcu thread because 
> it does not need report holding or not holding pointers.
> So old memory will be freed after all rcu thread report quiesce.
> 
> Best Regards,
> Wei Yanqin
> 
> > -Original Message-
> > From: Ben Pfaff 
> > Sent: Wednesday, June 3, 2020 1:28 AM
> > To: Linhaifeng 
> > Cc: Yanqin Wei ; d...@openvswitch.org; nd
> > ; Lilijun (Jerry) ; chenchanghu
> > ; Lichunhe 
> > Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> > 
> > On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> > > We should update rcu pointer first then use ovsrcu_postpone to free
> > > otherwise maybe cause use-after-free.
> > > e.g.,reader indicates momentary quiescent and access old pointer after
> > > writer postpone free old pointer and before setting new pointer.
> > >
> > > Signed-off-by: Linhaifeng 
> > 
> > I don't see how that's possible, since the writer hasn't quiesced.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Taller de Contabilidad Electrónica 2020

2020-06-02 Thread Que se debe y que no se debe enviar al SAT
Buenos día 
Quise aprovechar la oportunidad de hacerte una invitación para tomar nuestro 
curso de 6 horas :
 
Nombre: Taller de Contabilidad Electrónica 2020
Fecha: Martes 23 de Junio
Horario: 10:00 am a 1:00 pm y 2:00 pm a 05:00 pm
Formato: En línea con interacción en vivo.
Lugar: En Vivo desde su computadora
Instructor: Socorro Ávila (Contadora Pública con Maestría en
Derecho y con amplia experiencia en temas fiscales)

Taller de Contabilidad Electrónica, para el estudio y análisis práctico de los 
registros contables de acuerdo
a las NIF, importancia de los CFDI, catálogo de cuentas, balanza de 
comprobación y las generalidades para depurar
 la contabilidad y reducción del tiempo del envío de información al SAT, además 
del análisis de la información que 
se debe y la que no se debe enviar al SAT.

Ejes Temáticos:

- Código Fiscal de la Federación.
- Reglamento del Código Fiscal de Federación.
- Ley del Impuesto Sobre la Renta.
- Resolución Miscelánea Fiscal.

Solicita información respondiendo a este correo con la palabra Contabilidad, 
junto con los siguientes datos:

Nombre:
Correo electrónico:
Número telefónico:
Email Alterno:

Números de Atención: 55 15 54 66 30 - 55 30 16 70 85  

Qué tengas un gran día.
Saludos.



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Yanqin Wei
Hi Ben,

If my understanding is correct, the writer could not be a rcu thread because it 
does not need report holding or not holding pointers.
So old memory will be freed after all rcu thread report quiesce.

Best Regards,
Wei Yanqin

> -Original Message-
> From: Ben Pfaff 
> Sent: Wednesday, June 3, 2020 1:28 AM
> To: Linhaifeng 
> Cc: Yanqin Wei ; d...@openvswitch.org; nd
> ; Lilijun (Jerry) ; chenchanghu
> ; Lichunhe 
> Subject: Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first
> 
> On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> > We should update rcu pointer first then use ovsrcu_postpone to free
> > otherwise maybe cause use-after-free.
> > e.g.,reader indicates momentary quiescent and access old pointer after
> > writer postpone free old pointer and before setting new pointer.
> >
> > Signed-off-by: Linhaifeng 
> 
> I don't see how that's possible, since the writer hasn't quiesced.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP (iperf3)

2020-06-02 Thread Vinay Gupta via dev
Hi Flavio,

Thanks for your reply.
I have captured the suggested information but do not see anything that
could cause the packet drops.
Can you please take a look at the below data and see if you can find
something unusual ?
The PMDs are running on CPU 1,2,3,4 and CPU 1-7 are isolated cores.

---
root@bcm958802a8046c:~# cstats ; sleep 10; cycles
pmd thread numa_id 0 core_id 1:
  idle cycles: 99140849 (7.93%)
  processing cycles: 1151423715 (92.07%)
  avg cycles per packet: 116.94 (1250564564/10693918)
  avg processing cycles per packet: 107.67 (1151423715/10693918)
pmd thread numa_id 0 core_id 2:
  idle cycles: 118373662 (9.47%)
  processing cycles: 1132193442 (90.53%)
  avg cycles per packet: 124.39 (1250567104/10053309)
  avg processing cycles per packet: 112.62 (1132193442/10053309)
pmd thread numa_id 0 core_id 3:
  idle cycles: 53805933 (4.30%)
  processing cycles: 1196762002 (95.70%)
  avg cycles per packet: 107.35 (1250567935/11649948)
  avg processing cycles per packet: 102.73 (1196762002/11649948)
pmd thread numa_id 0 core_id 4:
  idle cycles: 189102938 (15.12%)
  processing cycles: 1061463293 (84.88%)
  avg cycles per packet: 143.47 (1250566231/8716828)
  avg processing cycles per packet: 121.77 (1061463293/8716828)
pmd thread numa_id 0 core_id 5:
pmd thread numa_id 0 core_id 6:
pmd thread numa_id 0 core_id 7:


*Runtime summary*  comm  parent   sched-in
run-timemin-run avg-run max-run  stddev  migrations
  (count)   (msec) (msec)
   (msec)  (msec)   %
-
ksoftirqd/0[7]   2  10.079  0.079
0.079   0.0790.00   0
  rcu_sched[8]   2 140.067  0.002
0.004   0.0099.96   0
   rcuos/4[38]   2  60.027  0.002
0.004   0.008   20.97   0
   rcuos/5[45]   2  40.018  0.004
0.004   0.0056.63   0
   kworker/0:1[71]   2 120.156  0.008
0.013   0.0196.72   0
 mmcqd/0[1230]   2  30.054  0.001
0.018   0.031   47.29   0
kworker/0:1H[1248]   2  10.006  0.006
0.006   0.0060.00   0
   kworker/u16:2[1547]   2 160.045  0.001
0.002   0.012   26.19   0
ntpd[5282]   1  10.063  0.063
0.063   0.0630.00   0
watchdog[6988]   1  20.089  0.012
0.044   0.076   72.26   0
ovs-vswitchd[9239]   1  20.326  0.152
0.163   0.1736.45   0
   revalidator8[9309/9239]9239  21.260  0.607
0.630   0.6523.58   0
   perf[27150]   27140  10.000  0.000
0.000   0.0000.00   0

Terminated tasks:
  sleep[27151]   27150  41.002  0.015
0.250   0.677   58.22   0

Idle stats:
CPU  0 idle for999.814  msec  ( 99.84%)



*CPU  1 idle entire time windowCPU  2 idle entire time windowCPU  3
idle entire time windowCPU  4 idle entire time window*
CPU  5 idle for500.326  msec  ( 49.96%)
CPU  6 idle entire time window
CPU  7 idle entire time window

Total number of unique tasks: 14
Total number of context switches: 115
   Total run time (msec):  3.198
Total scheduling time (msec): 1001.425  (x 8)
(END)



*02:16:22  UID  TGID   TID%usr %system  %guest   %wait
 %CPU   CPU  Command *02:16:230  9239 -  100.000.00
   0.000.00  100.00 5  ovs-vswitchd
02:16:230 -  92392.000.000.000.00
 2.00 5  |__ovs-vswitchd
02:16:230 -  92400.000.000.000.00
 0.00 0  |__vfio-sync
02:16:230 -  92410.000.000.000.00
 0.00 5  |__eal-intr-thread
02:16:230 -  92420.000.000.000.00
 0.00 5  |__dpdk_watchdog1
02:16:230 -  92440.000.000.000.00
 0.00 5  |__urcu2
02:16:230 -  92790.000.000.000.00
 0.00 5  |__ct_clean3
02:16:230 -  93080.000.000.000.00
 0.00 5  |__handler9
02:16:230 -  93090.000.000.000.00
 0.00 5  |__revalidator8
02:16:230 -  93280.00   

Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP (iperf3)

2020-06-02 Thread Flavio Leitner
On Mon, Jun 01, 2020 at 07:27:09PM -0400, Shahaji Bhosle via dev wrote:
> Hi Ben/Ilya,
> Hope you guys are doing well and staying safe. I have been chasing a weird
> problem with small drops and I think that is causing lots of TCP
> retransmission.
> 
> Setup details
> iPerf3(1k-5K
> Servers)<--DPDK2:OvS+DPDK(VxLAN:BOND)[DPDK0+DPDK1)<2x25G<
> [DPDK0+DPDK1)(VxLAN:BOND)OVS+DPDKDPDK2<---iPerf3(Clients)
> 
> All the Drops are ring drops on BONDed functions on the server side.  I
> have 4 CPUs each with 3PMD threads, DPDK0, DPDK1 and DPDK2 all running with
> 4 Rx rings each.
> 
> What is interesting is when I give each Rx rings its own CPU the drops go
> away. Or if I set cother_config:emc-insert-inv-prob=1 the drops go away.
> But I need to scale up the number of flows so trying to run this with EMC
> disabled.
> 
> I can tell that the rings are not getting serviced for 30-40usec because of
> some kind context switch or interrupts on these cores. I have tried to do
> the usual isolation, nohz_full rcu_nocbs etc. Move all the interrupts away
> from these cores etc. But nothing helps. I mean it improves, but the drops
> still happen.

When you disable the EMC (or reduce its efficiency) the per packet cost
increases, then it becomes more sensitive to variations. If you share
a CPU with multiple queues, you decrease the amount of time available
to process the queue. In either case, there will be less room to tolerate
variations.

Well, you might want to use 'perf' and monitor for the scheduling events
and then based on the stack trace see what is causing it and try to
prevent it.

For example:
# perf record -e sched:sched_switch -a -g sleep 1

For instance, you might see that another NIC used for management has
IRQs assigned to one isolated CPU. You can move it to another CPU to
reduce the noise, etc...

Another suggestion is look at PMD thread idle statistics because it
will tell you how much "extra" room you have left. As it approaches
to 0, more fine tuned your setup needs to be to avoid drops.

HTH,
-- 
fbl
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4] Bareudp Tunnel Support

2020-06-02 Thread Gregory Rose



On 5/25/2020 8:31 PM, Martin Varghese wrote:

From: Martin Varghese 

There are various L3 encapsulation standards using UDP being discussed to
leverage the UDP based load balancing capability of different networks.
MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them.

The Bareudp tunnel provides a generic L3 encapsulation tunnelling support
for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP
tunnel.

An example to create bareudp device to tunnel MPLS traffic is
given

$ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \
  type=bareudp options:remote_ip=2.1.1.3
  options:local_ip=2.1.1.2 \
  options:payload_type=0x8847 options:dst_port=6635 \
  options:packet_type="legacy_l3" \
  ofport_request=$bareudp_egress_port

The bareudp device supports special handling for MPLS & IP as
they can have multiple ethertypes. MPLS procotcol can have ethertypes
ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have
ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6).

The bareudp device to tunnel L3 traffic with multiple ethertypes
(MPLS & IP) can be created by passing the L3 protocol name as string in
the field payload_type. An example to create bareudp device to tunnel
MPLS unicast & multicast traffic is given below.::

$ ovs-vsctl add-port  br_mpls udp_port -- set interface
 udp_port \
 type=bareudp options:remote_ip=2.1.1.3
 options:local_ip=2.1.1.2 \
 options:payload_type=mpls options:dst_port=6635 \
 options:packet_type="legacy_l3"

Signed-off-by: Martin Varghese 


Hi Martin,

I built and installed a 5.7 kernel with bareudp configured and
the system traffic test worked fine.  I did not see any regressions
in any other system traffic tests.

The documentation and code looks pretty good but I do have a few
comments below.

Thanks!

- Greg


---
  Documentation/automake.mk |  1 +
  Documentation/faq/bareudp.rst | 62 +++
  Documentation/faq/index.rst   |  1 +
  Documentation/faq/releases.rst|  1 +
  NEWS  |  3 ++
  datapath/linux/compat/include/linux/openvswitch.h | 10 
  lib/dpif-netlink-rtnl.c   | 53 +++
  lib/dpif-netlink.c|  7 ++-
  lib/netdev-vport.c| 24 -
  lib/netdev.h  |  1 +
  ofproto/ofproto-dpif-xlate.c  |  1 +
  tests/system-layer3-tunnels.at| 47 +
  12 files changed, 209 insertions(+), 2 deletions(-)
  create mode 100644 Documentation/faq/bareudp.rst

diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index f85c432..ea3475f 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -88,6 +88,7 @@ DOC_SOURCE = \
Documentation/faq/terminology.rst \
Documentation/faq/vlan.rst \
Documentation/faq/vxlan.rst \
+   Documentation/faq/bareudp.rst \
Documentation/internals/index.rst \
Documentation/internals/authors.rst \
Documentation/internals/bugs.rst \
diff --git a/Documentation/faq/bareudp.rst b/Documentation/faq/bareudp.rst
new file mode 100644
index 000..257f20e
--- /dev/null
+++ b/Documentation/faq/bareudp.rst
@@ -0,0 +1,62 @@
+..
+  Licensed under the Apache License, Version 2.0 (the "License"); you may
+  not use this file except in compliance with the License. You may obtain
+  a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  License for the specific language governing permissions and limitations
+  under the License.
+
+  Convention for heading levels in Open vSwitch documentation:
+
+  ===  Heading 0 (reserved for the title in a document)
+  ---  Heading 1
+  ~~~  Heading 2
+  +++  Heading 3
+  '''  Heading 4
+
+  Avoid deeper levels because they do not render well.
+
+===
+Bareudp
+===
+
+Q: What is Bareudp?
+
+A: There are various L3 encapsulation standards using UDP being discussed
+   to leverage the UDP based load balancing capability of different
+   networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among
+   them.
+
+   The Bareudp tunnel provides a generic L3 encapsulation tunnelling
+   support for tunnelling different L3 protocols like MPLS, IP, NSH etc.
+   inside a UDP tunnel.
+
+   An example to create bareudp device to tunnel MPLS traffic is given
+   below.::
+
+   $ 

Re: [ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Ben Pfaff
On Tue, Jun 02, 2020 at 07:27:59AM +, Linhaifeng wrote:
> We should update rcu pointer first then use ovsrcu_postpone to free
> otherwise maybe cause use-after-free.
> e.g.,reader indicates momentary quiescent and access old pointer after
> writer postpone free old pointer and before setting new pointer.
> 
> Signed-off-by: Linhaifeng 

I don't see how that's possible, since the writer hasn't quiesced.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] netdev-offload-dpdk: Support offload of VLAN PUSH/POP actions

2020-06-02 Thread Sriharsha Basavapatna via dev
If there are no other comments, can this be applied to master ?

Thanks,
-Harsha

On Mon, Jun 1, 2020 at 7:54 PM Eli Britstein  wrote:
>
> Acked-by: Eli Britstein 
>
> On 5/29/2020 9:33 AM, Sriharsha Basavapatna wrote:
> > Parse VLAN PUSH/POP OVS datapath actions and add respective RTE actions.
> >
> > Signed-off-by: Sriharsha Basavapatna 
> > ---
> > v1->v2:
> > * Updated dump_flow_action() to print VLAN Push/Pop actions
> > * Updated NEWS, Documentation/howto/dpdk.rst files
> > ---
> >
> >   Documentation/howto/dpdk.rst |  1 +
> >   NEWS |  1 +
> >   lib/netdev-offload-dpdk.c| 64 
> >   3 files changed, 66 insertions(+)
> >
> > diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
> > index be950d7ce..c40fcafcb 100644
> > --- a/Documentation/howto/dpdk.rst
> > +++ b/Documentation/howto/dpdk.rst
> > @@ -395,6 +395,7 @@ Supported actions for hardware offload are:
> >   - Modification of Ethernet (mod_dl_src/mod_dl_dst).
> >   - Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl).
> >   - Modification of TCP/UDP (mod_tp_src/mod_tp_dst).
> > +- VLAN Push/Pop (push_vlan/pop_vlan).
> >
> >   Further Reading
> >   ---
> > diff --git a/NEWS b/NEWS
> > index 3dbd8ec0e..c1311e366 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -9,6 +9,7 @@ Post-v2.13.0
> >  - DPDK:
> >* Deprecated DPDK pdump packet capture support removed.
> >* Deprecated DPDK ring ports (dpdkr) are no longer supported.
> > + * Add hardware offload support for VLAN Push/Pop actions 
> > (experimental).
> >  - Linux datapath:
> >* Support for kernel versions up to 5.5.x.
> >  - AF_XDP:
> > diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> > index f8c46bbaa..c57586a48 100644
> > --- a/lib/netdev-offload-dpdk.c
> > +++ b/lib/netdev-offload-dpdk.c
> > @@ -420,6 +420,36 @@ dump_flow_action(struct ds *s, const struct 
> > rte_flow_action *actions)
> >   } else {
> >   ds_put_format(s, "  Set-%s-tcp/udp-port = null\n", dirstr);
> >   }
> > +} else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) {
> > +const struct rte_flow_action_of_push_vlan *rte_push_vlan =
> > +actions->conf;
> > +ds_put_cstr(s, "rte flow push-vlan action:\n");
> > +if (rte_push_vlan) {
> > +ds_put_format(s, "  Push-vlan: 0x%"PRIx16"\n",
> > +  ntohs(rte_push_vlan->ethertype));
> > +} else {
> > +ds_put_format(s, "  Push-vlan = null\n");
> > +}
> > +} else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
> > +struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp = 
> > actions->conf;
> > +ds_put_cstr(s, "rte flow set-vlan-pcp action:\n");
> > +if (rte_vlan_pcp) {
> > +ds_put_format(s, "  Set-vlan-pcp: %"PRIu8"\n",
> > +  rte_vlan_pcp->vlan_pcp);
> > +} else {
> > +ds_put_format(s, "  Set-vlan-pcp = null\n");
> > +}
> > +} else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
> > +struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid = 
> > actions->conf;
> > +ds_put_cstr(s, "rte flow set-vlan-vid action:\n");
> > +if (rte_vlan_vid) {
> > +ds_put_format(s, "  Set-vlan-vid: %"PRIu16"\n",
> > +  ntohs(rte_vlan_vid->vlan_vid));
> > +} else {
> > +ds_put_format(s, "  Set-vlan-vid = null\n");
> > +}
> > +} else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) {
> > +ds_put_cstr(s, "rte flow pop-vlan action\n");
> >   } else {
> >   ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
> >   }
> > @@ -970,6 +1000,33 @@ parse_set_actions(struct flow_actions *actions,
> >   return 0;
> >   }
> >
> > +static int
> > +parse_vlan_push_action(struct flow_actions *actions,
> > +   const struct ovs_action_push_vlan *vlan_push)
> > +{
> > +struct rte_flow_action_of_push_vlan *rte_push_vlan;
> > +struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp;
> > +struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid;
> > +
> > +rte_push_vlan = xzalloc(sizeof *rte_push_vlan);
> > +rte_push_vlan->ethertype = vlan_push->vlan_tpid;
> > +add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN,
> > +rte_push_vlan);
> > +
> > +rte_vlan_pcp = xzalloc(sizeof *rte_vlan_pcp);
> > +rte_vlan_pcp->vlan_pcp = vlan_tci_to_pcp(vlan_push->vlan_tci);
> > +add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
> > +rte_vlan_pcp);
> > +
> > +rte_vlan_vid = xzalloc(sizeof *rte_vlan_vid);
> > +rte_vlan_vid->vlan_vid =
> > +rte_cpu_to_be_16(vlan_tci_to_vid(vlan_push->vlan_tci));
> > +add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
> > +   

Re: [ovs-dev] [PATCH v4] Bareudp Tunnel Support

2020-06-02 Thread Gregory Rose



On 6/1/2020 7:37 PM, Martin Varghese wrote:

Hi Greg,

I apologise for the confusion.
All the bareudp patches got integrated into the net-next tree during
5.6 -rcX time frame.

I was not careful enough while announcing the linux kernel version
bareudp is supported.

I checked the linux 5.7  kconfig and i could see bareudp support present there.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/Kconfig?h=v5.7

config BAREUDP
tristate "Bare UDP Encapsulation"
depends on INET
depends on IPV6 || !IPV6
select NET_UDP_TUNNEL
select GRO_CELLS
help
   This adds a bare UDP tunnel module for tunnelling different
   kinds of traffic like MPLS, IP, etc. inside a UDP tunnel.

   To compile this driver as a module, choose M here: the module
   will be called bareudp.

I could not find the default .config of 5.7 to see if the bareudp
support is enabled by default.

I apologise again for wasting your time

Thanks
Martin


Hi Martin,

First, let's not drop the list.  I've added it back to my response.

Then, don't worry about it.  These sorts of things are why we do
reviews.  You should see some of the screw ups I've done! Actually,
if you've kept up with this list you would have seen some of them.

;^)

So you're right, BAREUDP config is not present in 5.6 but I did
find it in the stable tree on branch linux-5.7.y.  I'll enable the
config, get a kernel built and retry.

Thanks,

- Greg





On 6/2/20, Gregory Rose  wrote:

On 5/25/2020 8:31 PM, Martin Varghese wrote:

From: Martin Varghese 

There are various L3 encapsulation standards using UDP being discussed to
leverage the UDP based load balancing capability of different networks.
MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them.

The Bareudp tunnel provides a generic L3 encapsulation tunnelling support
for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a
UDP
tunnel.

An example to create bareudp device to tunnel MPLS traffic is
given

$ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \
   type=bareudp options:remote_ip=2.1.1.3
   options:local_ip=2.1.1.2 \
   options:payload_type=0x8847 options:dst_port=6635 \
   options:packet_type="legacy_l3" \
   ofport_request=$bareudp_egress_port

The bareudp device supports special handling for MPLS & IP as
they can have multiple ethertypes. MPLS procotcol can have ethertypes
ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have
ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6).

The bareudp device to tunnel L3 traffic with multiple ethertypes
(MPLS & IP) can be created by passing the L3 protocol name as string in
the field payload_type. An example to create bareudp device to tunnel
MPLS unicast & multicast traffic is given below.::

$ ovs-vsctl add-port  br_mpls udp_port -- set interface
  udp_port \
  type=bareudp options:remote_ip=2.1.1.3
  options:local_ip=2.1.1.2 \
  options:payload_type=mpls options:dst_port=6635 \
  options:packet_type="legacy_l3"

Signed-off-by: Martin Varghese 


Hi Martin,

I've built OVS with your patch and it applies cleanly, builds without
errors and seems OK so far as that goes.

However, when running the system traffic test I get an error:
134: layer3 - ping over MPLS Bareudp FAILED
(system-layer3-tunnels.at:163)

If I look at the ovs-vswitchd.log I see the following errors:

2020-06-01T21:16:35.669Z|00035|dpif|WARN|system@ovs-system: failed to
add at_bareudp0 as port: Address family not supported by protocol
2020-06-01T21:16:35.669Z|00036|bridge|WARN|could not add network device
at_bareudp0 to ofproto (Address family not supported by protocol)
2020-06-01T21:16:35.691Z|00037|dpif|WARN|system@ovs-system: failed to
add at_bareudp0 as port: Address family not supported by protocol
2020-06-01T21:16:35.691Z|00038|bridge|WARN|could not add network device
at_bareudp0 to ofproto (Address family not supported by protocol)
2020-06-01T21:16:35.693Z|00039|ofproto|WARN|br1: cannot get STP status
on nonexistent port 1
2020-06-01T21:16:35.693Z|00040|ofproto|WARN|br1: cannot get RSTP status
on nonexistent port 1
2020-06-01T21:16:35.697Z|00041|bridge|INFO|bridge br1: deleted interface
ovs-p1 on port 1
2020-06-01T21:16:35.701Z|00042|dpif|WARN|system@ovs-system: failed to
add at_bareudp0 as port: Address family not supported by protocol
2020-06-01T21:16:35.701Z|00043|bridge|WARN|could not add network device
at_bareudp0 to ofproto (Address family not supported by protocol)
2020-06-01T21:16:35.704Z|00044|bridge|WARN|could not open network device
ovs-p1 (No such device)
2020-06-01T21:16:35.722Z|00045|bridge|WARN|could not add network device
at_bareudp0 to ofproto (Address family not supported by protocol)
2020-06-01T21:16:35.726Z|00046|bridge|WARN|could not open network device
ovs-p1 (No such device)

Re: [ovs-dev] Re: Re: Re: [PATCH v4 0/3] Add support for TSO with DPDK

2020-06-02 Thread 贺鹏
yes,check dpdk gso lib……

在 2020年3月10日星期二,txfh2007 via dev  写道:

> Hi Flavio and all:
>
>  Is there a way to support software TSO for DPDK tunnel network ? I
> have tried userspace TSO function, and running on tunnel network, I have
> got the following error:
>  "Tunneling packets with HW offload flags is not supported: packet
> dropped"
>  So is there a way to work around if we would support both vlan and
> tunnel network on the same compute node ?
>
> Thanks
> Timo
>
>
>
> --
>
>
>
>
> On Fri, Feb 28, 2020 at 9:56 AM Flavio Leitner  wrote:
> >
> >
> > Hi Yi Yang,
> >
> > This is the bug fix required to make veth TSO work in OvS:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=9d2f67e43b73e8af7438be219b66a5de0cfa8bd9
> >
> > commit 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9
> > Author: Jianfeng Tan 
> > Date:   Sat Sep 29 15:41:27 2018 +
> >
> > net/packet: fix packet drop as of virtio gso
> >
> > When we use raw socket as the vhost backend, a packet from virito
> with
> > gso offloading information, cannot be sent out in later validaton at
> > xmit path, as we did not set correct skb->protocol which is further
> used
> > for looking up the gso function.
> >
> > To fix this, we set this field according to virito hdr information.
> >
> > Fixes: e858fae2b0b8f4 ("virtio_net: use common code for
> virtio_net_hdr and skb GSO conversion")
> > Signed-off-by: Jianfeng Tan 
> > Signed-off-by: David S. Miller 
> >
> >
> > So, the minimum kernel version is 4.19.
> >
> Thanks,
> I sent a patch to update the documentation. Please take a look.
> William
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


-- 
hepeng
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel

2020-06-02 Thread Tonghao Zhang
On Tue, Jun 2, 2020 at 5:34 PM Simon Horman  wrote:
>
> On Tue, Jun 02, 2020 at 04:58:32PM +0800, Tonghao Zhang wrote:
> > On Tue, Jun 2, 2020 at 4:33 PM Simon Horman  
> > wrote:
> > >
> > > On Tue, Jun 02, 2020 at 04:17:21PM +0800, Tonghao Zhang wrote:
> > > > On Fri, May 29, 2020 at 5:06 PM Simon Horman 
> > > >  wrote:
> > > > >
> > > > > On Mon, May 18, 2020 at 09:44:42AM +0800, xiangxia.m@gmail.com 
> > > > > wrote:
> > > > > > From: Tonghao Zhang 
> > > > > >
> > > > > > This patch allows users to offload the TC flower rules with tunnel
> > > > > > mask. In some case, mask is useful as wildcards.
> > > > > >
> > > > > > For example:
> > > > > > $ ovs-appctl dpctl/add-flow \
> > > > > > "tunnel(dst=3.3.3.100,src=3.3.3.200/255.255.255.0,tp_dst=4789),\
> > > > > > recirc_id(0),in_port(3),eth(),eth_type(0x0800),ipv4()" "2"
> > > > >
> > > > > Hi,
> > > > >
> > > > > Sorry for the delay in responding.
> > > > >
> > > > > I think it would be useful to spell out more clearly in the changelog
> > > > > what this patch does. From my reading of the code it:
> > > > >
> > > > > Allows masked match of the following, where previously supported
> > > > > an exact match was supported:
> > > > > * Remote (dst) tunnel endpoint address
> > > > > * Local (src) tunnel endpoint address
> > > > > * Remote (dst) tunnel endpoint UDP port
> > > > >
> > > > > And also allows masked match of the following, where previously no
> > > > > match was supported;
> > > > > * Local (std) tunnel endpoint UDP port
> > > > Ok, Thanks, I will update it in NEWS.
> > >
> > > Thanks. Please also include this information in the changelog.
> > >
> > > > > I think it would also be useful to describe a use-case where this
> > > > > is used. The command line example (above) is a good start.
> > > > Yes, I will update the commit log and describe a use-case for it.
> > > > >
> > > > > Also, not strictly related to this patch.
> > > > > I think patch series that have more than one patch should
> > > > > have a cover letter, in this case [PATCH 0/3], describing
> > > > > the overall aim of the patchset.
> > > > >
> > > > >
> > > > > The other patches in this series seem fine to me.
> > > > > Please consider addressing the issues I have raised here
> > > > > and posting a v2, with all three patches and a cover letter.
> > > > >
> > > > > ...
> > > > >
> > > > > > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > > > > > index 875ebef71941..39cf25f63ce0 100644
> > > > > > --- a/lib/netdev-offload-tc.c
> > > > > > +++ b/lib/netdev-offload-tc.c
> > > > > > @@ -633,13 +633,22 @@ parse_tc_flower_to_match(struct tc_flower 
> > > > > > *flower,
> > > > > >  match_set_tun_id(match, flower->key.tunnel.id);
> > > > > >  match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
> > > > > >  }
> > > > > > -if (flower->key.tunnel.ipv4.ipv4_dst) {
> > > > > > -match_set_tun_src(match, 
> > > > > > flower->key.tunnel.ipv4.ipv4_src);
> > > > > > -match_set_tun_dst(match, 
> > > > > > flower->key.tunnel.ipv4.ipv4_dst);
> > > > > > -} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
> > > > > > -   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
> > > > > > -match_set_tun_ipv6_src(match, 
> > > > > > >key.tunnel.ipv6.ipv6_src);
> > > > > > -match_set_tun_ipv6_dst(match, 
> > > > > > >key.tunnel.ipv6.ipv6_dst);
> > > > > > +if (flower->mask.tunnel.ipv4.ipv4_dst ||
> > > > > > +flower->mask.tunnel.ipv4.ipv4_src) {
> > > > >
> > > > > The change to the if condition seems separate from the change
> > > > > described in the changelog. What is the use-case for this?
> > > > I think that may be used for matching the tunnel src packets only
> > > > which will be dropped.
> > > > because user may don't want that packets sent to the host.
> > >
> > > I think it would be best to break out this (and the corresponding IPv6
> > > change) into a separate patch with a changelog that describes why
> > > this is being done and, if appropriate, an update to NEWS.
> > Hi Simon
> > Did you mean that the two patches as below as a series.
> > [PATCH 1/3] dpif-netlink: Generate ufids for installing TC flowers
> > [PATCH 3/3] netdev-offload-tc: Use ipv6_addr_is_set instead of is_all_zeros
> >
> > and this patch as a separate patch(with changelog)
> > [PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel
>
> Sorry for not being clear.
>
> I meant expand the series to four patch.
> Where the new patch just includes the following changes:
>
> -if (flower->key.tunnel.ipv4.ipv4_dst) {
> +if (flower->mask.tunnel.ipv4.ipv4_dst ||
> +flower->mask.tunnel.ipv4.ipv4_src) {
>
> ...
>
> -} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
> -   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
> +} else if (ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_dst) ||
> +   

[ovs-dev] [PATCH v2 3/4] netdev-offload-tc: Allow to match the IP and port mask of tunnel

2020-06-02 Thread xiangxia . m . yue
From: Tonghao Zhang 

This patch allows users to offload the TC flower rules with
tunnel mask. This patch allows masked match of the following,
where previously supported an exact match was supported:
* Remote (dst) tunnel endpoint address
* Local (src) tunnel endpoint address
* Remote (dst) tunnel endpoint UDP port

And also allows masked match of the following, where previously
no match was supported:
* Local (src) tunnel endpoint UDP port

In some case, mask is useful as wildcards. For example, DDOS,
in that case, we don’t want to allow specified hosts IPs or
only source Ports to access the targeted host. For example:

$ ovs-appctl dpctl/add-flow 
"tunnel(dst=2.2.2.100,src=2.2.2.0/255.255.255.0,tp_dst=4789),\
  recirc_id(0),in_port(3),eth(),eth_type(0x0800),ipv4()" ""

$ tc filter show dev vxlan_sys_4789 ingress
  ...
  eth_type ipv4
  enc_dst_ip 2.2.2.100
  enc_src_ip 2.2.2.0/24
  enc_dst_port 4789
  enc_ttl 64
  in_hw in_hw_count 2
action order 1: gact action drop
...

Cc: Simon Horman 
Cc: Paul Blakey 
Cc: Roi Dayan 
Cc: Ben Pfaff 
Cc: William Tu 
Cc: Ilya Maximets 
Signed-off-by: Tonghao Zhang 
Acked-by: Roi Dayan 
---
 NEWS|  5 
 include/openvswitch/match.h |  3 ++
 lib/match.c | 13 +
 lib/netdev-offload-tc.c | 38 +++--
 lib/tc.c| 57 +
 tests/tunnel.at | 22 ++
 6 files changed, 123 insertions(+), 15 deletions(-)

diff --git a/NEWS b/NEWS
index 70bd17584594..ed6b4486430e 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,11 @@ Post-v2.13.0
  * Deprecated DPDK ring ports (dpdkr) are no longer supported.
- Linux datapath:
  * Support for kernel versions up to 5.5.x.
+   - Tunnels: TC Flower offload
+ * Tunnel Local endpoint address masked match are supported.
+ * Tunnel Romte endpoint address masked match are supported.
+ * Tunnel Local endpoint ports masked match are supported.
+ * Tunnel Romte endpoint ports masked match are supported.
 
 
 v2.13.0 - 14 Feb 2020
diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h
index 8af3b74ed3e0..3b196c7fa462 100644
--- a/include/openvswitch/match.h
+++ b/include/openvswitch/match.h
@@ -105,6 +105,9 @@ void match_set_tun_flags(struct match *match, uint16_t 
flags);
 void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t 
mask);
 void match_set_tun_tp_dst(struct match *match, ovs_be16 tp_dst);
 void match_set_tun_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 
mask);
+void match_set_tun_tp_src(struct match *match, ovs_be16 tp_src);
+void match_set_tun_tp_src_masked(struct match *match,
+ ovs_be16 port, ovs_be16 mask);
 void match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, 
ovs_be16 mask);
 void match_set_tun_gbp_id(struct match *match, ovs_be16 gbp_id);
 void match_set_tun_gbp_flags_masked(struct match *match, uint8_t flags, 
uint8_t mask);
diff --git a/lib/match.c b/lib/match.c
index 25c277cc670b..29b25a73bab4 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -293,6 +293,19 @@ match_set_tun_tp_dst_masked(struct match *match, ovs_be16 
port, ovs_be16 mask)
 match->flow.tunnel.tp_dst = port & mask;
 }
 
+void
+match_set_tun_tp_src(struct match *match, ovs_be16 tp_src)
+{
+match_set_tun_tp_src_masked(match, tp_src, OVS_BE16_MAX);
+}
+
+void
+match_set_tun_tp_src_masked(struct match *match, ovs_be16 port, ovs_be16 mask)
+{
+match->wc.masks.tunnel.tp_src = mask;
+match->flow.tunnel.tp_src = port & mask;
+}
+
 void
 match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, ovs_be16 
mask)
 {
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 9b7c74aae887..8ba22312ec00 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -633,13 +633,20 @@ parse_tc_flower_to_match(struct tc_flower *flower,
 match_set_tun_id(match, flower->key.tunnel.id);
 match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
 }
-if (flower->key.tunnel.ipv4.ipv4_dst) {
-match_set_tun_src(match, flower->key.tunnel.ipv4.ipv4_src);
-match_set_tun_dst(match, flower->key.tunnel.ipv4.ipv4_dst);
-} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
-   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
-match_set_tun_ipv6_src(match, >key.tunnel.ipv6.ipv6_src);
-match_set_tun_ipv6_dst(match, >key.tunnel.ipv6.ipv6_dst);
+if (flower->mask.tunnel.ipv4.ipv4_dst) {
+match_set_tun_dst_masked(match,
+ flower->key.tunnel.ipv4.ipv4_dst,
+ flower->mask.tunnel.ipv4.ipv4_dst);
+match_set_tun_src_masked(match,
+ flower->key.tunnel.ipv4.ipv4_src,
+ flower->mask.tunnel.ipv4.ipv4_src);
+} else if 

[ovs-dev] [PATCH v2 0/4] netdev-offload-tc: Allow to match the IP and port mask of tunnel

2020-06-02 Thread xiangxia . m . yue
From: Tonghao Zhang 

This series patch allows user to match tunnel src/dst
address and port with masked values. User can use that
to match specified IP address or port and then drop packets
(DDOS network attack from a gateway or others) or other actions.

patch [1]: allow user install TC Flowers with dpctl
patch [2]: make the codes more readable.
patch [3]: allow user to masked match
patch [4]: allow user only match tunnel src address

Tonghao Zhang (4):
  dpif-netlink: Generate ufids for installing TC flowers
  netdev-offload-tc: Use ipv6_addr_is_set instead of is_all_zeros
  netdev-offload-tc: Allow to match the IP and port mask of tunnel
  netdev-offload-tc: Expand tunnel source IPs masked match

 NEWS|  5 +++
 include/openvswitch/match.h |  3 ++
 lib/dpif-netlink.c  | 45 ++
 lib/match.c | 13 
 lib/netdev-offload-tc.c | 49 -
 lib/odp-util.c  |  3 +-
 lib/packets.h   |  6 
 lib/tc.c| 63 +++--
 tests/tunnel.at | 22 +
 9 files changed, 184 insertions(+), 25 deletions(-)

--
V2:
* Split: 
http://patchwork.ozlabs.org/project/openvswitch/patch/20200518014443.1529-2-xiangxia.m@gmail.com/
  to two patches, that easily to review the patch.
* Fix checkpatch warning
* update NEWS
* add more info in commit message
--
2.26.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 2/4] netdev-offload-tc: Use ipv6_addr_is_set instead of is_all_zeros

2020-06-02 Thread xiangxia . m . yue
From: Tonghao Zhang 

Not bugfix, make the codes more readable.

Cc: Simon Horman 
Cc: Paul Blakey 
Cc: Roi Dayan 
Cc: Ben Pfaff 
Cc: William Tu 
Cc: Ilya Maximets 
Signed-off-by: Tonghao Zhang 
Acked-by: Roi Dayan 
---
 lib/netdev-offload-tc.c | 6 ++
 lib/tc.c| 6 ++
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 875ebef71941..9b7c74aae887 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -734,13 +734,11 @@ parse_tc_flower_to_match(struct tc_flower *flower,
 nl_msg_put_be32(buf, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
 action->encap.ipv4.ipv4_dst);
 }
-if (!is_all_zeros(>encap.ipv6.ipv6_src,
-  sizeof action->encap.ipv6.ipv6_src)) {
+if (ipv6_addr_is_set(>encap.ipv6.ipv6_src)) {
 nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
 >encap.ipv6.ipv6_src);
 }
-if (!is_all_zeros(>encap.ipv6.ipv6_dst,
-  sizeof action->encap.ipv6.ipv6_dst)) {
+if (ipv6_addr_is_set(>encap.ipv6.ipv6_dst)) {
 nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
 >encap.ipv6.ipv6_dst);
 }
diff --git a/lib/tc.c b/lib/tc.c
index 12af0192b614..a6297445ca33 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -2038,7 +2038,7 @@ nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, 
bool id_present,
 if (ipv4_dst) {
 nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src);
 nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst);
-} else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) {
+} else if (ipv6_addr_is_set(ipv6_dst)) {
 nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_DST,
 ipv6_dst);
 nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_SRC,
@@ -2135,12 +2135,10 @@ nl_msg_put_act_ct(struct ofpbuf *request, struct 
tc_action *action)
 action->ct.range.ipv4.max);
 }
 } else if (action->ct.range.ip_family == AF_INET6) {
-size_t ipv6_sz = sizeof(action->ct.range.ipv6.max);
 
 nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MIN,
 >ct.range.ipv6.min);
-if (!is_all_zeros(>ct.range.ipv6.max,
-  ipv6_sz)) {
+if (ipv6_addr_is_set(>ct.range.ipv6.max)) {
 nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MAX,
 >ct.range.ipv6.max);
 }
-- 
2.26.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 4/4] netdev-offload-tc: Expand tunnel source IPs masked match

2020-06-02 Thread xiangxia . m . yue
From: Tonghao Zhang 

To support more use case, for example, DDOS, which
packets should be dropped in hardware, this patch
allows users to match only the tunnel source IPs with
masked value.

$ ovs-appctl dpctl/add-flow 
"tunnel(src=2.2.2.0/255.255.255.0,tp_dst=4789,ttl=64),\
  recirc_id(2),in_port(3),eth(),eth_type(0x0800),ipv4()" ""

$ ovs-appctl dpctl/dump-flows
  tunnel(src=2.2.2.0/255.255.255.0,ttl=64,tp_dst=4789) ... actions:drop
$ tc filter show dev vxlan_sys_4789 ingress
  ...
  eth_type ipv4
  enc_src_ip 2.2.2.0/24
  enc_dst_port 4789
  enc_ttl 64
  in_hw in_hw_count 2
action order 1: gact action drop
...

Cc: Simon Horman 
Cc: Paul Blakey 
Cc: Roi Dayan 
Cc: Ben Pfaff 
Cc: William Tu 
Cc: Ilya Maximets 
Signed-off-by: Tonghao Zhang 
Acked-by: Roi Dayan 
---
 lib/netdev-offload-tc.c | 9 ++---
 lib/odp-util.c  | 3 ++-
 lib/packets.h   | 6 ++
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 8ba22312ec00..f7f9c231e3cf 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -633,14 +633,16 @@ parse_tc_flower_to_match(struct tc_flower *flower,
 match_set_tun_id(match, flower->key.tunnel.id);
 match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
 }
-if (flower->mask.tunnel.ipv4.ipv4_dst) {
+if (flower->mask.tunnel.ipv4.ipv4_dst ||
+flower->mask.tunnel.ipv4.ipv4_src) {
 match_set_tun_dst_masked(match,
  flower->key.tunnel.ipv4.ipv4_dst,
  flower->mask.tunnel.ipv4.ipv4_dst);
 match_set_tun_src_masked(match,
  flower->key.tunnel.ipv4.ipv4_src,
  flower->mask.tunnel.ipv4.ipv4_src);
-} else if (ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_dst)) {
+} else if (ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_dst) ||
+   ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_src)) {
 match_set_tun_ipv6_dst_masked(match,
   >key.tunnel.ipv6.ipv6_dst,
   >mask.tunnel.ipv6.ipv6_dst);
@@ -1400,7 +1402,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
*match,
 chain = key->recirc_id;
 mask->recirc_id = 0;
 
-if (flow_tnl_dst_is_set(>tunnel)) {
+if (flow_tnl_dst_is_set(>tunnel) ||
+flow_tnl_src_is_set(>tunnel)) {
 VLOG_DBG_RL(,
 "tunnel: id %#" PRIx64 " src " IP_FMT
 " dst " IP_FMT " tp_src %d tp_dst %d",
diff --git a/lib/odp-util.c b/lib/odp-util.c
index b66d266cca1d..72601dc6ba2b 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -6125,7 +6125,8 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms 
*parms,
 
 nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority);
 
-if (flow_tnl_dst_is_set(>tunnel) || export_mask) {
+if (flow_tnl_dst_is_set(>tunnel) ||
+flow_tnl_src_is_set(>tunnel) || export_mask) {
 tun_key_to_attr(buf, >tunnel, >flow->tunnel,
 parms->key_buf, NULL);
 }
diff --git a/lib/packets.h b/lib/packets.h
index 447e6f6fafa5..395bc869eb00 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -52,6 +52,12 @@ flow_tnl_dst_is_set(const struct flow_tnl *tnl)
 return tnl->ip_dst || ipv6_addr_is_set(>ipv6_dst);
 }
 
+static inline bool
+flow_tnl_src_is_set(const struct flow_tnl *tnl)
+{
+return tnl->ip_src || ipv6_addr_is_set(>ipv6_src);
+}
+
 struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl);
 struct in6_addr flow_tnl_src(const struct flow_tnl *tnl);
 
-- 
2.26.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/4] dpif-netlink: Generate ufids for installing TC flowers

2020-06-02 Thread xiangxia . m . yue
From: Tonghao Zhang 

To support installing the TC flowers to HW, via "ovs-appctl dpctl/add-flow"
command, there should be an ufid. This patch will check whether ufid exists,
if not, generate an ufid. Should to know that when processing upcall packets,
ufid is generated in parse_odp_packet for kernel datapath.

Configuring the max-idle/max-revalidator, may help testing this patch.

Cc: Simon Horman 
Cc: Paul Blakey 
Cc: Roi Dayan 
Cc: Ben Pfaff 
Cc: William Tu 
Cc: Ilya Maximets 
Signed-off-by: Tonghao Zhang 
Acked-by: Roi Dayan 
---
 lib/dpif-netlink.c | 45 +
 1 file changed, 45 insertions(+)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index dc642100fc58..a19ed7e53566 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2231,12 +2231,55 @@ dpif_netlink_operate_chunks(struct dpif_netlink *dpif, 
struct dpif_op **ops,
 }
 }
 
+static void
+dpif_netlink_try_update_ufid__(struct dpif_op *op, ovs_u128 *ufid)
+{
+switch (op->type) {
+case DPIF_OP_FLOW_PUT:
+if (!op->flow_put.ufid) {
+odp_flow_key_hash(op->flow_put.key, op->flow_put.key_len,
+  ufid);
+op->flow_put.ufid = ufid;
+}
+break;
+case DPIF_OP_FLOW_DEL:
+if (!op->flow_del.ufid) {
+odp_flow_key_hash(op->flow_del.key, op->flow_del.key_len,
+  ufid);
+op->flow_del.ufid = ufid;
+}
+break;
+case DPIF_OP_FLOW_GET:
+if (!op->flow_get.ufid) {
+odp_flow_key_hash(op->flow_get.key, op->flow_get.key_len,
+  ufid);
+op->flow_get.ufid = ufid;
+}
+break;
+case DPIF_OP_EXECUTE:
+default:
+break;
+}
+}
+
+static void
+dpif_netlink_try_update_ufid(struct dpif_op **ops, ovs_u128 *ufid,
+ size_t n_ops)
+{
+int i;
+
+for (i = 0; i < n_ops; i++) {
+dpif_netlink_try_update_ufid__(ops[i], [i]);
+}
+}
+
 static void
 dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
  enum dpif_offload_type offload_type)
 {
 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
 struct dpif_op *new_ops[OPERATE_MAX_OPS];
+ovs_u128 ufids[OPERATE_MAX_OPS];
 int count = 0;
 int i = 0;
 int err = 0;
@@ -2246,6 +2289,8 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op 
**ops, size_t n_ops,
 return;
 }
 
+dpif_netlink_try_update_ufid(ops, ufids, n_ops);
+
 if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) {
 while (n_ops > 0) {
 count = 0;
-- 
2.26.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v7] AB bonding: Add "primary" interface concept

2020-06-02 Thread Jeff Squyres (jsquyres) via dev
Greetings.

Is there an estimate on when this patch can be merged?

Thanks!


> On May 22, 2020, at 5:12 PM, Jeff Squyres  wrote:
> 
> In AB bonding, if the current active slave becomes disabled, a
> replacement slave is arbitrarily picked from the remaining set of
> enabled slaves.  This commit adds the concept of a "primary" slave: an
> interface that will always be (or become) the current active slave if
> it is enabled.
> 
> The rationale for this functionality is to allow the designation of a
> preferred interface for a given bond.  For example:
> 
> 1. Bond is created with interfaces p1 (primary) and p2, both enabled.
> 2. p1 becomes the current active slave (because it was designated as
>   the primary).
> 3. Later, p1 fails/becomes disabled.
> 4. p2 is chosen to become the current active slave.
> 5. Later, p1 becomes re-enabled.
> 6. p1 is chosen to become the current active slave (because it was
>   designated as the primary)
> 
> Note that p1 becomes the active slave once it becomes re-enabled, even
> if nothing has happened to p2.
> 
> This "primary" concept exists in Linux kernel network interface
> bonding, but did not previously exist in OVS bonding.
> 
> Only one primary slave inteface is supported per bond, and is only
> supported for active/backup bonding.
> 
> The primary slave interface is designated via
> "other_config:bond-primary" when creating a bond.
> 
> Signed-off-by: Jeff Squyres 
> Reviewed-by: Aaron Conole 
> Tested-by: Greg Rose 
> Acked-by: Greg Rose 
> ---
> v7:
> - After rebasing patch to head of tree, adjust test output as result
>  of changes from 81f71381f.
> 
> v6:
> - Style: bleep bloop.  Fix use of {}.
> 
> v5:
> - Handle when bond is reconfigured to remove "bond-primary" config.
> - Fix potential flapping between remaining slaves.
> - Add a test to re-add a removed primary interface and make sure the
>  bond reflects that properly.
> - Add a test to remove "bond-primary" config and make sure the bond
>  reflects that properly.
> 
> v4:
> - Style: bleep bloop.  Trim line length below 79 characters.
> 
> v3:
> - Adjusted print logic for when the primary slave is not attached
> 
> v2:
> - Added "ovs-vsctl bond/show" label when primary interface is no
>  longer enslaved
> - Fixed strcmp() usage nits
> - Moved "other_config:bond-primary" docs to the "Bonding
>  Configuration" group
> - Expanded commit message
> - Added more test cases (including one for when the primary interface
>  is no longer enslaved)
> 
> ofproto/bond.c|  75 +++-
> ofproto/bond.h|   1 +
> tests/lacp.at |   1 +
> tests/ofproto-dpif.at | 199 +-
> vswitchd/bridge.c |   5 ++
> vswitchd/vswitch.xml  |   8 ++
> 6 files changed, 287 insertions(+), 2 deletions(-)
> 
> diff --git a/ofproto/bond.c b/ofproto/bond.c
> index 405202fb6..4ad1a20ae 100644
> --- a/ofproto/bond.c
> +++ b/ofproto/bond.c
> @@ -93,6 +93,7 @@ struct bond_slave {
> /* Link status. */
> bool enabled;   /* May be chosen for flows? */
> bool may_enable;/* Client considers this slave bondable. */
> +bool is_primary;/* This slave is preferred over others. */
> long long delay_expires;/* Time after which 'enabled' may change. */
> 
> /* Rebalancing info.  Used only by bond_rebalance(). */
> @@ -126,6 +127,7 @@ struct bond {
> enum lacp_status lacp_status; /* Status of LACP negotiations. */
> bool bond_revalidate;   /* True if flows need revalidation. */
> uint32_t basis; /* Basis for flow hash function. */
> +char *primary;  /* Name of the primary slave interface. */
> 
> /* SLB specific bonding info. */
> struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */
> @@ -241,6 +243,7 @@ bond_create(const struct bond_settings *s, struct 
> ofproto_dpif *ofproto)
> 
> bond->active_slave_mac = eth_addr_zero;
> bond->active_slave_changed = false;
> +bond->primary = NULL;
> 
> bond_reconfigure(bond, s);
> return bond;
> @@ -290,6 +293,7 @@ bond_unref(struct bond *bond)
> update_recirc_rules__(bond);
> 
> hmap_destroy(>pr_rule_ops);
> +free(bond->primary);
> free(bond->name);
> free(bond);
> }
> @@ -459,6 +463,39 @@ bond_reconfigure(struct bond *bond, const struct 
> bond_settings *s)
> bond->bond_revalidate = false;
> }
> 
> +/*
> + * If a primary interface is set on the new settings:
> + * 1. If the bond has no primary previously set, save it and
> + * revalidate.
> + * 2. If the bond has a different primary previously set, save the
> + * new one and revalidate.
> + * 3. If the bond has the same primary previously set, do nothing.
> + */
> +if (s->primary) {
> +bool changed = false;
> +if (bond->primary) {
> +if (strcmp(bond->primary, s->primary)) {
> +free(bond->primary);
> +changed = true;
> +   

Re: [ovs-dev] [PATCH 0/2] Implement terse dump for netdev-offload

2020-06-02 Thread Simon Horman
On Tue, Jun 02, 2020 at 01:21:16PM +0200, Simon Horman wrote:
> On Mon, Jun 01, 2020 at 03:54:54PM +0300, Roi Dayan wrote:
> > In order to improve revalidator performance, extend netdev-offload with 
> > terse
> > dump support. In terse dump mode modify code that parses netlink to flower 
> > and
> > flower to match to only provide the essential data for conversion instead of
> > parsing all filter and actions data.
> > 
> > Implement support for new TC TCA_DUMP_FLAGS_TERSE flag. With the flag set TC
> > kernel implementation skips output of all other data besides stats, cookie 
> > and
> > flags which allows to pack much more filters in single netlink packet and
> > reduces amount of syscalls required to execute filter dump.
> > 
> > The impact of the change is measured by benchmarking revalidator poll 
> > interval
> > time with 100k of simple L2 flows with two revalidator threads on setup 
> > with 2x
> > Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz, 32GB memory. For such 
> > configuration
> > poll interval is reduced from ~1020ms to ~220ms. 
> > 
> > Pointer to the kernel patches for the new TC flag:
> > https://patchwork.ozlabs.org/project/netdev/cover/20200515114014.3135-1-vladbu%40mellanox.com/
> > 
> > Vlad Buslov (2):
> >   netdev-offload: Implement terse dump support
> >   tc: Support new terse dump kernel API
> 
> Thanks Roi, Thanks Vlad,
> 
> these changes look good to me. I have put them into Travis-CI, as per my
> usual process, to see if it flags anything.
> 
> https://travis-ci.org/github/horms2/ovs/builds/693802765
> https://travis-ci.org/github/horms2/ovs/builds/693800395

Hi,

unfortunately Travis-CI has flaged a number of failures.
I am wondering if they are explained by this one:

https://travis-ci.org/github/horms2/ovs/jobs/693802778#L2874

...

ofproto/ofproto-dpif-upcall.c:2585:55: error: reading variable 'stats' requires
  holding any mutex [-Werror,-Wthread-safety-analysis]
  udpif->dpif->current_ms : ukey->stats.used;
  ^

ofproto/ofproto-dpif-upcall.c:2588:18: error: reading variable 'created'
  requires holding any mutex [-Werror,-Wthread-safety-analysis]
return ukey->created;
 ^

...

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] netdev-offload: Implement terse dump support

2020-06-02 Thread Aaron Conole
Roi Dayan  writes:

> On 2020-06-02 1:35 AM, 0-day Robot wrote:
>> Bleep bloop.  Greetings Roi Dayan, I am a robot and I have tried out your 
>> patch.
>> Thanks for your contribution.
>> 
>> I encountered some error that I wasn't expecting.  See the details below.
>> 
>> 
>> checkpatch:
>> WARNING: Line is 80 characters long (recommended limit is 79)
>> #252 FILE: lib/netdev-offload-tc.c:1824:
>> parse_tc_flower_to_match(, match, actions, stats, attrs, buf, 
>> false);
>> 
>> Lines checked: 368, Warnings: 1, Errors: 0
>> 
>> 
>> Please check this out.  If you feel there has been an error, please email 
>> acon...@redhat.com
>> 
>> Thanks,
>> 0-day Robot
>> 
>
> it is pretty short line so we didn't want to do redundant line breaks.
> is it important or can we skip this?

It's not that important, I think.

> should we maybe update ovs checkpatch to 100 chars to match
> linux kernel? i guess the current 79 is also started because
> the kernel was 79?

I don't have a strong opinion on it, but maybe someone else does.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 0/2] Implement terse dump for netdev-offload

2020-06-02 Thread Simon Horman
On Mon, Jun 01, 2020 at 03:54:54PM +0300, Roi Dayan wrote:
> In order to improve revalidator performance, extend netdev-offload with terse
> dump support. In terse dump mode modify code that parses netlink to flower and
> flower to match to only provide the essential data for conversion instead of
> parsing all filter and actions data.
> 
> Implement support for new TC TCA_DUMP_FLAGS_TERSE flag. With the flag set TC
> kernel implementation skips output of all other data besides stats, cookie and
> flags which allows to pack much more filters in single netlink packet and
> reduces amount of syscalls required to execute filter dump.
> 
> The impact of the change is measured by benchmarking revalidator poll interval
> time with 100k of simple L2 flows with two revalidator threads on setup with 
> 2x
> Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz, 32GB memory. For such configuration
> poll interval is reduced from ~1020ms to ~220ms. 
> 
> Pointer to the kernel patches for the new TC flag:
> https://patchwork.ozlabs.org/project/netdev/cover/20200515114014.3135-1-vladbu%40mellanox.com/
> 
> Vlad Buslov (2):
>   netdev-offload: Implement terse dump support
>   tc: Support new terse dump kernel API

Thanks Roi, Thanks Vlad,

these changes look good to me. I have put them into Travis-CI, as per my
usual process, to see if it flags anything.

https://travis-ci.org/github/horms2/ovs/builds/693802765
https://travis-ci.org/github/horms2/ovs/builds/693800395
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovs-ctl: Don't set hostname as external-id

2020-06-02 Thread Daniel Alvarez Sanchez
Thanks a lot!
I proposed the new approach at:

https://patchwork.ozlabs.org/project/openvswitch/patch/20200525152821.19838-1-dalva...@redhat.com/


On Sat, May 23, 2020 at 8:38 PM Han Zhou  wrote:
>
>
>
> On Sat, May 23, 2020 at 12:06 AM Daniel Alvarez  wrote:
> >
> >
> > Thanks a lot Terry!
> >
> >
> > > On 22 May 2020, at 23:28, Terry Wilson  wrote:
> > >
> > > 
> > >
> > >
> > >> On Wed, May 20, 2020 at 10:52 AM Daniel Alvarez  
> > >> wrote:
> > >> ovs-ctl started to add the hostname as external-id [0] at some point.
> > >>
> > >> However, this can be problematic as if it's already set by an external
> > >> entity it will get overwritten. In RHEL systems, systemd will invoke
> > >> ovs-ctl to start OVS and that will overwrite it to the hostname of the
> > >> machine.
> > > If the problem is just ovs-ctl *overwriting* an existing entity then can 
> > > we just change
> > >
> > >> -ovs_vsctl set Open_vSwitch . external-ids:hostname="$hn"
> > >
> > > to ovs_vsctl add Open_vSwitch . external_ids:hostname="$hn"
> > >
> > > since add doesn't overwrite existing values if the key is set[1].
> >
> > This sounds great to me!
> > Han, it looks like Terry’s suggestion would work for the HV onboarding 
> > scenario that you mentioned and also fit in the model where an external 
> > entity decides what name to give to a particular HV. What do you think?
>
> Yes, SGTM, too :)
>
> > >
> > > [1] From man ovs-vsctl
> > >   [--if-exists] add table record column [key=]value...
> > >   Adds the specified value or key-value pair to column in 
> > > record in table.  If column is a map, then key is required, otherwise it 
> > > is  prohib‐
> > >   ited.  If key already exists in a map column, then the 
> > > current value is not replaced (use the set command to replace an existing 
> > > value).
> > >
> > >   Without --if-exists, it is an error if record does not 
> > > exist.  With --if-exists, this command does nothing if record does not 
> > > exist.
> > >
> > > Terry
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 6/7] event: documentation notes on event library

2020-06-02 Thread Gowrishankar Muthukrishnan
Documentation notes on event library and its usage is included
in this patch.

Signed-off-by: Gowrishankar Muthukrishnan 
---
 Documentation/automake.mk|   1 +
 Documentation/topics/index.rst   |   1 +
 Documentation/topics/user-defined-events.rst | 306 +++
 3 files changed, 308 insertions(+)
 create mode 100644 Documentation/topics/user-defined-events.rst

diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index f85c432..6dafa58 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -55,6 +55,7 @@ DOC_SOURCE = \
Documentation/topics/ovsdb-replication.rst \
Documentation/topics/porting.rst \
Documentation/topics/tracing.rst \
+   Documentation/topics/user-defined-events.rst \
Documentation/topics/userspace-tso.rst \
Documentation/topics/windows.rst \
Documentation/howto/index.rst \
diff --git a/Documentation/topics/index.rst b/Documentation/topics/index.rst
index 08af3a2..9f53aed 100644
--- a/Documentation/topics/index.rst
+++ b/Documentation/topics/index.rst
@@ -53,3 +53,4 @@ OVS
userspace-tso
idl-compound-indexes
ovs-extensions
+   user-defined-events
diff --git a/Documentation/topics/user-defined-events.rst 
b/Documentation/topics/user-defined-events.rst
new file mode 100644
index 000..0c283c2
--- /dev/null
+++ b/Documentation/topics/user-defined-events.rst
@@ -0,0 +1,306 @@
+..
+  Copyright 2020, Red Hat, Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License"); you may
+  not use this file except in compliance with the License. You may obtain
+  a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  License for the specific language governing permissions and limitations
+  under the License.
+
+  Convention for heading levels in Open vSwitch documentation:
+
+  ===  Heading 0 (reserved for the title in a document)
+  ---  Heading 1
+  ~~~  Heading 2
+  +++  Heading 3
+  '''  Heading 4
+
+  Avoid deeper levels because they do not render well.
+
+===
+User defined events and notification
+
+
+**Note:** This feature is considered experimental.
+
+
+Events are objects or messages that are used to notify other software
+components for a state change. In openvswitch, multiple resources that
+are involved in processing packets, undergo various state changes.
+Many of these resources are monitored internally by Openvswitch in
+various statistical counters for an instance like coverage counters,
+perf metric counters etc. When there is poor performance in network I/O
+through openvswitch or even to check if any of the resources is not
+healthy, sometimes the communication exchange between system operator
+and developer is lengthy in terms of:
+
+  - having to rely on CLI but the information sometimes is too much
+to connect hotspot dots.
+  - Debugging tools like gdb is more developer friendly and can not
+be used when live traffic is performance oriented.
+  - Perf like tracing tool can help finding top consumers on system
+resources but it is from a system point of view.
+
+Hence, troubleshooting negotiations kick off between support and engineering
+teams by means of various patches and collated logs and processes can
+sometimes become tedious. Finally, we lose opportunity to quickly confirm
+the root cause for an issue.
+
+Event library in Openvswitch is to rescue system operator from this
+data pollution and provide an abstract view on existing resources
+useful for troubleshooting. Here are the advantages in using event
+library enable CLI for troubleshooting.
+
+  - Less intrusiveness in enabling event API in runtime across
+available resources.
+  - Operator could engage monitoring application for receiving
+event notifications asynchronously.
+  - It abstracts the way the processing resources could be
+diagnosed using readable definition file and it is the input
+to populate various events.
+  - Developer can apply its API in other Openvswitch components to
+create useful events for their troubleshooting. For an example,
+timer API can be used to measure time taken by one or more
+problematic functions.
+
+Event library is an independent library within Openvswitch which provides
+an abstract view of occurrence of an event in the data and control path
+to the system operator, in a very less intrusive and asynchronous way.
+Event creation and handling is implemented as shown in below figure.
+
+::
+
++--+
+|  ovs-appctl  |
+  

[ovs-dev] [PATCH 7/7] system-event: add event testsuite

2020-06-02 Thread Gowrishankar Muthukrishnan
Add iperf3 performance testsuite for checking events.

Signed-off-by: Gowrishankar Muthukrishnan 
---
This is not complete test suite as I would like to get first set of 
feedbacks on implementation and write unit tests accordingly. This
test is to check if event API works expectedly while running some
traffic profile.

Run ovs-testeventd before test (though not mandatory if "notify:"
is removed in json file as test generates).

$ sudo sh -c "while true;do ./utilities/ovs-testeventd /tmp/event.sock;done"
$ sudo -E PATH=$DPDK_BUILD/app:$PATH make check-event
$ for i in `seq 1 4`;do echo test $i; pushd tests/system-event-testsuite.dir/$i 
> /dev/null; grep 'sender' -A1 -B1 system-event-testsuite.log; cat 
event_list;popd > /dev/null; done

Sample results from my VM (not server :) ):

test 1
[ ID] Interval   Transfer Bandwidth   Retr
[  4]   0.00-10.00  sec   296 MBytes   249 Mbits/sec  837 sender
[  4]   0.00-10.00  sec   294 MBytes   246 Mbits/sec  receiver
List of events:
netdev_send:
  resource   : timer
  no_of_samples  : 50
  max duration   : 349 (us)
  min duration   : 3 (us)
  95% of times   : 169.623093 (us)
  hit count  : 10

rte_vhost_enqueue_burst:
  resource   : timer
  no_of_samples  : 50
  max duration   : 43 (us)
  min duration   : 1 (us)
  95% of times   : 3.891219 (us)
  hit count  : 10

List of events not yet added:
test 2
[ ID] Interval   Transfer Bandwidth   Retr
[  4]   0.00-10.01  sec   267 MBytes   224 Mbits/sec   31 sender
[  4]   0.00-10.01  sec   266 MBytes   223 Mbits/sec  receiver
List of events:
netdev_send:
  resource   : timer
  no_of_samples  : 50
  max duration   : 283 (us)
  min duration   : 2 (us)
  95% of times   : 150.735806 (us)
  hit count  : 10

rte_vhost_enqueue_burst:
  resource   : timer
  no_of_samples  : 50
  max duration   : 7 (us)
  min duration   : 1 (us)
  95% of times   : 3.955793 (us)
  hit count  : 10

List of events not yet added:
test 3
[ ID] Interval   Transfer Bandwidth   Retr
[  4]   0.00-10.00  sec   326 MBytes   273 Mbits/sec  599 sender
[  4]   0.00-10.00  sec   323 MBytes   270 Mbits/sec  receiver
List of events:
netdev_send:
  resource   : timer
  no_of_samples  : 50
  max duration   : 278 (us)
  min duration   : 6 (us)
  95% of times   : 179.230316 (us)
  hit count  : 10

rte_vhost_enqueue_burst:
  resource   : timer
  no_of_samples  : 50
  max duration   : 39 (us)
  min duration   : 1 (us)
  95% of times   : 4.549241 (us)
  hit count  : 10

List of events not yet added:
test 4
[ ID] Interval   Transfer Bandwidth   Retr
[  4]   0.00-10.01  sec   257 MBytes   216 Mbits/sec   75 sender
[  4]   0.00-10.01  sec   256 MBytes   214 Mbits/sec  receiver
List of events:
netdev_send:
  resource   : timer
  no_of_samples  : 50
  max duration   : 428 (us)
  min duration   : 4 (us)
  95% of times   : 129.270144 (us)
  hit count  : 11

rte_vhost_enqueue_burst:
  resource   : timer
  no_of_samples  : 50
  max duration   : 12 (us)
  min duration   : 1 (us)
  95% of times   : 3.113697 (us)
  hit count  : 11

List of events not yet added:

---
 tests/automake.mk |  18 +++
 tests/system-event.at | 351 ++
 2 files changed, 369 insertions(+)
 create mode 100644 tests/system-event.at

diff --git a/tests/automake.mk b/tests/automake.mk
index cbba5b1..ea8f318 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -8,6 +8,7 @@ EXTRA_DIST += \
$(SYSTEM_AFXDP_TESTSUITE_AT) \
$(SYSTEM_OFFLOADS_TESTSUITE_AT) \
$(SYSTEM_DPDK_TESTSUITE_AT) \
+   $(SYSTEM_EVENT_TESTSUITE_AT) \
$(OVSDB_CLUSTER_TESTSUITE_AT) \
$(TESTSUITE) \
$(SYSTEM_KMOD_TESTSUITE) \
@@ -16,6 +17,7 @@ EXTRA_DIST += \
$(SYSTEM_AFXDP_TESTSUITE) \
$(SYSTEM_OFFLOADS_TESTSUITE) \
$(SYSTEM_DPDK_TESTSUITE) \
+   $(SYSTEM_EVENT_TESTSUITE) \
$(OVSDB_CLUSTER_TESTSUITE) \
tests/atlocal.in \
$(srcdir)/package.m4 \
@@ -184,6 +186,12 @@ SYSTEM_DPDK_TESTSUITE_AT = \
tests/system-dpdk-testsuite.at \
tests/system-dpdk.at
 
+SYSTEM_EVENT_TESTSUITE_AT = \
+   tests/system-common-macros.at \
+   tests/system-dpdk-macros.at \
+   tests/system-dpdk-testsuite.at \
+   tests/system-event.at
+
 check_SCRIPTS += tests/atlocal
 
 TESTSUITE = $(srcdir)/tests/testsuite
@@ -194,6 +202,7 @@ SYSTEM_TSO_TESTSUITE = $(srcdir)/tests/system-tso-testsuite
 SYSTEM_AFXDP_TESTSUITE = $(srcdir)/tests/system-afxdp-testsuite
 SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite
 SYSTEM_DPDK_TESTSUITE = $(srcdir)/tests/system-dpdk-testsuite
+SYSTEM_EVENT_TESTSUITE = $(srcdir)/tests/system-event-testsuite
 OVSDB_CLUSTER_TESTSUITE = $(srcdir)/tests/ovsdb-cluster-testsuite
 DISTCLEANFILES += tests/atconfig 

[ovs-dev] [PATCH 3/7] dpif: hook timer event api in datapath functions

2020-06-02 Thread Gowrishankar Muthukrishnan
This patch injects timing some important functions as listed
below in datapath. This can be expanded but the list is short
for now.

Signed-off-by: Gowrishankar Muthukrishnan 
---
 lib/dpif-netdev.c | 5 -
 lib/netdev-dpdk.c | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 51c8885..4db07c4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -20,6 +20,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -4297,7 +4298,9 @@ dp_netdev_pmd_flush_output_on_port(struct 
dp_netdev_pmd_thread *pmd,
 output_cnt = dp_packet_batch_size(>output_pkts);
 ovs_assert(output_cnt > 0);
 
-netdev_send(p->port->netdev, tx_qid, >output_pkts, dynamic_txqs);
+EVENT_FUNC_TIMER_TRY(netdev_send,
+ p->port->netdev,
+ tx_qid, >output_pkts, dynamic_txqs);
 dp_packet_batch_init(>output_pkts);
 
 /* Update time of the next flush. */
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 44ebf96..3493b05 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -17,6 +17,7 @@
 #include 
 #include "netdev-dpdk.h"
 
+#include 
 #include 
 #include 
 #include 
@@ -2605,6 +2606,8 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 unsigned int tx_pkts;
 
 tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, cur_pkts, cnt);
+EVENT_RETFUNC_TIMER_TRY(tx_pkts, rte_vhost_enqueue_burst,
+vid, vhost_qid, cur_pkts, cnt);
 if (OVS_LIKELY(tx_pkts)) {
 /* Packets have been sent.*/
 cnt -= tx_pkts;
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 5/7] event: test daemon to receive notification from event api

2020-06-02 Thread Gowrishankar Muthukrishnan
This is sample tiny daemon which receives event notification
from the registered events and displays the message info in the
terminal.

Signed-off-by: Gowrishankar Muthukrishnan 
---
 utilities/automake.mk  |   6 +-
 utilities/ovs-testeventd.c | 156 +
 2 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 utilities/ovs-testeventd.c

diff --git a/utilities/automake.mk b/utilities/automake.mk
index e2e22c3..0d6a648 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -3,7 +3,8 @@ bin_PROGRAMS += \
utilities/ovs-testcontroller \
utilities/ovs-dpctl \
utilities/ovs-ofctl \
-   utilities/ovs-vsctl
+   utilities/ovs-vsctl \
+   utilities/ovs-testeventd
 bin_SCRIPTS += utilities/ovs-docker \
utilities/ovs-pki \
utilities/ovs-pcap \
@@ -118,6 +119,9 @@ utilities_ovs_ofctl_LDADD = \
 utilities_ovs_vsctl_SOURCES = utilities/ovs-vsctl.c
 utilities_ovs_vsctl_LDADD = lib/libopenvswitch.la
 
+utilities_ovs_testeventd_SOURCES = utilities/ovs-testeventd.c
+utilities_ovs_testeventd_LDADD = lib/libopenvswitch.la
+
 if LINUX
 noinst_PROGRAMS += utilities/nlmon
 utilities_nlmon_SOURCES = utilities/nlmon.c
diff --git a/utilities/ovs-testeventd.c b/utilities/ovs-testeventd.c
new file mode 100644
index 000..6d97e37
--- /dev/null
+++ b/utilities/ovs-testeventd.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "stream.h"
+#include "jsonrpc.h"
+#include "openvswitch/json.h"
+#include "openvswitch/poll-loop.h"
+#include "util.h"
+
+static int
+handle_rpc(struct jsonrpc *rpc, struct jsonrpc_msg *msg, bool *done)
+{
+if (msg->type == JSONRPC_REQUEST) {
+struct jsonrpc_msg *reply = NULL;
+
+if (!strcmp(msg->method, "echo")) {
+reply = jsonrpc_create_reply(json_clone(msg->params), msg->id);
+} else if (!strcmp(msg->method, "ovs_event")) {
+char *params_s = json_to_string(msg->params, 0);
+char *id_s = json_to_string(msg->id, 0);
+printf("received msg %s(%s), id=%s\n",
+ msg->method, params_s, id_s);
+
+struct json *object = json_object_create();
+json_object_put_string(object, "status", "ok");
+reply = jsonrpc_create_reply(object, msg->id);
+free(params_s);
+free(id_s);
+} else {
+struct json *error = json_object_create();
+json_object_put_string(error, "error", "unknown method");
+reply = jsonrpc_create_error(error, msg->id);
+ovs_error(0, "unknown msg %s", msg->method);
+}
+
+jsonrpc_send(rpc, reply);
+return 0;
+} else if (msg->type == JSONRPC_NOTIFY) {
+if (!strcmp(msg->method, "shutdown")) {
+*done = true;
+return 0;
+} else {
+ovs_error(0, "unknown notification %s", msg->method);
+return ENOTTY;
+}
+} else {
+ovs_error(0, "unsolicited JSON-RPC reply or error");
+return EPROTO;
+}
+}
+
+int main(int argc, char *argv[])
+{
+char *path;
+struct pstream *listen;
+struct jsonrpc **rpcs;
+size_t n_rpcs, allocated_rpcs;
+int error;
+bool done;
+
+if (argc != 2) {
+printf("USAGE: %s socket_file_to_create\n", argv[0]);
+return -1;
+}
+
+path = xasprintf("punix:%s", argv[1]);
+error = jsonrpc_pstream_open(path, , 0);
+free(path);
+
+if (error) {
+printf("unable to open listening socket");
+return -1;
+}
+
+rpcs = NULL;
+done = false;
+n_rpcs = allocated_rpcs = 0;
+for (;;) {
+struct stream *stream;
+size_t i;
+
+error = pstream_accept(listen, );
+if (!error) {
+if (n_rpcs >= allocated_rpcs) {
+rpcs = x2nrealloc(rpcs, _rpcs, sizeof *rpcs);
+}
+rpcs[n_rpcs++] = jsonrpc_open(stream);
+} else if (error != EAGAIN) {
+printf("pstream_accept failed");
+}
+
+for (i = 0; i < n_rpcs;) {
+struct jsonrpc *rpc = rpcs[i];
+struct jsonrpc_msg *msg;
+
+jsonrpc_run(rpc);
+if (!jsonrpc_get_backlog(rpc)) {
+error = jsonrpc_recv(rpc, );
+if (!error) {
+error 

[ovs-dev] [PATCH 4/7] coverage: support conditional notification from events

2020-06-02 Thread Gowrishankar Muthukrishnan
This patch enables monitoring coverage counters through event API.

Signed-off-by: Gowrishankar Muthukrishnan 
---
 lib/coverage.c | 61 ++
 lib/coverage.h |  4 
 lib/timeval.c  |  3 +++
 3 files changed, 68 insertions(+)

diff --git a/lib/coverage.c b/lib/coverage.c
index a95b6aa..f4c872e 100644
--- a/lib/coverage.c
+++ b/lib/coverage.c
@@ -25,6 +25,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "openvswitch/vlog.h"
+#include "event.h"
 
 VLOG_DEFINE_THIS_MODULE(coverage);
 
@@ -36,6 +37,7 @@ static size_t allocated_coverage_counters = 0;
 static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER;
 
 DEFINE_STATIC_PER_THREAD_DATA(long long int, coverage_clear_time, LLONG_MIN);
+DEFINE_STATIC_PER_THREAD_DATA(long long int, coverage_event_time, LLONG_MIN);
 static long long int coverage_run_time = LLONG_MIN;
 
 /* Index counter used to compute the moving average array's index. */
@@ -51,6 +53,7 @@ static bool coverage_read_counter(const char *name,
 void
 coverage_counter_register(struct coverage_counter* counter)
 {
+event_register(counter->name, EV_RESOURCE_COVERAGE);
 if (n_coverage_counters >= allocated_coverage_counters) {
 coverage_counters = x2nrealloc(coverage_counters,
_coverage_counters,
@@ -414,3 +417,61 @@ coverage_read_counter(const char *name, unsigned long long 
int *count)
 
 return false;
 }
+
+static void
+coverage_event__(bool trylock)
+{
+long long int now, *thread_time;
+
+now = time_msec();
+thread_time = coverage_event_time_get();
+
+/* Initialize the coverage_event_time. */
+if (*thread_time == LLONG_MIN) {
+*thread_time = now + COVERAGE_EVENT_INTERVAL;
+}
+
+if (now >= *thread_time) {
+size_t i;
+
+if (trylock) {
+/* Returns if cannot acquire lock. */
+if (event_try_lock()) {
+return;
+}
+if (ovs_mutex_trylock(_mutex)) {
+event_unlock();
+return;
+}
+} else {
+event_lock();
+ovs_mutex_lock(_mutex);
+}
+
+for (i = 0; i < n_coverage_counters && event_count(); i++) {
+struct coverage_counter *c = coverage_counters[i];
+struct event *ev;
+
+ev = event_get(c->name);
+if (!ev || ev->def.resource != EV_RESOURCE_COVERAGE) {
+continue;
+}
+
+ev->current = c->total;
+ovs_mutex_unlock(_mutex);
+ev->rate_min = coverage_array_sum(c->min, MIN_AVG_LEN) / 60.0;
+ev->rate_hour = coverage_array_sum(c->hr,  HR_AVG_LEN) / 3600.0;
+ovs_mutex_lock(_mutex);
+}
+
+ovs_mutex_unlock(_mutex);
+event_unlock();
+*thread_time = now + COVERAGE_EVENT_INTERVAL;
+}
+}
+
+void
+coverage_try_event(void)
+{
+coverage_event__(true);
+}
diff --git a/lib/coverage.h b/lib/coverage.h
index dea990e..4cdf956 100644
--- a/lib/coverage.h
+++ b/lib/coverage.h
@@ -39,6 +39,9 @@ BUILD_ASSERT_DECL(6 % COVERAGE_RUN_INTERVAL == 0);
 #define COVERAGE_CLEAR_INTERVAL  1000
 BUILD_ASSERT_DECL(COVERAGE_RUN_INTERVAL % COVERAGE_CLEAR_INTERVAL == 0);
 
+#define COVERAGE_EVENT_INTERVAL  1000
+BUILD_ASSERT_DECL(6 % COVERAGE_EVENT_INTERVAL == 0);
+
 /* Defines the moving average array length. */
 #define MIN_AVG_LEN (6/COVERAGE_RUN_INTERVAL)
 #define HR_AVG_LEN  60
@@ -90,5 +93,6 @@ void coverage_log(void);
 void coverage_clear(void);
 void coverage_try_clear(void);
 void coverage_run(void);
+void coverage_try_event(void);
 
 #endif /* coverage.h */
diff --git a/lib/timeval.c b/lib/timeval.c
index 193c7ba..3e3d86f 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -35,6 +35,7 @@
 #include "ovs-thread.h"
 #include "signals.h"
 #include "seq.h"
+#include "stopwatch.h"
 #include "unixctl.h"
 #include "util.h"
 #include "openvswitch/vlog.h"
@@ -294,6 +295,8 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
 time_init();
 coverage_clear();
 coverage_run();
+coverage_try_event();
+
 if (*last_wakeup && !thread_is_pmd()) {
 log_poll_interval(*last_wakeup);
 }
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/7] stopwatch: expose required api for event library

2020-06-02 Thread Gowrishankar Muthukrishnan
Timer notifications in event library require handling stopwatch
across one or more events, so share few required functions.

Signed-off-by: Gowrishankar Muthukrishnan 
---
 lib/stopwatch.c | 20 +++-
 lib/stopwatch.h |  9 +
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/stopwatch.c b/lib/stopwatch.c
index f560216..0858d72 100644
--- a/lib/stopwatch.c
+++ b/lib/stopwatch.c
@@ -463,7 +463,7 @@ stopwatch_thread(void *ign OVS_UNUSED)
 return NULL;
 }
 
-static void
+void
 stopwatch_exit(void)
 {
 struct shash_node *node, *node_next;
@@ -526,6 +526,24 @@ stopwatch_create(const char *name, enum stopwatch_units 
units)
 }
 
 void
+stopwatch_delete(const char *name)
+{
+ovs_mutex_lock(_lock);
+shash_find_and_delete(, name);
+ovs_mutex_unlock(_lock);
+}
+
+unsigned int
+stopwatch_count(void)
+{
+unsigned int n;
+ovs_mutex_lock(_lock);
+n = shash_count();
+ovs_mutex_unlock(_lock);
+return n;
+}
+
+void
 stopwatch_start(const char *name, unsigned long long ts)
 {
 struct stopwatch_packet *pkt = stopwatch_packet_create(OP_START_SAMPLE);
diff --git a/lib/stopwatch.h b/lib/stopwatch.h
index 91abd64..2f8e56c 100644
--- a/lib/stopwatch.h
+++ b/lib/stopwatch.h
@@ -56,4 +56,13 @@ bool stopwatch_get_stats(const char *name, struct 
stopwatch_stats *stats);
 /* Block until all enqueued samples have been processed. */
 void stopwatch_sync(void);
 
+/* Delete stopwatch */
+void stopwatch_delete(const char *name);
+
+/* Count stopwatches */
+unsigned int stopwatch_count(void);
+
+/* Exit stopwatch */
+void stopwatch_exit(void);
+
 #endif /* stopwatch.h */
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/7] event: add api to manage user defined events

2020-06-02 Thread Gowrishankar Muthukrishnan
User defined notification is helpful in troubleshooting OVS for
a particular event to appear as user will define. Based on the
type of event, event is handled by a dedicated thread and user
is notified in registered socket (jsonrpc) if provided.

This patch comprises of below:

1. event API which is central library to define event based on
   the input file from user (json format).
2. dedicated thread to monitor registered events and issue
   notification.
3. event macros to hook at counters and functions for evaluating
   events by above thread.

Signed-off-by: Gowrishankar Muthukrishnan 
---
 lib/automake.mk   |   2 +
 lib/event.c   | 764 ++
 lib/event.h   | 269 +++
 vswitchd/bridge.c |   2 +
 4 files changed, 1037 insertions(+)
 create mode 100644 lib/event.c
 create mode 100644 lib/event.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 86940cc..f022baa 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -90,6 +90,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/dpif-provider.h \
lib/dpif.c \
lib/dpif.h \
+   lib/event.h \
+   lib/event.c \
lib/heap.c \
lib/heap.h \
lib/dynamic-string.c \
diff --git a/lib/event.c b/lib/event.c
new file mode 100644
index 000..77f5396
--- /dev/null
+++ b/lib/event.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include "event.h"
+#include "jsonrpc.h"
+#include "openvswitch/poll-loop.h"
+#include "openvswitch/shash.h"
+#include "openvswitch/vlog.h"
+#include "ovs-thread.h"
+#include "smap.h"
+#include "stream.h"
+#include "timeval.h"
+#include "unixctl.h"
+#include "util.h"
+
+VLOG_DEFINE_THIS_MODULE(event);
+
+static bool user_defined_event;
+static struct shash events;
+static struct shash events_reg;
+
+static pthread_t event_thread_id;
+static struct ovs_mutex event_mutex = OVS_MUTEX_INITIALIZER;
+
+static bool
+handle_conditional_event(struct event *ev)
+OVS_REQUIRES(event_mutex)
+{
+bool ok;
+
+switch (ev->def.op) {
+case EV_OP_EQ:
+ok = (ev->current == ev->def.value);
+break;
+case EV_OP_NE:
+ok = (ev->current != ev->def.value);
+break;
+case EV_OP_GT:
+ok = (ev->current > ev->def.value);
+break;
+case EV_OP_GE:
+ok = (ev->current >= ev->def.value);
+break;
+case EV_OP_LT:
+ok = (ev->current < ev->def.value);
+break;
+case EV_OP_LE:
+ok = (ev->current <= ev->def.value);
+break;
+case EV_OP_NONE:
+default:
+ok = false;
+}
+
+if (ok) {
+ev->hit++;
+}
+
+return ok;
+}
+
+static bool
+handle_message_event(struct event *ev)
+OVS_REQUIRES(event_mutex)
+{
+if (ev->def.resource == EV_RESOURCE_TIMER) {
+if (ev->count >= ev->def.samples) {
+stopwatch_get_stats(ev->name, >stats);
+ev->count = 0;
+ev->hit++;
+}
+}
+return true;
+}
+
+static void *
+event_thread(void *args OVS_UNUSED)
+{
+for (;;) {
+long long int next_refresh;
+struct shash_node *node;
+struct event *ev;
+int error;
+bool ok;
+
+next_refresh = time_msec() + EVENT_POLL_INTERVAL;
+do {
+ovs_mutex_lock(_mutex);
+SHASH_FOR_EACH (node, ) {
+ev = (struct event *)node->data;
+
+if (ev->hit && ev->hit > ev->hit_prev) {
+continue;
+}
+
+if (ev->type == EV_CONDITIONAL) {
+ok = handle_conditional_event(ev);
+} else if (ev->type == EV_MESSAGE) {
+ok = handle_message_event(ev);
+} else {
+continue;
+}
+
+if (ok && ev->notify.cb) {
+ovs_mutex_unlock(_mutex);
+error = ev->notify.cb(ev);
+ev->hit_prev = (!error) ? ev->hit: ev->hit_prev;
+ovs_mutex_lock(_mutex);
+}
+
+}
+ovs_mutex_unlock(_mutex);
+
+poll_timer_wait_until(next_refresh);
+poll_block();
+} while (time_msec() < next_refresh);
+}
+
+return NULL;
+}
+
+int

[ovs-dev] [PATCH 0/7] user defined events and notification

2020-06-02 Thread Gowrishankar Muthukrishnan
This patchset enables new feature "user defined events and notification".
In short, by this feature, one can enable asynchronous and less intrusive
monitoring in Openvswitch data plane and control plane resources, by
defining one or more events to populate inside Openvswitch. Based on the
type of the event, their behaviour is defined (to wait for some condition
be met or to inform in work flow). More information on how to use this
feature is in Documentation/topics/user-defined-events.rst.

This feature is targeting troubleshooting and live debugging situations
in production deployment, hence more helpful for operators and developers
debugging functional and performance issues in Openvswitch. This proposal
is not extensively coveraing all traceable places in Openvswitch, but 
demonstrating how it can be helpful and be extended based on the usecases.
Based on how it is perceived, I can work on enhancing this for optimations
whereever needed.

Gowrishankar Muthukrishnan (7):
  event: add api to manage user defined events
  stopwatch: expose required api for event library
  dpif: hook timer event api in datapath functions
  coverage: support conditional notification from events
  event: test daemon to receive notification from event api
  event: documentation notes on event library
  system-event: add event testsuite

 Documentation/automake.mk|   1 +
 Documentation/topics/index.rst   |   1 +
 Documentation/topics/user-defined-events.rst | 306 +++
 lib/automake.mk  |   2 +
 lib/coverage.c   |  61 +++
 lib/coverage.h   |   4 +
 lib/dpif-netdev.c|   5 +-
 lib/event.c  | 764 +++
 lib/event.h  | 269 ++
 lib/netdev-dpdk.c|   3 +
 lib/stopwatch.c  |  20 +-
 lib/stopwatch.h  |   9 +
 lib/timeval.c|   3 +
 tests/automake.mk|  18 +
 tests/system-event.at| 351 
 utilities/automake.mk|   6 +-
 utilities/ovs-testeventd.c   | 156 ++
 vswitchd/bridge.c|   2 +
 18 files changed, 1978 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/topics/user-defined-events.rst
 create mode 100644 lib/event.c
 create mode 100644 lib/event.h
 create mode 100644 tests/system-event.at
 create mode 100644 utilities/ovs-testeventd.c

-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel

2020-06-02 Thread Simon Horman
On Tue, Jun 02, 2020 at 04:58:32PM +0800, Tonghao Zhang wrote:
> On Tue, Jun 2, 2020 at 4:33 PM Simon Horman  
> wrote:
> >
> > On Tue, Jun 02, 2020 at 04:17:21PM +0800, Tonghao Zhang wrote:
> > > On Fri, May 29, 2020 at 5:06 PM Simon Horman  
> > > wrote:
> > > >
> > > > On Mon, May 18, 2020 at 09:44:42AM +0800, xiangxia.m@gmail.com 
> > > > wrote:
> > > > > From: Tonghao Zhang 
> > > > >
> > > > > This patch allows users to offload the TC flower rules with tunnel
> > > > > mask. In some case, mask is useful as wildcards.
> > > > >
> > > > > For example:
> > > > > $ ovs-appctl dpctl/add-flow \
> > > > > "tunnel(dst=3.3.3.100,src=3.3.3.200/255.255.255.0,tp_dst=4789),\
> > > > > recirc_id(0),in_port(3),eth(),eth_type(0x0800),ipv4()" "2"
> > > >
> > > > Hi,
> > > >
> > > > Sorry for the delay in responding.
> > > >
> > > > I think it would be useful to spell out more clearly in the changelog
> > > > what this patch does. From my reading of the code it:
> > > >
> > > > Allows masked match of the following, where previously supported
> > > > an exact match was supported:
> > > > * Remote (dst) tunnel endpoint address
> > > > * Local (src) tunnel endpoint address
> > > > * Remote (dst) tunnel endpoint UDP port
> > > >
> > > > And also allows masked match of the following, where previously no
> > > > match was supported;
> > > > * Local (std) tunnel endpoint UDP port
> > > Ok, Thanks, I will update it in NEWS.
> >
> > Thanks. Please also include this information in the changelog.
> >
> > > > I think it would also be useful to describe a use-case where this
> > > > is used. The command line example (above) is a good start.
> > > Yes, I will update the commit log and describe a use-case for it.
> > > >
> > > > Also, not strictly related to this patch.
> > > > I think patch series that have more than one patch should
> > > > have a cover letter, in this case [PATCH 0/3], describing
> > > > the overall aim of the patchset.
> > > >
> > > >
> > > > The other patches in this series seem fine to me.
> > > > Please consider addressing the issues I have raised here
> > > > and posting a v2, with all three patches and a cover letter.
> > > >
> > > > ...
> > > >
> > > > > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > > > > index 875ebef71941..39cf25f63ce0 100644
> > > > > --- a/lib/netdev-offload-tc.c
> > > > > +++ b/lib/netdev-offload-tc.c
> > > > > @@ -633,13 +633,22 @@ parse_tc_flower_to_match(struct tc_flower 
> > > > > *flower,
> > > > >  match_set_tun_id(match, flower->key.tunnel.id);
> > > > >  match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
> > > > >  }
> > > > > -if (flower->key.tunnel.ipv4.ipv4_dst) {
> > > > > -match_set_tun_src(match, 
> > > > > flower->key.tunnel.ipv4.ipv4_src);
> > > > > -match_set_tun_dst(match, 
> > > > > flower->key.tunnel.ipv4.ipv4_dst);
> > > > > -} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
> > > > > -   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
> > > > > -match_set_tun_ipv6_src(match, 
> > > > > >key.tunnel.ipv6.ipv6_src);
> > > > > -match_set_tun_ipv6_dst(match, 
> > > > > >key.tunnel.ipv6.ipv6_dst);
> > > > > +if (flower->mask.tunnel.ipv4.ipv4_dst ||
> > > > > +flower->mask.tunnel.ipv4.ipv4_src) {
> > > >
> > > > The change to the if condition seems separate from the change
> > > > described in the changelog. What is the use-case for this?
> > > I think that may be used for matching the tunnel src packets only
> > > which will be dropped.
> > > because user may don't want that packets sent to the host.
> >
> > I think it would be best to break out this (and the corresponding IPv6
> > change) into a separate patch with a changelog that describes why
> > this is being done and, if appropriate, an update to NEWS.
> Hi Simon
> Did you mean that the two patches as below as a series.
> [PATCH 1/3] dpif-netlink: Generate ufids for installing TC flowers
> [PATCH 3/3] netdev-offload-tc: Use ipv6_addr_is_set instead of is_all_zeros
> 
> and this patch as a separate patch(with changelog)
> [PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel

Sorry for not being clear.

I meant expand the series to four patch.
Where the new patch just includes the following changes:

-if (flower->key.tunnel.ipv4.ipv4_dst) {
+if (flower->mask.tunnel.ipv4.ipv4_dst ||
+flower->mask.tunnel.ipv4.ipv4_src) {

...

-} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
-   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
+} else if (ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_dst) ||
+   ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_src)) {

> 
> and other question about changelog, which file I should update, or I just 
> update
> it in a commit message.

Sorry again for not being clear, I meant the commit message.

> 
> I found the changelog in: debian/changelog
> > 

[ovs-dev] [PATCH v3 5/5] classifier: fix conj_set use-after-free issue

2020-06-02 Thread Linhaifeng
use ovsrcu_set first then use ovsrcu_postpone

CC: Ben Pfaff 
Fixes: 18080541d276 (\classifier: Add support for conjunctive matches.\)

Acked-by: Yanqin Wei 
Signed-off-by: Linhaifeng 
---
 lib/classifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index f2c3497c2..6bff76e07 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -249,11 +249,11 @@ cls_rule_set_conjunctions(struct cls_rule *cr,
 unsigned int old_n = old ? old->n : 0;
 
 if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof *conj))) {
+ovsrcu_set(>conj_set,
+   cls_conjunction_set_alloc(match, conj, n));
 if (old) {
 ovsrcu_postpone(free, old);
 }
-ovsrcu_set(>conj_set,
-   cls_conjunction_set_alloc(match, conj, n));
 }
 }
 
-- 
2.21.0.windows.1
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 4/5] ofproto: fix actions use-after-free issue

2020-06-02 Thread Linhaifeng
use ovsrcu_set first then use ovsrcu_postpone

CC: Eelco Chaudron 
Fixes: f82b3b6a2f4d (\ofproto-dpif-upcall: Only call ovsrcu_postpone()
on active actions\)

Acked-by: Yanqin Wei 
Signed-off-by: Linhaifeng 
---
 ofproto/ofproto-dpif-upcall.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 5e08ef10d..be6dafb78 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -1658,11 +1658,10 @@ ukey_set_actions(struct udpif_key *ukey, const struct 
ofpbuf *actions)
 struct ofpbuf *old_actions = ovsrcu_get_protected(struct ofpbuf *,
   >actions);
 
+ovsrcu_set(>actions, ofpbuf_clone(actions));
 if (old_actions) {
 ovsrcu_postpone(ofpbuf_delete, old_actions);
 }
-
-ovsrcu_set(>actions, ofpbuf_clone(actions));
 }
 
 static struct udpif_key *
-- 
2.21.0.windows.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 3/5] ofproto: fix vlans use-after-free issue

2020-06-02 Thread Linhaifeng
use ovsrcu_set first then use ovsrcu_postpone

CC: Jarno Rajahalme 
Fixes: 4f6780691653 (\mirror: Allow concurrent lookups.\)

Acked-by: Yanqin Wei 
Signed-off-by: Linhaifeng 
---
 ofproto/ofproto-dpif-mirror.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 343b75f0e..343100c08 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -276,9 +276,9 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 hmapx_destroy(_map);
 
 if (vlans || src_vlans) {
+unsigned long *new_vlans = vlan_bitmap_clone(src_vlans);
+ovsrcu_set(>vlans, new_vlans);
 ovsrcu_postpone(free, vlans);
-vlans = vlan_bitmap_clone(src_vlans);
-ovsrcu_set(>vlans, vlans);
 }
 
 mirror->out = out;
-- 
2.21.0.windows.1
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 2/5] pvector: fix pvector use-after-free issue

2020-06-02 Thread Linhaifeng
use ovsrcu_set first then use ovsrcu_postpone

CC: Jarno Rajahalme 
Fixes: da9cfca6e2d7 (\Revert "pvector: Expose non-concurrent priority
vector."\)

Acked-by: Yanqin Wei 
Signed-off-by: Linhaifeng 
---
 lib/pvector.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/pvector.c b/lib/pvector.c
index cc527fdc4..aa8c6cb24 100644
--- a/lib/pvector.c
+++ b/lib/pvector.c
@@ -67,10 +67,11 @@ pvector_init(struct pvector *pvec)
 void
 pvector_destroy(struct pvector *pvec)
 {
+struct pvector_impl *old = pvector_impl_get(pvec);
 free(pvec->temp);
 pvec->temp = NULL;
-ovsrcu_postpone(free, pvector_impl_get(pvec));
 ovsrcu_set(>impl, NULL); /* Poison. */
+ovsrcu_postpone(free, old);
 }
 
 /* Iterators for callers that need the 'index' afterward. */
@@ -205,11 +206,11 @@ pvector_change_priority(struct pvector *pvec, void *ptr, 
int priority)
 /* Make the modified pvector available for iteration. */
 void pvector_publish__(struct pvector *pvec)
 {
-struct pvector_impl *temp = pvec->temp;
-
+struct pvector_impl *new = pvec->temp;
+struct pvector_impl *old = ovsrcu_get_protected(struct pvector_impl *,
+   >impl);
 pvec->temp = NULL;
-pvector_impl_sort(temp); /* Also removes gaps. */
-ovsrcu_postpone(free, ovsrcu_get_protected(struct pvector_impl *,
-   >impl));
-ovsrcu_set(>impl, temp);
+pvector_impl_sort(new); /* Also removes gaps. */
+ovsrcu_set(>impl, new);
+ovsrcu_postpone(free, old);
 }
-- 
2.21.0.windows.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 1/5] ovs-rcu: fix rcu use-after-free issue

2020-06-02 Thread Linhaifeng
We should update rcu pointer first then use ovsrcu_postpone to free
otherwise maybe cause use-after-free.
e.g.,reader indicates momentary quiescent and access old pointer after
writer postpone free old pointer and before setting new pointer.

CC: Ben Pfaff 
Fixes: 0f2ea84841e1 (\ovs-rcu: New library.\)

Acked-by: Yanqin Wei 
Signed-off-by: Linhaifeng 
---
 lib/ovs-rcu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h
index ecc4c9201..98c238aea 100644
--- a/lib/ovs-rcu.h
+++ b/lib/ovs-rcu.h
@@ -118,10 +118,10 @@
  * void
  * change_flow(struct flow *new_flow)
  * {
+ * struct flow *old_flow = ovsrcu_get_protected(struct flow *, )
  * ovs_mutex_lock();
- * ovsrcu_postpone(free,
- * ovsrcu_get_protected(struct flow *, ));
  * ovsrcu_set(, new_flow);
+ * ovsrcu_postpone(free, old_flow);
  * ovs_mutex_unlock();
  * }
  *
-- 
2.21.0.windows.1
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel

2020-06-02 Thread Tonghao Zhang
On Tue, Jun 2, 2020 at 4:33 PM Simon Horman  wrote:
>
> On Tue, Jun 02, 2020 at 04:17:21PM +0800, Tonghao Zhang wrote:
> > On Fri, May 29, 2020 at 5:06 PM Simon Horman  
> > wrote:
> > >
> > > On Mon, May 18, 2020 at 09:44:42AM +0800, xiangxia.m@gmail.com wrote:
> > > > From: Tonghao Zhang 
> > > >
> > > > This patch allows users to offload the TC flower rules with tunnel
> > > > mask. In some case, mask is useful as wildcards.
> > > >
> > > > For example:
> > > > $ ovs-appctl dpctl/add-flow \
> > > > "tunnel(dst=3.3.3.100,src=3.3.3.200/255.255.255.0,tp_dst=4789),\
> > > > recirc_id(0),in_port(3),eth(),eth_type(0x0800),ipv4()" "2"
> > >
> > > Hi,
> > >
> > > Sorry for the delay in responding.
> > >
> > > I think it would be useful to spell out more clearly in the changelog
> > > what this patch does. From my reading of the code it:
> > >
> > > Allows masked match of the following, where previously supported
> > > an exact match was supported:
> > > * Remote (dst) tunnel endpoint address
> > > * Local (src) tunnel endpoint address
> > > * Remote (dst) tunnel endpoint UDP port
> > >
> > > And also allows masked match of the following, where previously no
> > > match was supported;
> > > * Local (std) tunnel endpoint UDP port
> > Ok, Thanks, I will update it in NEWS.
>
> Thanks. Please also include this information in the changelog.
>
> > > I think it would also be useful to describe a use-case where this
> > > is used. The command line example (above) is a good start.
> > Yes, I will update the commit log and describe a use-case for it.
> > >
> > > Also, not strictly related to this patch.
> > > I think patch series that have more than one patch should
> > > have a cover letter, in this case [PATCH 0/3], describing
> > > the overall aim of the patchset.
> > >
> > >
> > > The other patches in this series seem fine to me.
> > > Please consider addressing the issues I have raised here
> > > and posting a v2, with all three patches and a cover letter.
> > >
> > > ...
> > >
> > > > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > > > index 875ebef71941..39cf25f63ce0 100644
> > > > --- a/lib/netdev-offload-tc.c
> > > > +++ b/lib/netdev-offload-tc.c
> > > > @@ -633,13 +633,22 @@ parse_tc_flower_to_match(struct tc_flower *flower,
> > > >  match_set_tun_id(match, flower->key.tunnel.id);
> > > >  match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
> > > >  }
> > > > -if (flower->key.tunnel.ipv4.ipv4_dst) {
> > > > -match_set_tun_src(match, flower->key.tunnel.ipv4.ipv4_src);
> > > > -match_set_tun_dst(match, flower->key.tunnel.ipv4.ipv4_dst);
> > > > -} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
> > > > -   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
> > > > -match_set_tun_ipv6_src(match, 
> > > > >key.tunnel.ipv6.ipv6_src);
> > > > -match_set_tun_ipv6_dst(match, 
> > > > >key.tunnel.ipv6.ipv6_dst);
> > > > +if (flower->mask.tunnel.ipv4.ipv4_dst ||
> > > > +flower->mask.tunnel.ipv4.ipv4_src) {
> > >
> > > The change to the if condition seems separate from the change
> > > described in the changelog. What is the use-case for this?
> > I think that may be used for matching the tunnel src packets only
> > which will be dropped.
> > because user may don't want that packets sent to the host.
>
> I think it would be best to break out this (and the corresponding IPv6
> change) into a separate patch with a changelog that describes why
> this is being done and, if appropriate, an update to NEWS.
Hi Simon
Did you mean that the two patches as below as a series.
[PATCH 1/3] dpif-netlink: Generate ufids for installing TC flowers
[PATCH 3/3] netdev-offload-tc: Use ipv6_addr_is_set instead of is_all_zeros

and this patch as a separate patch(with changelog)
[PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel

and other question about changelog, which file I should update, or I just update
it in a commit message.

I found the changelog in: debian/changelog
> Thanks!
>
> > > > +match_set_tun_dst_masked(match,
> > > > + flower->key.tunnel.ipv4.ipv4_dst,
> > > > + 
> > > > flower->mask.tunnel.ipv4.ipv4_dst);
> > > > +match_set_tun_src_masked(match,
> > > > + flower->key.tunnel.ipv4.ipv4_src,
> > > > + 
> > > > flower->mask.tunnel.ipv4.ipv4_src);
> > > > +} else if 
> > > > (ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_dst) ||
> > > > +   
> > > > ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_src)) {
> > > > +match_set_tun_ipv6_dst_masked(match,
> > > > +  
> > > > >key.tunnel.ipv6.ipv6_dst,
> > > > +  
> > > > >mask.tunnel.ipv6.ipv6_dst);
> > > > +

Re: [ovs-dev] [PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel

2020-06-02 Thread Simon Horman
On Tue, Jun 02, 2020 at 04:17:21PM +0800, Tonghao Zhang wrote:
> On Fri, May 29, 2020 at 5:06 PM Simon Horman  
> wrote:
> >
> > On Mon, May 18, 2020 at 09:44:42AM +0800, xiangxia.m@gmail.com wrote:
> > > From: Tonghao Zhang 
> > >
> > > This patch allows users to offload the TC flower rules with tunnel
> > > mask. In some case, mask is useful as wildcards.
> > >
> > > For example:
> > > $ ovs-appctl dpctl/add-flow \
> > > "tunnel(dst=3.3.3.100,src=3.3.3.200/255.255.255.0,tp_dst=4789),\
> > > recirc_id(0),in_port(3),eth(),eth_type(0x0800),ipv4()" "2"
> >
> > Hi,
> >
> > Sorry for the delay in responding.
> >
> > I think it would be useful to spell out more clearly in the changelog
> > what this patch does. From my reading of the code it:
> >
> > Allows masked match of the following, where previously supported
> > an exact match was supported:
> > * Remote (dst) tunnel endpoint address
> > * Local (src) tunnel endpoint address
> > * Remote (dst) tunnel endpoint UDP port
> >
> > And also allows masked match of the following, where previously no
> > match was supported;
> > * Local (std) tunnel endpoint UDP port
> Ok, Thanks, I will update it in NEWS.

Thanks. Please also include this information in the changelog.

> > I think it would also be useful to describe a use-case where this
> > is used. The command line example (above) is a good start.
> Yes, I will update the commit log and describe a use-case for it.
> >
> > Also, not strictly related to this patch.
> > I think patch series that have more than one patch should
> > have a cover letter, in this case [PATCH 0/3], describing
> > the overall aim of the patchset.
> >
> >
> > The other patches in this series seem fine to me.
> > Please consider addressing the issues I have raised here
> > and posting a v2, with all three patches and a cover letter.
> >
> > ...
> >
> > > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > > index 875ebef71941..39cf25f63ce0 100644
> > > --- a/lib/netdev-offload-tc.c
> > > +++ b/lib/netdev-offload-tc.c
> > > @@ -633,13 +633,22 @@ parse_tc_flower_to_match(struct tc_flower *flower,
> > >  match_set_tun_id(match, flower->key.tunnel.id);
> > >  match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
> > >  }
> > > -if (flower->key.tunnel.ipv4.ipv4_dst) {
> > > -match_set_tun_src(match, flower->key.tunnel.ipv4.ipv4_src);
> > > -match_set_tun_dst(match, flower->key.tunnel.ipv4.ipv4_dst);
> > > -} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
> > > -   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
> > > -match_set_tun_ipv6_src(match, 
> > > >key.tunnel.ipv6.ipv6_src);
> > > -match_set_tun_ipv6_dst(match, 
> > > >key.tunnel.ipv6.ipv6_dst);
> > > +if (flower->mask.tunnel.ipv4.ipv4_dst ||
> > > +flower->mask.tunnel.ipv4.ipv4_src) {
> >
> > The change to the if condition seems separate from the change
> > described in the changelog. What is the use-case for this?
> I think that may be used for matching the tunnel src packets only
> which will be dropped.
> because user may don't want that packets sent to the host.

I think it would be best to break out this (and the corresponding IPv6
change) into a separate patch with a changelog that describes why
this is being done and, if appropriate, an update to NEWS.

Thanks!

> > > +match_set_tun_dst_masked(match,
> > > + flower->key.tunnel.ipv4.ipv4_dst,
> > > + flower->mask.tunnel.ipv4.ipv4_dst);
> > > +match_set_tun_src_masked(match,
> > > + flower->key.tunnel.ipv4.ipv4_src,
> > > + flower->mask.tunnel.ipv4.ipv4_src);
> > > +} else if (ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_dst) 
> > > ||
> > > +   ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_src)) 
> > > {
> > > +match_set_tun_ipv6_dst_masked(match,
> > > +  
> > > >key.tunnel.ipv6.ipv6_dst,
> > > +  
> > > >mask.tunnel.ipv6.ipv6_dst);
> > > +match_set_tun_ipv6_src_masked(match,
> > > +  
> > > >key.tunnel.ipv6.ipv6_src,
> > > +  
> > > >mask.tunnel.ipv6.ipv6_src);
> > >  }
> > >  if (flower->key.tunnel.tos) {
> > >  match_set_tun_tos_masked(match, flower->key.tunnel.tos,
> >
> > ...
> 
> 
> 
> -- 
> Best regards, Tonghao
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Australian supplier of Medical/Protective masks / N95 with CE, FDA certificate

2020-06-02 Thread Ryan + IP Supply
Good Tuesday,

Please check if you have any requirement for Disposable Medical/Protective
masks / N95 / Face Shields / Protective suits with CE and FDA certificate.

*Country of Origin: Vietnam *

We can ship from our US/Australia offices or directly from Vietnam.

Below is the price list in USD.

Best Regards,

Ryan

A: Unit 8 / 4A Bachell Ave | Lidcombe, NSW 2141

Ph: 02 80616886 | Website:
http://lists.ipsupply.com.au/lists/lt.php?tid=mJgAH0kM1wK4Kzwb4jHq4uIa0MpoYDfliwO9ntR6NokCSQ638w65MKypzohgbjiG

E: r...@ipsupply.com.au 




 To change your details and to choose which lists to be subscribed to,
visit your personal preferences page

.

 Or you can Unsubscribe

from all future mailings.

 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/3] netdev-offload-tc: Allow to match the IP and port mask of tunnel

2020-06-02 Thread Tonghao Zhang
On Fri, May 29, 2020 at 5:06 PM Simon Horman  wrote:
>
> On Mon, May 18, 2020 at 09:44:42AM +0800, xiangxia.m@gmail.com wrote:
> > From: Tonghao Zhang 
> >
> > This patch allows users to offload the TC flower rules with tunnel
> > mask. In some case, mask is useful as wildcards.
> >
> > For example:
> > $ ovs-appctl dpctl/add-flow \
> > "tunnel(dst=3.3.3.100,src=3.3.3.200/255.255.255.0,tp_dst=4789),\
> > recirc_id(0),in_port(3),eth(),eth_type(0x0800),ipv4()" "2"
>
> Hi,
>
> Sorry for the delay in responding.
>
> I think it would be useful to spell out more clearly in the changelog
> what this patch does. From my reading of the code it:
>
> Allows masked match of the following, where previously supported
> an exact match was supported:
> * Remote (dst) tunnel endpoint address
> * Local (src) tunnel endpoint address
> * Remote (dst) tunnel endpoint UDP port
>
> And also allows masked match of the following, where previously no
> match was supported;
> * Local (std) tunnel endpoint UDP port
Ok, Thanks, I will update it in NEWS.
> I think it would also be useful to describe a use-case where this
> is used. The command line example (above) is a good start.
Yes, I will update the commit log and describe a use-case for it.
>
> Also, not strictly related to this patch.
> I think patch series that have more than one patch should
> have a cover letter, in this case [PATCH 0/3], describing
> the overall aim of the patchset.
>
>
> The other patches in this series seem fine to me.
> Please consider addressing the issues I have raised here
> and posting a v2, with all three patches and a cover letter.
>
> ...
>
> > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > index 875ebef71941..39cf25f63ce0 100644
> > --- a/lib/netdev-offload-tc.c
> > +++ b/lib/netdev-offload-tc.c
> > @@ -633,13 +633,22 @@ parse_tc_flower_to_match(struct tc_flower *flower,
> >  match_set_tun_id(match, flower->key.tunnel.id);
> >  match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
> >  }
> > -if (flower->key.tunnel.ipv4.ipv4_dst) {
> > -match_set_tun_src(match, flower->key.tunnel.ipv4.ipv4_src);
> > -match_set_tun_dst(match, flower->key.tunnel.ipv4.ipv4_dst);
> > -} else if (!is_all_zeros(>key.tunnel.ipv6.ipv6_dst,
> > -   sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
> > -match_set_tun_ipv6_src(match, 
> > >key.tunnel.ipv6.ipv6_src);
> > -match_set_tun_ipv6_dst(match, 
> > >key.tunnel.ipv6.ipv6_dst);
> > +if (flower->mask.tunnel.ipv4.ipv4_dst ||
> > +flower->mask.tunnel.ipv4.ipv4_src) {
>
> The change to the if condition seems separate from the change
> described in the changelog. What is the use-case for this?
I think that may be used for matching the tunnel src packets only
which will be dropped.
because user may don't want that packets sent to the host.
> > +match_set_tun_dst_masked(match,
> > + flower->key.tunnel.ipv4.ipv4_dst,
> > + flower->mask.tunnel.ipv4.ipv4_dst);
> > +match_set_tun_src_masked(match,
> > + flower->key.tunnel.ipv4.ipv4_src,
> > + flower->mask.tunnel.ipv4.ipv4_src);
> > +} else if (ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_dst) ||
> > +   ipv6_addr_is_set(>mask.tunnel.ipv6.ipv6_src)) {
> > +match_set_tun_ipv6_dst_masked(match,
> > +  
> > >key.tunnel.ipv6.ipv6_dst,
> > +  
> > >mask.tunnel.ipv6.ipv6_dst);
> > +match_set_tun_ipv6_src_masked(match,
> > +  
> > >key.tunnel.ipv6.ipv6_src,
> > +  
> > >mask.tunnel.ipv6.ipv6_src);
> >  }
> >  if (flower->key.tunnel.tos) {
> >  match_set_tun_tos_masked(match, flower->key.tunnel.tos,
>
> ...



-- 
Best regards, Tonghao
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ofproto: Fix statistics of datapath operations.

2020-06-02 Thread zhaozhanxu
If 'stats->n_packets' is less than 'op->ukey->stats.n_packets',
the calculation result will be wrong.
'stats->n_bytes' is the same.

Signed-off-by: zhaozhanxu 
---
 ofproto/ofproto-dpif-upcall.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 5e08ef10d..80678f843 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -2385,8 +2385,12 @@ push_dp_ops(struct udpif *udpif, struct ukey_op *ops, 
size_t n_ops)
 transition_ukey(op->ukey, UKEY_EVICTED);
 push->used = MAX(stats->used, op->ukey->stats.used);
 push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags;
-push->n_packets = stats->n_packets - op->ukey->stats.n_packets;
-push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes;
+push->n_packets = (stats->n_packets > op->ukey->stats.n_packets
+   ? stats->n_packets - op->ukey->stats.n_packets
+   : 0);
+push->n_bytes = (stats->n_bytes > op->ukey->stats.n_bytes
+ ? stats->n_bytes - op->ukey->stats.n_bytes
+ : 0);
 ovs_mutex_unlock(>ukey->mutex);
 } else {
 push = stats;
-- 
2.20.1


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] netdev-offload: Implement terse dump support

2020-06-02 Thread Roi Dayan



On 2020-06-02 1:35 AM, 0-day Robot wrote:
> Bleep bloop.  Greetings Roi Dayan, I am a robot and I have tried out your 
> patch.
> Thanks for your contribution.
> 
> I encountered some error that I wasn't expecting.  See the details below.
> 
> 
> checkpatch:
> WARNING: Line is 80 characters long (recommended limit is 79)
> #252 FILE: lib/netdev-offload-tc.c:1824:
> parse_tc_flower_to_match(, match, actions, stats, attrs, buf, 
> false);
> 
> Lines checked: 368, Warnings: 1, Errors: 0
> 
> 
> Please check this out.  If you feel there has been an error, please email 
> acon...@redhat.com
> 
> Thanks,
> 0-day Robot
> 

it is pretty short line so we didn't want to do redundant line breaks.
is it important or can we skip this?

should we maybe update ovs checkpatch to 100 chars to match
linux kernel? i guess the current 79 is also started because
the kernel was 79?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] ovs rcu: update rcu pointer first

2020-06-02 Thread Linhaifeng
We should update rcu pointer first then use ovsrcu_postpone to free
otherwise maybe cause use-after-free.
e.g.,reader indicates momentary quiescent and access old pointer after
writer postpone free old pointer and before setting new pointer.

Signed-off-by: Linhaifeng 
---
 lib/classifier.c  |  4 ++--
 lib/ovs-rcu.h |  4 ++--
 lib/pvector.c | 15 ---
 ofproto/ofproto-dpif-mirror.c |  4 ++--
 ofproto/ofproto-dpif-upcall.c |  3 +--
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index f2c3497c2..6bff76e07 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -249,11 +249,11 @@ cls_rule_set_conjunctions(struct cls_rule *cr,
 unsigned int old_n = old ? old->n : 0;
 
 if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof *conj))) {
+ovsrcu_set(>conj_set,
+   cls_conjunction_set_alloc(match, conj, n));
 if (old) {
 ovsrcu_postpone(free, old);
 }
-ovsrcu_set(>conj_set,
-   cls_conjunction_set_alloc(match, conj, n));
 }
 }
 
diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h
index ecc4c9201..98c238aea 100644
--- a/lib/ovs-rcu.h
+++ b/lib/ovs-rcu.h
@@ -118,10 +118,10 @@
  * void
  * change_flow(struct flow *new_flow)
  * {
+ * struct flow *old_flow = ovsrcu_get_protected(struct flow *, )
  * ovs_mutex_lock();
- * ovsrcu_postpone(free,
- * ovsrcu_get_protected(struct flow *, ));
  * ovsrcu_set(, new_flow);
+ * ovsrcu_postpone(free, old_flow);
  * ovs_mutex_unlock();
  * }
  *
diff --git a/lib/pvector.c b/lib/pvector.c
index cc527fdc4..aa8c6cb24 100644
--- a/lib/pvector.c
+++ b/lib/pvector.c
@@ -67,10 +67,11 @@ pvector_init(struct pvector *pvec)
 void
 pvector_destroy(struct pvector *pvec)
 {
+struct pvector_impl *old = pvector_impl_get(pvec);
 free(pvec->temp);
 pvec->temp = NULL;
-ovsrcu_postpone(free, pvector_impl_get(pvec));
 ovsrcu_set(>impl, NULL); /* Poison. */
+ovsrcu_postpone(free, old);
 }
 
 /* Iterators for callers that need the 'index' afterward. */
@@ -205,11 +206,11 @@ pvector_change_priority(struct pvector *pvec, void *ptr, 
int priority)
 /* Make the modified pvector available for iteration. */
 void pvector_publish__(struct pvector *pvec)
 {
-struct pvector_impl *temp = pvec->temp;
-
+struct pvector_impl *new = pvec->temp;
+struct pvector_impl *old = ovsrcu_get_protected(struct pvector_impl *,
+   >impl);
 pvec->temp = NULL;
-pvector_impl_sort(temp); /* Also removes gaps. */
-ovsrcu_postpone(free, ovsrcu_get_protected(struct pvector_impl *,
-   >impl));
-ovsrcu_set(>impl, temp);
+pvector_impl_sort(new); /* Also removes gaps. */
+ovsrcu_set(>impl, new);
+ovsrcu_postpone(free, old);
 }
diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 343b75f0e..343100c08 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -276,9 +276,9 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 hmapx_destroy(_map);
 
 if (vlans || src_vlans) {
+unsigned long *new_vlans = vlan_bitmap_clone(src_vlans);
+ovsrcu_set(>vlans, new_vlans);
 ovsrcu_postpone(free, vlans);
-vlans = vlan_bitmap_clone(src_vlans);
-ovsrcu_set(>vlans, vlans);
 }
 
 mirror->out = out;
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 5e08ef10d..be6dafb78 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -1658,11 +1658,10 @@ ukey_set_actions(struct udpif_key *ukey, const struct 
ofpbuf *actions)
 struct ofpbuf *old_actions = ovsrcu_get_protected(struct ofpbuf *,
   >actions);
 
+ovsrcu_set(>actions, ofpbuf_clone(actions));
 if (old_actions) {
 ovsrcu_postpone(ofpbuf_delete, old_actions);
 }
-
-ovsrcu_set(>actions, ofpbuf_clone(actions));
 }
 
 static struct udpif_key *
-- 
2.21.0.windows.1
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 4/5] Introduce async IO in JSONRPC

2020-06-02 Thread anton . ivanov
From: Anton Ivanov 

1. Pull out buffering and send/receive ops from json rpc

2. Make the SSL send zero copy (it was creating an ofpbuf
out of an existing ofpbuf data without necessity).

3. Add vector IO to stream-fd to make flushing more
efficient. Also makes queueing for stream-fd and stream-ssl
roughly identical.

4. Unify backlog management

5. Make use of the full capacity of the incoming buffer and
not only when it is empty.

6. Allow for IO to be run in worker threads

7. Various minor fixes to enable async io - make rx errors
visible to tx and vice versa. Make activity tracking for
reconnect async friendly, etc.

8. Enable Async IO in ovsdb

Signed-off-by: Anton Ivanov 
---
 lib/async-io.c | 523 +
 lib/async-io.h |  86 +++
 lib/automake.mk|   2 +
 lib/jsonrpc.c  | 151 +---
 lib/stream-fd.c|  82 +++
 lib/stream-provider.h  |  34 ++-
 lib/stream-ssl.c   |  64 -
 lib/stream-tcp.c   |   2 +
 lib/stream-unix.c  |   2 +
 lib/stream-windows.c   |   2 +
 ovsdb/jsonrpc-server.c |  33 ++-
 ovsdb/ovsdb-server.c   |   2 +
 12 files changed, 872 insertions(+), 111 deletions(-)
 create mode 100644 lib/async-io.c
 create mode 100644 lib/async-io.h

diff --git a/lib/async-io.c b/lib/async-io.c
new file mode 100644
index 0..62725a653
--- /dev/null
+++ b/lib/async-io.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2020 Red Hat Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include "stream-provider.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "coverage.h"
+#include "fatal-signal.h"
+#include "flow.h"
+#include "jsonrpc.h"
+#include "openflow/nicira-ext.h"
+#include "openflow/openflow.h"
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/ofp-print.h"
+#include "openvswitch/ofpbuf.h"
+#include "openvswitch/vlog.h"
+#include "ovs-thread.h"
+#include "ovs-atomic.h"
+#include "packets.h"
+#include "openvswitch/poll-loop.h"
+#include "random.h"
+#include "socket-util.h"
+#include "util.h"
+#include "timeval.h"
+#include "async-io.h"
+#include "ovs-numa.h"
+
+VLOG_DEFINE_THIS_MODULE(async_io);
+
+static bool allow_async_io = false;
+
+static bool async_io_setup = false;
+static bool kill_async_io = false;
+
+static struct ovs_mutex init_mutex = OVS_MUTEX_INITIALIZER;
+
+static struct ovs_list io_pools = OVS_LIST_INITIALIZER(_pools);
+
+static int pool_size;
+
+static struct async_io_pool *io_pool = NULL;
+
+static int do_async_recv(struct async_data *data);
+static int do_stream_flush(struct async_data *data);
+
+static inline bool not_in_error(struct async_data *data) {
+int rx_error, tx_error;
+
+if (!data->valid) {
+return false;
+}
+
+atomic_read_relaxed(>rx_error, _error);
+atomic_read_relaxed(>tx_error, _error);
+
+return (
+((rx_error > 0) || (rx_error == -EAGAIN)) &&
+((tx_error >= 0) || (tx_error == -EAGAIN))
+);
+}
+
+static inline bool in_error(struct async_data *data) {
+return ! not_in_error(data);
+}
+
+
+static void *default_async_io_helper(void *arg) {
+struct async_io_control *io_control =
+(struct async_io_control *) arg;
+struct async_data *data;
+int retval;
+
+do {
+ovs_mutex_lock(_control->mutex);
+latch_poll(_control->async_latch);
+LIST_FOR_EACH (data, list_node, _control->work_items) {
+long backlog, oldbacklog;
+ovs_mutex_lock(>mutex);
+retval = -EAGAIN;
+if (not_in_error(data)) {
+/*
+ * We stop reading if the input queue is full
+ */
+if (byteq_headroom(>input) != 0) {
+retval = do_async_recv(data);
+} else {
+poll_timer_wait(1);
+retval = 0;
+}
+}
+if (not_in_error(data) && (retval > 0 || retval == -EAGAIN)) {
+stream_recv_wait(data->stream);
+}
+atomic_read_relaxed(>backlog, );
+if (not_in_error(data)) {
+stream_run(data->stream);
+do_stream_flush(data);
+}
+atomic_read_relaxed(>backlog, );
+if (not_in_error(data)) {
+if (backlog) {
+/* upper layers will refuse to process rx
+ 

[ovs-dev] [PATCH v2 1/5] Enable kernel probes and map stream probes onto them

2020-06-02 Thread anton . ivanov
From: Anton Ivanov 

1. Fix probe logic. The stream_or_pstream_needs_probes
function returning a mix of integer and boolean. As a
result probes were NOT turned off in a number of cases on
unix domain sockets and other transports where there
should be no probing. It now returns -1 (do not know),
0 (no probe needed), 1 (definitely needs probes).

2. Allow delegating probing to keepalive facilities in the
stream layer if avaialable.

3. Provide TCP KEEPALIVE probing at stream layer on supported
platforms for stream-ssl and stream-tcp.

Signed-off-by: Anton Ivanov 
---
 lib/jsonrpc.c  | 36 --
 lib/rconn.c|  2 +-
 lib/socket-util.c  | 44 ++
 lib/socket-util.h  |  8 
 lib/stream-fd.c|  9 +
 lib/stream-provider.h  |  6 ++
 lib/stream-ssl.c   |  8 
 lib/stream-tcp.c   |  1 +
 lib/stream-unix.c  |  1 +
 lib/stream-windows.c   |  1 +
 lib/stream.c   | 17 ++--
 lib/stream.h   |  1 +
 ovsdb/jsonrpc-server.c |  2 +-
 13 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
index ed748dbde..15e8d3527 100644
--- a/lib/jsonrpc.c
+++ b/lib/jsonrpc.c
@@ -787,6 +787,7 @@ struct jsonrpc_session {
 int last_error;
 unsigned int seqno;
 uint8_t dscp;
+int probe_interval;
 };
 
 static void
@@ -839,6 +840,7 @@ jsonrpc_session_open_multiple(const struct svec *remotes, 
bool retry)
 s->seqno = 0;
 s->dscp = 0;
 s->last_error = 0;
+s->probe_interval = reconnect_get_probe_interval(s->reconnect);
 
 const char *name = reconnect_get_name(s->reconnect);
 if (!pstream_verify_name(name)) {
@@ -848,8 +850,9 @@ jsonrpc_session_open_multiple(const struct svec *remotes, 
bool retry)
 reconnect_set_backoff(s->reconnect, INT_MAX, INT_MAX);
 }
 
-if (!stream_or_pstream_needs_probes(name)) {
+if (stream_or_pstream_needs_probes(name) < 1) {
 reconnect_set_probe_interval(s->reconnect, 0);
+s->probe_interval = 0;
 }
 
 return s;
@@ -879,6 +882,12 @@ jsonrpc_session_open_unreliably(struct jsonrpc *jsonrpc, 
uint8_t dscp)
 s->stream = NULL;
 s->pstream = NULL;
 s->seqno = 1;
+s->probe_interval = reconnect_get_probe_interval(s->reconnect);
+
+if (stream_or_pstream_needs_probes(reconnect_get_name(s->reconnect)) < 1) {
+reconnect_set_probe_interval(s->reconnect, 0);
+s->probe_interval = 0;
+}
 
 return s;
 }
@@ -934,6 +943,12 @@ jsonrpc_session_connect(struct jsonrpc_session *s)
 error = jsonrpc_stream_open(name, >stream, s->dscp);
 if (!error) {
 reconnect_connecting(s->reconnect, time_msec());
+if (stream_set_probe_interval(s->stream, s->probe_interval)) {
+/* we have delegated probing to the stream layer */
+reconnect_set_probe_interval(s->reconnect, 0);
+} else {
+reconnect_set_probe_interval(s->reconnect, s->probe_interval);
+}
 } else {
 s->last_error = error;
 }
@@ -967,6 +982,12 @@ jsonrpc_session_run(struct jsonrpc_session *s)
 jsonrpc_session_disconnect(s);
 }
 reconnect_connected(s->reconnect, time_msec());
+if (stream_set_probe_interval(stream, s->probe_interval)) {
+/* we have delegated probing to the stream layer */
+reconnect_set_probe_interval(s->reconnect, 0);
+} else {
+reconnect_set_probe_interval(s->reconnect, s->probe_interval);
+}
 s->rpc = jsonrpc_open(stream);
 s->seqno++;
 } else if (error != EAGAIN) {
@@ -1008,6 +1029,12 @@ jsonrpc_session_run(struct jsonrpc_session *s)
 if (!error) {
 reconnect_connected(s->reconnect, time_msec());
 s->rpc = jsonrpc_open(s->stream);
+if (stream_set_probe_interval(s->stream, s->probe_interval)) {
+/* we have delegated probing to the stream layer */
+reconnect_set_probe_interval(s->reconnect, 0);
+} else {
+reconnect_set_probe_interval(s->reconnect, s->probe_interval);
+}
 s->stream = NULL;
 s->seqno++;
 } else if (error != EAGAIN) {
@@ -1231,7 +1258,12 @@ void
 jsonrpc_session_set_probe_interval(struct jsonrpc_session *s,
int probe_interval)
 {
-reconnect_set_probe_interval(s->reconnect, probe_interval);
+s->probe_interval = probe_interval;
+if (s->stream && s->probe_interval) {
+if (!stream_set_probe_interval(s->stream, probe_interval)) {
+   reconnect_set_probe_interval(s->reconnect, probe_interval);
+}
+}
 }
 
 /* Sets the DSCP value used for 's''s connection to 'dscp'.  If this is
diff --git a/lib/rconn.c b/lib/rconn.c
index 

[ovs-dev] [PATCH v2 2/5] Make ByteQ safe for simultaneous producer/consumer

2020-06-02 Thread anton . ivanov
From: Anton Ivanov 

A ByteQ with unlocked head and tail is unsafe for simultaneous
consume/produce.

If simultaneous use is desired, these either need to be locked
or there needs to be a third atomic or lock guarded variable
"used".

An atomic "used" allows the producer to enqueue safely because
it "owns" the head and even if the consumer changes the head
it will only increase the space available versus the value in
"used".

Once the data has been written and the enqueued should be
made visible it fenced and the used is updated.

Similar for "consumer" - it can safely consume now as it
"owns" tail and never reads beyond tail + used (wrapped
around as needed).

Signed-off-by: Anton Ivanov 
---
 lib/byteq.c | 17 -
 lib/byteq.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/lib/byteq.c b/lib/byteq.c
index 3f865cf9e..da40c2530 100644
--- a/lib/byteq.c
+++ b/lib/byteq.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include "util.h"
+#include "ovs-atomic.h"
 
 /* Initializes 'q' as an empty byteq that uses the 'size' bytes of 'buffer' to
  * store data.  'size' must be a power of 2.
@@ -32,13 +33,16 @@ byteq_init(struct byteq *q, uint8_t *buffer, size_t size)
 q->buffer = buffer;
 q->size = size;
 q->head = q->tail = 0;
+q->used = ATOMIC_VAR_INIT(0);
 }
 
 /* Returns the number of bytes current queued in 'q'. */
 int
 byteq_used(const struct byteq *q)
 {
-return q->head - q->tail;
+int retval;
+atomic_read_relaxed(>used, );
+return retval;
 }
 
 /* Returns the number of bytes that can be added to 'q' without overflow. */
@@ -68,9 +72,11 @@ byteq_is_full(const struct byteq *q)
 void
 byteq_put(struct byteq *q, uint8_t c)
 {
+int discard;
 ovs_assert(!byteq_is_full(q));
 *byteq_head(q) = c;
 q->head++;
+atomic_add(>used, 1, );
 }
 
 /* Adds the 'n' bytes in 'p' at the head of 'q', which must have at least 'n'
@@ -79,6 +85,7 @@ void
 byteq_putn(struct byteq *q, const void *p_, size_t n)
 {
 const uint8_t *p = p_;
+int discard;
 ovs_assert(byteq_avail(q) >= n);
 while (n > 0) {
 size_t chunk = MIN(n, byteq_headroom(q));
@@ -86,6 +93,7 @@ byteq_putn(struct byteq *q, const void *p_, size_t n)
 byteq_advance_head(q, chunk);
 p += chunk;
 n -= chunk;
+atomic_add(>used, chunk, );
 }
 }
 
@@ -103,9 +111,11 @@ uint8_t
 byteq_get(struct byteq *q)
 {
 uint8_t c;
+int discard;
 ovs_assert(!byteq_is_empty(q));
 c = *byteq_tail(q);
 q->tail++;
+atomic_sub(>used, 1, );
 return c;
 }
 
@@ -168,8 +178,10 @@ byteq_tail(const struct byteq *q)
 void
 byteq_advance_tail(struct byteq *q, unsigned int n)
 {
+int discard;
 ovs_assert(byteq_tailroom(q) >= n);
 q->tail += n;
+atomic_sub_relaxed(>used, n, );
 }
 
 /* Returns the byte after the last in-use byte of 'q', the point at which new
@@ -195,6 +207,9 @@ byteq_headroom(const struct byteq *q)
 void
 byteq_advance_head(struct byteq *q, unsigned int n)
 {
+int discard;
 ovs_assert(byteq_headroom(q) >= n);
 q->head += n;
+atomic_thread_fence(memory_order_release);
+atomic_add_relaxed(>used, n, );
 }
diff --git a/lib/byteq.h b/lib/byteq.h
index d73e3684e..e829efab0 100644
--- a/lib/byteq.h
+++ b/lib/byteq.h
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include "ovs-atomic.h"
 
 /* General-purpose circular queue of bytes. */
 struct byteq {
@@ -26,6 +27,7 @@ struct byteq {
 unsigned int size;  /* Number of bytes allocated for 'buffer'. */
 unsigned int head;  /* Head of queue. */
 unsigned int tail;  /* Chases the head. */
+atomic_int used;
 };
 
 void byteq_init(struct byteq *, uint8_t *buffer, size_t size);
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 3/5] Fixes for build with extra warnings

2020-06-02 Thread anton . ivanov
From: Anton Ivanov 

Signed-off-by: Anton Ivanov 
---
 lib/byteq.c | 14 +++---
 lib/byteq.h | 12 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/lib/byteq.c b/lib/byteq.c
index da40c2530..8a7b4f1dc 100644
--- a/lib/byteq.c
+++ b/lib/byteq.c
@@ -38,16 +38,16 @@ byteq_init(struct byteq *q, uint8_t *buffer, size_t size)
 
 /* Returns the number of bytes current queued in 'q'. */
 int
-byteq_used(const struct byteq *q)
+byteq_used(struct byteq *q)
 {
-int retval;
+atomic_int retval;
 atomic_read_relaxed(>used, );
 return retval;
 }
 
 /* Returns the number of bytes that can be added to 'q' without overflow. */
 int
-byteq_avail(const struct byteq *q)
+byteq_avail(struct byteq *q)
 {
 return q->size - byteq_used(q);
 }
@@ -55,7 +55,7 @@ byteq_avail(const struct byteq *q)
 /* Returns true if no bytes are queued in 'q',
  * false if at least one byte is queued.  */
 bool
-byteq_is_empty(const struct byteq *q)
+byteq_is_empty(struct byteq *q)
 {
 return !byteq_used(q);
 }
@@ -63,7 +63,7 @@ byteq_is_empty(const struct byteq *q)
 /* Returns true if 'q' has no room to queue additional bytes,
  * false if 'q' has room for at least one more byte.  */
 bool
-byteq_is_full(const struct byteq *q)
+byteq_is_full(struct byteq *q)
 {
 return !byteq_avail(q);
 }
@@ -158,7 +158,7 @@ byteq_read(struct byteq *q, int fd)
 /* Returns the number of contiguous bytes of in-use space starting at the tail
  * of 'q'. */
 int
-byteq_tailroom(const struct byteq *q)
+byteq_tailroom(struct byteq *q)
 {
 int used = byteq_used(q);
 int tail_to_end = q->size - (q->tail & (q->size - 1));
@@ -195,7 +195,7 @@ byteq_head(struct byteq *q)
 /* Returns the number of contiguous bytes of free space starting at the head
  * of 'q'. */
 int
-byteq_headroom(const struct byteq *q)
+byteq_headroom(struct byteq *q)
 {
 int avail = byteq_avail(q);
 int head_to_end = q->size - (q->head & (q->size - 1));
diff --git a/lib/byteq.h b/lib/byteq.h
index e829efab0..fde9722ed 100644
--- a/lib/byteq.h
+++ b/lib/byteq.h
@@ -31,10 +31,10 @@ struct byteq {
 };
 
 void byteq_init(struct byteq *, uint8_t *buffer, size_t size);
-int byteq_used(const struct byteq *);
-int byteq_avail(const struct byteq *);
-bool byteq_is_empty(const struct byteq *);
-bool byteq_is_full(const struct byteq *);
+int byteq_used(struct byteq *);
+int byteq_avail(struct byteq *);
+bool byteq_is_empty(struct byteq *);
+bool byteq_is_full(struct byteq *);
 void byteq_put(struct byteq *, uint8_t c);
 void byteq_putn(struct byteq *, const void *, size_t n);
 void byteq_put_string(struct byteq *, const char *);
@@ -43,9 +43,9 @@ int byteq_write(struct byteq *, int fd);
 int byteq_read(struct byteq *, int fd);
 
 uint8_t *byteq_head(struct byteq *);
-int byteq_headroom(const struct byteq *);
+int byteq_headroom(struct byteq *);
 void byteq_advance_head(struct byteq *, unsigned int n);
-int byteq_tailroom(const struct byteq *);
+int byteq_tailroom(struct byteq *);
 const uint8_t *byteq_tail(const struct byteq *);
 void byteq_advance_tail(struct byteq *, unsigned int n);
 
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 5/5] Disable "merge updates on backlogged connections" test

2020-06-02 Thread anton . ivanov
From: Anton Ivanov 

The merge on backlogged behavior is predicated on prohibiting
processing of incoming transactions while there is an outstanding
backlog.

This behavior does not make sense if the xmit/recv is async from
actual business logic.

Signed-off-by: Anton Ivanov 
---
 tests/ovsdb-server.at | 190 +-
 1 file changed, 95 insertions(+), 95 deletions(-)

diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at
index 0b15758f2..e389d721a 100644
--- a/tests/ovsdb-server.at
+++ b/tests/ovsdb-server.at
@@ -1128,103 +1128,103 @@ AT_KEYWORDS([ovsdb server convert needs-conversion 
cluster])
 ovsdb_check_online_conversion cluster
 AT_CLEANUP
 
-AT_SETUP([ovsdb-server combines updates on backlogged connections])
-on_exit 'kill `cat *.pid`'
-
-# The maximum socket receive buffer size is important for this test, which
-# tests behavior when the receive buffer overflows.
-if test -e /proc/sys/net/core/rmem_max; then
-# Linux
-rmem_max=`cat /proc/sys/net/core/rmem_max`
-elif rmem_max=`sysctl -n net.inet.tcp.recvbuf_max 2>/dev/null`; then
-: # FreeBSD, NetBSD
-else
-# Don't know how to get maximum socket receive buffer on this OS
-AT_SKIP_IF([:])
-fi
-
-# Calculate the number of iterations we need to queue.  Each of the
-# iterations we execute, by itself, yields a monitor update of about
-# 25 kB, so fill up that much space plus a few for luck.
-n_iterations=`expr $rmem_max / 25000 + 5`
-echo rmem_max=$rmem_max n_iterations=$n_iterations
-
-# If there's too much queuing skip the test to avoid timing out.
-AT_SKIP_IF([test $rmem_max -gt 1048576])
-
-# Calculate the exact number of monitor updates expected for $n_iterations,
-# assuming no updates are combined.  The "extra" update is for the initial
-# contents of the database.
-n_updates=`expr $n_iterations \* 3 + 1`
-
-# Start an ovsdb-server with the vswitchd schema.
-OVSDB_INIT([db])
-AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --log-file 
--remote=punix:db.sock db],
-  [0], [ignore], [ignore])
-
-# Executes a set of transactions that add a bridge with 100 ports, and
-# then deletes that bridge.  This yields three monitor updates that
-# add up to about 25 kB in size.
+#AT_SETUP([ovsdb-server combines updates on backlogged connections])
+#on_exit 'kill `cat *.pid`'
 #
-# The update also increments a counter held in the database so that we can
-# verify that the overall effect of the transactions took effect (e.g.
-# monitor updates at the end weren't just dropped).  We add an arbitrary
-# string to the counter to make grepping for it more reliable.
-counter=0
-trigger_big_update () {
-counter=`expr $counter + 1`
-ovs-vsctl --no-wait -- set open_vswitch . system_version=xyzzy$counter
-ovs-vsctl --no-wait -- add-br br0 $add
-ovs-vsctl --no-wait -- del-br br0
-}
-add_ports () {
-for j in `seq 1 100`; do
-printf " -- add-port br0 p%d" $j
-done
-}
-add=`add_ports`
-
-AT_CAPTURE_FILE([ovsdb-client.err])
-AT_CAPTURE_FILE([ovsdb-client-nonblock.err])
-
-
-# Start an ovsdb-client monitoring all changes to the database,
-# By default, it is non-blocking, and will get update message
-# for each ovsdb-server transaactions.
-AT_CHECK([ovsdb-client --detach --no-chdir --pidfile=nonblock.pid monitor ALL 
>ovsdb-client-nonblock.out 2>ovsdb-client-nonblock.err])
-
-# Start an ovsdb-client monitoring all changes to the database,
-# make it block to force the buffers to fill up, and then execute
-# enough iterations that ovsdb-server starts combining updates.
-AT_CHECK([ovsdb-client --detach --no-chdir --pidfile monitor ALL 
>ovsdb-client.out 2>ovsdb-client.err])
-AT_CHECK([ovs-appctl -t ovsdb-client ovsdb-client/block])
-for i in `seq 1 $n_iterations`; do
-echo "blocked update ($i of $n_iterations)"
-trigger_big_update $i
-done
-AT_CHECK([ovs-appctl -t ovsdb-client ovsdb-client/unblock])
-OVS_WAIT_UNTIL([grep "xyzzy$counter" ovsdb-client.out])
-OVS_WAIT_UNTIL([grep "xyzzy$counter" ovsdb-client-nonblock.out])
-OVS_APP_EXIT_AND_WAIT([ovsdb-client])
-AT_CHECK([kill `cat nonblock.pid`])
-
-# Count the number of updates in the ovsdb-client output, by counting
-# the number of changes to the Open_vSwitch table.  (All of our
-# transactions modify the Open_vSwitch table.)  It should be less than
-# $n_updates updates.
+## The maximum socket receive buffer size is important for this test, which
+## tests behavior when the receive buffer overflows.
+#if test -e /proc/sys/net/core/rmem_max; then
+## Linux
+#rmem_max=`cat /proc/sys/net/core/rmem_max`
+#elif rmem_max=`sysctl -n net.inet.tcp.recvbuf_max 2>/dev/null`; then
+#: # FreeBSD, NetBSD
+#else
+## Don't know how to get maximum socket receive buffer on this OS
+#AT_SKIP_IF([:])
+#fi
 #
-# Check that the counter is what we expect.
-logged_updates=`grep -c '^Open_vSwitch' ovsdb-client.out`
-logged_nonblock_updates=`grep -c '^Open_vSwitch' ovsdb-client-nonblock.out`
-echo 

[ovs-dev] Updated async IO patch series

2020-06-02 Thread anton . ivanov
I have updated the async io patch series and added to them the patch
to switch from "jsonrpc echo" to kernel probing where supported by the
OS. 

The patchset is tested with OVN at scale and results in better stability
of OVN at high scales comparet to master. F.e. - I get 1 out of 3 OVN scale
runs fail for master when using 150 fake nodes on my righ (8 x 4 core/8G
VMs + 1 4 core/24G VM for OVN central). With this patchse the failure rate
drops to zero. 

The performance gain (tested on the same rig) is ~ 5%.

The first patch in the series can be used separately - it solves multiple
problems related to connection drops at high load due to incorrect probe
handling - raft, control connections, etc.


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovs rcu: update rcu pointer first

2020-06-02 Thread Yanqin Wei
Hi Haifeng,

One more comment. Since this is a bug fix, it is possible that the maintainer 
will backport to the previous branch. Therefore, it is recommended to split 
into several patches and add fixes tag.
http://docs.openvswitch.org/en/latest/internals/contributing/submitting-patches/?highlight=submit
e.g.
"Fixes: 63bc9fb1c69f ("packets: Reorder CS_* flags to remove gap.")
If you would like to record which commit introduced a bug being fixed, you may 
do that with a "Fixes" header. This assists in determining which OVS releases 
have the bug, so the patch can be applied to all affected versions. The easiest 
way to generate the header in the proper format is with this git command. This 
command also CCs the author of the commit being fixed, which makes sense unless 
the author also made the fix or is already named in another tag:
$ git log -1 --pretty=format:"CC: %an <%ae>%nFixes: %h (\"%s\")" \
  --abbrev=12 COMMIT_REF"

Best Regards,
Wei Yanqin

> -Original Message-
> From: Linhaifeng 
> Sent: Tuesday, June 2, 2020 3:13 PM
> To: Yanqin Wei ; d...@openvswitch.org
> Cc: nd 
> Subject: RE: [PATCH] ovs rcu: update rcu pointer first
> 
> Hi Yanqin,
> 
> Thank you for your suggestions. I will send a new patch.
> 
> -Original Message-
> From: Yanqin Wei [mailto:yanqin@arm.com]
> Sent: Tuesday, June 2, 2020 11:51 AM
> To: Linhaifeng ; d...@openvswitch.org
> Cc: nd 
> Subject: RE: [PATCH] ovs rcu: update rcu pointer first
> 
> Hi Haifeng,
> 
> It looks indeed a risk for using ovs-rcu. A few comments inline.
> 
> Best Regards,
> Wei Yanqin
> 
> > -Original Message-
> > From: dev  On Behalf Of Linhaifeng
> > Sent: Monday, June 1, 2020 11:13 AM
> > To: d...@openvswitch.org
> > Subject: [ovs-dev] [PATCH] ovs rcu: update rcu pointer first
> >
> > We should update rcu pointer first then use ovsrcu_postpone to free
> > otherwise maybe cause use-after-free.
> >
> > e.g, thead are two threads A and B:
> >
> > 1. thread A call ovsrcu_postpone and flush cbset, this time have not
> > call ovsrcu_quiesce
> >
> > 2. thread rcu wait all threads call ovsrcu_quiesce
> >
> > 3. thread B call ovsrcu_quiesce
> >
> > 4. thread B get the old pointer next round
> >
> > 5. thrad A call ovsrcu_quiesce, now all threads have called
> > ovsrcu_quiesce
> [Yanqin]   Thread A is a writer and does not have to call ovsrcu_quiesce. I 
> think
> this scenario can be simplified as follows: reader indicates momentary
> quiescent and access old pointer after writer postpone free old pointer and
> before setting new pointer.
> 
> >
> > 6. thread rcu free old pointer
> >
> > 7. thread B use-after-free
> >
> > Signed-off-by: Linhaifeng 
> > ---
> >  lib/classifier.c  |  4 ++--
> >  lib/ovs-rcu.h |  2 +-
> >  lib/pvector.c | 15 ---
> >  ofproto/ofproto-dpif-mirror.c |  4 ++-- ofproto/ofproto-dpif-upcall.c
> > |  3 +--
> >  5 files changed, 14 insertions(+), 14 deletions(-)
> >
> > diff --git a/lib/classifier.c b/lib/classifier.c index
> > f2c3497c2..6bff76e07 100644
> > --- a/lib/classifier.c
> > +++ b/lib/classifier.c
> > @@ -249,11 +249,11 @@ cls_rule_set_conjunctions(struct cls_rule *cr,
> >  unsigned int old_n = old ? old->n : 0;
> >
> >  if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof
> > *conj))) {
> > +ovsrcu_set(>conj_set,
> > +   cls_conjunction_set_alloc(match, conj, n));
> >  if (old) {
> >  ovsrcu_postpone(free, old);
> >  }
> > -ovsrcu_set(>conj_set,
> > -   cls_conjunction_set_alloc(match, conj, n));
> >  }
> >  }
> >
> > diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h index ecc4c9201..a66d868ea
> > 100644
> > --- a/lib/ovs-rcu.h
> > +++ b/lib/ovs-rcu.h
> > @@ -119,9 +119,9 @@
> >   * change_flow(struct flow *new_flow)
> >   * {
> >   * ovs_mutex_lock();
> > + * ovsrcu_set(, new_flow);
> >   * ovsrcu_postpone(free,
> >   * ovsrcu_get_protected(struct flow *, ));
> [Yanqin] flowp has been set to new flow pointer here. Maybe a new variable is
> needed to store old pointer.
> > - * ovsrcu_set(, new_flow);
> >   * ovs_mutex_unlock();
> >   * }
> >   *
> > diff --git a/lib/pvector.c b/lib/pvector.c index cc527fdc4..aa8c6cb24
> > 100644
> > --- a/lib/pvector.c
> > +++ b/lib/pvector.c
> > @@ -67,10 +67,11 @@ pvector_init(struct pvector *pvec)  void
> > pvector_destroy(struct pvector *pvec)  {
> > +struct pvector_impl *old = pvector_impl_get(pvec);
> >  free(pvec->temp);
> >  pvec->temp = NULL;
> > -ovsrcu_postpone(free, pvector_impl_get(pvec));
> >  ovsrcu_set(>impl, NULL); /* Poison. */
> > +ovsrcu_postpone(free, old);
> >  }
> >
> >  /* Iterators for callers that need the 'index' afterward. */ @@
> > -205,11 +206,11 @@ pvector_change_priority(struct pvector *pvec, void
> > *ptr, int priority)
> >  /* Make the modified pvector available for iteration. */  void
> > 

Re: [ovs-dev] [PATCH] ovs rcu: update rcu pointer first

2020-06-02 Thread Linhaifeng
Hi Yanqin,

Thank you for your suggestions. I will send a new patch.

-Original Message-
From: Yanqin Wei [mailto:yanqin@arm.com] 
Sent: Tuesday, June 2, 2020 11:51 AM
To: Linhaifeng ; d...@openvswitch.org
Cc: nd 
Subject: RE: [PATCH] ovs rcu: update rcu pointer first

Hi Haifeng,

It looks indeed a risk for using ovs-rcu. A few comments inline.

Best Regards,
Wei Yanqin

> -Original Message-
> From: dev  On Behalf Of Linhaifeng
> Sent: Monday, June 1, 2020 11:13 AM
> To: d...@openvswitch.org
> Subject: [ovs-dev] [PATCH] ovs rcu: update rcu pointer first
> 
> We should update rcu pointer first then use ovsrcu_postpone to free 
> otherwise maybe cause use-after-free.
> 
> e.g, thead are two threads A and B:
> 
> 1. thread A call ovsrcu_postpone and flush cbset, this time have not 
> call ovsrcu_quiesce
> 
> 2. thread rcu wait all threads call ovsrcu_quiesce
> 
> 3. thread B call ovsrcu_quiesce
> 
> 4. thread B get the old pointer next round
> 
> 5. thrad A call ovsrcu_quiesce, now all threads have called 
> ovsrcu_quiesce
[Yanqin]   Thread A is a writer and does not have to call ovsrcu_quiesce. I 
think this scenario can be simplified as follows: reader indicates momentary 
quiescent and access old pointer after writer postpone free old pointer and 
before setting new pointer. 

> 
> 6. thread rcu free old pointer
> 
> 7. thread B use-after-free
> 
> Signed-off-by: Linhaifeng 
> ---
>  lib/classifier.c  |  4 ++--
>  lib/ovs-rcu.h |  2 +-
>  lib/pvector.c | 15 ---
>  ofproto/ofproto-dpif-mirror.c |  4 ++--  
> ofproto/ofproto-dpif-upcall.c |  3 +--
>  5 files changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/classifier.c b/lib/classifier.c index 
> f2c3497c2..6bff76e07 100644
> --- a/lib/classifier.c
> +++ b/lib/classifier.c
> @@ -249,11 +249,11 @@ cls_rule_set_conjunctions(struct cls_rule *cr,
>  unsigned int old_n = old ? old->n : 0;
> 
>  if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof 
> *conj))) {
> +ovsrcu_set(>conj_set,
> +   cls_conjunction_set_alloc(match, conj, n));
>  if (old) {
>  ovsrcu_postpone(free, old);
>  }
> -ovsrcu_set(>conj_set,
> -   cls_conjunction_set_alloc(match, conj, n));
>  }
>  }
> 
> diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h index ecc4c9201..a66d868ea 
> 100644
> --- a/lib/ovs-rcu.h
> +++ b/lib/ovs-rcu.h
> @@ -119,9 +119,9 @@
>   * change_flow(struct flow *new_flow)
>   * {
>   * ovs_mutex_lock();
> + * ovsrcu_set(, new_flow);
>   * ovsrcu_postpone(free,
>   * ovsrcu_get_protected(struct flow *, ));
[Yanqin] flowp has been set to new flow pointer here. Maybe a new variable is 
needed to store old pointer.
> - * ovsrcu_set(, new_flow);
>   * ovs_mutex_unlock();
>   * }
>   *
> diff --git a/lib/pvector.c b/lib/pvector.c index cc527fdc4..aa8c6cb24 
> 100644
> --- a/lib/pvector.c
> +++ b/lib/pvector.c
> @@ -67,10 +67,11 @@ pvector_init(struct pvector *pvec)  void 
> pvector_destroy(struct pvector *pvec)  {
> +struct pvector_impl *old = pvector_impl_get(pvec);
>  free(pvec->temp);
>  pvec->temp = NULL;
> -ovsrcu_postpone(free, pvector_impl_get(pvec));
>  ovsrcu_set(>impl, NULL); /* Poison. */
> +ovsrcu_postpone(free, old);
>  }
> 
>  /* Iterators for callers that need the 'index' afterward. */ @@ 
> -205,11 +206,11 @@ pvector_change_priority(struct pvector *pvec, void 
> *ptr, int priority)
>  /* Make the modified pvector available for iteration. */  void 
> pvector_publish__(struct pvector *pvec)  {
> -struct pvector_impl *temp = pvec->temp;
> -
> +struct pvector_impl *new = pvec->temp;
> +struct pvector_impl *old = ovsrcu_get_protected(struct pvector_impl *,
> +   >impl);
>  pvec->temp = NULL;
> -pvector_impl_sort(temp); /* Also removes gaps. */
> -ovsrcu_postpone(free, ovsrcu_get_protected(struct pvector_impl *,
> -   >impl));
> -ovsrcu_set(>impl, temp);
> +pvector_impl_sort(new); /* Also removes gaps. */
> +ovsrcu_set(>impl, new);
> +ovsrcu_postpone(free, old);
>  }
> diff --git a/ofproto/ofproto-dpif-mirror.c 
> b/ofproto/ofproto-dpif-mirror.c index
> 343b75f0e..343100c08 100644
> --- a/ofproto/ofproto-dpif-mirror.c
> +++ b/ofproto/ofproto-dpif-mirror.c
> @@ -276,9 +276,9 @@ mirror_set(struct mbridge *mbridge, void *aux, 
> const char *name,
>  hmapx_destroy(_map);
> 
>  if (vlans || src_vlans) {
> +unsigned long *new_vlans = vlan_bitmap_clone(src_vlans);
> +ovsrcu_set(>vlans, new_vlans);
>  ovsrcu_postpone(free, vlans);
> -vlans = vlan_bitmap_clone(src_vlans);
> -ovsrcu_set(>vlans, vlans);
>  }
> 
>  mirror->out = out;
> diff --git a/ofproto/ofproto-dpif-upcall.c 
> b/ofproto/ofproto-dpif-upcall.c index
> 

[ovs-dev] [PATCH v1 1/6] netdev: avoid unnecessary packet batch refilling in netdev feature check

2020-06-02 Thread Yanqin Wei
Before sending packets on netdev, feature compatibility is always checked
and incompatible traffic should be dropped. But, packet batch is refilled
even when no packet needs to be dropped. This patch improves it by keeping
the original batch if no packet should be dropped.

Reviewed-by: Lijian Zhang 
Reviewed-by: Malvika Gupta 
Signed-off-by: Yanqin Wei 
---
 lib/dp-packet.h | 12 
 lib/netdev.c| 13 ++---
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 0430cca8e..1345f46e7 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -762,12 +762,12 @@ dp_packet_batch_size(const struct dp_packet_batch *batch)
 return batch->count;
 }
 
-/* Clear 'batch' for refill. Use dp_packet_batch_refill() to add
+/* Clear 'batch' from 'offset' for refill. Use dp_packet_batch_refill() to add
  * packets back into the 'batch'. */
 static inline void
-dp_packet_batch_refill_init(struct dp_packet_batch *batch)
+dp_packet_batch_refill_prepare(struct dp_packet_batch *batch, size_t offset)
 {
-batch->count = 0;
+batch->count = offset;
 };
 
 static inline void
@@ -801,6 +801,10 @@ dp_packet_batch_is_full(const struct dp_packet_batch 
*batch)
 for (size_t IDX = 0; IDX < dp_packet_batch_size(BATCH); IDX++)  \
 if (PACKET = BATCH->packets[IDX], true)
 
+#define DP_PACKET_BATCH_FOR_EACH_WITH_SIZE(IDX, SIZE, PACKET, BATCH) \
+ for (size_t IDX = 0; IDX < SIZE; IDX++) \
+if (PACKET = BATCH->packets[IDX], true)
+
 /* Use this macro for cases where some packets in the 'BATCH' may be
  * dropped after going through each packet in the 'BATCH'.
  *
@@ -813,7 +817,7 @@ dp_packet_batch_is_full(const struct dp_packet_batch *batch)
  * the 'const' modifier since it should not be modified by
  * the iterator.  */
 #define DP_PACKET_BATCH_REFILL_FOR_EACH(IDX, SIZE, PACKET, BATCH)   \
-for (dp_packet_batch_refill_init(BATCH), IDX=0; IDX < SIZE; IDX++)  \
+for (dp_packet_batch_refill_prepare(BATCH, 0), IDX=0; IDX < SIZE; IDX++)  \
  if (PACKET = BATCH->packets[IDX], true)
 
 static inline void
diff --git a/lib/netdev.c b/lib/netdev.c
index 90962eec6..7934a00d4 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -838,15 +838,22 @@ netdev_send_prepare_batch(const struct netdev *netdev,
   struct dp_packet_batch *batch)
 {
 struct dp_packet *packet;
-size_t i, size = dp_packet_batch_size(batch);
+size_t batch_cnt = dp_packet_batch_size(batch);;
+bool refill = false;
 
-DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
+DP_PACKET_BATCH_FOR_EACH_WITH_SIZE (i, batch_cnt, packet, batch) {
 char *errormsg = NULL;
 
 if (netdev_send_prepare_packet(netdev->ol_flags, packet, )) {
-dp_packet_batch_refill(batch, packet, i);
+if ( OVS_UNLIKELY(refill) ) {
+dp_packet_batch_refill(batch, packet, i);
+}
 } else {
 dp_packet_delete(packet);
+if( !refill ) {
+dp_packet_batch_refill_prepare(batch, i);
+refill = true;
+}
 COVERAGE_INC(netdev_send_prepare_drops);
 VLOG_WARN_RL(, "%s: Packet dropped: %s",
  netdev_get_name(netdev), errormsg);
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 3/6] dpif-netdev: improve emc lookup performance by contiguous storage of hash value.

2020-06-02 Thread Yanqin Wei
In the emc lookup function, hash/flow/key are checked for matching entry.
Each entry comparison loads several cachelines into CPU. So in the
multifow case, processor will wait for the data to be fetched from lower
level cacheline or main memory because of cache miss.
This patch modifies emc table layout to contiguously store the hash items.
It can reduce the number of cache miss for fetching hash value in EMC
lookup miss case. And EMC lookup hit case can also benefit because
processor can parallelly load and match hash and key in different cacheline.

Reviewed-by: Lijian Zhang 
Reviewed-by: Malvika Gupta 
Reviewed-by: Ruifeng Wang 
Signed-off-by: Yanqin Wei 
---
 lib/dpif-netdev.c | 55 +++
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c94d5e8c7..3994f41e4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -192,13 +192,19 @@ static struct odp_support dp_netdev_support = {
 #define DEFAULT_EM_FLOW_INSERT_INV_PROB 100
 #define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \
 DEFAULT_EM_FLOW_INSERT_INV_PROB)
+struct emc_key {
+uint32_t len;/* Length of the following miniflow (incl. map). */
+struct miniflow mf;
+uint64_t buf[FLOW_MAX_PACKET_U64S];
+};
 
 struct emc_entry {
 struct dp_netdev_flow *flow;
-struct netdev_flow_key key;   /* key.hash used for emc hash value. */
+struct emc_key key;
 };
 
 struct emc_cache {
+uint32_t hash[EM_FLOW_HASH_ENTRIES];
 struct emc_entry entries[EM_FLOW_HASH_ENTRIES];
 int sweep_idx;/* For emc_cache_slow_sweep(). */
 };
@@ -220,9 +226,9 @@ struct dfc_cache {
 
 /* Iterate in the exact match cache through every entry that might contain a
  * miniflow with hash 'HASH'. */
-#define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH) \
+#define EMC_FOR_EACH_POS_WITH_HASH(ID, HASH) \
 for (uint32_t i__ = 0, srch_hash__ = (HASH); \
- (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & EM_FLOW_HASH_MASK], \
+ (ID) = srch_hash__ & EM_FLOW_HASH_MASK, \
  i__ < EM_FLOW_HASH_SEGS;\
  i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT)
 
@@ -877,8 +883,8 @@ emc_cache_init(struct emc_cache *flow_cache)
 
 flow_cache->sweep_idx = 0;
 for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
+flow_cache->hash[i] = 0;
 flow_cache->entries[i].flow = NULL;
-flow_cache->entries[i].key.hash = 0;
 flow_cache->entries[i].key.len = sizeof(struct miniflow);
 flowmap_init(_cache->entries[i].key.mf.map);
 }
@@ -2733,12 +2739,12 @@ netdev_flow_key_equal(const struct netdev_flow_key *a,
 return a->hash == b->hash && !memcmp(>mf, >mf, a->len);
 }
 
-/* Used to compare 'netdev_flow_key' in the exact match cache to a miniflow.
+/* Used to compare 'emc_key' in the exact match cache to a miniflow.
  * The maps are compared bitwise, so both 'key->mf' and 'mf' must have been
  * generated by miniflow_extract. */
 static inline bool
-netdev_flow_key_equal_mf(const struct netdev_flow_key *key,
- const struct miniflow *mf)
+emc_key_equal_mf(const struct emc_key *key,
+ const struct miniflow *mf)
 {
 return !memcmp(>mf, mf, key->len);
 }
@@ -2840,7 +2846,8 @@ emc_change_entry(struct emc_entry *ce, struct 
dp_netdev_flow *flow,
 }
 }
 if (key) {
-netdev_flow_key_clone(>key, key);
+ce->key.len = key->len;
+memcpy(>key.mf, >mf, key->len);
 }
 }
 
@@ -2849,12 +2856,14 @@ emc_insert(struct emc_cache *cache, const struct 
netdev_flow_key *key,
struct dp_netdev_flow *flow)
 {
 struct emc_entry *to_be_replaced = NULL;
-struct emc_entry *current_entry;
+uint32_t *to_be_replaced_hash = NULL;
+uint32_t id;
 
-EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
-if (netdev_flow_key_equal(_entry->key, key)) {
+EMC_FOR_EACH_POS_WITH_HASH (id, key->hash) {
+if (key->hash == cache->hash[id]
+&& emc_key_equal_mf(>entries[id].key, >mf)) {
 /* We found the entry with the 'mf' miniflow */
-emc_change_entry(current_entry, flow, NULL);
+emc_change_entry(>entries[id], flow, NULL);
 return;
 }
 
@@ -2862,14 +2871,15 @@ emc_insert(struct emc_cache *cache, const struct 
netdev_flow_key *key,
  * in the first entry where it can be */
 if (!to_be_replaced
 || (emc_entry_alive(to_be_replaced)
-&& !emc_entry_alive(current_entry))
-|| current_entry->key.hash < to_be_replaced->key.hash) {
-to_be_replaced = current_entry;
+&& !emc_entry_alive(>entries[id]))
+|| cache->hash[id] < *to_be_replaced_hash) {
+to_be_replaced = 

[ovs-dev] [PATCH v1 5/6] dpif-netdev: remove unnecessary key length calculation in fast path

2020-06-02 Thread Yanqin Wei
Key length is only useful for emc table. This patch moves the key length
calculation into emc_change_entry for fast path performance.

Reviewed-by: Lijian Zhang 
Reviewed-by: Malvika Gupta 
Reviewed-by: Ruifeng Wang 
Signed-off-by: Yanqin Wei 
---
 lib/dpif-netdev.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d575edefd..6ff8194ab 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2846,8 +2846,8 @@ emc_change_entry(struct emc_entry *ce, struct 
dp_netdev_flow *flow,
 }
 }
 if (key) {
-ce->key.len = key->len;
-memcpy(>key.mf, >mf, key->len);
+ce->key.len = netdev_flow_key_size(miniflow_n_values(>mf));
+memcpy(>key.mf, >mf, ce->key.len);
 }
 }
 
@@ -6541,8 +6541,6 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
 tcp_flags = miniflow_get_tcp_flags([i].mf);
 
 /* SMC hit and emc miss, we insert into EMC */
-keys[i].len =
-netdev_flow_key_size(miniflow_n_values([i].mf));
 emc_probabilistic_insert(pmd, [i], flow);
 /* Add these packets into the flow map in the same order
  * as received.
@@ -6831,10 +6829,6 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 int lookup_cnt = 0, add_lookup_cnt;
 bool any_miss;
 
-for (size_t i = 0; i < cnt; i++) {
-/* Key length is needed in all the cases, hash computed on demand. */
-keys[i]->len = netdev_flow_key_size(miniflow_n_values([i]->mf));
-}
 /* Get the classifier for the in_port */
 cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
 if (OVS_LIKELY(cls)) {
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 2/6] dpif-netdev: add tunnel_valid flag to skip ip/ipv6 address comparison

2020-06-02 Thread Yanqin Wei
miniflow_extract checks the validation of tunnel metadata by comparing
tunnel destination address, including 16 bytes ipv6 address.
This patch introduces a 'tunnel_valid' flag. If it is false,
md->cacheline2 will not be touched. This improvement is beneficial to
miniflow_extract performance for all kinds of traffic.

Reviewed-by: Lijian Zhang 
Reviewed-by: Malvika Gupta 
Signed-off-by: Yanqin Wei 
---
 lib/dpif-netdev.c | 14 +++---
 lib/flow.c|  2 +-
 lib/packets.h | 46 --
 3 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 51c888501..c94d5e8c7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6625,12 +6625,16 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
 if (i != cnt - 1) {
 struct dp_packet **packets = packets_->packets;
 /* Prefetch next packet data and metadata. */
-OVS_PREFETCH(dp_packet_data(packets[i+1]));
-pkt_metadata_prefetch_init([i+1]->md);
+OVS_PREFETCH(dp_packet_data(packets[i + 1]));
+if (md_is_valid) {
+pkt_metadata_prefetch([i + 1]->md);
+} else {
+pkt_metadata_prefetch_init([i + 1]->md);
+}
 }
 
 if (!md_is_valid) {
-pkt_metadata_init(>md, port_no);
+pkt_metadata_datapath_init(>md, port_no);
 }
 
 if ((*recirc_depth_get() == 0) &&
@@ -6730,6 +6734,10 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
 miniflow_expand(>mf, );
 memset(, 0, sizeof match.wc);
 
+if (!packet->md.tunnel_valid) {
+pkt_metadata_tnl_dst_init(>md);
+}
+
 ofpbuf_clear(actions);
 ofpbuf_clear(put_actions);
 
diff --git a/lib/flow.c b/lib/flow.c
index cc1b3f2db..1f0b3d4dc 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -747,7 +747,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow 
*dst)
 ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
 
 /* Metadata. */
-if (flow_tnl_dst_is_set(>tunnel)) {
+if (md->tunnel_valid && flow_tnl_dst_is_set(>tunnel)) {
 miniflow_push_words(mf, tunnel, >tunnel,
 offsetof(struct flow_tnl, metadata) /
 sizeof(uint64_t));
diff --git a/lib/packets.h b/lib/packets.h
index 447e6f6fa..3b507d2a3 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -103,15 +103,16 @@ PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, 
cacheline0,
action. */
 uint32_t skb_priority;  /* Packet priority for QoS. */
 uint32_t pkt_mark;  /* Packet mark. */
+struct conn *conn;  /* Cached conntrack connection. */
 uint8_t  ct_state;  /* Connection state. */
 bool ct_orig_tuple_ipv6;
 uint16_t ct_zone;   /* Connection zone. */
 uint32_t ct_mark;   /* Connection mark. */
 ovs_u128 ct_label;  /* Connection label. */
 union flow_in_port in_port; /* Input port. */
-struct conn *conn;  /* Cached conntrack connection. */
 bool reply; /* True if reply direction. */
 bool icmp_related;  /* True if ICMP related. */
+bool tunnel_valid;
 );
 
 PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
@@ -141,6 +142,7 @@ pkt_metadata_init_tnl(struct pkt_metadata *md)
  * are before this and as long as they are empty, the options won't
  * be looked at. */
 memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts));
+md->tunnel_valid = true;
 }
 
 static inline void
@@ -151,6 +153,25 @@ pkt_metadata_init_conn(struct pkt_metadata *md)
 
 static inline void
 pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
+{
+/* Initialize only till ct_state. Once the ct_state is zeroed out rest
+ * of ct fields will not be looked at unless ct_state != 0.
+ */
+memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
+
+/* It can be expensive to zero out all of the tunnel metadata. However,
+ * we can just zero out ip_dst and the rest of the data will never be
+ * looked at. */
+md->tunnel_valid = true;
+md->tunnel.ip_dst = 0;
+md->tunnel.ipv6_dst = in6addr_any;
+
+md->in_port.odp_port = port;
+}
+
+/* This function initializes those members used by userspace datapath */
+static inline void
+pkt_metadata_datapath_init(struct pkt_metadata *md, odp_port_t port)
 {
 /* This is called for every packet in userspace datapath and affects
  * performance if all the metadata is initialized. Hence, fields should
@@ -162,12 +183,19 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t 
port)
 memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
 
 /* It can be expensive to zero out all of the tunnel metadata. However,
- * we can just zero out ip_dst and the rest of the data will never be
- * looked at. */
+ * we can just clear 

[ovs-dev] [PATCH v1 0/6] Memory access optimization for flow scalability of userspace datapath.

2020-06-02 Thread Yanqin Wei
OVS userspace datapath is a program with heavy memory access. It needs to
load/store a large number of memory, including packet header, metadata,
EMC/SMC/DPCLS tables and so on. It causes a lot of cache line missing and
refilling, which has a great impact on flow scalability. And in some cases,
EMC has a negative impact on the overall performance. It is difficult for
user to dynamically manage the enabling of EMC. 

This series of patches improve memory access of userspace datapath as
follows:
1. Reduce the number of metadata cache line accessed by non-tunnel traffic. 
2. Decrease unnecessary memory load/store for batch/flow. 
3. Modify the layout of EMC data struct. Centralize the storage of hash
value. 

In the NIC2NIC traffic tests, the overall performance improvement is
observed, especially in multi-flow cases. 
Flows   delta
1-1K flows  5-10%
10K flows   20%
100K flows  40%
EMC disable 10%

Malvika Gupta (1):
  [ovs-dev] dpif-netdev: Modify dfc_processing function to void function

Yanqin Wei (5):
  netdev: avoid unnecessary packet batch refilling in netdev feature
check
  dpif-netdev: add tunnel_valid flag to skip ip/ipv6 address comparison
  dpif-netdev: improve emc lookup performance by contiguous storage of
hash value.
  dpif-netdev: skip flow hash calculation in case of smc disabled
  dpif-netdev: remove unnecessary key length calculation in fast path

 lib/dp-packet.h   |  12 +++--
 lib/dpif-netdev.c | 115 --
 lib/flow.c|   2 +-
 lib/netdev.c  |  13 --
 lib/packets.h |  46 ---
 5 files changed, 120 insertions(+), 68 deletions(-)

-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 4/6] dpif-netdev: skip flow hash calculation in case of smc disabled

2020-06-02 Thread Yanqin Wei
In case of 10k+ flows, emc lookup will usually miss. Flow hash value is
always calculated in this case no matter smc is enabled or not. This patch
moves it from smc_insert function into fast_path_processing and
handle_packet_upcall function to avoid unnecessary hash calculation and memory
access(flow->ufid) in smc disabled case.

Reviewed-by: Lijian Zhang 
Reviewed-by: Malvika Gupta 
Reviewed-by: Lance Yang 
Signed-off-by: Yanqin Wei 
---
 lib/dpif-netdev.c | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3994f41e4..d575edefd 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2957,14 +2957,8 @@ smc_insert(struct dp_netdev_pmd_thread *pmd,
 struct smc_bucket *bucket = _cache->buckets[key->hash & SMC_MASK];
 uint16_t index;
 uint32_t cmap_index;
-bool smc_enable_db;
 int i;
 
-atomic_read_relaxed(>dp->smc_enable_db, _enable_db);
-if (!smc_enable_db) {
-return;
-}
-
 cmap_index = cmap_find_index(>flow_table, hash);
 index = (cmap_index >= UINT16_MAX) ? UINT16_MAX : (uint16_t)cmap_index;
 
@@ -6794,8 +6788,13 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
  add_actions->size);
 }
 ovs_mutex_unlock(>flow_mutex);
-uint32_t hash = dp_netdev_flow_hash(_flow->ufid);
-smc_insert(pmd, key, hash);
+
+bool smc_enable_db;
+atomic_read_relaxed(>dp->smc_enable_db, _enable_db);
+if (smc_enable_db) {
+uint32_t hash = dp_netdev_flow_hash(_flow->ufid);
+smc_insert(pmd, key, hash);
+}
 emc_probabilistic_insert(pmd, key, netdev_flow);
 }
 if (pmd_perf_metrics_enabled(pmd)) {
@@ -6904,9 +6903,13 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 }
 
 flow = dp_netdev_flow_cast(rules[i]);
-uint32_t hash =  dp_netdev_flow_hash(>ufid);
-smc_insert(pmd, keys[i], hash);
 
+bool smc_enable_db;
+atomic_read_relaxed(>dp->smc_enable_db, _enable_db);
+if (smc_enable_db) {
+uint32_t hash =  dp_netdev_flow_hash(>ufid);
+smc_insert(pmd, keys[i], hash);
+}
 emc_probabilistic_insert(pmd, keys[i], flow);
 /* Add these packets into the flow map in the same order
  * as received.
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 6/6] dpif-netdev: Modify dfc_processing function to void function

2020-06-02 Thread Yanqin Wei
From: Malvika Gupta 

dfc_processing function returns the number of packets left to be processed
in 'packets' array via dp_packet_batch_size() function. dfc_processing function
is called only from dp_netdev_input__ and its return value is not checked upon
function return. Moreover, dp_packet_batch_is_empty() called after function
return from dfc_processing itself calls dp_packet_batch_size to check if
'packets' array is empty. This patch modifies the dfc_processing function to a
void function to remove the above code redundancy and cleans the code.

Reviewed-by: Yanqin Wei 
Signed-off-by: Malvika Gupta 
---
 lib/dpif-netdev.c | 17 +
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 6ff8194ab..b3750017b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6577,16 +6577,13 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
  * beginning of the 'packets' array. The pointers of missed keys are put in the
  * missed_keys pointer array for future processing.
  *
- * The function returns the number of packets that needs to be processed in the
- * 'packets' array (they have been moved to the beginning of the vector).
- *
  * For performance reasons a caller may choose not to initialize the metadata
  * in 'packets_'.  If 'md_is_valid' is false, the metadata in 'packets'
  * is not valid and must be initialized by this function using 'port_no'.
  * If 'md_is_valid' is true, the metadata is already valid and 'port_no'
  * will be ignored.
  */
-static inline size_t
+static inline void
 dfc_processing(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets_,
struct netdev_flow_key *keys,
@@ -6707,15 +6704,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
 
 pmd_perf_update_counter(>perf_stats, PMD_STAT_EXACT_HIT, n_emc_hit);
 
-if (!smc_enable_db) {
-return dp_packet_batch_size(packets_);
+if (smc_enable_db) {
+/* Packets miss EMC will do a batch lookup in SMC if enabled */
+smc_lookup_batch(pmd, keys, missed_keys, packets_,
+  n_missed, flow_map, index_map);
 }
-
-/* Packets miss EMC will do a batch lookup in SMC if enabled */
-smc_lookup_batch(pmd, keys, missed_keys, packets_,
- n_missed, flow_map, index_map);
-
-return dp_packet_batch_size(packets_);
 }
 
 static inline int
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC v3 PATCH 3/5] dpif-netdev: Skip encap action during datapath execution

2020-06-02 Thread Eli Britstein



On 6/1/2020 8:29 PM, Sriharsha Basavapatna wrote:

On Mon, Jun 1, 2020 at 9:18 PM Eli Britstein  wrote:


On 6/1/2020 6:15 PM, Sriharsha Basavapatna wrote:

On Mon, Jun 1, 2020 at 7:58 PM Eli Britstein  wrote:

On 5/28/2020 11:19 AM, Sriharsha Basavapatna wrote:

In this patch we check if action processing (apart from OUTPUT action),
should be skipped for a given dp_netdev_flow. Specifically, we check if
the action is TNL_PUSH and if it has been offloaded to HW, then we do not
push the tunnel header in SW. The datapath only executes the OUTPUT action.
The packet will be encapsulated in HW during transmit.

Signed-off-by: Sriharsha Basavapatna 
---
lib/dpif-netdev.c | 46 --
1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 781b233f4..3e175c25e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -112,6 +112,7 @@ COVERAGE_DEFINE(datapath_drop_recirc_error);
COVERAGE_DEFINE(datapath_drop_invalid_port);
COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
+COVERAGE_DEFINE(datapath_skip_tunnel_push);

/* Protects against changes to 'dp_netdevs'. */
static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
@@ -538,6 +539,16 @@ struct dp_netdev_flow {
bool dead;
uint32_t mark;   /* Unique flow mark assigned to a flow */

+/* The next two members are used to support partial offloading of
+ * actions. The boolean flag tells if this flow has its actions partially
+ * offloaded. The egress port# tells if the action should be offloaded
+ * on the egress (output) port instead of the in-port for the flow. Note
+ * that we support flows with a single egress port action.
+ * (see MAX_ACTION_ATTRS for related comments).
+ */
+bool partial_actions_offloaded;
+odp_port_t  egress_offload_port;
+
/* Statistics. */
struct dp_netdev_flow_stats stats;

@@ -791,7 +802,8 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
  bool should_steal,
  const struct flow *flow,
  const struct nlattr *actions,
-  size_t actions_len);
+  size_t actions_len,
+  const struct dp_netdev_flow *dp_flow);
static void dp_netdev_input(struct dp_netdev_pmd_thread *,
struct dp_packet_batch *, odp_port_t port_no);
static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
@@ -3828,7 +3840,7 @@ dpif_netdev_execute(struct dpif *dpif, struct 
dpif_execute *execute)
dp_packet_batch_init_packet(, execute->packet);
pp.do_not_steal = true;
dp_netdev_execute_actions(pmd, , false, execute->flow,
-  execute->actions, execute->actions_len);
+  execute->actions, execute->actions_len, NULL);
dp_netdev_pmd_flush_output_packets(pmd, true);

if (pmd->core_id == NON_PMD_CORE_ID) {
@@ -6456,7 +6468,7 @@ packet_batch_per_flow_execute(struct 
packet_batch_per_flow *batch,
actions = dp_netdev_flow_get_actions(flow);

dp_netdev_execute_actions(pmd, >array, true, >flow,
-  actions->actions, actions->size);
+  actions->actions, actions->size, flow);
}

static inline void
@@ -6764,7 +6776,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
 * we'll send the packet up twice. */
dp_packet_batch_init_packet(, packet);
dp_netdev_execute_actions(pmd, , true, ,
-  actions->data, actions->size);
+  actions->data, actions->size, NULL);

add_actions = put_actions->size ? put_actions : actions;
if (OVS_LIKELY(error != ENOSPC)) {
@@ -6999,6 +7011,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_execute_aux {
struct dp_netdev_pmd_thread *pmd;
const struct flow *flow;
+const struct dp_netdev_flow *dp_flow;/* for partial action offload */
};

static void
@@ -7143,7 +7156,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread 
*pmd,
if (!error || error == ENOSPC) {
dp_packet_batch_init_packet(, packet);
dp_netdev_execute_actions(pmd, , should_steal, flow,
-  actions->data, actions->size);
+  actions->data, actions->size, NULL);
} else if (should_steal) {
dp_packet_delete(packet);
COVERAGE_INC(datapath_drop_userspace_action_error);
@@ -7162,6 +7175,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
*packets_,
int type = nl_attr_type(a);
struct 

[ovs-dev] Goodbye from our Newsletter

2020-06-02 Thread IP Supply Pty Ltd
Goodbye from our Newsletter, sorry to see you go.

You have been unsubscribed from our newsletters.

This is the last email you will receive from us. Our newsletter system,
phpList,
will refuse to send you any further messages, without manual intervention
by our administrator.

If there is an error in this information, you can re-subscribe:
please go to https://lists.ipsupply.com.au/lists/?p=subscribe  and follow
the steps.

Thank you

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev