[ovs-dev] [PATCH v2] [python] Add monitor_cond_since support

2021-11-02 Thread Terry Wilson
Add support for monitor_cond_since / update3 to python-ovs to
allow more efficient reconnections when connecting to clustered
OVSDB servers.

Signed-off-by: Terry Wilson 
---
 python/ovs/db/idl.py | 55 +++-
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
index 87ee06cde..3a43dcc8a 100644
--- a/python/ovs/db/idl.py
+++ b/python/ovs/db/idl.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import collections
+import enum
 import functools
 import uuid
 
@@ -36,6 +37,7 @@ ROW_DELETE = "delete"
 
 OVSDB_UPDATE = 0
 OVSDB_UPDATE2 = 1
+OVSDB_UPDATE3 = 2
 
 CLUSTERED = "clustered"
 RELAY = "relay"
@@ -45,6 +47,12 @@ Notice = collections.namedtuple('Notice', ('event', 'row', 
'updates'))
 Notice.__new__.__defaults__ = (None,)  # default updates=None
 
 
+class Monitor(enum.IntEnum):
+monitor = OVSDB_UPDATE
+monitor_cond = OVSDB_UPDATE2
+monitor_cond_since = OVSDB_UPDATE3
+
+
 class Idl(object):
 """Open vSwitch Database Interface Definition Language (OVSDB IDL).
 
@@ -102,7 +110,13 @@ class Idl(object):
 IDL_S_SERVER_MONITOR_REQUESTED = 2
 IDL_S_DATA_MONITOR_REQUESTED = 3
 IDL_S_DATA_MONITOR_COND_REQUESTED = 4
-IDL_S_MONITORING = 5
+IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED = 5
+IDL_S_MONITORING = 6
+
+monitor_map = {
+Monitor.monitor: IDL_S_SERVER_MONITOR_REQUESTED,
+Monitor.monitor_cond: IDL_S_DATA_MONITOR_COND_REQUESTED,
+Monitor.monitor_cond_since: IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED}
 
 def __init__(self, remote, schema_helper, probe_interval=None,
  leader_only=True):
@@ -150,6 +164,7 @@ class Idl(object):
 self._last_seqno = None
 self.change_seqno = 0
 self.uuid = uuid.uuid1()
+self.last_id = str(uuid.UUID(int=0))
 
 # Server monitor.
 self._server_schema_request_id = None
@@ -264,8 +279,12 @@ class Idl(object):
 msg = self._session.recv()
 if msg is None:
 break
-
 if (msg.type == ovs.jsonrpc.Message.T_NOTIFY
+and msg.method == "update3"
+and len(msg.params) == 3):
+self.__parse_update(msg.params[2], OVSDB_UPDATE3)
+self.last_id = msg.params[1]
+elif (msg.type == ovs.jsonrpc.Message.T_NOTIFY
 and msg.method == "update2"
 and len(msg.params) == 2):
 # Database contents changed.
@@ -291,7 +310,10 @@ class Idl(object):
 self.change_seqno += 1
 self._monitor_request_id = None
 self.__clear()
-if self.state == self.IDL_S_DATA_MONITOR_COND_REQUESTED:
+if (self.state ==
+self.IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED):
+self.__parse_update(msg.result[2], OVSDB_UPDATE3)
+elif self.state == self.IDL_S_DATA_MONITOR_COND_REQUESTED:
 self.__parse_update(msg.result, OVSDB_UPDATE2)
 else:
 assert self.state == self.IDL_S_DATA_MONITOR_REQUESTED
@@ -368,11 +390,17 @@ class Idl(object):
 elif msg.type == ovs.jsonrpc.Message.T_NOTIFY and msg.id == "echo":
 # Reply to our echo request.  Ignore it.
 pass
+elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
+  self.state == (
+  self.IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED) and
+  self._monitor_request_id == msg.id):
+if msg.error == "unknown method":
+self.__send_monitor_request(Monitor.monitor_cond)
 elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
   self.state == self.IDL_S_DATA_MONITOR_COND_REQUESTED and
   self._monitor_request_id == msg.id):
 if msg.error == "unknown method":
-self.__send_monitor_request()
+self.__send_monitor_request(Monitor.monitor)
 elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
   self._server_schema_request_id is not None and
   self._server_schema_request_id == msg.id):
@@ -571,11 +599,13 @@ class Idl(object):
 self._db_change_aware_request_id = msg.id
 self._session.send(msg)
 
-def __send_monitor_request(self):
-if (self.state in [self.IDL_S_SERVER_MONITOR_REQUESTED,
-   self.IDL_S_INITIAL]):
+def __send_monitor_request(self, max_version=Monitor.monitor_cond_since):
+if self.state == self.IDL_S_INITIAL:
 self.state = self.IDL_S_DATA_MONITOR_COND_REQUESTED
 method = "monitor_cond"
+elif self.state == self.IDL_S_SERVER_MONITOR_REQUESTED:
+self.state = 

[ovs-dev] [PATCH] [python] Add monitor_cond_since support

2021-11-02 Thread Terry Wilson
Add support for monitor_cond_since / update3 to python-ovs to
allow more efficient reconnections when connecting to clustered
OVSDB servers.

Signed-off-by: Terry Wilson 
---
 python/ovs/db/idl.py | 60 ++--
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
index 87ee06cde..d9c9a8dc9 100644
--- a/python/ovs/db/idl.py
+++ b/python/ovs/db/idl.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import collections
+import enum
 import functools
 import uuid
 
@@ -36,6 +37,7 @@ ROW_DELETE = "delete"
 
 OVSDB_UPDATE = 0
 OVSDB_UPDATE2 = 1
+OVSDB_UPDATE3 = 2
 
 CLUSTERED = "clustered"
 RELAY = "relay"
@@ -45,6 +47,12 @@ Notice = collections.namedtuple('Notice', ('event', 'row', 
'updates'))
 Notice.__new__.__defaults__ = (None,)  # default updates=None
 
 
+class Monitor(enum.IntEnum):
+monitor = OVSDB_UPDATE
+monitor_cond = OVSDB_UPDATE2
+monitor_cond_since = OVSDB_UPDATE3
+
+
 class Idl(object):
 """Open vSwitch Database Interface Definition Language (OVSDB IDL).
 
@@ -102,7 +110,13 @@ class Idl(object):
 IDL_S_SERVER_MONITOR_REQUESTED = 2
 IDL_S_DATA_MONITOR_REQUESTED = 3
 IDL_S_DATA_MONITOR_COND_REQUESTED = 4
-IDL_S_MONITORING = 5
+IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED = 5
+IDL_S_MONITORING = 6
+
+monitor_map = {
+Monitor.monitor: IDL_S_SERVER_MONITOR_REQUESTED,
+Monitor.monitor_cond: IDL_S_DATA_MONITOR_COND_REQUESTED,
+Monitor.monitor_cond_since: IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED}
 
 def __init__(self, remote, schema_helper, probe_interval=None,
  leader_only=True):
@@ -150,6 +164,7 @@ class Idl(object):
 self._last_seqno = None
 self.change_seqno = 0
 self.uuid = uuid.uuid1()
+self.last_id = str(uuid.UUID(int=0))
 
 # Server monitor.
 self._server_schema_request_id = None
@@ -264,8 +279,12 @@ class Idl(object):
 msg = self._session.recv()
 if msg is None:
 break
-
 if (msg.type == ovs.jsonrpc.Message.T_NOTIFY
+and msg.method == "update3"
+and len(msg.params) == 3):
+self.__parse_update(msg.params[2], OVSDB_UPDATE3)
+self.last_id = msg.params[1]
+elif (msg.type == ovs.jsonrpc.Message.T_NOTIFY
 and msg.method == "update2"
 and len(msg.params) == 2):
 # Database contents changed.
@@ -291,7 +310,10 @@ class Idl(object):
 self.change_seqno += 1
 self._monitor_request_id = None
 self.__clear()
-if self.state == self.IDL_S_DATA_MONITOR_COND_REQUESTED:
+if (self.state ==
+self.IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED):
+self.__parse_update(msg.result[2], OVSDB_UPDATE3)
+elif self.state == self.IDL_S_DATA_MONITOR_COND_REQUESTED:
 self.__parse_update(msg.result, OVSDB_UPDATE2)
 else:
 assert self.state == self.IDL_S_DATA_MONITOR_REQUESTED
@@ -368,11 +390,17 @@ class Idl(object):
 elif msg.type == ovs.jsonrpc.Message.T_NOTIFY and msg.id == "echo":
 # Reply to our echo request.  Ignore it.
 pass
+elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
+  self.state == (
+  self.IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED) and
+  self._monitor_request_id == msg.id):
+if msg.error == "unknown method":
+self.__send_monitor_request(Monitor.monitor_cond)
 elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
   self.state == self.IDL_S_DATA_MONITOR_COND_REQUESTED and
   self._monitor_request_id == msg.id):
 if msg.error == "unknown method":
-self.__send_monitor_request()
+self.__send_monitor_request(Monitor.monitor)
 elif (msg.type == ovs.jsonrpc.Message.T_ERROR and
   self._server_schema_request_id is not None and
   self._server_schema_request_id == msg.id):
@@ -571,11 +599,13 @@ class Idl(object):
 self._db_change_aware_request_id = msg.id
 self._session.send(msg)
 
-def __send_monitor_request(self):
-if (self.state in [self.IDL_S_SERVER_MONITOR_REQUESTED,
-   self.IDL_S_INITIAL]):
+def __send_monitor_request(self, max_version=Monitor.monitor_cond_since):
+if self.state == self.IDL_S_INITIAL:
 self.state = self.IDL_S_DATA_MONITOR_COND_REQUESTED
 method = "monitor_cond"
+elif self.state == self.IDL_S_SERVER_MONITOR_REQUESTED:
+self.state = 

[ovs-dev] [PATCH 0/1] [python] Add monitor_cond_since support

2021-11-02 Thread Terry Wilson
I added monitor_cond_since / update3 support to the python IDL. I
think ultimately it would be good to refactor a lot of the code to be
a little more ovsdb_cs-like, but I'd rather come back and do that
without also adding features.

Terry Wilson (1):
  [python] Add monitor_cond_since support

 python/ovs/db/idl.py | 61 ++--
 1 file changed, 48 insertions(+), 13 deletions(-)

-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2] ovn-northd: Virtual port add ND/ARP responder flows for IPv6 VIPs.

2021-11-02 Thread Numan Siddique
On Wed, Sep 22, 2021 at 1:58 PM  wrote:
>
> From: Mohammad Heib 
>
> currently ovn-northd only handle virtual ports with VIP IPv4 and ignores
> virtual ports with VIP IPv6.
>
> This patch adds support for virtual ports with VIP IPv6 by adding
> lflows to the lsp_in_arp_rsp logical switch pipeline.
> Those lflows handle Neighbor Solicitations and Neighbor Advertisement requests
> that target the virtual port VIPs and bind the virtual port to the desired 
> VIF.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2003091
> Fixes: 054f4c85c413 ("Add a new logical switch port type - 'virtual'")
> Signed-off-by: Mohammad Heib 

Hi Mohammad,

Thanks for the patch.

I didn't do a thorough review.  I have just a few comments.

You need to update the documentation in northd/ovn-northd.8.xml about
the new flows added.

PSB for a couple of  small nit comments.

Thanks
Numan

> ---
>  northd/northd.c | 80 ++---
>  tests/ovn.at| 66 
>  2 files changed, 116 insertions(+), 30 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index cf2467fe1..b934819fc 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -7382,16 +7382,28 @@ build_lswitch_arp_nd_responder_known_ips(struct 
> ovn_port *op,
>   *  - ARP reply from the virtual ip which belongs to a logical
>   *port of type 'virtual' and bind that port.
>   * */
> -ovs_be32 ip;
> +struct in6_addr ip;
> +
>  const char *virtual_ip = smap_get(>nbsp->options,
>"virtual-ip");
>  const char *virtual_parents = smap_get(>nbsp->options,
> "virtual-parents");
> -if (!virtual_ip || !virtual_parents ||
> -!ip_parse(virtual_ip, )) {
> +if (!virtual_ip || !virtual_parents) {
>  return;
>  }
>
> +bool is_ipv4 = strchr(virtual_ip, '.') ? true : false;
> +if (is_ipv4) {
> +ovs_be32 ipv4;
> +if (!ip_parse(virtual_ip, )) {
> + return;
> +}
> +} else{
> +if (!ipv6_parse(virtual_ip, )) {
> + return;
> +}
> +}
> +
>  char *tokstr = xstrdup(virtual_parents);
>  char *save_ptr = NULL;
>  char *vparent;
> @@ -7404,13 +7416,31 @@ build_lswitch_arp_nd_responder_known_ips(struct 
> ovn_port *op,
>  continue;
>  }
>
> -ds_clear(match);
> -ds_put_format(match, "inport == \"%s\" && "
> -  "((arp.op == 1 && arp.spa == %s && "
> -  "arp.tpa == %s) || (arp.op == 2 && "
> -  "arp.spa == %s))",
> -  vparent, virtual_ip, virtual_ip,
> -  virtual_ip);
> +if (is_ipv4) {
> +ds_clear(match);
> +ds_put_format(match, "inport == \"%s\" && "
> +"((arp.op == 1 && arp.spa == %s && "
> +"arp.tpa == %s) || (arp.op == 2 && "
> +"arp.spa == %s))",
> +vparent, virtual_ip, virtual_ip,
> +virtual_ip);
> +} else{
> +struct ipv6_netaddr na;
> +/* Find VIP multicast group */
> +in6_addr_solicited_node(_addr, );
> +inet_ntop(AF_INET6, _addr, na.sn_addr_s, sizeof 
> na.sn_addr_s);
> +
> +ds_clear(match);
> +ds_put_format(match, "inport == \"%s\" && "
> +"((nd_ns && ip6.dst == {%s, %s} && nd.target == 
> %s) ||"
> +"(nd_na && nd.target == %s))",
> +vparent,
> +virtual_ip,
> +na.sn_addr_s,
> +virtual_ip,
> +virtual_ip);
> +}
> +
>  ds_clear(actions);
>  ds_put_format(actions,
>  "bind_vport(%s, inport); "
> @@ -11030,17 +11060,29 @@ build_arp_resolve_flows_for_lrouter_port(
>   * 00:00:00:00:00:00 and advance to next table so that ARP is
>   * resolved by router pipeline using the arp{} action.
>   * The MAC_Binding entry for the virtual ip might be invalid. */
> -ovs_be32 ip;
>
>  const char *vip = smap_get(>nbsp->options,
> "virtual-ip");
>  const char *virtual_parents = smap_get(>nbsp->options,
> "virtual-parents");
> -if (!vip || 

Re: [ovs-dev] [PATCH ovn] utilities: nbctl: add --may-exist parameter to meter-add cmd

2021-11-02 Thread Numan Siddique
On Wed, Oct 27, 2021 at 10:54 AM Lorenzo Bianconi
 wrote:
>
> Add --may-exist support to meter-add command in order to update
> meter parameters if it has been already created
>
> Signed-off-by: Lorenzo Bianconi 
> ---
>  tests/ovn-nbctl.at|  6 +++--
>  tests/ovn-northd.at   | 11 +-
>  utilities/ovn-nbctl.c | 51 ---
>  3 files changed, 43 insertions(+), 25 deletions(-)
>
> diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
> index 9b80ae410..a8946fef8 100644
> --- a/tests/ovn-nbctl.at
> +++ b/tests/ovn-nbctl.at
> @@ -371,6 +371,8 @@ dnl Add duplicate meter name
>  AT_CHECK([ovn-nbctl meter-add meter1 drop 10 kbps], [1], [], [stderr])
>  AT_CHECK([grep 'already exists' stderr], [0], [ignore])
>
> +AT_CHECK([ovn-nbctl --may-exist meter-add meter1 drop 11 kbps])
> +
>  dnl Add reserved meter name
>  AT_CHECK([ovn-nbctl meter-add __meter1 drop 10 kbps], [1], [], [stderr])
>  AT_CHECK([grep 'reserved' stderr], [0], [ignore])
> @@ -396,7 +398,7 @@ AT_CHECK([ovn-nbctl meter-add meter5 drop 10 10001011 
> kbps], [1], [],
>
>  AT_CHECK([ovn-nbctl meter-list], [0], [dnl
>  meter1: bands:
> -  drop: 10 kbps
> +  drop: 11 kbps
>  meter2: bands:
>drop: 3 kbps, 2 kb burst
>  meter3: bands:
> @@ -409,7 +411,7 @@ dnl Delete a single meter.
>  AT_CHECK([ovn-nbctl meter-del meter2])
>  AT_CHECK([ovn-nbctl meter-list], [0], [dnl
>  meter1: bands:
> -  drop: 10 kbps
> +  drop: 11 kbps
>  meter3: bands:
>drop: 100 kbps, 200 kb burst
>  meter4: (fair) bands:
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index e2b9924b6..ca1b8a117 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -3235,7 +3235,16 @@ event-elb: meter0
>
>  AT_CHECK([ovn-sbctl list logical_flow | grep trigger_event -A 2 | grep -q 
> meter0])
>
> -check ovn-nbctl --wait=hv meter-add meter1 drop 200 pktps 10
> +check ovn-nbctl --wait=hv meter-add meter1 drop 300 pktps 10
> +AT_CHECK([ovn-nbctl meter-list |grep meter1 -A 1], [0], [dnl
> +meter1: bands:
> +  drop: 300 pktps, 10 packet burst
> +])
> +check ovn-nbctl --wait=hv --may-exist meter-add meter1 drop 200 pktps 10
> +AT_CHECK([ovn-nbctl meter-list |grep meter1 -A 1], [0], [dnl
> +meter1: bands:
> +  drop: 200 pktps, 10 packet burst
> +])
>  check ovn-nbctl --wait=hv lr-copp-add r0 arp meter1
>  AT_CHECK([ovn-nbctl lr-copp-list r0], [0], [dnl
>  arp: meter1
> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
> index b04b24d4b..1f71cae46 100644
> --- a/utilities/ovn-nbctl.c
> +++ b/utilities/ovn-nbctl.c
> @@ -2585,19 +2585,6 @@ meter_cmp(const void *meter1_, const void *meter2_)
>  return strcmp(meter1->name, meter2->name);
>  }
>
> -static void
> -nbctl_pre_meter_list(struct ctl_context *ctx)
> -{
> -ovsdb_idl_add_column(ctx->idl, _meter_col_name);
> -ovsdb_idl_add_column(ctx->idl, _meter_col_fair);
> -ovsdb_idl_add_column(ctx->idl, _meter_col_bands);
> -ovsdb_idl_add_column(ctx->idl, _meter_col_unit);
> -
> -ovsdb_idl_add_column(ctx->idl, _meter_band_col_action);
> -ovsdb_idl_add_column(ctx->idl, _meter_band_col_rate);
> -ovsdb_idl_add_column(ctx->idl, _meter_band_col_burst_size);
> -}
> -

Hi Lorenzo,

Thanks for the patch.  I applied this patch to the main branch with one change.

I kept the function nbctl_pre_meter_list() AS IS even though
nbctl_pre_meter_add()
are exactly the same.   I felt it's a bit odd to call
nbctl_pre_meter_add() in the meter-list pre check command.

Thanks
Numan



>  static void
>  nbctl_meter_list(struct ctl_context *ctx)
>  {
> @@ -2650,18 +2637,34 @@ static void
>  nbctl_pre_meter_add(struct ctl_context *ctx)
>  {
>  ovsdb_idl_add_column(ctx->idl, _meter_col_name);
> +ovsdb_idl_add_column(ctx->idl, _meter_col_fair);
> +ovsdb_idl_add_column(ctx->idl, _meter_col_bands);
> +ovsdb_idl_add_column(ctx->idl, _meter_col_unit);
> +
> +ovsdb_idl_add_column(ctx->idl, _meter_band_col_action);
> +ovsdb_idl_add_column(ctx->idl, _meter_band_col_rate);
> +ovsdb_idl_add_column(ctx->idl, _meter_band_col_burst_size);
>  }
>
>  static void
>  nbctl_meter_add(struct ctl_context *ctx)
>  {
> -const struct nbrec_meter *meter;
> +const struct nbrec_meter *meter = NULL, *iter;
> +struct nbrec_meter_band *band = NULL;
>
>  const char *name = ctx->argv[1];
> -NBREC_METER_FOR_EACH (meter, ctx->idl) {
> -if (!strcmp(meter->name, name)) {
> -ctl_error(ctx, "meter with name \"%s\" already exists", name);
> -return;
> +NBREC_METER_FOR_EACH (iter, ctx->idl) {
> +if (!strcmp(iter->name, name)) {
> +if (!shash_find(>options, "--may-exist")) {
> +ctl_error(ctx, "meter with name \"%s\" already exists", 
> name);
> +return;
> +} else {
> +meter = iter;
> +if (meter->n_bands) {
> +band = meter->bands[0];
> +}
> +break;
> +}
>  }
>  

Re: [ovs-dev] [PATCH ovn] ovn.at: Fix flaky test "router - check packet length - icmp defrag"

2021-11-02 Thread Numan Siddique
On Tue, Oct 19, 2021 at 11:58 AM Xavier Simonart  wrote:
>
> This test was failing randomly, probably on slow systems.
> It was waiting for gratuitous ARP which were potentially generated earlier.
>
> Fixes: 1c9e46ab ("northd: add check_pkt_larger lflows for ingress traffic")
> Signed-off-by: Xavier Simonart 

Thanks.  Applied.

Numan

> ---
>  tests/ovn.at | 23 +++
>  1 file changed, 11 insertions(+), 12 deletions(-)
>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 05eac4e5f..9483e4f75 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -16895,7 +16895,6 @@ test_ip_packet_larger() {
>  expected=${expected}
>  expected=${expected}
>  echo $expected > br_phys_n1.expected
> -echo $gw_ip_garp >> br_phys_n1.expected
>  else
>  src_ip=`ip_to_hex 10 0 0 1`
>  dst_ip=`ip_to_hex 10 0 0 3`
> @@ -16916,7 +16915,9 @@ test_ip_packet_larger() {
>  check as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
>
>  if test $mtu -ge $mtu_needed; then
> -OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br_phys_n1.expected])
> +# Ignore gratuitous ARPs; there is no guarantee to get them here as 
> they might
> +# already have been generated
> +OVN_CHECK_PACKETS_CONTAIN([hv1/br-phys_n1-tx.pcap], 
> [br_phys_n1.expected])
>  $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap  > pkts
>  # hv1/vif1-tx.pcap can receive the GARP packet generated by 
> ovn-controller
>  # for the gateway router port. So ignore this packet.
> @@ -16953,7 +16954,9 @@ test_ip_packet_larger_ext() {
>  orig_packet_l3=${orig_packet_l3}
>  packet=${packet}${orig_packet_l3}
>
> -
> gw_ip_garp=202012130806000108000604000120201213aca80064aca80064
> +# A Gratuitous ARP (as shown next line) might be transmitted, but
> +# it is also possible that it was transmitted earlier, so do not wait 
> for it.
> +# 
> optional_gw_ip_garp=202012130806000108000604000120201213aca80064aca80064
>  
> ext_ip_garp=0012af11080600010800060400010012af11aca80004aca80004
>
>  src_ip=`ip_to_hex 172 168 0 100`
> @@ -16967,8 +16970,6 @@ test_ip_packet_larger_ext() {
>  icmp_reply=${icmp_reply}${orig_packet_l3}
>  echo $icmp_reply > br-phys_n1.expected
>
> -echo $gw_ip_garp >> br-phys_n1.expected
> -
>  as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
>  as hv1 reset_pcap_file hv1-vif1 hv1/vif1
>
> @@ -16977,7 +16978,7 @@ test_ip_packet_larger_ext() {
>  # Send packet from sw0-port1 to outside
>  check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
>
> -OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +OVN_CHECK_PACKETS_CONTAIN([hv1/br-phys_n1-tx.pcap], 
> [br-phys_n1.expected])
>  }
>
>  # IPv6 outgoing traffic generated inside the cluster
> @@ -17053,9 +17054,7 @@ test_ip6_packet_larger_ext() {
>  local ip6_hdr=60583afe${ipv6_src}${ipv6_dst}
>  local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f1${payload}
>
> -local 
> ns=202012130806000108000604000120201213aca80064aca80064
> -echo $ns > br-phys_n1.expected
> -
> +# Some ** ARP ** packets might still be received - ignore them
>  as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
>  as hv1 reset_pcap_file hv1-vif1 hv1/vif1
>
> @@ -17078,9 +17077,9 @@ test_ip6_packet_larger_ext() {
>  outer_packet=${outer_ip6}${outer_icmp6_and_payload}
>
>  icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
> -echo $icmp6_reply >> br-phys_n1.expected
> +echo $icmp6_reply > br-phys_n1.expected
>
> -OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +OVN_CHECK_PACKETS_CONTAIN([hv1/br-phys_n1-tx.pcap], 
> [br-phys_n1.expected])
>  }
>
>  wait_for_ports_up
> @@ -17200,7 +17199,7 @@ OVS_WAIT_FOR_OUTPUT([
>  grep "check_pkt_larger(118)" ext-br-int-gw-flows-100 | wc -l], [0], [3
>  ])
>
> -AS_BOX([testing ingress traffic mtu 100 for gw router - IPv6])
> +AS_BOX([testing ingress traffic mtu 100 for gw router - IPv4])
>  test_ip_packet_larger_ext 100
>
>  AS_BOX([testing ingress traffic mtu 100 for gw router - IPv6])
> --
> 2.31.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] tests: Fix requested-chassis localport test

2021-11-02 Thread Numan Siddique
On Tue, Nov 2, 2021 at 12:40 PM Dumitru Ceara  wrote:
>
> On 11/2/21 2:31 PM, Frode Nordahl wrote:
> > The ovn-controller requested-chassis localport test currently
> > makes a change to the NB DB with --wait=sb, and subsequently does
> > an immediate check for change on a chassis.  This does not allways
> > succeed.
> >
> > Update the test to use `--wait=hv` instead.
> >
> > Fixes: ad77db239d9a ("controller: Make use of 
> > Port_Binding:requested_chassis.")
> > Signed-off-by: Frode Nordahl 
> > ---
>
> Hi Frode,
>
> This looks good to me, thanks!
>
> Acked-by: Dumitru Ceara 

Thanks.   Applied.

Numan

>
> Regards,
> Dumitru
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 ovn 1/3] CoPP: add self-test for icmp{4, 6}_error controller action

2021-11-02 Thread Numan Siddique
On Fri, Oct 29, 2021 at 4:48 AM Mark Gray  wrote:
>
> On 21/10/2021 23:18, Lorenzo Bianconi wrote:
> > Introduce CoPP selftest for icmp{4,6}_error controller action
> > Remove sleep in CoPP test and rely on tcpdump "-l" option.
> >
> > Signed-off-by: Lorenzo Bianconi 
> > ---
> >  tests/ovn-northd.at | 23 +++
> >  tests/system-ovn.at | 43 ---
> >  2 files changed, 51 insertions(+), 15 deletions(-)
> >
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index 544820764..3ff0029f8 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -3249,6 +3249,29 @@ AT_CHECK([ovn-nbctl lr-copp-list r0], [0], [dnl
> >
> >  AT_CHECK([ovn-sbctl list logical_flow | grep arp -A 2 | grep -q 
> > meter1],[1])
> >
> > +check ovn-nbctl --wait=hv meter-add meter2 drop 400 pktps 10
> > +check ovn-nbctl --wait=hv lr-copp-add r0 icmp4-error meter2
> > +AT_CHECK([ovn-nbctl lr-copp-list r0], [0], [dnl
> > +icmp4-error: meter2
> > +])
> > +
> > +AT_CHECK([ovn-sbctl list logical_flow | grep icmp4 -A 2 | grep -q meter2])
> > +
> > +check ovn-nbctl --wait=hv lr-copp-del r0 icmp4-error
> > +AT_CHECK([ovn-nbctl lr-copp-list r0], [0], [dnl
> > +])
> > +
> > +check ovn-nbctl --wait=hv lr-copp-add r0 icmp6-error meter2
> > +AT_CHECK([ovn-nbctl lr-copp-list r0], [0], [dnl
> > +icmp6-error: meter2
> > +])
> > +
> > +AT_CHECK([ovn-sbctl list logical_flow | grep icmp6 -A 2 | grep -q meter2])
> > +
> > +check ovn-nbctl --wait=hv lr-copp-del r0 icmp6-error
> > +AT_CHECK([ovn-nbctl lr-copp-list r0], [0], [dnl
> > +])
> > +
> >  check ovn-nbctl --wait=hv ls-copp-del sw1 event-elb
> >  AT_CHECK([ovn-nbctl ls-copp-list sw1], [0], [dnl
> >  ])
> > diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> > index 345384223..d003843c3 100644
> > --- a/tests/system-ovn.at
> > +++ b/tests/system-ovn.at
> > @@ -6669,7 +6669,7 @@ check ovn-nbctl lsp-add public public1 \
> >  -- lsp-set-type public1 localnet \
> >  -- lsp-set-options public1 network_name=phynet
> >
> > -NS_EXEC([sw01], [tcpdump -n -i sw01 icmp -Q in > reject.pcap &])
> > +NS_EXEC([sw01], [tcpdump -l -n -i sw01 icmp -Q in > reject.pcap &])
> >  check ovn-nbctl meter-add acl-meter drop 1 pktps 0
> >  check ovn-nbctl --wait=hv ls-copp-add sw0 reject acl-meter
> >  check ovn-nbctl acl-add sw0 from-lport 1002 'inport == "sw01" && ip && 
> > udp' reject
> > @@ -6679,37 +6679,33 @@ reject: acl-meter
> >  ])
> >
> >  ip netns exec sw01 scapy -H <<-EOF
> > -p = IP(src="192.168.1.2", dst="192.168.1.1")/ UDP(dport = 12345) / 
> > Raw(b"X"*64)
> > +p = IP(src="192.168.1.2", dst="192.168.1.1") / UDP(dport = 12345) / 
> > Raw(b"X"*64)
> >  send (p, iface='sw01', loop = 0, verbose = 0, count = 20)
> >  EOF
> >
> > -sleep 2
> > -kill $(pidof tcpdump)
> > -
> >  # 1pps + 1 burst size
> >  OVS_WAIT_UNTIL([
> >  n_reject=$(grep unreachable reject.pcap | wc -l)
> >  test "${n_reject}" = "2"
> >  ])
> > +kill $(pidof tcpdump)
> >
> >  rm -f reject.pcap
> > -NS_EXEC([sw01], [tcpdump -n -i sw01 icmp -Q in > reject.pcap &])
> > +NS_EXEC([sw01], [tcpdump -l -n -i sw01 icmp -Q in > reject.pcap &])
> >  check ovn-nbctl --wait=hv ls-copp-del sw0 reject
> >
> >  ip netns exec sw01 scapy -H <<-EOF
> > -p = IP(src="192.168.1.2", dst="192.168.1.1")/ UDP(dport = 12345) / 
> > Raw(b"X"*64)
> > +p = IP(src="192.168.1.2", dst="192.168.1.1") / UDP(dport = 12345) / 
> > Raw(b"X"*64)
> >  send (p, iface='sw01', loop = 0, verbose = 0, count = 20)
> >  EOF
> >
> > -sleep 2
> > -kill $(pidof tcpdump)
> > -
> >  OVS_WAIT_UNTIL([
> >  n_reject=$(grep unreachable reject.pcap | wc -l)
> >  test "${n_reject}" = "20"
> >  ])
> > +kill $(pidof tcpdump)
> >
> > -NS_EXEC([server], [tcpdump -n -i s1 arp[[24:4]]=0xac100164 > arp.pcap &])
> > +NS_EXEC([server], [tcpdump -l -n -i s1 arp[[24:4]]=0xac100164 > arp.pcap 
> > &])
> >  check ovn-nbctl meter-add arp-meter drop 1 pktps 0
> >  check ovn-nbctl --wait=hv lr-copp-add R1 arp-resolve arp-meter
> >  AT_CHECK([ovn-nbctl lr-copp-list R1], [0], [dnl
> > @@ -6717,18 +6713,35 @@ arp-resolve: arp-meter
> >  ])
> >
> >  ip netns exec sw01 scapy -H <<-EOF
> > -p = IP(src="192.168.1.2", dst="172.16.1.100")/ TCP(dport = 80, flags="S") 
> > / Raw(b"X"*64)
> > +p = IP(src="192.168.1.2", dst="172.16.1.100") / TCP(dport = 80, flags="S") 
> > / Raw(b"X"*64)
> >  send (p, iface='sw01', loop = 0, verbose = 0, count = 100)
> >  EOF
> >
> > -sleep 2
> > -kill $(pidof tcpdump)
> > -
> >  # 1pps + 1 burst size
> >  OVS_WAIT_UNTIL([
> >  n_arp=$(grep ARP arp.pcap | wc -l)
> >  test "${n_arp}" = "2"
> >  ])
> > +kill $(pidof tcpdump)
> > +
> > +check ovn-nbctl meter-add icmp-meter drop 1 pktps 0
> > +check ovn-nbctl --wait=hv lr-copp-add R1 icmp4-error icmp-meter
> > +AT_CHECK([ovn-nbctl lr-copp-list R1 |grep icmp4-error], [0], [dnl
> > +icmp4-error: icmp-meter
> > +])
> > +
> > +NS_EXEC([sw01], [tcpdump -l -n -i sw01 icmp > icmp.pcap &])
> > +ip netns exec sw01 scapy -H <<-EOF
> > +p = 

Re: [ovs-dev] [PATCH] ovsdb-idl: Add memory report function.

2021-11-02 Thread Dumitru Ceara
On 10/14/21 1:46 PM, Ilya Maximets wrote:
> Added new function to return memory usage statistics for database
> objects inside IDL.  Statistics similar to what ovsdb-server reports.
> Not counting _Server database as it should be small, hence doesn't
> worth adding extra code to the ovsdb-cs module.  Can be added later
> if needed.
> 
> ovs-vswitchd is a user in OVS, but this API will be mostly useful for
> OVN daemons.
> 
> Signed-off-by: Ilya Maximets 
> ---
>  lib/ovsdb-idl.c   | 24 
>  lib/ovsdb-idl.h   |  3 +++
>  vswitchd/bridge.c |  2 ++
>  3 files changed, 29 insertions(+)
> 
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index 383a601f6..b22492d5e 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -42,6 +42,7 @@
>  #include "openvswitch/poll-loop.h"
>  #include "openvswitch/shash.h"
>  #include "skiplist.h"
> +#include "simap.h"
>  #include "sset.h"
>  #include "svec.h"
>  #include "util.h"
> @@ -465,6 +466,29 @@ ovsdb_idl_wait(struct ovsdb_idl *idl)
>  ovsdb_cs_wait(idl->cs);
>  }
>  
> +/* Returns memory usage statistics. */
> +void
> +ovsdb_idl_get_memory_usage(struct ovsdb_idl *idl, struct simap *usage)
> +{
> +unsigned int cells = 0;
> +
> +if (!idl) {
> +return;
> +}
> +
> +for (size_t i = 0; i < idl->class_->n_tables; i++) {
> +struct ovsdb_idl_table *table = >tables[i];
> +unsigned int n_columns = shash_count(>columns);

Nit: even though this is also constant time we already have the number
of columns stored in the table class, we could instead do:

unsigned int n_columns = table->class_->n_columns;

I guess this can be fixed up at apply time, therefore:

Acked-by: Dumitru Ceara 

Thanks,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-21.09 0/3] Improve Load Balancer performance.

2021-11-02 Thread Dumitru Ceara
On 11/2/21 8:30 PM, Dumitru Ceara wrote:
> This series backports the load balancer performance improvements to
> stable branch-21.09.
> 
> - patch 1/3 changes the way ARP responder flows are generated for load
>   balancer VIPs, using an address set, making SB updates incremental
>   and also reducing memory usage on the SB server side.
> - patch 2/3 introduces a new feature, Load_Balancer_Group, which
>   simplifies load balancer configuration in large scale scenarios and
>   at the same time improves performance due to the large reduction in
>   database refereces between rows.
> - patch 3/3 is a follow up fix of a bug originally introduced by
>   patch 1/3.
> 
> Usually new features and performance fixes are not to be backported to
> stable branches, however due to the (mis)alignment of upstream and
> downstream release schedules, and due to the fact that ovn-kubernetes
> requires a database table to be part of a schema that's tagged in a
> release in ovn-org/ovn repo, if load balancer groups would be available
> only starting with v21.12.0 then downstream ovn-kubernetes (and OpenShift)
> will not be able to consume the feature for two release cycles (~6 months).
> 
> On the other hand the feature itself is quite contained, doesn't break
> backwards compatibility, and, when used, hugely improves some CMSs
> (OpenShift) load balancer use cases.
> 
> Dumitru Ceara (3):
>   northd: Use address sets for ARP responder flows for VIPs.
>   nb: Add support for Load_Balancer_Groups.
>   northd: Always generate valid load balancer address set names.
> 
>  NEWS  |2 
>  northd/northd.c   |  334 
> +
>  ovn-nb.ovsschema  |   24 +++-
>  ovn-nb.xml|   37 +
>  tests/ovn-northd.at   |  293 ++-
>  utilities/ovn-nbctl.c |3 
>  6 files changed, 517 insertions(+), 176 deletions(-)
> 

Adding OVN maintainers explicitly in order to discuss the possibility of
including this in a v21.09.1.

Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-21.09 3/3] northd: Always generate valid load balancer address set names.

2021-11-02 Thread Dumitru Ceara
Address set names have a fixed format ([a-zA-Z_.][a-zA-Z_.0-9]*) while
logical router names are free form.  This means we cannot directly embed
the logical router name inside the address set name.

To make sure that address set names are unique and valid use instead
the Datapath_Binding.tunnel_key value which is guaranteed to be unique
across logical routers.

Fixes: c1e3896c0a39 ("northd: Use address sets for ARP responder flows for 
VIPs.")
Signed-off-by: Dumitru Ceara 
(cherry picked from commit beed00c9206d439c89da3bcaf924163abf606215)
---
 northd/northd.c |   39 +++
 tests/ovn-northd.at |   27 +++
 2 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index c1d83548e..d237f693d 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -878,6 +878,37 @@ lr_has_lb_vip(struct ovn_datapath *od)
 return false;
 }
 
+/* Builds a unique address set compatible name ([a-zA-Z_.][a-zA-Z_.0-9]*)
+ * for the router's load balancer VIP address set, combining the logical
+ * router's datapath tunnel key and address family.
+ *
+ * Also prefixes the name with 'prefix'.
+ */
+static char *
+lr_lb_address_set_name_(const struct ovn_datapath *od, const char *prefix,
+int addr_family)
+{
+ovs_assert(od->nbr);
+return xasprintf("%s_rtr_lb_%"PRIu32"_ip%s", prefix, od->tunnel_key,
+ addr_family == AF_INET ? "4" : "6");
+}
+
+/* Builds the router's load balancer VIP address set name. */
+static char *
+lr_lb_address_set_name(const struct ovn_datapath *od, int addr_family)
+{
+return lr_lb_address_set_name_(od, "", addr_family);
+}
+
+/* Builds a string that refers to the the router's load balancer VIP address
+ * set name, that is: $.
+ */
+static char *
+lr_lb_address_set_ref(const struct ovn_datapath *od, int addr_family)
+{
+return lr_lb_address_set_name_(od, "$", addr_family);
+}
+
 static void
 init_lb_for_datapath(struct ovn_datapath *od)
 {
@@ -12040,7 +12071,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
 }
 
 /* Create a single ARP rule for all IPs that are used as VIPs. */
-char *lb_ips_v4_as = xasprintf("$%s_lb_ip4", op->od->nbr->name);
+char *lb_ips_v4_as = lr_lb_address_set_ref(op->od, AF_INET);
 build_lrouter_arp_flow(op->od, op, lb_ips_v4_as,
REG_INPORT_ETH_ADDR,
match, false, 90, NULL, lflows);
@@ -12056,7 +12087,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
 }
 
 /* Create a single ND rule for all IPs that are used as VIPs. */
-char *lb_ips_v6_as = xasprintf("$%s_lb_ip6", op->od->nbr->name);
+char *lb_ips_v6_as = lr_lb_address_set_ref(op->od, AF_INET6);
 build_lrouter_nd_flow(op->od, op, "nd_na", lb_ips_v6_as, NULL,
   REG_INPORT_ETH_ADDR, match, false, 90,
   NULL, lflows, meter_groups);
@@ -13687,7 +13718,7 @@ sync_address_sets(struct northd_context *ctx, struct 
hmap *datapaths)
 }
 
 if (sset_count(>lb_ips_v4)) {
-char *ipv4_addrs_name = xasprintf("%s_lb_ip4", od->nbr->name);
+char *ipv4_addrs_name = lr_lb_address_set_name(od, AF_INET);
 const char **ipv4_addrs = sset_array(>lb_ips_v4);
 
 sync_address_set(ctx, ipv4_addrs_name, ipv4_addrs,
@@ -13697,7 +13728,7 @@ sync_address_sets(struct northd_context *ctx, struct 
hmap *datapaths)
 }
 
 if (sset_count(>lb_ips_v6)) {
-char *ipv6_addrs_name = xasprintf("%s_lb_ip6", od->nbr->name);
+char *ipv6_addrs_name = lr_lb_address_set_name(od, AF_INET6);
 const char **ipv6_addrs = sset_array(>lb_ips_v6);
 
 sync_address_set(ctx, ipv6_addrs_name, ipv6_addrs,
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 166723502..3f21edee9 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1701,10 +1701,13 @@ ovn-nbctl lr-lb-add lr lb4
 ovn-nbctl lr-lb-add lr lb5
 
 ovn-nbctl --wait=sb sync
+lr_key=$(fetch_column sb:datapath_binding tunnel_key external_ids:name=lr)
+lb_as_v4="_rtr_lb_${lr_key}_ip4"
+lb_as_v6="_rtr_lb_${lr_key}_ip6"
 
 # Check generated VIP address sets.
-check_column '192.168.2.1 192.168.2.4 192.168.2.5 192.168.2.6' Address_Set 
addresses name=lr_lb_ip4
-check_column 'fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set 
addresses name=lr_lb_ip6
+check_column '192.168.2.1 192.168.2.4 192.168.2.5 192.168.2.6' Address_Set 
addresses name=${lb_as_v4}
+check_column 'fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set 
addresses name=${lb_as_v6}
 
 # Ingress router port ETH address is stored in lr_in_admission.
 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_admission.*xreg0\[[0..47\]]" | 
sort], [0], [dnl
@@ -1723,7 +1726,7 @@ action=(xreg0[[0..47]] = 00:00:00:00:01:00; 

[ovs-dev] [PATCH ovn branch-21.09 2/3] nb: Add support for Load_Balancer_Groups.

2021-11-02 Thread Dumitru Ceara
For deployments when a large number of load balancers are associated
to multiple logical switches/routers, introduce a syntactic sugar
in the OVN_Northbound database (Load_Balancer_Groups) to simplify
configuration.

Instead of associating N Load_Balancer records to M Logical_Switches
(M x N references in the NB database) we can instead create a single
Load_Balancer_Group record, associate all N Load_Balancer records to
it, and associate it to all M Logical_Switches (in total M + N
references in the NB database).

This makes it easier for the CMS to configure Load Balancers (e.g., in
the ovn-kubernetes use case cluster load balancers are applied to all
node logical switches and node logical gateway routers) but also
drastically improves performance on the ovsdb-server NB side.  This
happens thanks to the fact that ovsdb-server now has to track M times
less references.

With a micro benchmark which creates 120 logical switches and
associates 1000 load balancers to them (with ovn-nbctl daemon) we
measure:

 CPU Time NB DB  CPU Time ovn-nbctl
  -
  Plain LB: 30s 35s
  LB Groups: 1s  2s

Reported-at: https://bugzilla.redhat.com/2001528
Signed-off-by: Dumitru Ceara 
(cherry picked from commit f6aba21c9de8952beccf7ee7e98cfa28618f1edf)
---
 NEWS  |2 
 northd/northd.c   |  242 +++-
 ovn-nb.ovsschema  |   24 -
 ovn-nb.xml|   37 ++-
 tests/ovn-northd.at   |  246 +
 utilities/ovn-nbctl.c |3 +
 6 files changed, 424 insertions(+), 130 deletions(-)

diff --git a/NEWS b/NEWS
index 8fce36482..63d765aa4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,7 @@
 OVN v21.09.1 - xx xxx 
 --
+  - Added Load_Balancer_Group support, which simplifies large scale
+configurations of load balancers.
 
 OVN v21.09.0 - 01 Oct 2021
 --
diff --git a/northd/northd.c b/northd/northd.c
index 502c263cc..c1d83548e 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -827,17 +827,74 @@ static void destroy_router_enternal_ips(struct 
ovn_datapath *od)
 sset_destroy(>external_ips);
 }
 
+static bool
+lb_has_vip(const struct nbrec_load_balancer *lb)
+{
+return !smap_is_empty(>vips);
+}
+
+static bool
+lb_group_has_vip(const struct nbrec_load_balancer_group *lb_group)
+{
+for (size_t i = 0; i < lb_group->n_load_balancer; i++) {
+if (lb_has_vip(lb_group->load_balancer[i])) {
+return true;
+}
+}
+return false;
+}
+
+static bool
+ls_has_lb_vip(struct ovn_datapath *od)
+{
+for (size_t i = 0; i < od->nbs->n_load_balancer; i++) {
+if (lb_has_vip(od->nbs->load_balancer[i])) {
+return true;
+}
+}
+
+for (size_t i = 0; i < od->nbs->n_load_balancer_group; i++) {
+if (lb_group_has_vip(od->nbs->load_balancer_group[i])) {
+return true;
+}
+}
+return false;
+}
+
+static bool
+lr_has_lb_vip(struct ovn_datapath *od)
+{
+for (size_t i = 0; i < od->nbr->n_load_balancer; i++) {
+if (lb_has_vip(od->nbr->load_balancer[i])) {
+return true;
+}
+}
+
+for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) {
+if (lb_group_has_vip(od->nbr->load_balancer_group[i])) {
+return true;
+}
+}
+return false;
+}
+
 static void
-init_lb_ips(struct ovn_datapath *od)
+init_lb_for_datapath(struct ovn_datapath *od)
 {
 sset_init(>lb_ips_v4);
 sset_init(>lb_ips_v4_routable);
 sset_init(>lb_ips_v6);
 sset_init(>lb_ips_v6_routable);
+
+if (od->nbs) {
+od->has_lb_vip = ls_has_lb_vip(od);
+} else {
+od->has_lb_vip = lr_has_lb_vip(od);
+}
 }
 
 static void
-destroy_lb_ips(struct ovn_datapath *od)
+destroy_lb_for_datapath(struct ovn_datapath *od)
 {
 if (!od->nbs && !od->nbr) {
 return;
@@ -895,7 +952,7 @@ ovn_datapath_destroy(struct hmap *datapaths, struct 
ovn_datapath *od)
 free(od->router_ports);
 destroy_nat_entries(od);
 destroy_router_enternal_ips(od);
-destroy_lb_ips(od);
+destroy_lb_for_datapath(od);
 free(od->nat_entries);
 free(od->localnet_ports);
 free(od->l3dgw_ports);
@@ -1224,7 +1281,7 @@ join_datapaths(struct northd_context *ctx, struct hmap 
*datapaths,
 
 init_ipam_info_for_datapath(od);
 init_mcast_info_for_datapath(od);
-init_lb_ips(od);
+init_lb_for_datapath(od);
 }
 
 const struct nbrec_logical_router *nbr;
@@ -1257,7 +1314,7 @@ join_datapaths(struct northd_context *ctx, struct hmap 
*datapaths,
 init_mcast_info_for_datapath(od);
 init_nat_entries(od);
 init_router_external_ips(od);
-init_lb_ips(od);
+init_lb_for_datapath(od);
 if 

[ovs-dev] [PATCH ovn branch-21.09 1/3] northd: Use address sets for ARP responder flows for VIPs.

2021-11-02 Thread Dumitru Ceara
Otherwise the S_ROUTER_IN_IP_INPUT aggregated flows that reply to ARP
requests targeting load balancer VIPs get completely regenerated every
time a new VIP/LB is added.  This affects SB memory usage as RAFT log
entries are huge.  Use an address set instead.  Updating an address set
is incremental, because it's performed with a "mutate" operation.

On a large scale ovn-kubernetes deployment with a high number of
load balancers (services) this change reduces memory usage of
ovsdb-servers running the OVN_Southbound cluster by 50%, from ~2GB
RSS to ~1GB RSS.

Signed-off-by: Dumitru Ceara 
(cherry picked from commit c1e3896c0a393c035f301db6c1a8431adda57eb0)
---
 northd/northd.c |   61 +--
 tests/ovn-northd.at |   44 +
 2 files changed, 59 insertions(+), 46 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index aa1040628..502c263cc 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -11967,39 +11967,28 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
   op->cr_port->json_key);
 }
 
-struct ds load_balancer_ips_v4 = DS_EMPTY_INITIALIZER;
-
-/* For IPv4 we can just create one rule with all required IPs. */
-ds_put_cstr(_balancer_ips_v4, "{ ");
-ds_put_and_free_cstr(_balancer_ips_v4,
- sset_join(>od->lb_ips_v4, ", ", " }"));
-
-build_lrouter_arp_flow(op->od, op, ds_cstr(_balancer_ips_v4),
+/* Create a single ARP rule for all IPs that are used as VIPs. */
+char *lb_ips_v4_as = xasprintf("$%s_lb_ip4", op->od->nbr->name);
+build_lrouter_arp_flow(op->od, op, lb_ips_v4_as,
REG_INPORT_ETH_ADDR,
match, false, 90, NULL, lflows);
-ds_destroy(_balancer_ips_v4);
+free(lb_ips_v4_as);
 }
 
 if (sset_count(>od->lb_ips_v6)) {
 ds_clear(match);
-ds_clear(actions);
-
-struct ds load_balancer_ips_v6 = DS_EMPTY_INITIALIZER;
-
-ds_put_cstr(_balancer_ips_v6, "{ ");
-ds_put_and_free_cstr(_balancer_ips_v6,
- sset_join(>od->lb_ips_v6, ", ", " }"));
 
 if (is_l3dgw_port(op)) {
 ds_put_format(match, "is_chassis_resident(%s)",
   op->cr_port->json_key);
 }
-build_lrouter_nd_flow(op->od, op, "nd_na",
-  ds_cstr(_balancer_ips_v6), NULL,
+
+/* Create a single ND rule for all IPs that are used as VIPs. */
+char *lb_ips_v6_as = xasprintf("$%s_lb_ip6", op->od->nbr->name);
+build_lrouter_nd_flow(op->od, op, "nd_na", lb_ips_v6_as, NULL,
   REG_INPORT_ETH_ADDR, match, false, 90,
   NULL, lflows, meter_groups);
-
-ds_destroy(_balancer_ips_v6);
+free(lb_ips_v6_as);
 }
 
 if (!op->od->is_gw_router && !op->od->n_l3dgw_ports) {
@@ -13571,7 +13560,7 @@ sync_address_set(struct northd_context *ctx, const char 
*name,
  * in OVN_Northbound, so that the address sets used in Logical_Flows in
  * OVN_Southbound is checked against the proper set.*/
 static void
-sync_address_sets(struct northd_context *ctx)
+sync_address_sets(struct northd_context *ctx, struct hmap *datapaths)
 {
 struct shash sb_address_sets = SHASH_INITIALIZER(_address_sets);
 
@@ -13618,6 +13607,34 @@ sync_address_sets(struct northd_context *ctx)
 svec_destroy(_addrs);
 }
 
+/* Sync router load balancer VIP generated address sets. */
+struct ovn_datapath *od;
+HMAP_FOR_EACH (od, key_node, datapaths) {
+if (!od->nbr) {
+continue;
+}
+
+if (sset_count(>lb_ips_v4)) {
+char *ipv4_addrs_name = xasprintf("%s_lb_ip4", od->nbr->name);
+const char **ipv4_addrs = sset_array(>lb_ips_v4);
+
+sync_address_set(ctx, ipv4_addrs_name, ipv4_addrs,
+ sset_count(>lb_ips_v4), _address_sets);
+free(ipv4_addrs_name);
+free(ipv4_addrs);
+}
+
+if (sset_count(>lb_ips_v6)) {
+char *ipv6_addrs_name = xasprintf("%s_lb_ip6", od->nbr->name);
+const char **ipv6_addrs = sset_array(>lb_ips_v6);
+
+sync_address_set(ctx, ipv6_addrs_name, ipv6_addrs,
+ sset_count(>lb_ips_v6), _address_sets);
+free(ipv6_addrs_name);
+free(ipv6_addrs);
+}
+}
+
 /* sync user defined address sets, which may overwrite port group
  * generated address sets if same name is used */
 const struct nbrec_address_set *nb_address_set;
@@ -14358,7 +14375,7 @@ ovnnb_db_run(struct northd_context *ctx,
 stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());

[ovs-dev] [PATCH ovn branch-21.09 0/3] Improve Load Balancer performance.

2021-11-02 Thread Dumitru Ceara
This series backports the load balancer performance improvements to
stable branch-21.09.

- patch 1/3 changes the way ARP responder flows are generated for load
  balancer VIPs, using an address set, making SB updates incremental
  and also reducing memory usage on the SB server side.
- patch 2/3 introduces a new feature, Load_Balancer_Group, which
  simplifies load balancer configuration in large scale scenarios and
  at the same time improves performance due to the large reduction in
  database refereces between rows.
- patch 3/3 is a follow up fix of a bug originally introduced by
  patch 1/3.

Usually new features and performance fixes are not to be backported to
stable branches, however due to the (mis)alignment of upstream and
downstream release schedules, and due to the fact that ovn-kubernetes
requires a database table to be part of a schema that's tagged in a
release in ovn-org/ovn repo, if load balancer groups would be available
only starting with v21.12.0 then downstream ovn-kubernetes (and OpenShift)
will not be able to consume the feature for two release cycles (~6 months).

On the other hand the feature itself is quite contained, doesn't break
backwards compatibility, and, when used, hugely improves some CMSs
(OpenShift) load balancer use cases.

Dumitru Ceara (3):
  northd: Use address sets for ARP responder flows for VIPs.
  nb: Add support for Load_Balancer_Groups.
  northd: Always generate valid load balancer address set names.

 NEWS  |2 
 northd/northd.c   |  334 +
 ovn-nb.ovsschema  |   24 +++-
 ovn-nb.xml|   37 +
 tests/ovn-northd.at   |  293 ++-
 utilities/ovn-nbctl.c |3 
 6 files changed, 517 insertions(+), 176 deletions(-)

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH branch-2.14] dpdk: Use DPDK 19.11.10 release

2021-11-02 Thread Ilya Maximets
On 10/28/21 13:22, Suneetha Kalahasthi wrote:
> Modify ci linux build script to use the latest DPDK stable release 19.11.10.
> Modify Documentation to use the latest DPDK stable release 19.11.10.
> Update NEWS file to reflect the latest DPDK stable release 19.11.10.
> FAQ is updated to reflect the latest DPDK for each OVS branch.
> 
> Signed-off-by: Suneetha Kalahasthi 
> ---
>  .ci/linux-build.sh   | 2 +-
>  Documentation/faq/releases.rst   | 4 ++--
>  Documentation/intro/install/dpdk.rst | 6 +++---
>  NEWS | 2 +-
>  4 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
> index 7e16ebb6a..db63bfea1 100755
> --- a/.ci/linux-build.sh
> +++ b/.ci/linux-build.sh
> @@ -187,7 +187,7 @@ fi
>  
>  if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then
>  if [ -z "$DPDK_VER" ]; then
> -DPDK_VER="19.11.8"
> +DPDK_VER="19.11.10"
>  fi
>  install_dpdk $DPDK_VER
>  if [ "$CC" = "clang" ]; then
> diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
> index 9e8382611..f606ef2f8 100644
> --- a/Documentation/faq/releases.rst
> +++ b/Documentation/faq/releases.rst
> @@ -195,8 +195,8 @@ Q: What DPDK version does each Open vSwitch release work 
> with?
>  2.10.x   17.11.10
>  2.11.x   18.11.11
>  2.12.x   18.11.11
> -2.13.x   19.11.8
> -2.14.x   19.11.8
> +2.13.x   19.11.10
> +2.14.x   19.11.10
>   
>  
>  Q: Are all the DPDK releases that OVS versions work with maintained?
> diff --git a/Documentation/intro/install/dpdk.rst 
> b/Documentation/intro/install/dpdk.rst
> index 2835c837e..bb3a5368f 100644
> --- a/Documentation/intro/install/dpdk.rst
> +++ b/Documentation/intro/install/dpdk.rst
> @@ -71,9 +71,9 @@ Install DPDK
>  #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
>  
> $ cd /usr/src/
> -   $ wget https://fast.dpdk.org/rel/dpdk-19.11.8.tar.xz
> -   $ tar xf dpdk-19.11.8.tar.xz
> -   $ export DPDK_DIR=/usr/src/dpdk-stable-19.11.8
> +   $ wget https://fast.dpdk.org/rel/dpdk-19.11.10.tar.xz
> +   $ tar xf dpdk-19.11.10.tar.xz
> +   $ export DPDK_DIR=/usr/src/dpdk-stable-19.11.10
> $ cd $DPDK_DIR
>  
>  #. (Optional) Configure DPDK as a shared library
> diff --git a/NEWS b/NEWS
> index 7d8d6931c..99ad98e5f 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -5,7 +5,7 @@ v2.14.3 - 21 Oct 2021
>  -
> - Bug fixes
> - DPDK:
> - * OVS validated with DPDK 19.11.8. It is recommended to use this version
> + * OVS validated with DPDK 19.11.10. It is recommended to use this 
> version
> until further releases.
> - OVS now reports the datapath capability 'ct_zero_snat', which reflects
>   whether the SNAT with all-zero IP address is supported.
> 

Same here.  This update should go to 2.14.4.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH branch-2.13] dpdk: Use DPDK 19.11.10 release

2021-11-02 Thread Ilya Maximets
On 10/28/21 13:22, Suneetha Kalahasthi wrote:
> Modify ci linux build script to use the latest DPDK stable release 19.11.10.
> Modify Documentation to use the latest DPDK stable release 19.11.10.
> Update NEWS file to reflect the latest DPDK stable release 19.11.10.
> FAQ is updated to reflect the latest DPDK for each OVS branch.
> 
> Signed-off-by: Suneetha Kalahasthi 
> ---
>  .ci/linux-build.sh   | 2 +-
>  Documentation/faq/releases.rst   | 2 +-
>  Documentation/intro/install/dpdk.rst | 6 +++---
>  NEWS | 3 +++
>  4 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
> index b22ad9531..178487896 100755
> --- a/.ci/linux-build.sh
> +++ b/.ci/linux-build.sh
> @@ -182,7 +182,7 @@ fi
>  
>  if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then
>  if [ -z "$DPDK_VER" ]; then
> -DPDK_VER="19.11.8"
> +DPDK_VER="19.11.10"
>  fi
>  install_dpdk $DPDK_VER
>  # Enable pdump support in OVS.
> diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
> index 70a0e9b22..0df2e4163 100644
> --- a/Documentation/faq/releases.rst
> +++ b/Documentation/faq/releases.rst
> @@ -192,7 +192,7 @@ Q: What DPDK version does each Open vSwitch release work 
> with?
>  2.10.x   17.11.10
>  2.11.x   18.11.11
>  2.12.x   18.11.11
> -2.13.x   19.11.8
> +2.13.x   19.11.10
>   
>  
>  Q: Are all the DPDK releases that OVS versions work with maintained?
> diff --git a/Documentation/intro/install/dpdk.rst 
> b/Documentation/intro/install/dpdk.rst
> index b603b6f0b..7e33f0682 100644
> --- a/Documentation/intro/install/dpdk.rst
> +++ b/Documentation/intro/install/dpdk.rst
> @@ -71,9 +71,9 @@ Install DPDK
>  #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
>  
> $ cd /usr/src/
> -   $ wget https://fast.dpdk.org/rel/dpdk-19.11.8.tar.xz
> -   $ tar xf dpdk-19.11.8.tar.xz
> -   $ export DPDK_DIR=/usr/src/dpdk-stable-19.11.8
> +   $ wget https://fast.dpdk.org/rel/dpdk-19.11.10.tar.xz
> +   $ tar xf dpdk-19.11.10.tar.xz
> +   $ export DPDK_DIR=/usr/src/dpdk-stable-19.11.10
> $ cd $DPDK_DIR
>  
>  #. (Optional) Configure DPDK as a shared library
> diff --git a/NEWS b/NEWS
> index 840909735..6c5de557a 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -7,6 +7,9 @@ v2.13.5 - 21 Oct 2021
> - OVS now reports the datapath capability 'ct_zero_snat', which reflects
>   whether the SNAT with all-zero IP address is supported.
>   See ovs-vswitchd.conf.db(5) for details.
> +   - DPDK:
> + * OVS validated with DPDK 19.11.10. It is recommended to use this 
> version
> +   until further releases.

Hi.  Thanks for the patch, but please, don't change NEWS for already
released versions.  This update should go to 2.13.6.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH dpdk-latest 2/2] netdev-dpdk: Fix build with 21.11-rc1.

2021-11-02 Thread David Marchand
On Tue, Nov 2, 2021 at 3:32 PM Stokes, Ian  wrote:
>
> > PKT_[RT]X_* and other mbuf macros have been prefixed with RTE_MBUF_ [1].
> > Update accordingly.
> >
> > 1: https://git.dpdk.org/dpdk/commit/?id=daa02b5cddbb
> >
> > Signed-off-by: David Marchand 
>
> HI David, thanks for the patches, I was just running these through github and 
> spotted there was a failure in the build not sure if it's related to these 
> patches or possibly a different patch that has been upstream to DPDK main and 
> may require a follow on patch
>
> Error and build link below.
>
> ../../lib/ofp-packet.c: note: in included file (through 
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_flow.h, 
> ../../lib/netdev-dpdk.h, ../../lib/dp-packet.h):
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:92:37: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:93:37: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:94:40: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:95:37: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:96:40: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:97:37: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:98:37: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:99:40: error: 
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:100:39: error: 
> invalid bitfield specifier for type restricted ovs_be16.

The sparse issue was coming from a DPDK issue.
The patch is in next-net and will be in -rc2.
https://git.dpdk.org/next/dpdk-next-net/commit/?id=ea278f0a42849c4b60d13d2167d54562cf7e9852


FYI, there will be a new issue in 21.11-rc2, since a struct rename is
about to get merge for vhost.
https://patchwork.dpdk.org/project/dpdk/patch/20211102104748.57078-1-maxime.coque...@redhat.com/

I can respin this series with additional fix.


-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1] checkpatch: Detect "trojan source" attack

2021-11-02 Thread Mike Pattrick
Recently there has been a lot of press about the "trojan source" attack,
where Unicode characters are used to obfuscate the true functionality of
code. This attack didn't effect OVS, but adding the check here will help
guard against it sneaking in later.

Signed-off-by: Mike Pattrick 
---
 utilities/checkpatch.py | 12 
 1 file changed, 12 insertions(+)

diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
index 16f46c78e..cbe9b9a4a 100755
--- a/utilities/checkpatch.py
+++ b/utilities/checkpatch.py
@@ -181,6 +181,8 @@ __regex_added_doc_rst = re.compile(
 __regex_empty_return = re.compile(r'\s*return;')
 __regex_if_macros = re.compile(r'^ +(%s) \([\S]([\s\S]+[\S])*\) { +\\' %
__parenthesized_constructs)
+__regex_bidi_characters = re.compile('\u061C|\u200E|\u200F|\u2066'
+'|\u2067|\u2068|\u2069|\u202A|\u202B|\u202C|\u202D|\u202E')
 
 skip_leading_whitespace_check = False
 skip_trailing_whitespace_check = False
@@ -294,6 +296,11 @@ def pointer_whitespace_check(line):
 return __regex_ptr_declaration_missing_whitespace.search(line) is not None
 
 
+def bidi_character_check(line):
+"""Return TRUE if inappropriate Unicode characters are detected """
+return __regex_bidi_characters.search(line) is not None
+
+
 def cast_whitespace_check(line):
 """Return TRUE if there is no space between the '()' used in a cast and
the expression whose type is cast, i.e.: '(void *)foo'"""
@@ -565,6 +572,11 @@ checks = [
  'print':
  lambda: print_error("Inappropriate spacing in pointer declaration")},
 
+{'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None,
+ 'check': lambda x: bidi_character_check(x),
+ 'print':
+ lambda: print_error("Inappropriate Unicode characters detected.")},
+
 {'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None,
  'prereq': lambda x: not is_comment_line(x),
  'check': lambda x: cast_whitespace_check(x),
-- 
2.30.2



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v8] net: openvswitch: IPv6: Add IPv6 extension header support

2021-11-02 Thread Jakub Kicinski
On Fri, 29 Oct 2021 11:20:08 -0700 Toms Atteka wrote:
> This change adds a new OpenFlow field OFPXMT_OFB_IPV6_EXTHDR and
> packets can be filtered using ipv6_ext flag.
> 
> Signed-off-by: Toms Atteka 

Hi! This patch didn't get reviewed in time for the v5.16 merge window,
please continue the work but you'll have to repost in two weeks after
v5.16-rc1 has been cut. If you repost before that point please use RFC
designation.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] dpif-netdev: Sync PMD ALB state with user commands.

2021-11-02 Thread Kevin Traynor
Previously, when a user enabled PMD auto load balancer with
pmd-auto-lb="true", some conditions such as number of PMDs/RxQs
that were required for a rebalance to take place were checked.

If the configuration meant that a rebalance would not take place
then PMD ALB was logged as 'disabled' and not run.

Later, if the PMD/RxQ configuration changed whereby a rebalance
could be effective, PMD ALB was logged as 'enabled' and would run at
the appropriate time.

This worked ok from a functional view but it is unintuitive for the
user reading the logs.

e.g. with one PMD (PMD ALB would not be effective)

User enables ALB, but logs say it is disabled because it won't run.
$ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
|dpif_netdev|INFO|PMD auto load balance is disabled

No dry run takes place.

Add more PMDs (PMD ALB may be effective).
$ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50
|dpif_netdev|INFO|PMD auto load balance is enabled ...

Dry run takes place.
|dpif_netdev|DBG|PMD auto load balance performing dry run.

A better approach is to simply reflect back the user enable/disable
state in the logs and deal with whether the rebalance will be effective
when needed. That is the approach taken in this patch.

To cut down on unneccessary work, some basic checks are also made before
starting a PMD ALB dry run and debug logs can indicate this to the user.

e.g. with one PMD (PMD ALB would not be effective)

User enables ALB, and logs confirm the user has enabled it.
$ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
|dpif_netdev|INFO|PMD auto load balance is enabled...

No dry run takes place.
|dpif_netdev|DBG|PMD auto load balance nothing to do, not enough non-isolated 
PMDs or RxQs.

Add more PMDs (PMD ALB may be effective).
$ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50

Dry run takes place.
|dpif_netdev|DBG|PMD auto load balance performing dry run.

Signed-off-by: Kevin Traynor 
Acked-by: Sunil Pai G 
Reviewed-by: David Marchand 
Acked-by: Gaetan Rivet 

---
v3:
- add minor fixes from v2 that were not committed
- add unit tests for dry run config checking
- Keeping Acks as there is just additional UTs added here (Let me know if any 
issue with that)

v2:
- minor: punctuation change for one log and one indent fix
- Add Ack/Review tags

GH actions:
https://github.com/kevintraynor/ovs/actions/runs/1413417595
---
 lib/dpif-netdev.c | 120 +-
 tests/alb.at  |  91 +++
 2 files changed, 145 insertions(+), 66 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b078c2da5..21b7ee487 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -217,5 +217,6 @@ struct dp_meter {
 
 struct pmd_auto_lb {
-bool auto_lb_requested; /* Auto load balancing requested by user. */
+bool do_dry_run;
+bool recheck_config;
 bool is_enabled;/* Current status of Auto load balancing. */
 uint64_t rebalance_intvl;
@@ -4149,52 +4150,25 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op 
**ops, size_t n_ops,
 /* Enable or Disable PMD auto load balancing. */
 static void
-set_pmd_auto_lb(struct dp_netdev *dp, bool always_log)
+set_pmd_auto_lb(struct dp_netdev *dp, bool state, bool always_log)
 {
-unsigned int cnt = 0;
-struct dp_netdev_pmd_thread *pmd;
 struct pmd_auto_lb *pmd_alb = >pmd_alb;
-uint8_t rebalance_load_thresh;
 
-bool enable_alb = false;
-bool multi_rxq = false;
-enum sched_assignment_type pmd_rxq_assign_type = dp->pmd_rxq_assign_type;
-
-/* Ensure that there is at least 2 non-isolated PMDs and
- * one of them is polling more than one rxq. */
-CMAP_FOR_EACH (pmd, node, >poll_threads) {
-if (pmd->core_id == NON_PMD_CORE_ID || pmd->isolated) {
-continue;
-}
-
-if (hmap_count(>poll_list) > 1) {
-multi_rxq = true;
-}
-if (cnt && multi_rxq) {
-enable_alb = true;
-break;
-}
-cnt++;
-}
-
-/* Enable auto LB if requested and not using roundrobin assignment. */
-enable_alb = enable_alb && pmd_rxq_assign_type != SCHED_ROUNDROBIN &&
-pmd_alb->auto_lb_requested;
-
-if (pmd_alb->is_enabled != enable_alb || always_log) {
-pmd_alb->is_enabled = enable_alb;
+if (pmd_alb->is_enabled != state || always_log) {
+pmd_alb->is_enabled = state;
 if (pmd_alb->is_enabled) {
+uint8_t rebalance_load_thresh;
+
 atomic_read_relaxed(_alb->rebalance_load_thresh,
 _load_thresh);
-VLOG_INFO("PMD auto load balance is enabled "
+VLOG_INFO("PMD auto load balance is enabled, "
   "interval %"PRIu64" mins, "
   "pmd load threshold %"PRIu8"%%, "
-  "improvement threshold %"PRIu8"%%",
+  "improvement threshold 

[ovs-dev] [PATCH 4/4] Tunnel: Snoop ingress packets and update neigh cache if needed.

2021-11-02 Thread Paolo Valerio
In case of native tunnel with bfd enabled, if the MAC address of the
remote end's interface changes (e.g. because it got rebooted, and the
MAC address is allocated dinamically), the BFD session will never be
re-established.

This happens because the local tunnel neigh entry doesn't get updated,
and the local end keeps sending BFD packets with the old destination
MAC address. This was not an issue until
b23ddcc57d41 ("tnl-neigh-cache: tighten arp and nd snooping.")
because ARP requests were snooped as well avoiding the problem.

Fix this by snooping the incoming packets in the slow path, and
updating the neigh cache accordingly.

Signed-off-by: Paolo Valerio 
Fixes: b23ddcc57d41 ("tnl-neigh-cache: tighten arp and nd snooping.")
---
 lib/tnl-neigh-cache.c |   12 ++--
 lib/tnl-neigh-cache.h |2 ++
 ofproto/ofproto-dpif-xlate.c  |   14 ++
 tests/tunnel-push-pop-ipv6.at |   36 
 tests/tunnel-push-pop.at  |   35 +++
 5 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 58dba5e5c..1a67b3c55 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -136,9 +136,9 @@ tnl_neigh_delete(struct tnl_neigh_entry *neigh)
 ovsrcu_postpone(neigh_entry_free, neigh);
 }
 
-static void
-tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
-const struct eth_addr mac)
+void
+tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
+  const struct eth_addr mac)
 {
 ovs_mutex_lock();
 struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
@@ -169,7 +169,7 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst,
 const struct eth_addr mac)
 {
 struct in6_addr dst6 = in6_addr_mapped_ipv4(dst);
-tnl_neigh_set__(name, , mac);
+tnl_neigh_set(name, , mac);
 }
 
 static int
@@ -214,7 +214,7 @@ tnl_nd_snoop(const struct flow *flow, struct flow_wildcards 
*wc,
 memset(>masks.nd_target, 0xff, sizeof wc->masks.nd_target);
 
 if (update) {
-tnl_neigh_set__(name, >nd_target, flow->arp_tha);
+tnl_neigh_set(name, >nd_target, flow->arp_tha);
 }
 return 0;
 }
@@ -362,7 +362,7 @@ tnl_neigh_cache_add(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 return;
 }
 
-tnl_neigh_set__(br_name, , mac);
+tnl_neigh_set(br_name, , mac);
 unixctl_command_reply(conn, "OK");
 }
 
diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
index 10724d8b4..a6d018fff 100644
--- a/lib/tnl-neigh-cache.h
+++ b/lib/tnl-neigh-cache.h
@@ -33,6 +33,8 @@
 
 int tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc,
 const char dev_name[IFNAMSIZ], bool update);
+void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
+   const struct eth_addr mac);
 int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst,
  struct eth_addr *mac);
 void tnl_neigh_cache_init(void);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 2f09bcca1..cc433d6ab 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -4099,6 +4099,20 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct 
flow *flow,
  is_neighbor_reply_correct(ctx, flow)) {
 tnl_neigh_snoop(flow, wc, ctx->xbridge->name,
 ctx->xin->allow_side_effects);
+} else if (*tnl_port != ODPP_NONE &&
+   ctx->xin->allow_side_effects &&
+   (flow->dl_type == htons(ETH_TYPE_IP) ||
+flow->dl_type == htons(ETH_TYPE_IPV6))) {
+struct eth_addr mac = flow->dl_src;
+struct in6_addr s_ip6;
+
+if (flow->dl_type == htons(ETH_TYPE_IP)) {
+in6_addr_set_mapped_ipv4(_ip6, flow->nw_src);
+} else {
+s_ip6 = flow->ipv6_src;
+}
+
+tnl_neigh_set(ctx->xbridge->name, _ip6, mac);
 }
 }
 
diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
index 766a4bcf8..5c4dd248b 100644
--- a/tests/tunnel-push-pop-ipv6.at
+++ b/tests/tunnel-push-pop-ipv6.at
@@ -462,6 +462,42 @@ AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 
'in_port(6081)'], [0], [dnl
 
tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0x,type=0x80,len=4,0xa/0xf}{class=0x,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
 packets:0, bytes:0, used:never, 
actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535))
 ])
 
+dnl Receive VXLAN with different MAC and verify that the neigh cache gets 
updated
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 

[ovs-dev] [PATCH 3/4] Native tunnel: Do not refresh the entry while revalidating.

2021-11-02 Thread Paolo Valerio
This is a minor issue but visible e.g. when you try to flush the neigh
cache while the ARP flow is still present in the datapath, triggering
the revalidation of the datapath flows which subsequently
refreshes/adds the entry in the cache.

Signed-off-by: Paolo Valerio 
---
 lib/tnl-neigh-cache.c|   20 +---
 lib/tnl-neigh-cache.h|2 +-
 ofproto/ofproto-dpif-xlate.c |3 ++-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 8eacaccd8..58dba5e5c 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -174,7 +174,7 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst,
 
 static int
 tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
-  const char name[IFNAMSIZ])
+  const char name[IFNAMSIZ], bool update)
 {
 /* Snoop normal ARP replies and gratuitous ARP requests/replies only */
 if (!is_arp(flow)
@@ -184,13 +184,17 @@ tnl_arp_snoop(const struct flow *flow, struct 
flow_wildcards *wc,
 return EINVAL;
 }
 
-tnl_arp_set(name, FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src), 
flow->arp_sha);
+memset(>masks.nw_src, 0xff, sizeof wc->masks.nw_src);
+
+if (update) {
+tnl_arp_set(name, flow->nw_src, flow->arp_sha);
+}
 return 0;
 }
 
 static int
 tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char name[IFNAMSIZ])
+ const char name[IFNAMSIZ], bool update)
 {
 if (!is_nd(flow, wc) || flow->tp_src != htons(ND_NEIGHBOR_ADVERT)) {
 return EINVAL;
@@ -209,20 +213,22 @@ tnl_nd_snoop(const struct flow *flow, struct 
flow_wildcards *wc,
 memset(>masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
 memset(>masks.nd_target, 0xff, sizeof wc->masks.nd_target);
 
-tnl_neigh_set__(name, >nd_target, flow->arp_tha);
+if (update) {
+tnl_neigh_set__(name, >nd_target, flow->arp_tha);
+}
 return 0;
 }
 
 int
 tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc,
-const char name[IFNAMSIZ])
+const char name[IFNAMSIZ], bool update)
 {
 int res;
-res = tnl_arp_snoop(flow, wc, name);
+res = tnl_arp_snoop(flow, wc, name, update);
 if (res != EINVAL) {
 return res;
 }
-return tnl_nd_snoop(flow, wc, name);
+return tnl_nd_snoop(flow, wc, name, update);
 }
 
 void
diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
index e4b42b059..10724d8b4 100644
--- a/lib/tnl-neigh-cache.h
+++ b/lib/tnl-neigh-cache.h
@@ -32,7 +32,7 @@
 #include "util.h"
 
 int tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc,
-const char dev_name[IFNAMSIZ]);
+const char dev_name[IFNAMSIZ], bool update);
 int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst,
  struct eth_addr *mac);
 void tnl_neigh_cache_init(void);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 8723cb4e8..2f09bcca1 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -4097,7 +4097,8 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct 
flow *flow,
 (flow->dl_type == htons(ETH_TYPE_ARP) ||
  flow->nw_proto == IPPROTO_ICMPV6) &&
  is_neighbor_reply_correct(ctx, flow)) {
-tnl_neigh_snoop(flow, wc, ctx->xbridge->name);
+tnl_neigh_snoop(flow, wc, ctx->xbridge->name,
+ctx->xin->allow_side_effects);
 }
 }
 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/4] Native tunnel: Add tnl/neigh/ageing command.

2021-11-02 Thread Paolo Valerio
with the command is now possible to change the ageing time of the
cache entries.

For the existing entries the ageing time is updated only if the
current expiration is greater than the new one. In any case, the next
refresh will set it to the new value.

This is intended mostly for debugging purpose.

Signed-off-by: Paolo Valerio 
---
 NEWS|3 ++
 lib/tnl-neigh-cache.c   |   77 ++-
 ofproto/ofproto-tnl-unixctl.man |9 +
 tests/tunnel-push-pop-ipv6.at   |   30 +++
 tests/tunnel-push-pop.at|   30 +++
 5 files changed, 140 insertions(+), 9 deletions(-)

diff --git a/NEWS b/NEWS
index 90f4b1590..148dd5d61 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,9 @@ Post-v2.16.0
limiting behavior.
  * Add hardware offload support for matching IPv4/IPv6 frag types
(experimental).
+   - Native tunnel:
+ * Added new ovs-appctl tnl/neigh/ageing to read/write the neigh ageing
+   time.
 
 
 v2.16.0 - 16 Aug 2021
diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 2769e5c3d..8eacaccd8 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -47,6 +47,7 @@
 
 /* In milliseconds */
 #define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60 * 1000)
+#define NEIGH_ENTRY_MAX_AGEING_TIME(3600 * 1000)
 
 struct tnl_neigh_entry {
 struct cmap_node cmap_node;
@@ -58,6 +59,7 @@ struct tnl_neigh_entry {
 
 static struct cmap table = CMAP_INITIALIZER;
 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static atomic_uint32_t neigh_ageing;
 
 static uint32_t
 tnl_neigh_hash(const struct in6_addr *ip)
@@ -75,6 +77,15 @@ tnl_neigh_expired(struct tnl_neigh_entry *neigh)
 return expired <= time_msec();
 }
 
+static uint32_t
+tnl_neigh_get_ageing(void)
+{
+unsigned int ageing;
+
+atomic_read_relaxed(_ageing, );
+return ageing;
+}
+
 static struct tnl_neigh_entry *
 tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
 {
@@ -89,7 +100,7 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const 
struct in6_addr *dst)
 }
 
 atomic_store_relaxed(>expires, time_msec() +
- NEIGH_ENTRY_DEFAULT_IDLE_TIME);
+ tnl_neigh_get_ageing());
 return neigh;
 }
 }
@@ -134,7 +145,7 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct 
in6_addr *dst,
 if (neigh) {
 if (eth_addr_equals(neigh->mac, mac)) {
 atomic_store_relaxed(>expires, time_msec() +
- NEIGH_ENTRY_DEFAULT_IDLE_TIME);
+ tnl_neigh_get_ageing());
 ovs_mutex_unlock();
 return;
 }
@@ -147,7 +158,7 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct 
in6_addr *dst,
 neigh->ip = *dst;
 neigh->mac = mac;
 atomic_store_relaxed(>expires, time_msec() +
- NEIGH_ENTRY_DEFAULT_IDLE_TIME);
+ tnl_neigh_get_ageing());
 ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
 cmap_insert(, >cmap_node, tnl_neigh_hash(>ip));
 ovs_mutex_unlock();
@@ -273,6 +284,43 @@ tnl_neigh_cache_flush(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 unixctl_command_reply(conn, "OK");
 }
 
+static void
+tnl_neigh_cache_ageing(struct unixctl_conn *conn, int argc,
+const char *argv[], void *aux OVS_UNUSED)
+{
+long long int new_exp, curr_exp;
+struct tnl_neigh_entry *neigh;
+uint32_t ageing;
+
+if (argc == 1) {
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(, "%"PRIu32, tnl_neigh_get_ageing() / 1000);
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+
+return;
+}
+
+if (!ovs_scan(argv[1], "%"SCNu32, ) ||
+!ageing || ageing > NEIGH_ENTRY_MAX_AGEING_TIME) {
+unixctl_command_reply_error(conn, "bad ageing value");
+return;
+}
+
+ageing *= 1000;
+atomic_store_relaxed(_ageing, ageing);
+new_exp = time_msec() + ageing;
+
+CMAP_FOR_EACH (neigh, cmap_node, ) {
+atomic_read_relaxed(>expires, _exp);
+if (new_exp < curr_exp) {
+atomic_store_relaxed(>expires, new_exp);
+}
+}
+
+unixctl_command_reply(conn, "OK");
+}
+
 static int
 lookup_any(const char *host_name, struct in6_addr *address)
 {
@@ -347,10 +395,21 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 void
 tnl_neigh_cache_init(void)
 {
-unixctl_command_register("tnl/arp/show", "", 0, 0, tnl_neigh_cache_show, 
NULL);
-unixctl_command_register("tnl/arp/set", "BRIDGE IP MAC", 3, 3, 
tnl_neigh_cache_add, NULL);
-unixctl_command_register("tnl/arp/flush", "", 0, 0, tnl_neigh_cache_flush, 
NULL);
-unixctl_command_register("tnl/neigh/show", "", 0, 0, tnl_neigh_cache_show, 
NULL);
-unixctl_command_register("tnl/neigh/set", "BRIDGE IP MAC", 3, 3, 

[ovs-dev] [PATCH 1/4] Native tunnel: Read/write expires atomically.

2021-11-02 Thread Paolo Valerio
Signed-off-by: Paolo Valerio 
---
 lib/tnl-neigh-cache.c |   32 +++-
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 5bda4af7e..2769e5c3d 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -32,6 +32,7 @@
 #include "errno.h"
 #include "flow.h"
 #include "netdev.h"
+#include "ovs-atomic.h"
 #include "ovs-thread.h"
 #include "packets.h"
 #include "openvswitch/poll-loop.h"
@@ -44,14 +45,14 @@
 #include "openvswitch/vlog.h"
 
 
-/* In seconds */
-#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60)
+/* In milliseconds */
+#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60 * 1000)
 
 struct tnl_neigh_entry {
 struct cmap_node cmap_node;
 struct in6_addr ip;
 struct eth_addr mac;
-time_t expires; /* Expiration time. */
+atomic_llong expires;   /* Expiration time in ms. */
 char br_name[IFNAMSIZ];
 };
 
@@ -64,6 +65,16 @@ tnl_neigh_hash(const struct in6_addr *ip)
 return hash_bytes(ip->s6_addr, 16, 0);
 }
 
+static bool
+tnl_neigh_expired(struct tnl_neigh_entry *neigh)
+{
+long long expired;
+
+atomic_read_relaxed(>expires, );
+
+return expired <= time_msec();
+}
+
 static struct tnl_neigh_entry *
 tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
 {
@@ -73,11 +84,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const 
struct in6_addr *dst)
 hash = tnl_neigh_hash(dst);
 CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, ) {
 if (ipv6_addr_equals(>ip, dst) && !strcmp(neigh->br_name, 
br_name)) {
-if (neigh->expires <= time_now()) {
+if (tnl_neigh_expired(neigh)) {
 return NULL;
 }
 
-neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+atomic_store_relaxed(>expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME);
 return neigh;
 }
 }
@@ -121,7 +133,8 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct 
in6_addr *dst,
 struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
 if (neigh) {
 if (eth_addr_equals(neigh->mac, mac)) {
-neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+atomic_store_relaxed(>expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME);
 ovs_mutex_unlock();
 return;
 }
@@ -133,7 +146,8 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct 
in6_addr *dst,
 
 neigh->ip = *dst;
 neigh->mac = mac;
-neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+atomic_store_relaxed(>expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME);
 ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
 cmap_insert(, >cmap_node, tnl_neigh_hash(>ip));
 ovs_mutex_unlock();
@@ -208,7 +222,7 @@ tnl_neigh_cache_run(void)
 
 ovs_mutex_lock();
 CMAP_FOR_EACH(neigh, cmap_node, ) {
-if (neigh->expires <= time_now()) {
+if (tnl_neigh_expired(neigh)) {
 tnl_neigh_delete(neigh);
 changed = true;
 }
@@ -319,7 +333,7 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 
 ds_put_format(, ETH_ADDR_FMT"   %s",
   ETH_ADDR_ARGS(neigh->mac), neigh->br_name);
-if (neigh->expires <= time_now()) {
+if (tnl_neigh_expired(neigh)) {
 ds_put_format(, " STALE");
 }
 ds_put_char(, '\n');

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/4] Native tunnel: Update neigh entries in tnl termination.

2021-11-02 Thread Paolo Valerio
- Dropped the RFC tag
- Included Flavio's suggestions
- Squashed patch #5 into #2 and #3

The series is composed of the following patches:

#1: Expires is modified in different contexts (revalidator, pmd-rx, bfd-tx).
It's probably not very racy for many reasons, but it seems a good idea
to use an atomic here (especially with the introduction of #3).

#2: Introduces the possibility to set and read the entries timeout.
It's useful for debugging purpose. Includes a self-test.

#3: Fixes a use case in which the tnl neigh entry never
gets updated (further details in the commit message).
Includes a self-test.

#4: While working on #2 and #3 I noticed unstable timeouts (more 
visible the ageing time is low) during nd snoop. 
It turned out to be a problem of revalidation. Additional
details can be found in the commit description.

The use case fixed in #3 could be fixed relaxing the check in
tnl_arp_snoop(), allowing to snoop ARP requests as well, but such
approach could add unneeded entries in the cache.

Paolo Valerio (4):
  Native tunnel: Read/write expires atomically.
  Native tunnel: Add tnl/neigh/ageing command.
  Native tunnel: Do not refresh the entry while revalidating.
  Tunnel: Snoop ingress packets and update neigh cache if needed.


 NEWS|   3 +
 lib/tnl-neigh-cache.c   | 133 +---
 lib/tnl-neigh-cache.h   |   4 +-
 ofproto/ofproto-dpif-xlate.c|  17 +++-
 ofproto/ofproto-tnl-unixctl.man |   9 +++
 tests/tunnel-push-pop-ipv6.at   |  66 
 tests/tunnel-push-pop.at|  65 
 7 files changed, 268 insertions(+), 29 deletions(-)

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] dpif-netdev: Sync PMD ALB state with user commands.

2021-11-02 Thread Kevin Traynor

On 02/11/2021 16:52, Gaƫtan Rivet wrote:

On Tue, Nov 2, 2021, at 11:35, Kevin Traynor wrote:

On 02/11/2021 01:34, Gaƫtan Rivet wrote:

On Mon, Nov 1, 2021, at 15:42, Kevin Traynor wrote:

Previously, when a user enabled PMD auto load balancer with
pmd-auto-lb="true", some conditions such as number of PMDs/RxQs
that were required for a rebalance to take place were checked.

If the configuration meant that a rebalance would not take place
then PMD ALB was logged as 'disabled' and not run.

Later, if the PMD/RxQ configuration changed whereby a rebalance
could be effective, PMD ALB was logged as 'enabled' and would run at
the appropriate time.

This worked ok from a functional view but it is unintuitive for the
user reading the logs.

e.g. with one PMD (PMD ALB would not be effective)

User enables ALB, but logs say it is disabled because it won't run.
$ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
|dpif_netdev|INFO|PMD auto load balance is disabled

No dry run takes place.

Add more PMDs (PMD ALB may be effective).
$ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50
|dpif_netdev|INFO|PMD auto load balance is enabled ...

Dry run takes place.
|dpif_netdev|DBG|PMD auto load balance performing dry run.

A better approach is to simply reflect back the user enable/disable
state in the logs and deal with whether the rebalance will be effective
when needed. That is the approach taken in this patch.

To cut down on unneccessary work, some basic checks are also made before
starting a PMD ALB dry run and debug logs can indicate this to the user.

e.g. with one PMD (PMD ALB would not be effective)

User enables ALB, and logs confirm the user has enabled it.
$ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
|dpif_netdev|INFO|PMD auto load balance is enabled...

No dry run takes place.
|dpif_netdev|DBG|PMD auto load balance nothing to do, not enough
non-isolated PMDs or RxQs.

Add more PMDs (PMD ALB may be effective).
$ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50

Dry run takes place.
|dpif_netdev|DBG|PMD auto load balance performing dry run.

Signed-off-by: Kevin Traynor 
Acked-by: Sunil Pai G 
Reviewed-by: David Marchand 



Hi Kevin,

The changes make sense and the code looks good.
For the test coverage, given the new behavior it seems more straightforward
as far as checking the ALB state.

If I'm not mistaken however, the previous tests were not only checking the
ALB state (following pmd-auto-lb=true), but also that it would be active given 
the
proper conditions (n_rxq, pmd-cpu-mask > 1).

The test changes kept the first part, but removed the second.
It seems it could be worth testing, WDYT?



Hi Gaetan, thanks for reviewing. Yes, you are right. The issue is that
those checks are currently config time checks, so those conditions can
easily be created in UT. Now that the checks are changed to be during
runtime and only when a CPU threshold is detected for 1 min, it is a
more difficult condition to create in UT.

I can take a look and see if there is something quick I can think of for
this specific case, but otherwise I would rather have a separate task of
seeing if there is a way to fake CPU and RxQ cycles over a period of
time. Then several other runtime tests could be added including ones for
these checks.

Kevin.


It make sense. Adding a way to simulate load on a PMD is out of scope for
this patch. I think it's otherwise fine so:

Acked-by: Gaetan Rivet 



Thanks Gaetan. Actually, I found a way to test these config checks 
without simulating a load, so I will be sending a v3 once I refine them 
a bit more. While doing that I also noticed I had made but not committed 
the minor log changes from David's comments in v2 :/


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] dpif-netdev: Sync PMD ALB state with user commands.

2021-11-02 Thread Gaƫtan Rivet
On Tue, Nov 2, 2021, at 11:35, Kevin Traynor wrote:
> On 02/11/2021 01:34, Gaƫtan Rivet wrote:
>> On Mon, Nov 1, 2021, at 15:42, Kevin Traynor wrote:
>>> Previously, when a user enabled PMD auto load balancer with
>>> pmd-auto-lb="true", some conditions such as number of PMDs/RxQs
>>> that were required for a rebalance to take place were checked.
>>>
>>> If the configuration meant that a rebalance would not take place
>>> then PMD ALB was logged as 'disabled' and not run.
>>>
>>> Later, if the PMD/RxQ configuration changed whereby a rebalance
>>> could be effective, PMD ALB was logged as 'enabled' and would run at
>>> the appropriate time.
>>>
>>> This worked ok from a functional view but it is unintuitive for the
>>> user reading the logs.
>>>
>>> e.g. with one PMD (PMD ALB would not be effective)
>>>
>>> User enables ALB, but logs say it is disabled because it won't run.
>>> $ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
>>> |dpif_netdev|INFO|PMD auto load balance is disabled
>>>
>>> No dry run takes place.
>>>
>>> Add more PMDs (PMD ALB may be effective).
>>> $ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50
>>> |dpif_netdev|INFO|PMD auto load balance is enabled ...
>>>
>>> Dry run takes place.
>>> |dpif_netdev|DBG|PMD auto load balance performing dry run.
>>>
>>> A better approach is to simply reflect back the user enable/disable
>>> state in the logs and deal with whether the rebalance will be effective
>>> when needed. That is the approach taken in this patch.
>>>
>>> To cut down on unneccessary work, some basic checks are also made before
>>> starting a PMD ALB dry run and debug logs can indicate this to the user.
>>>
>>> e.g. with one PMD (PMD ALB would not be effective)
>>>
>>> User enables ALB, and logs confirm the user has enabled it.
>>> $ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
>>> |dpif_netdev|INFO|PMD auto load balance is enabled...
>>>
>>> No dry run takes place.
>>> |dpif_netdev|DBG|PMD auto load balance nothing to do, not enough
>>> non-isolated PMDs or RxQs.
>>>
>>> Add more PMDs (PMD ALB may be effective).
>>> $ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50
>>>
>>> Dry run takes place.
>>> |dpif_netdev|DBG|PMD auto load balance performing dry run.
>>>
>>> Signed-off-by: Kevin Traynor 
>>> Acked-by: Sunil Pai G 
>>> Reviewed-by: David Marchand 
>>>
>> 
>> Hi Kevin,
>> 
>> The changes make sense and the code looks good.
>> For the test coverage, given the new behavior it seems more straightforward
>> as far as checking the ALB state.
>> 
>> If I'm not mistaken however, the previous tests were not only checking the
>> ALB state (following pmd-auto-lb=true), but also that it would be active 
>> given the
>> proper conditions (n_rxq, pmd-cpu-mask > 1).
>> 
>> The test changes kept the first part, but removed the second.
>> It seems it could be worth testing, WDYT?
>> 
>
> Hi Gaetan, thanks for reviewing. Yes, you are right. The issue is that 
> those checks are currently config time checks, so those conditions can 
> easily be created in UT. Now that the checks are changed to be during 
> runtime and only when a CPU threshold is detected for 1 min, it is a 
> more difficult condition to create in UT.
>
> I can take a look and see if there is something quick I can think of for 
> this specific case, but otherwise I would rather have a separate task of 
> seeing if there is a way to fake CPU and RxQ cycles over a period of 
> time. Then several other runtime tests could be added including ones for 
> these checks.
>
> Kevin.

It make sense. Adding a way to simulate load on a PMD is out of scope for
this patch. I think it's otherwise fine so:

Acked-by: Gaetan Rivet 

-- 
Gaetan Rivet
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] problem: long tcp session instantiation with conntrack and OOT ovs kmod

2021-11-02 Thread Vladislav Odintsov
Hi,

itā€™s just a quick reminder, that if you know how to debug/fix this, please let 
me know.
Thanks.

Regards,
Vladislav Odintsov

> On 11 Oct 2021, at 15:48, Vladislav Odintsov  wrote:
> 
> Hi Greg, Pravin,
> 
> I was suggested to contact you with my OVS/conntrack/OOT KMOD problem. Could 
> you please look at
> this and give any thoughts.
> 
> Initially I posted this question to OVN dev mail list [1] and it seems that 
> OVN is not involved in my issue.
> I was able to reproduce the problem with only OVS and OOT OVS kernel module.
> The problem was seen in OVS 2.13, but it reproduces with OVS master branch 
> code too.
> 
> The problem: long tcp session instantiation.
> 
> If to follow configuration steps from ovs-conntrack tutorial [2] and run in 
> netns "left" some tcp service
> (in my example, httpd), enable sysctl -w net.ipv4.tcp_tw_recycle=1 and run 
> curl from "right" netns to "left"
> passing argument --local-port with same source tcp port value, first tcp 
> session would be established
> correctly (SYN-SYN/ACK-ACK). If we run curl again with same local port, this 
> tcp session instantiation
> will take 1 second to establish because 1st TCP SYN packet will be dropped 
> somewhere in conntrack, because
> conntrack record with same tcp source/dest IP/port was existed. After 1st SYN 
> this record got dropped.
> Then after 1 second tcp makes a retry which is already successful.
> 
> tcpdump from originating veth for such scenario looks like this:
> 
> [root@ovn-1 ~]# tcpdump  -ni veth_r0
> tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
> listening on veth_r0, link-type EN10MB (Ethernet), capture size 262144 bytes
> 15:18:53.690544 IP 192.168.0.2.12346 > 192.168.0.1.http: Flags [S], seq 
> 4033940784, win 29200, options [mss 1460,sackOK,TS val 957559592 ecr 
> 0,nop,wscale 7], length 0
> 15:18:54.692030 IP 192.168.0.2.12346 > 192.168.0.1.http: Flags [S], seq 
> 4033940784, win 29200, options [mss 1460,sackOK,TS val 957560594 ecr 
> 0,nop,wscale 7], length 0
> 15:18:54.692127 IP 192.168.0.1.http > 192.168.0.2.12346: Flags [S.], seq 
> 3684571696, ack 4033940785, win 28960, options [mss 1460,sackOK,TS val 
> 957560594 ecr 957560594,nop,wscale 7], length 0
> 15:18:54.692150 IP 192.168.0.2.12346 > 192.168.0.1.http: Flags [.], ack 1, 
> win 229, options [nop,nop,TS val 957560594 ecr 957560594], length 0
> 
> Conntrack output (while true loop) while this syn-syn-syn/ack is running:
> 
> [root@ovn-1 ~]# while true; do sleep 0.5; date; grep 192.168.0. 
> /proc/net/nf_conntrack; done
> ipv4 2 tcp  6 117 TIME_WAIT src=192.168.0.1 dst=192.168.0.2 sport=80 
> dport=12345 src=192.168.0.2 dst=192.168.0.1 sport=12345 dport=80 [ASSURED] 
> mark=0 zone=0 use=2
> Mon Oct 11 15:46:24 MSK 2021  # <<< -here I ran curl 
> ipv4 2 tcp  6 117 TIME_WAIT src=192.168.0.1 dst=192.168.0.2 sport=80 
> dport=12345 src=192.168.0.2 dst=192.168.0.1 sport=12345 dport=80 [ASSURED] 
> mark=0 zone=0 use=2
> Mon Oct 11 15:46:25 MSK 2021
> Mon Oct 11 15:46:25 MSK 2021
> Mon Oct 11 15:46:26 MSK 2021
> ipv4 2 tcp  6 119 TIME_WAIT src=192.168.0.1 dst=192.168.0.2 sport=80 
> dport=12345 src=192.168.0.2 dst=192.168.0.1 sport=12345 dport=80 [ASSURED] 
> mark=0 zone=0 use=2  <<< - retry SYN
> Mon Oct 11 15:46:26 MSK 2021
> ipv4 2 tcp  6 119 TIME_WAIT src=192.168.0.1 dst=192.168.0.2 sport=80 
> dport=12345 src=192.168.0.2 dst=192.168.0.1 sport=12345 dport=80 [ASSURED] 
> mark=0 zone=0 use=2
> 
> 
> I would highly appreciate if you can look at this as itā€™s quite important 
> problem for my installation.
> Thanks in advance.
> 
> 1: https://mail.openvswitch.org/pipermail/ovs-dev/2021-September/387623.html 
> 
> 2: https://docs.openvswitch.org/en/latest/tutorials/ovs-conntrack/ 
> 
> 
> Regards,
> Vladislav Odintsov
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] tests: Fix requested-chassis localport test

2021-11-02 Thread Dumitru Ceara
On 11/2/21 2:31 PM, Frode Nordahl wrote:
> The ovn-controller requested-chassis localport test currently
> makes a change to the NB DB with --wait=sb, and subsequently does
> an immediate check for change on a chassis.  This does not allways
> succeed.
> 
> Update the test to use `--wait=hv` instead.
> 
> Fixes: ad77db239d9a ("controller: Make use of 
> Port_Binding:requested_chassis.")
> Signed-off-by: Frode Nordahl 
> ---

Hi Frode,

This looks good to me, thanks!

Acked-by: Dumitru Ceara 

Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovsdb: Don't let transaction history grow larger than the database.

2021-11-02 Thread Dumitru Ceara
On 10/14/21 4:58 PM, Ilya Maximets wrote:
> If user frequently changes a lot of rows in a database, transaction
> history could grow way larger than the database itself.  This wastes
> a lot of memory and also makes monitor_cond_since slower than
> usual monotor_cond if the transaction id is old enough, because
> re-construction of the changes from a history is slower than just
> creation of initial database snapshot.  This is also the case if
> user deleted a lot of data, so transaction history still holds all of
> it while the database itself doesn't.
> 
> In case of current lb-per-service model in ovn-kubernetes, each
> load-balancer is added to every logical switch/router.  Such a
> transaction touches more than a half of a OVN_Northbound database.

Hi Ilya

This is also being fixed in ovn-kubernetes and load balancer groups will
be used instead; this will avoid generating such inefficient
transactions.  Nevertheless, limiting transaction history size makes
sense in general.

I have a tiny comment below; with it addressed:

Acked-by: Dumitru Ceara 

Thanks,
Dumitru

> And each of these transactions is added to the transaction history.
> Since transaction history depth is 100, in worst case scenario,
> it will hold 100 copies of a database increasing memory consumption
> dramatically.  In tests with 3000 LBs and 120 LSs, memory goes up
> to 3 GB, while holding at 30 MB if transaction history disabled in
> the code.
> 
> Fixing that by keeping count of the number of ovsdb_atom's in the
> database and not allowing the total number of atoms in transaction
> history to grow larger than this value.  Counting atoms is fairly
> cheap because we don't need to iterate over them, so it doesn't have
> significant performance impact.  It would be ideal to measure the
> size of individual atoms, but that will hit the performance.
> Counting cells instead of atoms is not sufficient, because OVN
> users are adding hundreds or thousands of atoms to a single cell,
> so they are largely different in size.
> 
> Signed-off-by: Ilya Maximets 
> ---
>  ovsdb/ovsdb.c   |  5 
>  ovsdb/ovsdb.h   |  3 +++
>  ovsdb/transaction.c | 56 +++--
>  3 files changed, 62 insertions(+), 2 deletions(-)
> 
> diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
> index 126d16a2f..e6d866182 100644
> --- a/ovsdb/ovsdb.c
> +++ b/ovsdb/ovsdb.c
> @@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct 
> ovsdb_storage *storage)
>  ovs_list_init(>triggers);
>  db->run_triggers_now = db->run_triggers = false;
>  
> +db->n_atoms = 0;
> +
>  db->is_relay = false;
>  ovs_list_init(>txn_forward_new);
>  hmap_init(>txn_forward_sent);
> @@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct 
> simap *usage)
>  }
>  
>  simap_increase(usage, "cells", cells);
> +simap_increase(usage, "atoms", db->n_atoms);
> +simap_increase(usage, "txn-history", db->n_txn_history);
> +simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);
>  
>  if (db->storage) {
>  ovsdb_storage_get_memory_usage(db->storage, usage);
> diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
> index 4a7bd0f0e..ec2d235ec 100644
> --- a/ovsdb/ovsdb.h
> +++ b/ovsdb/ovsdb.h
> @@ -90,8 +90,11 @@ struct ovsdb {
>  /* History trasanctions for incremental monitor transfer. */
>  bool need_txn_history; /* Need to maintain history of transactions. 
> */
>  unsigned int n_txn_history; /* Current number of history transactions. */
> +unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
>  struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. 
> */
>  
> +size_t n_atoms;  /* Total number of ovsdb atoms in the database. */
> +
>  /* Relay mode. */
>  bool is_relay;  /* True, if database is in relay mode. */
>  /* List that holds transactions waiting to be forwarded to the server. */
> diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
> index d61c0..a7ca09b58 100644
> --- a/ovsdb/transaction.c
> +++ b/ovsdb/transaction.c
> @@ -41,6 +41,9 @@ struct ovsdb_txn {
>  struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
>  struct ds comment;
>  struct uuid txnid; /* For clustered mode only. It is the eid. */
> +size_t n_atoms;/* Number of atoms in all transaction rows. */
> +ssize_t n_atoms_diff;  /* Difference between number of added and
> +* removed atoms. */
>  };
>  
>  /* A table modified by a transaction. */
> @@ -842,6 +845,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED,
>  return NULL;
>  }
>  
> +static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
> +count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
> +{
> +struct ovsdb_table *table = txn_row->table;
> +ssize_t n_atoms_old = 0, n_atoms_new = 0;
> +struct shash_node *node;
> +
> +SHASH_FOR_EACH (node, >schema->columns) 

Re: [ovs-dev] [PATCH] ovsdb: transaction: Incremental reassessment of weak refs.

2021-11-02 Thread Dumitru Ceara
On 10/16/21 3:20 AM, Ilya Maximets wrote:
> The main idea is to not store list of weak references in the source
> row, so they all don't need to be re-checked/updated on every
> modification of that source row.  The point is that source row already
> knows UUIDs of all destination rows stored in the data, so there is no
> much profit in storing this information somewhere else.  If needed,
> destination row can be looked up and reference can be looked up in the
> destination row.  For the fast lookup, destination row now stores
> references in a hash map.
> 
> Weak reference structure now contains the table and uuid of a source
> row instead of a direct pointer.  This allows to replace/update the
> source row without breaking any weak references stored in destination
> rows.
> 
> Structure also now contains the key-value pair of atoms that triggered
> creation of this reference.  These atoms can be used to quickly
> subtract removed references from a source row.  During reassessment,
> ovsdb now only needs to care about new added or removed atoms, and
> atoms that got removed due to removal of the destination rows, but
> these are marked for reassessment by the destination row.
> 
> ovsdb_datum_subtract() is used to remove atoms that points to removed
> or incorrect rows, so there is no need to re-sort datum in the end.
> 
> Results of an OVN load-balancer benchmark that adds 3K load-balancers
> to each of 120 logical switches and 120 logical routers in the OVN
> sandbox with clustered Northbound database and then removes them:
> 
> Before:
> 
>   %CPU  CPU Time  CMD
>   86.8  00:16:05  ovsdb-server nb1.db
>   44.1  00:08:11  ovsdb-server nb2.db
>   43.2  00:08:00  ovsdb-server nb3.db
> 
> After:
> 
>   %CPU  CPU Time  CMD
>   54.9  00:02:58  ovsdb-server nb1.db
>   33.3  00:01:48  ovsdb-server nb2.db
>   32.2  00:01:44  ovsdb-server nb3.db
> 
> So, on a cluster leader the processing time dropped by 5.4x, on
> followers - by 4.5x.  More load-balancers - larger the performance
> difference.  There is a slight increase of memory usage, because new
> reference structure is larger, but the difference is not significant.
> 
> Signed-off-by: Ilya Maximets 
> ---

Hi Ilya,

We've been running quite a few scale tests with this change applied
downstream and performance improvement is visible and we didn't detect
any regressions AFAICT.

The code looks good to me, I left a couple of tiny nits that can be
fixed at apply time.

Acked-by: Dumitru Ceara 

Thanks,
Dumitru

>  ovsdb/row.c |  97 +---
>  ovsdb/row.h |  34 +-
>  ovsdb/transaction.c | 262 ++--
>  3 files changed, 293 insertions(+), 100 deletions(-)
> 
> diff --git a/ovsdb/row.c b/ovsdb/row.c
> index 65a054621..7c6914773 100644
> --- a/ovsdb/row.c
> +++ b/ovsdb/row.c
> @@ -38,8 +38,7 @@ allocate_row(const struct ovsdb_table *table)
>  struct ovsdb_row *row = xmalloc(row_size);
>  row->table = CONST_CAST(struct ovsdb_table *, table);
>  row->txn_row = NULL;
> -ovs_list_init(>src_refs);
> -ovs_list_init(>dst_refs);
> +hmap_init(>dst_refs);
>  row->n_refs = 0;
>  return row;
>  }
> @@ -61,6 +60,63 @@ ovsdb_row_create(const struct ovsdb_table *table)
>  return row;
>  }
>  
> +static struct ovsdb_weak_ref *
> +ovsdb_weak_ref_clone(struct ovsdb_weak_ref *src)
> +{
> +struct ovsdb_weak_ref *weak = xzalloc(sizeof *weak);
> +
> +weak->src_table = src->src_table;
> +weak->src = src->src;
> +weak->dst_table = src->dst_table;
> +weak->dst = src->dst;
> +ovsdb_type_clone(>type, >type);
> +ovsdb_atom_clone(>key, >key, src->type.key.type);
> +if (src->type.value.type != OVSDB_TYPE_VOID) {
> +ovsdb_atom_clone(>value, >value, src->type.value.type);
> +}
> +weak->by_key = src->by_key;
> +weak->column_idx = src->column_idx;
> +ovs_list_init(>src_node);
> +hmap_node_nullify(>dst_node);

Nit: I think we can initialize 'weak''s fields in the same order as
they're defined in the ovsdb_weak_ref structure.

> +return weak;
> +}
> +
> +uint32_t
> +ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *weak)
> +{
> +return uuid_hash(>src);
> +}
> +
> +static bool
> +ovsdb_weak_ref_equals(const struct ovsdb_weak_ref *a,
> +  const struct ovsdb_weak_ref *b)
> +{
> +if (a == b) {
> +return true;
> +}
> +return a->src_table == b->src_table
> +   && a->dst_table == b->dst_table
> +   && uuid_equals(>src, >src)
> +   && uuid_equals(>dst, >dst)
> +   && a->column_idx == b->column_idx
> +   && a->by_key == b->by_key
> +   && ovsdb_atom_equals(>key, >key, a->type.key.type);
> +}
> +
> +struct ovsdb_weak_ref *
> +ovsdb_row_find_weak_ref(const struct ovsdb_row *row,
> +const struct ovsdb_weak_ref *ref)
> +{
> +struct ovsdb_weak_ref *weak;
> +HMAP_FOR_EACH_WITH_HASH (weak, dst_node,
> +

Re: [ovs-dev] [PATCH v2] jsonrpc: Sort JSON objects only if debug is on

2021-11-02 Thread Mike Pattrick
Hi Anton,

Anton Ivanov  writes:

> From: Anton Ivanov 
> 
> There is no point to sort JSON objects when nobody is
> observing them. Machines do not care if it is sorted or
> not.
> 
> Signed-off-by: Anton Ivanov 

This makes sense to me. The automated CI seems to have had 
an issue when this was submitted, but it does pass the
tests.

I see previously there was a discussion about how this
might affect human readability, but I think the ability
to pipe unsorted JSON into command line prettifiers like
"python -m json.tool" should cover most manual inspection
use cases.

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ipf: add ipf context

2021-11-02 Thread Aaron Conole
Hi Peng,

Peng He  writes:

> ipf_postprocess will emit packets into the datapath pipeline ignoring
> the conntrack context, this might casuse weird issues when a packet
> batch has less space to contain all the fragments belonging to single
> packet.
>
> Given the below ruleest and consider sending a 64K ICMP packet which
> is splitted into 64 fragments.
>
> priority=1,action=drop
> priority=10,arp,action=normal
> priority=100,in_port=1,ct_state=-trk,icmp,action=ct(zone=9,table=0)
> priority=100,in_port=1,ct_state=+new+trk,icmp,action=ct(zone=9,commit),2
> priority=100,in_port=1,ct_state=-new+est+trk,icmp,action=2
> priority=100,in_port=2,ct_state=-trk,icmp,action=ct(table=0,zone=9)
> priority=100,in_port=2,ct_state=+trk+est-new,icmp,action=1
>
> Batch 1:
> the first 32 packets will be buffered in the ipf preprocessing, nothing
> more proceeds.
>
> Batch 2:
> the second 32 packets succeed the fragment reassembly and goes to ct
> and ipf_post will emits the first 32 packets due to the limit of batch
> size.
>
> the first 32 packets goes to the datapath again due to the
> recirculation, and again buffered at ipf preprocessing before ct,
> then the ovs tries to call ct commit and ipf_postprocessing which emits
> the last 32 packets, in this case the last 32 packets will follow
> the current action list which will be sent to port 2 directly without
> recirculation and going to ipf preprocssing again.
>
> This will cause the first 32 packets never get the chance to
> reassemble and evevntually this large ICMP packets fail to transmit.
>
> this patch fixes this issue by adding firstly ipf context to avoid
> ipf_postprocessing emits packets in the wrong context. Then by
> re-executing the action list again to emit the last 32 packets
> in the right context to correctly transmitting multiple fragments.
> ---

There are quite a few splats from checkpatch checks.  I will look a bit
closer when v2 comes around.  Thank you also for adding a unit test with
it to showcase the issue.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH dpdk-latest 2/2] netdev-dpdk: Fix build with 21.11-rc1.

2021-11-02 Thread Stokes, Ian
> > PKT_[RT]X_* and other mbuf macros have been prefixed with RTE_MBUF_ [1].
> > Update accordingly.
> >
> > 1: https://git.dpdk.org/dpdk/commit/?id=daa02b5cddbb
> >
> > Signed-off-by: David Marchand 
> 
> HI David, thanks for the patches, I was just running these through github and
> spotted there was a failure in the build not sure if it's related to these 
> patches or
> possibly a different patch that has been upstream to DPDK main and may require
> a follow on patch
> 
> Error and build link below.

Apologies, forgot to add the build link.

https://github.com/istokes/ovs/actions/runs/1412616124

Regards
Ian
> 
> ../../lib/ofp-packet.c: note: in included file (through
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_flow.h, ../../lib/netdev-
> dpdk.h, ../../lib/dp-packet.h):
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:92:37: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:93:37: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:94:40: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:95:37: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:96:40: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:97:37: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:98:37: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:99:40: error:
> invalid bitfield specifier for type restricted ovs_be16.
> /home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:100:39: error:
> invalid bitfield specifier for type restricted ovs_be16.
> 
> 
> Regards
> Ian
> 
> > ---
> >  lib/dp-packet.h   | 26 ++
> >  lib/netdev-dpdk.c | 18 +-
> >  2 files changed, 23 insertions(+), 21 deletions(-)
> >
> > diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> > index 3dc582fbfd..ee0805ae69 100644
> > --- a/lib/dp-packet.h
> > +++ b/lib/dp-packet.h
> > @@ -58,29 +58,31 @@ enum OVS_PACKED_ENUM dp_packet_source {
> >  enum dp_packet_offload_mask {
> >  /* Value 0 is not used. */
> >  /* Is the 'rss_hash' valid? */
> > -DEF_OL_FLAG(DP_PACKET_OL_RSS_HASH, PKT_RX_RSS_HASH, 0x1),
> > +DEF_OL_FLAG(DP_PACKET_OL_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
> > 0x1),
> >  /* Is the 'flow_mark' valid? */
> > -DEF_OL_FLAG(DP_PACKET_OL_FLOW_MARK, PKT_RX_FDIR_ID, 0x2),
> > +DEF_OL_FLAG(DP_PACKET_OL_FLOW_MARK, RTE_MBUF_F_RX_FDIR_ID,
> > 0x2),
> >  /* Bad L4 checksum in the packet. */
> > -DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_BAD,
> > PKT_RX_L4_CKSUM_BAD, 0x4),
> > +DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_BAD,
> > RTE_MBUF_F_RX_L4_CKSUM_BAD, 0x4),
> >  /* Bad IP checksum in the packet. */
> > -DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_BAD,
> > PKT_RX_IP_CKSUM_BAD, 0x8),
> > +DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_BAD,
> > RTE_MBUF_F_RX_IP_CKSUM_BAD, 0x8),
> >  /* Valid L4 checksum in the packet. */
> > -DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_GOOD,
> > PKT_RX_L4_CKSUM_GOOD, 0x10),
> > +DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_GOOD,
> > RTE_MBUF_F_RX_L4_CKSUM_GOOD,
> > +0x10),
> >  /* Valid IP checksum in the packet. */
> > -DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD,
> > PKT_RX_IP_CKSUM_GOOD, 0x20),
> > +DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD,
> > RTE_MBUF_F_RX_IP_CKSUM_GOOD,
> > +0x20),
> >  /* TCP Segmentation Offload. */
> > -DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, PKT_TX_TCP_SEG, 0x40),
> > +DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, RTE_MBUF_F_TX_TCP_SEG,
> > 0x40),
> >  /* Offloaded packet is IPv4. */
> > -DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, PKT_TX_IPV4, 0x80),
> > +DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, RTE_MBUF_F_TX_IPV4, 0x80),
> >  /* Offloaded packet is IPv6. */
> > -DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, PKT_TX_IPV6, 0x100),
> > +DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, RTE_MBUF_F_TX_IPV6, 0x100),
> >  /* Offload TCP checksum. */
> > -DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CKSUM, PKT_TX_TCP_CKSUM,
> > 0x200),
> > +DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CKSUM,
> > RTE_MBUF_F_TX_TCP_CKSUM, 0x200),
> >  /* Offload UDP checksum. */
> > -DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, PKT_TX_UDP_CKSUM,
> > 0x400),
> > +DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM,
> > RTE_MBUF_F_TX_UDP_CKSUM, 0x400),
> >  /* Offload SCTP checksum. */
> > -DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM,
> > 0x800),
> > +DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM,
> > RTE_MBUF_F_TX_SCTP_CKSUM, 0x800),
> >  /* Adding new field requires adding to 

Re: [ovs-dev] [PATCH dpdk-latest 2/2] netdev-dpdk: Fix build with 21.11-rc1.

2021-11-02 Thread Stokes, Ian
> PKT_[RT]X_* and other mbuf macros have been prefixed with RTE_MBUF_ [1].
> Update accordingly.
> 
> 1: https://git.dpdk.org/dpdk/commit/?id=daa02b5cddbb
> 
> Signed-off-by: David Marchand 

HI David, thanks for the patches, I was just running these through github and 
spotted there was a failure in the build not sure if it's related to these 
patches or possibly a different patch that has been upstream to DPDK main and 
may require a follow on patch

Error and build link below.

../../lib/ofp-packet.c: note: in included file (through 
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_flow.h, 
../../lib/netdev-dpdk.h, ../../lib/dp-packet.h):
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:92:37: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:93:37: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:94:40: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:95:37: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:96:40: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:97:37: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:98:37: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:99:40: error: 
invalid bitfield specifier for type restricted ovs_be16.
/home/runner/work/ovs/ovs/dpdk-dir/build/include/rte_l2tpv2.h:100:39: error: 
invalid bitfield specifier for type restricted ovs_be16.


Regards
Ian

> ---
>  lib/dp-packet.h   | 26 ++
>  lib/netdev-dpdk.c | 18 +-
>  2 files changed, 23 insertions(+), 21 deletions(-)
> 
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 3dc582fbfd..ee0805ae69 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -58,29 +58,31 @@ enum OVS_PACKED_ENUM dp_packet_source {
>  enum dp_packet_offload_mask {
>  /* Value 0 is not used. */
>  /* Is the 'rss_hash' valid? */
> -DEF_OL_FLAG(DP_PACKET_OL_RSS_HASH, PKT_RX_RSS_HASH, 0x1),
> +DEF_OL_FLAG(DP_PACKET_OL_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
> 0x1),
>  /* Is the 'flow_mark' valid? */
> -DEF_OL_FLAG(DP_PACKET_OL_FLOW_MARK, PKT_RX_FDIR_ID, 0x2),
> +DEF_OL_FLAG(DP_PACKET_OL_FLOW_MARK, RTE_MBUF_F_RX_FDIR_ID,
> 0x2),
>  /* Bad L4 checksum in the packet. */
> -DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_BAD,
> PKT_RX_L4_CKSUM_BAD, 0x4),
> +DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_BAD,
> RTE_MBUF_F_RX_L4_CKSUM_BAD, 0x4),
>  /* Bad IP checksum in the packet. */
> -DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_BAD,
> PKT_RX_IP_CKSUM_BAD, 0x8),
> +DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_BAD,
> RTE_MBUF_F_RX_IP_CKSUM_BAD, 0x8),
>  /* Valid L4 checksum in the packet. */
> -DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_GOOD,
> PKT_RX_L4_CKSUM_GOOD, 0x10),
> +DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_GOOD,
> RTE_MBUF_F_RX_L4_CKSUM_GOOD,
> +0x10),
>  /* Valid IP checksum in the packet. */
> -DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD,
> PKT_RX_IP_CKSUM_GOOD, 0x20),
> +DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD,
> RTE_MBUF_F_RX_IP_CKSUM_GOOD,
> +0x20),
>  /* TCP Segmentation Offload. */
> -DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, PKT_TX_TCP_SEG, 0x40),
> +DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, RTE_MBUF_F_TX_TCP_SEG,
> 0x40),
>  /* Offloaded packet is IPv4. */
> -DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, PKT_TX_IPV4, 0x80),
> +DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, RTE_MBUF_F_TX_IPV4, 0x80),
>  /* Offloaded packet is IPv6. */
> -DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, PKT_TX_IPV6, 0x100),
> +DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, RTE_MBUF_F_TX_IPV6, 0x100),
>  /* Offload TCP checksum. */
> -DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CKSUM, PKT_TX_TCP_CKSUM,
> 0x200),
> +DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CKSUM,
> RTE_MBUF_F_TX_TCP_CKSUM, 0x200),
>  /* Offload UDP checksum. */
> -DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, PKT_TX_UDP_CKSUM,
> 0x400),
> +DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM,
> RTE_MBUF_F_TX_UDP_CKSUM, 0x400),
>  /* Offload SCTP checksum. */
> -DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM,
> 0x800),
> +DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM,
> RTE_MBUF_F_TX_SCTP_CKSUM, 0x800),
>  /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK.
> */
>  };
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 6d3fd6beda..db08aec440 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2165,14 +2165,14 @@ netdev_dpdk_prep_hwol_packet(struct
> netdev_dpdk *dev, struct rte_mbuf *mbuf)
>  {
>  struct dp_packet 

[ovs-dev] [PATCH ovn] tests: Fix requested-chassis localport test

2021-11-02 Thread Frode Nordahl
The ovn-controller requested-chassis localport test currently
makes a change to the NB DB with --wait=sb, and subsequently does
an immediate check for change on a chassis.  This does not allways
succeed.

Update the test to use `--wait=hv` instead.

Fixes: ad77db239d9a ("controller: Make use of Port_Binding:requested_chassis.")
Signed-off-by: Frode Nordahl 
---
 tests/ovn.at | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ovn.at b/tests/ovn.at
index 7cfdede77..72a397ae0 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -28723,12 +28723,12 @@ AT_CHECK([as hv1 ovs-ofctl -O OpenFlow15 dump-flows 
br-int table=0 | grep -q in_
 AT_CHECK([as hv2 ovs-ofctl -O OpenFlow15 dump-flows br-int table=0 | grep -q 
in_port=13], [0], [])
 
 # Set requested-chassis to one of the HVs
-check ovn-nbctl --wait=sb lsp-set-options sw0-lport requested-chassis="hv1"
+check ovn-nbctl --wait=hv lsp-set-options sw0-lport requested-chassis="hv1"
 AT_CHECK([as hv1 ovs-ofctl -O OpenFlow15 dump-flows br-int table=0 | grep -q 
in_port=13], [0], [])
 AT_CHECK([as hv2 ovs-ofctl -O OpenFlow15 dump-flows br-int table=0 | grep -q 
in_port=13], [1], [])
 
 # Set requested-chassis to empty string
-check ovn-nbctl --wait=sb lsp-set-options sw0-lport requested-chassis=""
+check ovn-nbctl --wait=hv lsp-set-options sw0-lport requested-chassis=""
 AT_CHECK([as hv1 ovs-ofctl -O OpenFlow15 dump-flows br-int table=0 | grep -q 
in_port=13], [0], [])
 AT_CHECK([as hv2 ovs-ofctl -O OpenFlow15 dump-flows br-int table=0 | grep -q 
in_port=13], [0], [])
 
-- 
2.32.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v8 1/4] lib: Add infrastructure for VIF plug providers.

2021-11-02 Thread Frode Nordahl
New lib/vif-plug-provider module contains the infrastructure for
registering VIF plug provider classes which may be hosted inside
or outside the core OVN repository.

New controller/vif-plug module adds internal interface for
interacting with the VIF plug providers.

Extend build system to allow building of built-in VIF plug
providers and linking an externally built VIF plug provider.

Signed-off-by: Frode Nordahl 
---
 Documentation/automake.mk |   2 +
 Documentation/topics/index.rst|   1 +
 .../topics/vif-plug-providers/index.rst   |  32 +
 .../vif-plug-providers/vif-plug-providers.rst | 209 ++
 acinclude.m4  |  49 ++
 configure.ac  |   2 +
 controller/automake.mk|   4 +-
 controller/test-vif-plug.c|  72 ++
 controller/vif-plug.c | 632 ++
 controller/vif-plug.h |  79 +++
 lib/automake.mk   |  10 +-
 lib/vif-plug-provider.c   | 204 ++
 lib/vif-plug-provider.h   | 163 +
 lib/vif-plug-providers/dummy/vif-plug-dummy.c | 120 
 ovn-architecture.7.xml|  35 +-
 tests/automake.mk |  13 +-
 tests/ovn-vif-plug.at |   8 +
 17 files changed, 1619 insertions(+), 16 deletions(-)
 create mode 100644 Documentation/topics/vif-plug-providers/index.rst
 create mode 100644 
Documentation/topics/vif-plug-providers/vif-plug-providers.rst
 create mode 100644 controller/test-vif-plug.c
 create mode 100644 controller/vif-plug.c
 create mode 100644 controller/vif-plug.h
 create mode 100644 lib/vif-plug-provider.c
 create mode 100644 lib/vif-plug-provider.h
 create mode 100644 lib/vif-plug-providers/dummy/vif-plug-dummy.c
 create mode 100644 tests/ovn-vif-plug.at

diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index b3fd3d62b..704c80671 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -28,6 +28,8 @@ DOC_SOURCE = \
Documentation/topics/ovn-news-2.8.rst \
Documentation/topics/role-based-access-control.rst \
Documentation/topics/debugging-ddlog.rst \
+   Documentation/topics/vif-plug-providers/index.rst \
+   Documentation/topics/vif-plug-providers/vif-plug-providers.rst \
Documentation/howto/index.rst \
Documentation/howto/docker.rst \
Documentation/howto/firewalld.rst \
diff --git a/Documentation/topics/index.rst b/Documentation/topics/index.rst
index d58d5618b..e9e49c742 100644
--- a/Documentation/topics/index.rst
+++ b/Documentation/topics/index.rst
@@ -41,6 +41,7 @@ OVN
high-availability
role-based-access-control
ovn-news-2.8
+   vif-plug-providers/index
testing
 
 .. list-table::
diff --git a/Documentation/topics/vif-plug-providers/index.rst 
b/Documentation/topics/vif-plug-providers/index.rst
new file mode 100644
index 0..b7552ac4c
--- /dev/null
+++ b/Documentation/topics/vif-plug-providers/index.rst
@@ -0,0 +1,32 @@
+..
+  Licensed under the Apache License, Version 2.0 (the "License"); you may
+  not use this file except in compliance with the License. You may obtain
+  a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  License for the specific language governing permissions and limitations
+  under the License.
+
+  Convention for heading levels in OVN documentation:
+
+  ===  Heading 0 (reserved for the title in a document)
+  ---  Heading 1
+  ~~~  Heading 2
+  +++  Heading 3
+  '''  Heading 4
+
+  Avoid deeper levels because they do not render well.
+
+==
+VIF Plug Providers
+==
+
+
+.. toctree::
+   :maxdepth: 2
+
+   vif-plug-providers
diff --git a/Documentation/topics/vif-plug-providers/vif-plug-providers.rst 
b/Documentation/topics/vif-plug-providers/vif-plug-providers.rst
new file mode 100644
index 0..77ecf7e0f
--- /dev/null
+++ b/Documentation/topics/vif-plug-providers/vif-plug-providers.rst
@@ -0,0 +1,209 @@
+..
+  Licensed under the Apache License, Version 2.0 (the "License"); you may
+  not use this file except in compliance with the License. You may obtain
+  a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  License for the specific language governing permissions and 

[ovs-dev] [PATCH ovn v8 3/4] controller: Consider plugging VIF on CMS request.

2021-11-02 Thread Frode Nordahl
When OVN is linked with an appropriate VIF plug provider,
CMS can request an OVN controller to plug individual lports into
the Open vSwitch instance it manages.

The port and interface record will be maintained throughout the
lifetime of the lport and it will be removed on release of lport.

Signed-off-by: Frode Nordahl 
---
 controller/ovn-controller.c |  61 +-
 ovn-nb.xml  |  21 +++
 tests/ovn-macros.at |   2 +-
 tests/ovn.at| 121 
 4 files changed, 203 insertions(+), 2 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index dd09d57e2..b4d4636d8 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -106,6 +106,7 @@ static unixctl_cb_func debug_delay_nb_cfg_report;
 #define IF_STATUS_MGR_UPDATE_STOPWATCH_NAME "if-status-mgr-update"
 #define OFCTRL_SEQNO_RUN_STOPWATCH_NAME "ofctrl-seqno-run"
 #define BFD_RUN_STOPWATCH_NAME "bfd-run"
+#define VIF_PLUG_RUN_STOPWATCH_NAME "vif-plug-run"
 
 #define OVS_NB_CFG_NAME "ovn-nb-cfg"
 #define OVS_NB_CFG_TS_NAME "ovn-nb-cfg-ts"
@@ -937,6 +938,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
 binding_register_ovs_idl(ovs_idl);
 bfd_register_ovs_idl(ovs_idl);
 physical_register_ovs_idl(ovs_idl);
+vif_plug_register_ovs_idl(ovs_idl);
 }
 
 #define SB_NODES \
@@ -3205,6 +3207,7 @@ main(int argc, char *argv[])
 stopwatch_create(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME, SW_MS);
 stopwatch_create(OFCTRL_SEQNO_RUN_STOPWATCH_NAME, SW_MS);
 stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);
+stopwatch_create(VIF_PLUG_RUN_STOPWATCH_NAME, SW_MS);
 
 /* Define inc-proc-engine nodes. */
 ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
@@ -3440,6 +3443,11 @@ main(int argc, char *argv[])
 };
 struct if_status_mgr *if_mgr = ctrl_engine_ctx.if_mgr;
 
+struct shash vif_plug_deleted_iface_ids =
+SHASH_INITIALIZER(_plug_deleted_iface_ids);
+struct shash vif_plug_changed_iface_ids =
+SHASH_INITIALIZER(_plug_changed_iface_ids);
+
 char *ovn_version = ovn_get_internal_version();
 VLOG_INFO("OVN internal version is : [%s]", ovn_version);
 
@@ -3650,6 +3658,37 @@ main(int argc, char *argv[])
 ovsrec_port_table_get(ovs_idl_loop.idl),
 br_int, chassis, _data->local_datapaths);
 stopwatch_stop(PATCH_RUN_STOPWATCH_NAME, time_msec());
+if (vif_plug_provider_has_providers() && ovs_idl_txn) {
+struct vif_plug_ctx_in vif_plug_ctx_in = {
+.ovs_idl_txn = ovs_idl_txn,
+.sbrec_port_binding_by_name =
+sbrec_port_binding_by_name,
+.sbrec_port_binding_by_requested_chassis =
+sbrec_port_binding_by_requested_chassis,
+.ovsrec_port_by_interfaces =
+ovsrec_port_by_interfaces,
+.ovs_table = ovs_table,
+.br_int = br_int,
+.iface_table =
+ovsrec_interface_table_get(
+ovs_idl_loop.idl),
+.chassis_rec = chassis,
+.local_bindings =
+_data->lbinding_data.bindings,
+};
+struct vif_plug_ctx_out vif_plug_ctx_out = {
+.deleted_iface_ids =
+_plug_deleted_iface_ids,
+.changed_iface_ids =
+_plug_changed_iface_ids,
+};
+stopwatch_start(VIF_PLUG_RUN_STOPWATCH_NAME,
+time_msec());
+vif_plug_run(_plug_ctx_in,
+ _plug_ctx_out);
+stopwatch_stop(VIF_PLUG_RUN_STOPWATCH_NAME,
+   time_msec());
+}
 stopwatch_start(PINCTRL_RUN_STOPWATCH_NAME,
 time_msec());
 pinctrl_run(ovnsb_idl_txn,
@@ -3806,7 +3845,16 @@ main(int argc, char *argv[])
 engine_set_force_recompute(true);
 }
 
-if (ovsdb_idl_loop_commit_and_wait(_idl_loop) == 1) {
+int ovs_txn_status = ovsdb_idl_loop_commit_and_wait(_idl_loop);
+if (!ovs_txn_status) {
+/* The transaction failed. */
+vif_plug_clear_deleted(
+_plug_deleted_iface_ids);
+vif_plug_clear_changed(

[ovs-dev] [PATCH ovn v8 0/4] Introduce infrastructure for VIF plug providers

2021-11-02 Thread Frode Nordahl
Introduce infrastructure for VIF plug providers and add feature to
ovn-controller to add and remove ports on the integration bridge as
directed by CMS through Logical_Switch_Port options.

Traditionally it has been the CMSs responsibility to create Virtual
Interfaces (VIFs) as part of instance (Container, Pod, Virtual
Machine etc.) life cycle, and subsequently manage plug/unplug
operations on the Open vSwitch integration bridge.

With the advent of NICs connected to multiple distinct CPUs we can
have a topology where the instance runs on one host and Open
vSwitch and OVN runs on a different host, the smartnic control plane
CPU.  The host facing interfaces will be visible to Open vSwitch
and OVN as representor ports.

The actions necessary for plugging and unplugging the representor
port in Open vSwitch running on the smartnic control plane CPU would
be the same for every CMS.

Hardware or platform specific details for initialization and lookup
of representor ports is provided by an plugging provider hosted
inside or outside the core OVN repository, and linked at OVN build
time.

RFC1 -> RFC2:
 - Introduce the plug-provider interface, remove hardware/platform
   dependent code.
 - Add ovsport module.
 - Integrate with binding module.
 - Split into multiple patches.

RFC2 -> v1:
 - Extend build system, `--with-plug-provider`.
 - Check for required data in b_ctx_in and lbinding data structures.
 - Split consider_plug_lport into update and create processing
   functions.
 - Consider unplug on release where relevant.
 - Add ovn-controller `--enable-dummy-plug` option for testing.
 - Consistent function naming and move ovsport module to controller/.
 - Rename plug-test.c -> plug-dummy.c.
 - Add functional- and unit- tests.

v1 -> v2:
 - Move update to controller/binding.h from patch 6 -> patch 5.
 - Fix lint problems reported by 0-day Robot.

v2 -> v3:
 - Fix build system extension for plug provider.
 - Implement DDlog version of northd change.
 - Rebase on tip.

v3 -> v4:
 - sb:Port_Binding:plugged_by -> sb:Port_Binding:requested_chassis.
 - Move documentation of plugin specific options to plugin
   implementation.
 - ovn-northd-ddlog: squash changes into same patch as C version, rework
   the DDlog implementation. 
 - controller: Use requested_chassis column instead of parsing options.
 - plug-provider: Remove the extra class instantiation layer and
   refcounting.  Add documentation and various tweaks.
 - controller: Add engine node for plug provider so that a plugin run
   function can run at an appropriate time and also allow feeding back
   information about changes that can not be handled incrementally.
 - binding: Fix return values for plug functions so they adhere to the
   incremental processing engine contract.
 - Add support for building in-tree plug providers.

v4 -> v5:
 - binding: Make some data structures and functions public to allow
   other modules to make use of its local binding tracking data
   structures.
 - Add separate incremental processing engine nodes for the plug
   provider.
 - Do change handling in the controller/plug module rather than piggy
   backing on the binding module.
 - Deal with asynchronous notification to plug provider class and
   subsequent freeing of data structures when the transaction commits.
 - Drop the representor plugin as in-tree plugin to address concerns about
   it being Linux-only and having netlink/kernel dependencies.  Will
   upstream to ovn-org/ovn-vif instead.

v5 -> v6:
 - Fix 0-day Robot documentation lexer warning treated as error:
https://mail.openvswitch.org/pipermail/ovs-build/2021-September/017538.html

v6 -> v7:
 - lport: Fall back to strcmp when requested-chassis option is set
  but Port_Binding->requested_chassis is not filled yet.
 - physical: Make use of common lport_can_bind_on_this_chassis helper
 instead of duplicating the inverse check in the code.
 - tests: Add test cases to cover currently uncovered requested-chassis
  functionality.
 - plug: Make smaps struct members instead of pointers.
 - plug: Don't use is_deleted function when not iterating over tracked
 records.  Avoid adding multiple delete or update iface requests
 to the same transaction.
 - plug: For full recompute, don't process until northd has populated
 Port_Binding->requested_chassis to avoid thrashing on startup.
 Also fix order of processing.
 - plug: Handle failed transactions appropriately.
 - tests: Clean up and extend plug test case.

v7 -> v8:
 - Patches 1 through 9 from v7 was merged.
 - Use Port_Binding:requested_chassis index when iterating over PBs.
 - Drop separate I-P engine nodes for the VIF plug providers at this
   time, we can revisit the need for it in the event of VIF plug
   providers gaining support for Scalable Functions and the increased
   density that entails.
 - Rename module, objects and folders from 'plug-' to 'vif-plug-*' to
   avoid using too generic names.
 - Don't call 

[ovs-dev] [PATCH ovn v8 4/4] NEWS: Add note on infrastructure for VIF plug providers.

2021-11-02 Thread Frode Nordahl
Signed-off-by: Frode Nordahl 
---
 NEWS | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/NEWS b/NEWS
index 5f448e67d..65bf7d30f 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,13 @@ Post v21.09.0
 if not desired.
   - Added Load_Balancer_Group support, which simplifies large scale
 configurations of load balancers.
+  - Introduced infrastructure for VIF plug providers.  When OVN is linked with
+an appropriate VIF plug provider, such as OVN VIF [0], CMS can request OVN
+to plug lports.  This is particularly useful in topologies where the
+ovn-controller process is running on SmartNIC control plane CPUs.  Please
+refer to Documentation/topics/vif-plug-providers/vif-plug-providers.rst for
+more information.
+[0] https://github.com/ovn-org/ovn-vif
 
 OVN v21.09.0 - 01 Oct 2021
 --
-- 
2.32.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v8 2/4] ovn-controller: Prepare VIF plug provider infrastructure.

2021-11-02 Thread Frode Nordahl
Add port by interfaces index - To be able to effectively remove
ports previously plugged by us we need to look up ports by
interface records.

Add Port_Binding by requested_chassis index - To be able to
effectively iterate over ports destined to our chassis we need to
look up Port_Binding records by requested_chassis.

Add `enable-dummy-plug` option - To enable testing of the VIF plug
provider infrastructure without building OVN with an external VIF
plug provider we include a dummy implementation which can be
enabled using this command line option.

Signed-off-by: Frode Nordahl 
---
 controller/ovn-controller.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 92ba50d65..dd09d57e2 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -56,6 +56,8 @@
 #include "lib/ovn-sb-idl.h"
 #include "lib/ovn-util.h"
 #include "patch.h"
+#include "vif-plug.h"
+#include "vif-plug-provider.h"
 #include "physical.h"
 #include "pinctrl.h"
 #include "openvswitch/poll-loop.h"
@@ -3082,11 +3084,17 @@ main(int argc, char *argv[])
 patch_init();
 pinctrl_init();
 lflow_init();
+vif_plug_provider_initialize();
 
 /* Connect to OVS OVSDB instance. */
 struct ovsdb_idl_loop ovs_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
 ovsdb_idl_create(ovs_remote, _idl_class, false, true));
 ctrl_register_ovs_idl(ovs_idl_loop.idl);
+
+struct ovsdb_idl_index *ovsrec_port_by_interfaces
+= ovsdb_idl_index_create1(ovs_idl_loop.idl,
+  _port_col_interfaces);
+
 ovsdb_idl_get_initial_snapshot(ovs_idl_loop.idl);
 
 /* Configure OVN SB database. */
@@ -3122,6 +3130,9 @@ main(int argc, char *argv[])
 struct ovsdb_idl_index *sbrec_port_binding_by_type
 = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
   _port_binding_col_type);
+struct ovsdb_idl_index *sbrec_port_binding_by_requested_chassis
+= ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
+  _port_binding_col_requested_chassis);
 struct ovsdb_idl_index *sbrec_datapath_binding_by_key
 = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
   _datapath_binding_col_tunnel_key);
@@ -3348,8 +3359,12 @@ main(int argc, char *argv[])
 sbrec_port_binding_by_key);
 engine_ovsdb_node_add_index(_sb_port_binding, "datapath",
 sbrec_port_binding_by_datapath);
+engine_ovsdb_node_add_index(_sb_port_binding, "requested_chassis",
+sbrec_port_binding_by_requested_chassis);
 engine_ovsdb_node_add_index(_sb_datapath_binding, "key",
 sbrec_datapath_binding_by_key);
+engine_ovsdb_node_add_index(_ovs_port, "interfaces",
+ovsrec_port_by_interfaces);
 
 struct ed_type_lflow_output *lflow_output_data =
 engine_get_internal_data(_lflow_output);
@@ -3879,6 +3894,7 @@ loop_done:
 pinctrl_destroy();
 patch_destroy();
 if_status_mgr_destroy(if_mgr);
+vif_plug_provider_destroy_all();
 
 ovsdb_idl_loop_destroy(_idl_loop);
 ovsdb_idl_loop_destroy(_idl_loop);
@@ -3899,6 +3915,7 @@ parse_options(int argc, char *argv[])
 VLOG_OPTION_ENUMS,
 OVN_DAEMON_OPTION_ENUMS,
 SSL_OPTION_ENUMS,
+OPT_ENABLE_DUMMY_VIF_PLUG,
 };
 
 static struct option long_options[] = {
@@ -3909,6 +3926,8 @@ parse_options(int argc, char *argv[])
 STREAM_SSL_LONG_OPTIONS,
 {"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT},
 {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT},
+{"enable-dummy-vif-plug", no_argument, NULL,
+ OPT_ENABLE_DUMMY_VIF_PLUG},
 {NULL, 0, NULL, 0}
 };
 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
@@ -3954,6 +3973,10 @@ parse_options(int argc, char *argv[])
 stream_ssl_set_ca_cert_file(optarg, true);
 break;
 
+case OPT_ENABLE_DUMMY_VIF_PLUG:
+vif_plug_dummy_enable();
+break;
+
 case '?':
 exit(EXIT_FAILURE);
 
-- 
2.32.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] dpif-netdev: Sync PMD ALB state with user commands.

2021-11-02 Thread Kevin Traynor

On 02/11/2021 01:34, Gaƫtan Rivet wrote:

On Mon, Nov 1, 2021, at 15:42, Kevin Traynor wrote:

Previously, when a user enabled PMD auto load balancer with
pmd-auto-lb="true", some conditions such as number of PMDs/RxQs
that were required for a rebalance to take place were checked.

If the configuration meant that a rebalance would not take place
then PMD ALB was logged as 'disabled' and not run.

Later, if the PMD/RxQ configuration changed whereby a rebalance
could be effective, PMD ALB was logged as 'enabled' and would run at
the appropriate time.

This worked ok from a functional view but it is unintuitive for the
user reading the logs.

e.g. with one PMD (PMD ALB would not be effective)

User enables ALB, but logs say it is disabled because it won't run.
$ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
|dpif_netdev|INFO|PMD auto load balance is disabled

No dry run takes place.

Add more PMDs (PMD ALB may be effective).
$ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50
|dpif_netdev|INFO|PMD auto load balance is enabled ...

Dry run takes place.
|dpif_netdev|DBG|PMD auto load balance performing dry run.

A better approach is to simply reflect back the user enable/disable
state in the logs and deal with whether the rebalance will be effective
when needed. That is the approach taken in this patch.

To cut down on unneccessary work, some basic checks are also made before
starting a PMD ALB dry run and debug logs can indicate this to the user.

e.g. with one PMD (PMD ALB would not be effective)

User enables ALB, and logs confirm the user has enabled it.
$ ovs-vsctl set open_vSwitch . other_config:pmd-auto-lb="true"
|dpif_netdev|INFO|PMD auto load balance is enabled...

No dry run takes place.
|dpif_netdev|DBG|PMD auto load balance nothing to do, not enough
non-isolated PMDs or RxQs.

Add more PMDs (PMD ALB may be effective).
$ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=50

Dry run takes place.
|dpif_netdev|DBG|PMD auto load balance performing dry run.

Signed-off-by: Kevin Traynor 
Acked-by: Sunil Pai G 
Reviewed-by: David Marchand 



Hi Kevin,

The changes make sense and the code looks good.
For the test coverage, given the new behavior it seems more straightforward
as far as checking the ALB state.

If I'm not mistaken however, the previous tests were not only checking the
ALB state (following pmd-auto-lb=true), but also that it would be active given 
the
proper conditions (n_rxq, pmd-cpu-mask > 1).

The test changes kept the first part, but removed the second.
It seems it could be worth testing, WDYT?



Hi Gaetan, thanks for reviewing. Yes, you are right. The issue is that 
those checks are currently config time checks, so those conditions can 
easily be created in UT. Now that the checks are changed to be during 
runtime and only when a CPU threshold is detected for 1 min, it is a 
more difficult condition to create in UT.


I can take a look and see if there is something quick I can think of for 
this specific case, but otherwise I would rather have a separate task of 
seeing if there is a way to fake CPU and RxQ cycles over a period of 
time. Then several other runtime tests could be added including ones for 
these checks.


Kevin.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v7 12/12] controller: Consider plugging of ports on CMS request.

2021-11-02 Thread Frode Nordahl
On Thu, Oct 28, 2021 at 10:19 AM Frode Nordahl
 wrote:
>
> On Thu, Oct 28, 2021 at 8:51 AM Han Zhou  wrote:
> >
> >
> >
> > On Tue, Oct 19, 2021 at 3:14 AM Frode Nordahl  
> > wrote:
> > >
> > > When OVN is linked with an appropriate plugging implementation,
> > > CMS can request OVN to plug individual lports into the local
> > > Open vSwitch instance.
> > >
> > > The port and instance record will be maintained during the lifetime
> > > of the lport and it will be removed on release of lport.
> > >
> > > Signed-off-by: Frode Nordahl 
> > > ---
> > >  controller/ovn-controller.c | 222 +++-
> > >  ovn-nb.xml  |  21 
> > >  tests/ovn-controller.at |   1 -
> > >  tests/ovn-macros.at |   2 +-
> > >  tests/ovn.at| 121 
> > >  5 files changed, 364 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index ef2bdadc8..a92d2820a 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -937,6 +937,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> > >  binding_register_ovs_idl(ovs_idl);
> > >  bfd_register_ovs_idl(ovs_idl);
> > >  physical_register_ovs_idl(ovs_idl);
> > > +plug_register_ovs_idl(ovs_idl);
> > >  }
> > >
> > >  #define SB_NODES \
> > > @@ -2978,6 +2979,180 @@ flow_output_lflow_output_handler(struct 
> > > engine_node *node,
> > >  return true;
> > >  }
> > >
> > > +static void *
> > > +en_plug_provider_lookup_init(struct engine_node *node OVS_UNUSED,
> > > + struct engine_arg *arg OVS_UNUSED)
> > > +{
> > > +return NULL;
> > > +}
> > > +
> > > +static void
> > > +en_plug_provider_lookup_cleanup(void *data OVS_UNUSED)
> > > +{
> > > +
> > > +}
> > > +
> > > +static void
> > > +en_plug_provider_lookup_run(struct engine_node *node,
> > > +void *data OVS_UNUSED)
> > > +{
> > > +if (!plug_provider_has_providers()) {
> > > +engine_set_node_state(node, EN_UNCHANGED);
> > > +return;
> > > +}
> > > +
> > > +if (plug_provider_run_all()) {
> > > +engine_set_node_state(node, EN_UPDATED);
> >
> > Formally, this engine node should have its data, which may be simply a 
> > boolean flag, telling if something has changed in the "provider's internal 
> > lookup table", and the plug_provider node would take this as its input and 
> > read the flag. But it looks ok this way since there is no change handler 
> > needed for this input and it would always trigger a full run (recompute) in 
> > the plug_provider node. So I am fine with it.
>
> Thank you for providing this insight.
>
> > > +} else {
> > > +engine_set_node_state(node, EN_UNCHANGED);
> > > +}
> > > +}
> > > +
> > > +
> > > +struct ed_type_plug_provider {
> > > +struct shash deleted_iface_ids;
> > > +struct shash changed_iface_ids;
> > > +bool pb_handler_has_run;
> > > +};
> > > +
> > > +static void *
> > > +en_plug_provider_init(struct engine_node *node OVS_UNUSED,
> > > +  struct engine_arg *arg OVS_UNUSED)
> > > +{
> > > +struct ed_type_plug_provider *data = xzalloc(sizeof *data);
> > > +
> > > +shash_init(>deleted_iface_ids);
> > > +shash_init(>changed_iface_ids);
> > > +data->pb_handler_has_run = false;
> > > +return data;
> > > +}
> > > +
> > > +static void
> > > +en_plug_provider_cleanup(void *data)
> > > +{
> > > +struct ed_type_plug_provider *plug_provider_data = data;
> > > +
> > > +shash_destroy_free_data(_provider_data->deleted_iface_ids);
> > > +shash_destroy_free_data(_provider_data->changed_iface_ids);
> > > +}
> > > +
> > > +static void
> > > +init_plug_ctx(struct engine_node *node,
> > > +  void *data,
> > > +  struct plug_ctx_in *plug_ctx_in,
> > > +  struct plug_ctx_out *plug_ctx_out)
> > > +{
> > > +struct ovsrec_open_vswitch_table *ovs_table =
> > > +(struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> > > +engine_get_input("OVS_open_vswitch", node));
> > > +struct ovsrec_bridge_table *bridge_table =
> > > +(struct ovsrec_bridge_table *)EN_OVSDB_GET(
> > > +engine_get_input("OVS_bridge", node));
> > > +const char *chassis_id = get_ovs_chassis_id(ovs_table);
> > > +const struct ovsrec_bridge *br_int = get_br_int(bridge_table, 
> > > ovs_table);
> > > +
> > > +ovs_assert(br_int && chassis_id);
> > > +
> > > +struct ovsdb_idl_index *sbrec_chassis_by_name =
> > > +engine_ovsdb_node_get_index(
> > > +engine_get_input("SB_chassis", node),
> > > +"name");
> > > +
> > > +const struct sbrec_chassis *chassis
> > > += chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> > > +ovs_assert(chassis);
> > > +
> > > +struct ovsrec_interface_table *iface_table =
> > > +(struct 

Re: [ovs-dev] [PATCH v3] dpcls: Change info-get function to fetch dpcls usage stats.

2021-11-02 Thread Eelco Chaudron
Hi Kumar,

Thanks for fixing the previously mentioned issues, however, we still need to 
finish the API discussion. I copied over the discussion from v2 below.

//Eelco

On 28 Oct 2021, at 11:50, Kumar Amber wrote:

> Modified the dplcs info-get command output to include
> the count for different dpcls implementations.
>
> $ovs-appctl dpif-netdev/subtable-lookup-prio-get
>
> Available dpcls implementations:
>   autovalidator (Use count: 1, Priority: 5)
>   generic (Use count: 0, Priority: 1)
>   avx512_gather (Use count: 0, Priority: 3)
>
> Test case to verify changes:
>   1021: PMD - dpcls configuration ok
>
> Signed-off-by: Kumar Amber 
> Signed-off-by: Harry van Haaren 
> Co-authored-by: Harry van Haaren 
>
> ---
> v3:
> - Fix comments on the patch.
> - Function API remains same, see discussion on OVS ML here:
>   "https://mail.openvswitch.org/pipermail/ovs-dev/2021-October/388737.html;
> v2:
> - Dependency merged rebased to master.
>
> ---
> ---
>  Documentation/topics/dpdk/bridge.rst | 16 +++
>  lib/dpif-netdev-lookup.c | 72 +++-
>  lib/dpif-netdev-lookup.h | 19 +++-
>  lib/dpif-netdev.c| 31 +---
>  tests/pmd.at | 16 +++
>  5 files changed, 106 insertions(+), 48 deletions(-)
>
> diff --git a/Documentation/topics/dpdk/bridge.rst 
> b/Documentation/topics/dpdk/bridge.rst
> index f645b9ade..63a54da1c 100644
> --- a/Documentation/topics/dpdk/bridge.rst
> +++ b/Documentation/topics/dpdk/bridge.rst
> @@ -156,10 +156,10 @@ OVS provides multiple implementations of dpcls. The 
> following command enables
>  the user to check what implementations are available in a running instance ::
>
>  $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
> -Available lookup functions (priority : name)
> -0 : autovalidator
> -1 : generic
> -0 : avx512_gather
> +Available dpcls implementations:
> +autovalidator (Use count: 1, Priority: 5)
> +generic (Use count: 0, Priority: 1)
> +avx512_gather (Use count: 0, Priority: 3)
>
>  To set the priority of a lookup function, run the ``prio-set`` command ::
>
> @@ -172,10 +172,10 @@ function due to the command being run. To verify the 
> prioritization, re-run the
>  get command, note the updated priority of the ``avx512_gather`` function ::
>
>  $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
> -Available lookup functions (priority : name)
> -0 : autovalidator
> -1 : generic
> -5 : avx512_gather
> +Available dpcls implementations:
> +autovalidator (Use count: 0, Priority: 0)
> +generic (Use count: 0, Priority: 0)
> +avx512_gather (Use count: 1, Priority: 5)
>
>  If two lookup functions have the same priority, the first one in the list is
>  chosen, and the 2nd occurance of that priority is not used. Put in logical
> diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c
> index bd0a99abe..8a95c84e1 100644
> --- a/lib/dpif-netdev-lookup.c
> +++ b/lib/dpif-netdev-lookup.c
> @@ -36,18 +36,21 @@ static struct dpcls_subtable_lookup_info_t 
> subtable_lookups[] = {
>  { .prio = 0,
>  #endif
>.probe = dpcls_subtable_autovalidator_probe,
> -  .name = "autovalidator", },
> +  .name = "autovalidator",
> +  .usage_cnt.count = 0,},

Use the ATOMIC_COUNT_INIT() macro, and also add a space after the comma:

  .usage_cnt = ATOMIC_COUNT_INIT(0), },
>
>  /* The default scalar C code implementation. */
>  { .prio = 1,
>.probe = dpcls_subtable_generic_probe,
> -  .name = "generic", },
> +  .name = "generic",
> +  .usage_cnt.count = 0,},

Same as above

>
>  #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
>  /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
>  { .prio = 0,
>.probe = dpcls_subtable_avx512_gather_probe,
> -  .name = "avx512_gather", },
> +  .name = "avx512_gather",
> +  .usage_cnt.count = 0,},

Same as above

>  #else
>  /* Disabling AVX512 at compile time, as compile time requirements not 
> met.
>   * This could be due to a number of reasons:
> @@ -93,25 +96,44 @@ dpcls_subtable_set_prio(const char *name, uint8_t 
> priority)
>  }
>
>  dpcls_subtable_lookup_func
> -dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count)
> +dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count,
> + dpcls_subtable_lookup_func old_func,
> + bool will_use_result)
>  {
>  /* Iter over each subtable impl, and get highest priority one. */
>  int32_t prio = -1;
> +uint32_t i;

In this code you use a lot of (u)int32_t, however in OVS for this kind of use 
cases please stick to just int, and if really needed unsigned int.

>  const char *name = NULL;
> +uint32_t best_idx =