[ovs-dev] [PATCH v3] ofproto-dpif-xlate: No clone when tunnel push is last action

2022-05-19 Thread Rosemarie O'Riorden
When OVS sees a tunnel push with a nested list next, it will not
clone the packet, as a clone is not needed. However, a clone action will
still be created with the tunnel push encapulated inside. There is no
need to create the clone action in this case, as extra parsing will need
to be performed, which is less efficient.

Signed-off-by: Rosemarie O'Riorden 
---
v2:
 - Fixes tests that are affected by the change
v3:
 - Adds support for hardware offloading

 lib/netdev-offload-dpdk.c | 36 +--
 lib/netlink.c |  4 +-
 lib/netlink.h |  2 +-
 ofproto/ofproto-dpif-xlate.c  | 21 ++---
 tests/nsh.at  |  6 +--
 tests/packet-type-aware.at| 24 +-
 tests/tunnel-push-pop-ipv6.at | 20 -
 tests/tunnel-push-pop.at  | 83 ++-
 8 files changed, 129 insertions(+), 67 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 12d299603..8749a7e7f 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -2058,6 +2058,19 @@ parse_vlan_push_action(struct flow_actions *actions,
 return 0;
 }
 
+static void
+tunnel_push_action(struct flow_actions *actions,
+ const struct ovs_action_push_tnl *tnl_push) {
+ struct rte_flow_action_raw_encap *raw_encap;
+ raw_encap = xzalloc(sizeof *raw_encap);
+ raw_encap->data = (uint8_t *) tnl_push->header;
+ raw_encap->preserve = NULL;
+ raw_encap->size = tnl_push->header_len;
+
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
+raw_encap);
+}
+
 static int
 parse_clone_actions(struct netdev *netdev,
 struct flow_actions *actions,
@@ -2072,20 +2085,7 @@ parse_clone_actions(struct netdev *netdev,
 
 if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
 const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
-struct rte_flow_action_raw_encap *raw_encap;
-
-if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
-!add_vxlan_encap_action(actions, tnl_push->header)) {
-continue;
-}
-
-raw_encap = xzalloc(sizeof *raw_encap);
-raw_encap->data = (uint8_t *) tnl_push->header;
-raw_encap->preserve = NULL;
-raw_encap->size = tnl_push->header_len;
-
-add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
-raw_encap);
+tunnel_push_action(actions, tnl_push);
 } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
 if (add_output_action(netdev, actions, ca)) {
 return -1;
@@ -2188,6 +2188,14 @@ parse_flow_actions(struct netdev *netdev,
 }
 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) {
 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL);
+} else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_PUSH) {
+const struct ovs_action_push_tnl *tnl_push = nl_attr_get(nla);
+
+if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
+!add_vxlan_encap_action(actions, tnl_push->header)) {
+continue;
+}
+tunnel_push_action(actions, tnl_push);
 } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE &&
left <= NLA_ALIGN(nla->nla_len)) {
 const struct nlattr *clone_actions = nl_attr_get(nla);
diff --git a/lib/netlink.c b/lib/netlink.c
index 8204025a5..06929ff9c 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -536,7 +536,7 @@ nl_msg_end_nested(struct ofpbuf *msg, size_t offset)
 /* Cancel a nested Netlink attribute in 'msg'.  'offset' should be the value
  * returned by nl_msg_start_nested(). */
 void
-nl_msg_cancel_nested(struct ofpbuf *msg, size_t offset)
+nl_msg_reset_size(struct ofpbuf *msg, size_t offset)
 {
 msg->size = offset;
 }
@@ -552,7 +552,7 @@ nl_msg_end_non_empty_nested(struct ofpbuf *msg, size_t 
offset)
 
 struct nlattr *attr = ofpbuf_at_assert(msg, offset, sizeof *attr);
 if (!nl_attr_get_size(attr)) {
-nl_msg_cancel_nested(msg, offset);
+nl_msg_reset_size(msg, offset);
 return true;
 } else {
 return false;
diff --git a/lib/netlink.h b/lib/netlink.h
index b97470743..0f6916179 100644
--- a/lib/netlink.h
+++ b/lib/netlink.h
@@ -82,7 +82,7 @@ void nl_msg_put_string(struct ofpbuf *, uint16_t type, const 
char *value);
 
 size_t nl_msg_start_nested(struct ofpbuf *, uint16_t type);
 void nl_msg_end_nested(struct ofpbuf *, size_t offset);
-void nl_msg_cancel_nested(struct ofpbuf *, size_t offset);
+void nl_msg_reset_size(struct ofpbuf *, size_t offset);
 bool nl_msg_end_non_empty_nested(struct ofpbuf *, size_t offset);
 void nl_msg_put_nested(struct ofpbuf *, uint16_t type,
const void *data, size_t size);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 

[ovs-dev] [PATCH v3] dpif-netdev: ct maxconns config persistence

2022-05-19 Thread lic121
Max allowed userspace dp conntrack entries is configurable with
'ovs-appctl dpctl/ct-set-maxconns' command. In real scenarios,
this configuration is expected to survive from host reboot, from
ovs service restart.

Signed-off-by: lic121 
---

Notes:
v3:
  - add a warning to dpctl_ct_set_maxconns
  - add NEWS entry
v2:
  - rename "ct-maxconns" to "userspace-ct-maxconns"

 NEWS|  5 +
 lib/dpctl.c |  3 +++
 lib/dpctl.man   |  4 +++-
 lib/dpif-netdev.c   | 11 +++
 tests/system-traffic.at | 10 ++
 vswitchd/vswitch.xml|  7 +++
 6 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index eece0d0..e75b2af 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,11 @@ Post-v2.17.0
- Windows:
  * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
  * IPv6 Geneve tunnel support.
+   - Userspace datapath:
+ * 'ovs-appctl dpctl/ct-set-maxconns' is deprecated for lack of persistence
+   capabilitiy.
+ * New configuration knob 'other_config:userspace-ct-maxconns' to set
+   maximum number of connection tracker entries for userspace datapath.
 
 
 v2.17.0 - 17 Feb 2022
diff --git a/lib/dpctl.c b/lib/dpctl.c
index 29041fa..73cf14c 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -1990,6 +1990,9 @@ dpctl_ct_set_maxconns(int argc, const char *argv[],
 struct dpif *dpif;
 int error = opt_dpif_open(argc, argv, dpctl_p, 3, );
 if (!error) {
+dpctl_print(dpctl_p,
+"Warning: dpctl/ct-set-maxconns is deprecated by "
+"other_config:userspace-ct-maxconns");
 uint32_t maxconns;
 if (ovs_scan(argv[argc - 1], "%"SCNu32, )) {
 error = ct_dpif_set_maxconns(dpif, maxconns);
diff --git a/lib/dpctl.man b/lib/dpctl.man
index c100d0a..4f08a3f 100644
--- a/lib/dpctl.man
+++ b/lib/dpctl.man
@@ -343,7 +343,9 @@ system due to connection tracking or simply limiting 
connection
 tracking.  If the number of connections is already over the new maximum
 limit request then the new maximum limit will be enforced when the
 number of connections decreases to that limit, which normally happens
-due to connection expiry.  Only supported for userspace datapath.
+due to connection expiry.  Only supported for userspace datapath. This
+command is deprecated by ovsdb cfg other_config:userspace-ct-maxconns
+because of persistence capability.
 .
 .TP
 \*(DX\fBct\-get\-maxconns\fR [\fIdp\fR]
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 21277b2..bfbe6db 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4828,6 +4828,17 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
 }
 }
 
+uint32_t ct_maxconns, cur_maxconns;
+ct_maxconns = smap_get_int(other_config, "userspace-ct-maxconns",
+   UINT32_MAX);
+/* Leave runtime value as it is when cfg is removed. */
+if (ct_maxconns < UINT32_MAX) {
+conntrack_get_maxconns(dp->conntrack, _maxconns);
+if (ct_maxconns != cur_maxconns) {
+conntrack_set_maxconns(dp->conntrack, ct_maxconns);
+}
+}
+
 bool smc_enable = smap_get_bool(other_config, "smc-enable", false);
 bool cur_smc;
 atomic_read_relaxed(>smc_enable_db, _smc);
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 1d20366..cb1eb16 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -2305,6 +2305,16 @@ AT_CHECK([ovs-appctl dpctl/ct-get-maxconns], [], [dnl
 10
 ])
 
+AT_CHECK([ovs-vsctl set Open_vswitch . other_config:userspace-ct-maxconns=20], 
[0])
+AT_CHECK([ovs-appctl dpctl/ct-get-maxconns], [], [dnl
+20
+])
+
+AT_CHECK([ovs-vsctl remove Open_vswitch . other_config userspace-ct-maxconns], 
[0])
+AT_CHECK([ovs-appctl dpctl/ct-get-maxconns], [], [dnl
+20
+])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index cc1dd77..f2324be 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -183,6 +183,13 @@
 
   
 
+  
+The maximum number of connection tracker entries allowed in the
+userspace datapath.  This deprecates "ovs-appctl dpctl/ct-set-maxconns"
+command.
+  
+
   
 
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] handlers: Fix handlers mapping

2022-05-19 Thread Ilya Maximets
On 5/19/22 23:02, Aaron Conole wrote:
> Ilya Maximets  writes:
> 
>> On 5/5/22 05:52, Michael Santana wrote:
>>> On Mon, Apr 25, 2022 at 2:32 PM Ilya Maximets  wrote:

 On 4/19/22 05:19, Michael Santana wrote:
> On Tue, Apr 5, 2022 at 6:32 PM Ilya Maximets  wrote:
>>
>> On 4/4/22 14:36, Michael Santana wrote:
>>> The handler and CPU mapping in upcalls are incorrect, and this is
>>> specially noticeable systems with cpu isolation enabled.
>>>
>>> Say we have a 12 core system where only every even number CPU is enabled
>>> C0, C2, C4, C6, C8, C10
>>>
>>> This means we will create an array of size 6 that will be sent to
>>> kernel that is populated with sockets [S0, S1, S2, S3, S4, S5]
>>>
>>> The problem is when the kernel does an upcall it checks the socket array
>>> via the index of the CPU, effectively adding additional load on some
>>> CPUs while leaving no work on other CPUs.
>>>
>>> e.g.
>>>
>>> C0  indexes to S0
>>> C2  indexes to S2 (should be S1)
>>> C4  indexes to S4 (should be S2)
>>>
>>> Modulo of 6 (size of socket array) is applied, so we wrap back to S0
>>> C6  indexes to S0 (should be S3)
>>> C8  indexes to S2 (should be S4)
>>> C10 indexes to S4 (should be S5)
>>>
>>> Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5
>>> get no work assigned to them
>>>
>>> This leads to the kernel to throw the following message:
>>> "openvswitch: cpu_id mismatch with handler threads"
>>>
>>> To fix this we send the kernel a corrected array of sockets the size
>>> of all CPUs in the system. In the above example we would create a
>>> corrected array as follows:
>>> [S0, S1, S1, S2, S2, S3, S3, S4, S4, S5, S5, S0]
>>>
>>> This guarantees that regardless of which CPU a packet comes in the 
>>> kernel
>>> will correctly map it to the correct socket
>>
>> Hey, Michael.  Thanks for working on this issue!
>>
>> I agree that the array is too short and ovs-vswitchd has to supply
>> a full array with values for each physical core.
>> This can be even a separate patch with just filling in the array
>> for each physical core in a round-robin manner.
>>
>> However, I'm not sure it is possible to predict what the good
>> distribution of sockets among cores should look like.
>>
>> Taking your current implementation as an example.  I'm assuming
>> that suggested distribution (socket duplication for neighboring
>> CPU cores) can be good for a system with 2 NUMA nodes and cores
>> enumerated as odd - NUMA0, even - NUMA1.  But the enumeration
>> scheme of CPU cores is not set in stone, most multi-NUMA systems
>> has a BIOS configuration knob to change the enumeration method.
>> One of the most popular ones is: lower cores - NUMA0, higher cores
>> - NUMA1.  In this case, since interrupts from a NIC are likely
>> arriving on the cores from the same NUMA, sockets S1-S3 will
>> carry all the traffic, while sockets S4-S5 will remain unused.
>>
>> Another point is that we don't really know how many irq vectors
>> are registered for NICs and what is the irq affinity for them.
>> IOW, we don't know on which cores actual interrupts will be
>> handled and on which sockets they will end up.  IRQ affinity
>> and number of queues are also dynamic and can be changed over
>> time.
>>
>> So, AFAICT, regardless of the positioning of the sockets in the
>> array, there will be very good and very bad cases for it with
>> more or less same probability.
>>
>> It might be that the only option to guarantee more or less even
>> load distribution is to always create N handlers, where the N
>> is the number of actual physical cores.  And let the scheduler
>> and in-kernel IRQ affinity to deal with load balancing.
>> Yes, we're adding an extra overhead in case of limited CPU
>> affinity for the ovs-vswitchd, but OTOH if we have high volume
>> of traffic received on big number of CPU cores, while OVS is
>> limited to a single core, that sounds like a misconfiguration.
>> Some performance testing is required, for sure.
>>
>> Thoughts?
>
>
> Thanks for the feedback. There are a couple of points I would like to
> make regarding your suggestions and a couple clarifications I would like 
> to make
> (aaron++ thanks for helping me draft this)
>
> In this patch, when we pass the array of port ids to the kernel we
> fill the entire array with valid port ids, as such we create an array
> that is the size of the number of CPUs on the system.
>
> The question now becomes how do we handle isolated cores. There
> shouldn't be any packet processing occurring on any isolated core
> since the cores are supposedly isolated. The way the current code
> 

Re: [ovs-dev] [PATCH ovn] nb: Add Load_Balancer.options:neighbor_responder knob.

2022-05-19 Thread Tom Parrott

Thanks!

On 2022-05-18 18:53, Mark Michelson wrote:


I applied this to main.

On 5/11/22 14:48, Mark Michelson wrote: Thanks for this Dumitru.

Acked-by: Mark Michelson 

On 4/28/22 11:33, Dumitru Ceara wrote: This allows CMS to tweak the way 
logical routers reply to ARP/ND packets
targeting load balancer VIPs.  By default a router only replies for 
VIPs

that are reachable locally (they're part of a subnet configured on the
router).  There are cases though when it's desirable for routers to
reply for all VIPs.

Reported-at: https://github.com/ovn-org/ovn/issues/124
Reported-by: Tom Parrott 
Signed-off-by: Dumitru Ceara 
---
NEWS|  4 
northd/northd.c | 20 
ovn-nb.xml  |  9 +
tests/ovn-northd.at | 10 --
4 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index dbe89e9cf..ed735c32c 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,10 @@ Post v22.03.0
- Support NAT for logical routers with multiple distributed gateway 
ports.

- Add global option (NB_Global.options:default_acl_drop) to enable
implicit drop behavior on logical switches with ACLs applied.
+  - Add NB.Load_Balancer.options:neighbor_responder to allow the CMS 
to
+explicitly request routers to reply to any ARP/ND request for a 
VIP
+(when set to "all") and only for reachable VIPs (when set to 
"reachable"

+or by default).
OVN v22.03.0 - 11 Mar 2022
--
diff --git a/northd/northd.c b/northd/northd.c
index a5297..f01bd2cf7 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -3920,6 +3920,26 @@ static void
build_lrouter_lb_reachable_ips(struct ovn_datapath *od,
const struct ovn_northd_lb *lb)
{
+const char *neighbor_responder_mode =
+smap_get_def(>nlb->options, "neighbor_responder", 
"reachable");

+
+/* If configured to reply to neighbor requests for all VIPs force 
them

+ * all to be considered "reachable".
+ */
+if (!strcmp(neighbor_responder_mode, "all")) {
+for (size_t i = 0; i < lb->n_vips; i++) {
+if (IN6_IS_ADDR_V4MAPPED(>vips[i].vip)) {
+sset_add(>lb_ips_v4_reachable, 
lb->vips[i].vip_str);

+} else {
+sset_add(>lb_ips_v6_reachable, 
lb->vips[i].vip_str);

+}
+}
+return;
+}
+
+/* Otherwise, a VIP is reachable if there's at least one router
+ * subnet that includes it.
+ */
for (size_t i = 0; i < lb->n_vips; i++) {
if (IN6_IS_ADDR_V4MAPPED(>vips[i].vip)) {
ovs_be32 vip_ip4 = in6_addr_get_mapped_ipv4(>vips[i].vip);
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 9010240a8..756c2a378 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -1856,6 +1856,15 @@
more information about what flows are added for IP routes, please
see the ovn-northd manpage section on IP Routing.

+
+  
+If set to all, then routers on which the load 
balancer
+is applied reply to ARP/neighbor discovery requests for all 
VIPs
+of the load balancer.  If set to reachable, then 
routers
+on which the load balancer is applied reply to ARP/neighbor 
discovery
+requests only for VIPs that are part of a router's subnet.  
The default
+value of this option, if not specified, is 
reachable.

+  


diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 69ad85533..8e256de2e 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1708,6 +1708,10 @@ ovn-nbctl lb-add lb5 
"[[fe80::200:ff:fe00:101]]:8080" "[[fe02::200:ff:fe00:101]]
ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:102]]:8080" 
"[[fe02::200:ff:fe00:102]]:8080"

ovn-nbctl lb-add lb6 "43.43.43.43:8080" "10.0.0.8:8080" udp
ovn-nbctl lb-add lb7 "[[4343::4343]]:8080" "[[10::10]]:8080" udp
+ovn-nbctl lb-add lb8 "44.44.44.44:8080" "10.0.0.8:8080" udp
+ovn-nbctl set Load_Balancer lb8 options:neighbor_responder=all
+ovn-nbctl lb-add lb9 "[[::]]:8080" "[[10::10]]:8080" udp
+ovn-nbctl set Load_Balancer lb9 options:neighbor_responder=all
ovn-nbctl lr-lb-add lr lb1
ovn-nbctl lr-lb-add lr lb2
@@ -1716,6 +1720,8 @@ ovn-nbctl lr-lb-add lr lb4
ovn-nbctl lr-lb-add lr lb5
ovn-nbctl lr-lb-add lr lb6
ovn-nbctl lr-lb-add lr lb7
+ovn-nbctl lr-lb-add lr lb8
+ovn-nbctl lr-lb-add lr lb9
ovn-nbctl --wait=sb sync
lr_key=$(fetch_column sb:datapath_binding tunnel_key 
external_ids:name=lr)

@@ -1723,8 +1729,8 @@ lb_as_v4="_rtr_lb_${lr_key}_ip4"
lb_as_v6="_rtr_lb_${lr_key}_ip6"
# Check generated VIP address sets (only reachable IPs).
-check_column '43.43.43.43' Address_Set addresses name=${lb_as_v4}
-check_column '4343::4343 fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' 
Address_Set addresses name=${lb_as_v6}
+check_column '43.43.43.43 44.44.44.44' Address_Set addresses 
name=${lb_as_v4}
+check_column '4343::4343 :: fe80::200:ff:fe00:101 
fe80::200:ff:fe00:102' Address_Set addresses name=${lb_as_v6}

# Ingress router port ETH address is stored in lr_in_admission.
AT_CHECK([ovn-sbctl lflow-list | grep -E 

Re: [ovs-dev] [PATCH ovn] Allow for setting the Next server IP in the DHCP header

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 4:11 AM Lucas Alvares Gomes  wrote:
>
> Hi,
>
> Thanks Numan for the review. See the replies below.
>
> On Thu, May 19, 2022 at 12:36 AM Numan Siddique  wrote:
> >
> > On Wed, May 11, 2022 at 11:34 AM  wrote:
> > >
> > > From: Lucas Alvares Gomes 
> > >
> > > In order to PXE boot a baremetal server using the OVN DHCP server we
> > > need to allow users to set the "next-server" (siaddr) [0] field in the
> > > DHCP header.
> > >
> > > While investigating this issue by comparing the DHCPOFFER and DHCPACK
> > > packets sent my dnsmasq and OVN we saw that the "next-server" field
> > > was the problem for OVN, without it PXE booting was timing out while
> > > fetching the iPXE image from the TFTP server (see the bugzilla ticket
> > > below for reference).
> > >
> > > To confirm this problem we created a bogus patch hardcoding the TFTP
> > > address in the siaddr of the DHCP header (see the discussion in the
> > > maillist below) and with this in place we were able to deploy a
> > > baremetal node using the OVN DHCP end-to-end.
> > >
> > > This patch is a proper implementation that creates a new DHCP
> > > configuration option called "next_server" to allow users to set this
> > > field dynamically. This patch uses the DHCP code 253 which is a unsed
> > > code for DHCP specification as this is not a normal DHCP option but a
> > > special use case in OVN.
> > >
> > > [0]
> > > https://github.com/openvswitch/ovs/blob/9dd3031d2e0e9597449e95428320ccaaff7d8b3d/lib/dhcp.h#L42
> > >
> > > Reported-by: https://bugzilla.redhat.com/show_bug.cgi?id=2083629
> > > Reported-by:
> > > https://mail.openvswitch.org/pipermail/ovs-discuss/2022-May/051821.html
> > > Signed-off-by: Lucas Alvares Gomes 
> > > ---
> > >  controller/pinctrl.c | 69 ++--
> > >  lib/actions.c| 13 -
> > >  lib/ovn-l7.h |  8 +
> > >  northd/ovn-northd.c  |  1 +
> > >  ovn-nb.xml   |  7 +
> > >  tests/ovn.at |  6 ++--
> > >  tests/test-ovn.c |  1 +
> > >  7 files changed, 80 insertions(+), 25 deletions(-)
> > >
> > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > > index ae3da332c..428863293 100644
> > > --- a/controller/pinctrl.c
> > > +++ b/controller/pinctrl.c
> > > @@ -2203,30 +2203,56 @@ pinctrl_handle_put_dhcp_opts(
> > >   *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
> > >   * --
> > >   */
> > > -struct dhcp_opt_header *in_dhcp_opt =
> > > -(struct dhcp_opt_header *)reply_dhcp_opts_ptr->data;
> > > -if (in_dhcp_opt->code == DHCP_OPT_BOOTFILE_CODE) {
> > > -unsigned char *ptr = (unsigned char *)in_dhcp_opt;
> > > -int len = sizeof *in_dhcp_opt + in_dhcp_opt->len;
> > > -struct dhcp_opt_header *next_dhcp_opt =
> > > -(struct dhcp_opt_header *)(ptr + len);
> > > -
> > > -if (next_dhcp_opt->code == DHCP_OPT_BOOTFILE_ALT_CODE) {
> > > -if (!ipxe_req) {
> > > -ofpbuf_pull(reply_dhcp_opts_ptr, len);
> > > -next_dhcp_opt->code = DHCP_OPT_BOOTFILE_CODE;
> > > -} else {
> > > -char *buf = xmalloc(len);
> > > +ovs_be32 next_server = in_dhcp_data->siaddr;
> > > +bool bootfile_name_set = false;
> > > +in_dhcp_ptr = reply_dhcp_opts_ptr->data;
> > > +end = (const char *)reply_dhcp_opts_ptr->data + 
> > > reply_dhcp_opts_ptr->size;
> > > +
> >
> > Hi Lucas,
> >
> > Thanks for adding this support.
> >
> > It seems to me this while loop can be avoided since lib/actions.c
> > always encodes offer_ip, DHCP_OPT_BOOTFILE_CODE,
> > DHCP_OPT_BOOTFILE_ALT_CODE
> > and now DHCP_OPT_NEXT_SERVER_CODE  in the userdata buffer before
> > encoding other dhcp options.
> >
> > Since it is deterministic,  can't we do something like below instead
> > of the while loop ?
> >
> > struct dhcp_opt_header *in_dhcp_opt =
> > (struct dhcp_opt_header *)reply_dhcp_opts_ptr->data;
> > if (in_dhcp_opt->code == DHCP_OPT_BOOTFILE_CODE) {
> >
> >advance in_dhcp_opt to the next option
> > } else if (in_dhcp_opt->code == DHCP_OPT_BOOTFILE_ALT_CODE) {
> >   ..
> >  advance in_dhcp_opt to the next option
> > }
> >
> > if (in_dhcp_opt->code == DHCP_OPT_NEXT_SERVER_CODE) {
> > next_server = get_unaligned_be32(DHCP_OPT_PAYLOAD(in_dhcp_opt));
> > }
> >
> > Let me know if this gets complicated because of my above suggestion.
> > In that case,I'm fine to run the below while loop.
> >
>
> That's how I coded it the first time when I was testing the patch, but
> I found a problem where if more than one option was set, for example:
> bootfile_name and next_server only the first encoded option would be
> processed. In order to process all options I needed to offset to the
> next one like:
>
> unsigned char *ptr = (unsigned char *)in_dhcp_opt;
> int len = sizeof *in_dhcp_opt + in_dhcp_opt->len;
> struct dhcp_opt_header *next_dhcp_opt = 

Re: [ovs-dev] [PATCH v2] handlers: Fix handlers mapping

2022-05-19 Thread Aaron Conole
Ilya Maximets  writes:

> On 5/5/22 05:52, Michael Santana wrote:
>> On Mon, Apr 25, 2022 at 2:32 PM Ilya Maximets  wrote:
>>>
>>> On 4/19/22 05:19, Michael Santana wrote:
 On Tue, Apr 5, 2022 at 6:32 PM Ilya Maximets  wrote:
>
> On 4/4/22 14:36, Michael Santana wrote:
>> The handler and CPU mapping in upcalls are incorrect, and this is
>> specially noticeable systems with cpu isolation enabled.
>>
>> Say we have a 12 core system where only every even number CPU is enabled
>> C0, C2, C4, C6, C8, C10
>>
>> This means we will create an array of size 6 that will be sent to
>> kernel that is populated with sockets [S0, S1, S2, S3, S4, S5]
>>
>> The problem is when the kernel does an upcall it checks the socket array
>> via the index of the CPU, effectively adding additional load on some
>> CPUs while leaving no work on other CPUs.
>>
>> e.g.
>>
>> C0  indexes to S0
>> C2  indexes to S2 (should be S1)
>> C4  indexes to S4 (should be S2)
>>
>> Modulo of 6 (size of socket array) is applied, so we wrap back to S0
>> C6  indexes to S0 (should be S3)
>> C8  indexes to S2 (should be S4)
>> C10 indexes to S4 (should be S5)
>>
>> Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5
>> get no work assigned to them
>>
>> This leads to the kernel to throw the following message:
>> "openvswitch: cpu_id mismatch with handler threads"
>>
>> To fix this we send the kernel a corrected array of sockets the size
>> of all CPUs in the system. In the above example we would create a
>> corrected array as follows:
>> [S0, S1, S1, S2, S2, S3, S3, S4, S4, S5, S5, S0]
>>
>> This guarantees that regardless of which CPU a packet comes in the kernel
>> will correctly map it to the correct socket
>
> Hey, Michael.  Thanks for working on this issue!
>
> I agree that the array is too short and ovs-vswitchd has to supply
> a full array with values for each physical core.
> This can be even a separate patch with just filling in the array
> for each physical core in a round-robin manner.
>
> However, I'm not sure it is possible to predict what the good
> distribution of sockets among cores should look like.
>
> Taking your current implementation as an example.  I'm assuming
> that suggested distribution (socket duplication for neighboring
> CPU cores) can be good for a system with 2 NUMA nodes and cores
> enumerated as odd - NUMA0, even - NUMA1.  But the enumeration
> scheme of CPU cores is not set in stone, most multi-NUMA systems
> has a BIOS configuration knob to change the enumeration method.
> One of the most popular ones is: lower cores - NUMA0, higher cores
> - NUMA1.  In this case, since interrupts from a NIC are likely
> arriving on the cores from the same NUMA, sockets S1-S3 will
> carry all the traffic, while sockets S4-S5 will remain unused.
>
> Another point is that we don't really know how many irq vectors
> are registered for NICs and what is the irq affinity for them.
> IOW, we don't know on which cores actual interrupts will be
> handled and on which sockets they will end up.  IRQ affinity
> and number of queues are also dynamic and can be changed over
> time.
>
> So, AFAICT, regardless of the positioning of the sockets in the
> array, there will be very good and very bad cases for it with
> more or less same probability.
>
> It might be that the only option to guarantee more or less even
> load distribution is to always create N handlers, where the N
> is the number of actual physical cores.  And let the scheduler
> and in-kernel IRQ affinity to deal with load balancing.
> Yes, we're adding an extra overhead in case of limited CPU
> affinity for the ovs-vswitchd, but OTOH if we have high volume
> of traffic received on big number of CPU cores, while OVS is
> limited to a single core, that sounds like a misconfiguration.
> Some performance testing is required, for sure.
>
> Thoughts?


 Thanks for the feedback. There are a couple of points I would like to
 make regarding your suggestions and a couple clarifications I would like 
 to make
 (aaron++ thanks for helping me draft this)

 In this patch, when we pass the array of port ids to the kernel we
 fill the entire array with valid port ids, as such we create an array
 that is the size of the number of CPUs on the system.

 The question now becomes how do we handle isolated cores. There
 shouldn't be any packet processing occurring on any isolated core
 since the cores are supposedly isolated. The way the current code
 handles isolated cores is stated in the original commit message.
 Moving forward we have two choices to deal with the isolated cores. 1.
 fill the 

Re: [ovs-dev] [PATCH] classifier: adjust segment boundary to execute prerequisite processing

2022-05-19 Thread Aaron Conole
"Ferriter, Cian"  writes:

> Hi Aaron,

Hi Cian,

> 
>
>> ---
>>  include/openvswitch/flow.h |  7 +++
>>  tests/classifier.at| 27 +++
>>  2 files changed, 30 insertions(+), 4 deletions(-)
>> 
>> diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
>> index 3054015d93..df10cf579e 100644
>> --- a/include/openvswitch/flow.h
>> +++ b/include/openvswitch/flow.h
>> @@ -141,15 +141,14 @@ struct flow {
>>  uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */
>>  uint8_t nw_ttl; /* IP TTL/Hop Limit. */
>>  uint8_t nw_proto;   /* IP protocol or low 8 bits of ARP opcode. 
>> */
>> +/* L4 (64-bit aligned) */
>>  struct in6_addr nd_target;  /* IPv6 neighbor discovery (ND) target. */
>>  struct eth_addr arp_sha;/* ARP/ND source hardware address. */
>>  struct eth_addr arp_tha;/* ARP/ND target hardware address. */
>> -ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type.
>> - * With L3 to avoid matching L4. */
>> +ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. */
>>  ovs_be16 pad2;  /* Pad to 64 bits. */
>>  struct ovs_key_nsh nsh; /* Network Service Header keys */
>> 
>> -/* L4 (64-bit aligned) */
>>  ovs_be16 tp_src;/* TCP/UDP/SCTP source port/ICMP type. */
>>  ovs_be16 tp_dst;/* TCP/UDP/SCTP destination port/ICMP code. 
>> */
>>  ovs_be16 ct_tp_src; /* CT original tuple source port/ICMP type. 
>> */
>> @@ -179,7 +178,7 @@ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) 
>> + sizeof(uint32_t)
>
> About the following BUILD_ASSRT_DECL which is in include/openvswitch/flow.h 
> directly below struct flow.h:
> /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
> BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
>   == sizeof(struct flow_tnl) + sizeof(struct ovs_key_nsh) + 
> 300
>   && FLOW_WC_SEQ == 42);
>
> Should we increment the FLOW_WC_SEQ value? The comment reads:
> /* This sequence number should be incremented whenever anything involving 
> flows
>  * or the wildcarding of flows changes.  This will cause build assertion
>  * failures in places which likely need to be updated. */
>
> We haven't changed anything in the order or content of struct flow but we 
> have changed (fixed) how flows are wildcarded. It doesn't look like any of 
> the code asserting FLOW_WC_SEQ == 42 and relying on struct flow order and 
> content needs updating.
> Just wondering your/others thoughts about whether to increment to 43 or not.

I decided not to increment it because it's really a subtable
configuration for the classifier rather than a flow structure related
change.  In ofproto/ofpcoto.c we initialize the classifier with the map
stored in flow.c that holds the various segments.  I consider it more of
a classifier configuration, in that sense.  As you note, it doesn't
modify the layout or size of flow struct.  Maybe we can have a more
precise comment around that area.

>>  enum {
>>  FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst),
>>  FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src),
>> -FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src),
>> +FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, nd_target),
>>  };
>>  BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0);
>>  BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0);
>
> IIUC, we are moving the L4 boundary 56B earlier in struct flow. This
> means L3/L4 segment boundary is still at a U64 boundary. I know we
> have BUILD_ASSERTs checking this, but I just thought to double check
> myself.

Yes - it's 448 bits or 7 u64 words.  This is a bit lucky for us -
otherwise we might have had to get more creative :)

>> diff --git a/tests/classifier.at b/tests/classifier.at
>> index cdcd72c156..a0a8fe0359 100644
>> --- a/tests/classifier.at
>> +++ b/tests/classifier.at
>> @@ -129,6 +129,33 @@ Datapath actions: 3
>>  OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
>>  AT_CLEANUP
>> 
>> +AT_SETUP([flow classifier - ipv6 ND dependancy])
>> +OVS_VSWITCHD_START
>> +add_of_ports br0 1 2
>> +AT_DATA([flows.txt], [dnl
>> + table=0,priority=100,ipv6,ipv6_src=1000::/10 actions=resubmit(,1)
>> + table=0,priority=0 actions=NORMAL
>> + table=1,priority=110,ipv6,ipv6_dst=1000::3 actions=resubmit(,2)
>> + table=1,priority=100,ipv6,ipv6_dst=1000::4 actions=resubmit(,2)
>> + table=1,priority=0 actions=NORMAL
>> + 
>> table=2,priority=120,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1
>>  actions=NORMAL
>> + table=2,priority=100,tcp actions=NORMAL
>> + table=2,priority=100,icmp6 actions=NORMAL
>> + table=2,priority=0 actions=NORMAL
>> +])
>> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
>> +
>> +# test ICMPv6 echo request (which should have no nd_target field)
>> 

Re: [ovs-dev] [PATCH ovn] ovs-sandbox: Allow specifying initial contents for OVS and VTEP databases.

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 4:19 PM Numan Siddique  wrote:
>
> On Thu, May 19, 2022 at 4:17 PM Mark Michelson  wrote:
> >
> > Acked-by: Mark Michelson 
> >
> > Since this is a new feature introduced after soft freeze for 22.06, this
> > shouldn't be a candidate for 22.06. However, personally I feel this is
> > so low-risk (and so useful) that it should be included. Does anyone
> > disagree?
>
> I'm fine.  It would be helpful for debugging.
> I can take a look and merge it.

Applied to main,

Thanks
Numan

>
> Numan
>
> >
> > On 5/17/22 06:47, Dumitru Ceara wrote:
> > > This makes it easier to troubleshoot OVN in a sandbox when initial
> > > contents for the NB/SB/OVS DBs are provided (e.g., from a production
> > > environment).
> > >
> > > A way to load the DBs locally and run OVN against them is:
> > > $ SANDBOXFLAGS="--nbdb-source=path-to-nb.db --sbdb-source=path-to-sb.db 
> > > --ovs-source=path-to-conf.db" make sandbox
> > >
> > > Signed-off-by: Dumitru Ceara 
> > > ---
> > >   tutorial/ovs-sandbox | 28 ++--
> > >   1 file changed, 26 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/tutorial/ovs-sandbox b/tutorial/ovs-sandbox
> > > index d81e00496..2219b0e99 100755
> > > --- a/tutorial/ovs-sandbox
> > > +++ b/tutorial/ovs-sandbox
> > > @@ -79,6 +79,8 @@ ovn_rbac=true
> > >   n_northds=1
> > >   n_ics=1
> > >   n_controllers=1
> > > +ovs_source=
> > > +vtep_source=
> > >   nbdb_model=standalone
> > >   nbdb_servers=3
> > >   nbdb_source=
> > > @@ -307,6 +309,18 @@ EOF
> > >   --sbdb-so*)
> > >   prev=ovnsb_source
> > >   ;;
> > > +--ovs-so*=*)
> > > +ovs_source=$optarg
> > > +;;
> > > +--ovs-so*)
> > > +prev=ovs_source
> > > +;;
> > > +--vtep-so*=*)
> > > +vtep_source=$optarg
> > > +;;
> > > +--vtep-so*)
> > > +prev=vtep_source
> > > +;;
> > >   --ic-nb-s*=*)
> > >   ic_nb_servers=$optarg
> > >   ic_nb_model=clustered
> > > @@ -474,10 +488,20 @@ trap 'kill `cat "$sandbox"/*.pid`' 0 1 2 3 13 14 15
> > >
> > >   # Create database and start ovsdb-server.
> > >   touch "$sandbox"/.conf.db.~lock~
> > > -run ovsdb-tool create conf.db "$schema"
> > > +if test ! -e "$ovs_source"; then
> > > +run ovsdb-tool create conf.db "$schema"
> > > +else
> > > +run cp "$ovs_source" conf.db
> > > +run ovsdb-tool convert conf.db "$schema"
> > > +fi
> > >   ovsdb_server_args=
> > >
> > > -run ovsdb-tool create vtep.db "$vtep_schema"
> > > +if test ! -e "$vtep_source"; then
> > > +run ovsdb-tool create vtep.db "$vtep_schema"
> > > +else
> > > +run cp "$vtep_source" vtep.db
> > > +run ovsdb-tool convert vtep.db "$vtep_schema"
> > > +fi
> > >   ovsdb_server_args="vtep.db conf.db"
> > >
> > >   if [ "$HAVE_OPENSSL" = yes ]; then
> > >
> >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] handlers: Fix handlers mapping

2022-05-19 Thread Ilya Maximets
On 5/5/22 05:52, Michael Santana wrote:
> On Mon, Apr 25, 2022 at 2:32 PM Ilya Maximets  wrote:
>>
>> On 4/19/22 05:19, Michael Santana wrote:
>>> On Tue, Apr 5, 2022 at 6:32 PM Ilya Maximets  wrote:

 On 4/4/22 14:36, Michael Santana wrote:
> The handler and CPU mapping in upcalls are incorrect, and this is
> specially noticeable systems with cpu isolation enabled.
>
> Say we have a 12 core system where only every even number CPU is enabled
> C0, C2, C4, C6, C8, C10
>
> This means we will create an array of size 6 that will be sent to
> kernel that is populated with sockets [S0, S1, S2, S3, S4, S5]
>
> The problem is when the kernel does an upcall it checks the socket array
> via the index of the CPU, effectively adding additional load on some
> CPUs while leaving no work on other CPUs.
>
> e.g.
>
> C0  indexes to S0
> C2  indexes to S2 (should be S1)
> C4  indexes to S4 (should be S2)
>
> Modulo of 6 (size of socket array) is applied, so we wrap back to S0
> C6  indexes to S0 (should be S3)
> C8  indexes to S2 (should be S4)
> C10 indexes to S4 (should be S5)
>
> Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5
> get no work assigned to them
>
> This leads to the kernel to throw the following message:
> "openvswitch: cpu_id mismatch with handler threads"
>
> To fix this we send the kernel a corrected array of sockets the size
> of all CPUs in the system. In the above example we would create a
> corrected array as follows:
> [S0, S1, S1, S2, S2, S3, S3, S4, S4, S5, S5, S0]
>
> This guarantees that regardless of which CPU a packet comes in the kernel
> will correctly map it to the correct socket

 Hey, Michael.  Thanks for working on this issue!

 I agree that the array is too short and ovs-vswitchd has to supply
 a full array with values for each physical core.
 This can be even a separate patch with just filling in the array
 for each physical core in a round-robin manner.

 However, I'm not sure it is possible to predict what the good
 distribution of sockets among cores should look like.

 Taking your current implementation as an example.  I'm assuming
 that suggested distribution (socket duplication for neighboring
 CPU cores) can be good for a system with 2 NUMA nodes and cores
 enumerated as odd - NUMA0, even - NUMA1.  But the enumeration
 scheme of CPU cores is not set in stone, most multi-NUMA systems
 has a BIOS configuration knob to change the enumeration method.
 One of the most popular ones is: lower cores - NUMA0, higher cores
 - NUMA1.  In this case, since interrupts from a NIC are likely
 arriving on the cores from the same NUMA, sockets S1-S3 will
 carry all the traffic, while sockets S4-S5 will remain unused.

 Another point is that we don't really know how many irq vectors
 are registered for NICs and what is the irq affinity for them.
 IOW, we don't know on which cores actual interrupts will be
 handled and on which sockets they will end up.  IRQ affinity
 and number of queues are also dynamic and can be changed over
 time.

 So, AFAICT, regardless of the positioning of the sockets in the
 array, there will be very good and very bad cases for it with
 more or less same probability.

 It might be that the only option to guarantee more or less even
 load distribution is to always create N handlers, where the N
 is the number of actual physical cores.  And let the scheduler
 and in-kernel IRQ affinity to deal with load balancing.
 Yes, we're adding an extra overhead in case of limited CPU
 affinity for the ovs-vswitchd, but OTOH if we have high volume
 of traffic received on big number of CPU cores, while OVS is
 limited to a single core, that sounds like a misconfiguration.
 Some performance testing is required, for sure.

 Thoughts?
>>>
>>>
>>> Thanks for the feedback. There are a couple of points I would like to
>>> make regarding your suggestions and a couple clarifications I would like to 
>>> make
>>> (aaron++ thanks for helping me draft this)
>>>
>>> In this patch, when we pass the array of port ids to the kernel we
>>> fill the entire array with valid port ids, as such we create an array
>>> that is the size of the number of CPUs on the system.
>>>
>>> The question now becomes how do we handle isolated cores. There
>>> shouldn't be any packet processing occurring on any isolated core
>>> since the cores are supposedly isolated. The way the current code
>>> handles isolated cores is stated in the original commit message.
>>> Moving forward we have two choices to deal with the isolated cores. 1.
>>> fill the array with handler portids to allow upcalls to pass, 2. fill
>>> the array with '0' on the isolated cores to drop them.  We chose

Re: [ovs-dev] [PATCH ovn v2 2/2] Prepare for post-22.06.0.

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 4:15 PM Mark Michelson  wrote:
>
> Signed-off-by: Mark Michelson 

Acked-by: Numan Siddique 

Numan

> ---
>  NEWS | 3 +++
>  configure.ac | 2 +-
>  debian/changelog | 6 ++
>  3 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/NEWS b/NEWS
> index 74d4029ff..2a394f8d6 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,3 +1,6 @@
> +Post v22.06.0
> +-
> +
>  OVN v22.06.0 - XX XXX XXX
>  -
>- Support IGMP and MLD snooping on transit logical switches that connect
> diff --git a/configure.ac b/configure.ac
> index b649441bc..8665e71f4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -13,7 +13,7 @@
>  # limitations under the License.
>
>  AC_PREREQ(2.63)
> -AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
> +AC_INIT(ovn, 22.06.90, b...@openvswitch.org)
>  AC_CONFIG_MACRO_DIR([m4])
>  AC_CONFIG_AUX_DIR([build-aux])
>  AC_CONFIG_HEADERS([config.h])
> diff --git a/debian/changelog b/debian/changelog
> index 27bb40232..21126029c 100644
> --- a/debian/changelog
> +++ b/debian/changelog
> @@ -1,3 +1,9 @@
> +ovn (22.06.90-1) unstable; urgency=low
> +
> +   * New upstream version
> +
> + -- OVN team   Thu, 19 May 2022 15:11:38 -0400
> +
>  ovn (22.06.0-1) unstable; urgency=low
>
> * New upstream version
> --
> 2.31.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 1/2] Prepare for 22.06.0.

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 4:15 PM Mark Michelson  wrote:
>
> Signed-off-by: Mark Michelson 
> ---
>  NEWS | 4 ++--
>  configure.ac | 2 +-
>  debian/changelog | 4 ++--
>  3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 1cdba4bfc..74d4029ff 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,5 @@
> -Post v22.03.0
> --
> +OVN v22.06.0 - XX XXX XXX

Looks like you missed out an 'X' as Ilya mentioned in v1.

I suppose you can correct it before applying.

Acked-by: Numan Siddique 


> +-
>- Support IGMP and MLD snooping on transit logical switches that connect
>  different OVN Interconnection availability zones.
>- Replaced the usage of masked ct_label by ct_mark in most cases to work
> diff --git a/configure.ac b/configure.ac
> index eba85eede..b649441bc 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -13,7 +13,7 @@
>  # limitations under the License.
>
>  AC_PREREQ(2.63)
> -AC_INIT(ovn, 22.03.90, b...@openvswitch.org)
> +AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
>  AC_CONFIG_MACRO_DIR([m4])
>  AC_CONFIG_AUX_DIR([build-aux])
>  AC_CONFIG_HEADERS([config.h])
> diff --git a/debian/changelog b/debian/changelog
> index f2355c3be..27bb40232 100644
> --- a/debian/changelog
> +++ b/debian/changelog
> @@ -1,8 +1,8 @@
> -ovn (22.03.90-1) unstable; urgency=low
> +ovn (22.06.0-1) unstable; urgency=low
>
> * New upstream version
>
> - -- OVN team   Thu, 24 Feb 2022 16:55:45 -0500
> + -- OVN team   Thu, 19 May 2022 15:11:38 -0400
>
>  ovn (22.03.0-1) unstable; urgency=low
>
> --
> 2.31.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] tests: Fix failing test case - "Port security lflows"

2022-05-19 Thread Mark Michelson

Thanks for the quick fix on this Numan. I went ahead and pushed it to main.

On 5/19/22 16:17, num...@ovn.org wrote:

From: Numan Siddique 

Also added a few comments in the port security related functions
in controller/lflow.c

Fixes: 94974a02bfd5("northd: Add generic port security logical flows.")
Signed-off-by: Numan Siddique 
---
  controller/lflow.c  | 16 +++-
  tests/ovn-northd.at |  2 +-
  2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index e968231492..7a3419305a 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -2731,7 +2731,13 @@ lflow_handle_flows_for_lport(const struct 
sbrec_port_binding *pb,
  return false;
  }
  
-/* Program the port security flows. */

+/* Program the port security flows.
+ * Note: All the port security OF rules are added using the 'uuid'
+ * of the port binding.  Right now port binding 'uuid' is used in
+ * the logical flow table (l_ctx_out->flow_table) only for port
+ * security flows.  Later if new flows are added using the
+ * port binding'uuid', then this function should handle it properly.
+ */
  ofctrl_remove_flows(l_ctx_out->flow_table, >header_.uuid);
  
  if (pb->n_port_security && shash_find(l_ctx_in->binding_lports,

@@ -3399,6 +3405,10 @@ build_out_port_sec_ip4_flows(const struct 
sbrec_port_binding *pb,
  struct ovn_desired_flow_table *flow_table)
  {
  if (!ps_addr->n_ipv4_addrs && !ps_addr->n_ipv6_addrs) {
+ /* No IPv4 and no IPv6 addresses in the port security.
+  * Both IPv4 and IPv6 traffic should be delivered to the
+  * lport. build_out_port_sec_no_ip_flows() takes care of
+  * adding the required flow(s) to allow. */
  return;
  }
  
@@ -3493,6 +3503,10 @@ build_out_port_sec_ip6_flows(const struct sbrec_port_binding *pb,

  struct ovn_desired_flow_table *flow_table)
  {
  if (!ps_addr->n_ipv4_addrs && !ps_addr->n_ipv6_addrs) {
+/* No IPv4 and no IPv6 addresses in the port security.
+ * Both IPv4 and IPv6 traffic should be delivered to the
+ * lport. build_out_port_sec_no_ip_flows() takes care of
+ * adding the required flow(s) to allow. */
  return;
  }
  
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at

index 0912e3bec4..5bd0935e72 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -7404,8 +7404,8 @@ AT_CHECK([cat sw0flows | grep -e port_sec | sort | sed 
's/table=./table=?/' ], [
table=? (ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), 
action=(drop;)
table=? (ls_in_check_port_sec), priority=100  , match=(vlan.present), 
action=(drop;)
table=? (ls_in_check_port_sec), priority=50   , match=(1), 
action=(reg0[[15]] = check_in_port_sec(); next;)
-  table=? (ls_in_check_port_sec), priority=50   , match=(inport == "sw0p1"), 
action=(reg0[[14]] = 1; next(pipeline=ingress, table=16);)
table=? (ls_in_check_port_sec), priority=70   , match=(inport == 
"localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
+  table=? (ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), 
action=(reg0[[14]] = 1; next(pipeline=ingress, table=16);)
table=? (ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), 
action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
table=? (ls_in_apply_port_sec), priority=0, match=(1), action=(next;)
table=? (ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), 
action=(drop;)



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 0/3] Adding generic port security flows.

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 1:47 PM Mark Michelson  wrote:
>
> Thank you for the rebase, Numan. I pushed the series to main.

Thanks for applying.  Unfortunately while rebasing I made a mistake
and a test case is failing.
Can you please take a look at this -
https://patchwork.ozlabs.org/project/ovn/patch/20220519201733.2184302-1-num...@ovn.org/

Numan

>
> On 5/19/22 11:17, num...@ovn.org wrote:
> > From: Numan Siddique 
> >
> > This patch series adds generic logical flows for port security in
> > the logical switch pipeline and pushes the actual port security
> > implementation logic to ovn-controller from ovn-northd.
> >
> > ovn-northd will now add logical flows like:
> >
> > table=0 (ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[14] 
> > = check_in_port_sec(); next;)
> > table=1 (ls_in_apply_port_sec), priority=50   , match=(reg0[14] == 1), 
> > action=(drop;)
> > table=1 (ls_in_apply_port_sec), priority=0, match=(1), action=(next;)
> >
> > OVN action check_in_port_sec() resubmits the packet to openflow table
> > 73.  ovn-controller will add port security flows in table 73,74 and 75
> > for all the logical ports it has claimed.  The port security information
> > is passed down the Port_Binding table in Southbound database.
> >
> > The main motivation for the patch is to address scale concerns.
> > This patch series reduces the number of logical flows and ovn-northd
> > CPU utilization time.
> >
> > Did some scale testing and below are the results:
> >
> > Used a Northbound database from a deployment of 120 node cluster.
> > Number of logical switch ports with port security configured: 13711
> >
> > With vanilla ovn-northd
> > ---
> > Number of logical flows : 208061
> > Avg time taken to run build_lflows() : 1301 msec
> > Size of Southbound database after compaction: 104M
> >
> > With ovn-northd using this feature
> > -
> > Number of logical flows : 83396
> > Avg time taken to run build_lflows() : 560  msec
> > Size of Southbound database after compaction: 45M
> >
> >
> > v1 -> v2
> > ---
> >* Rebased to resolve conflicts.
> >* Added Mark's Acks.
> >
> > Numan Siddique (3):
> >ovn-controller: Add OF rules for port security.
> >actions: Add new actions check_in_port_sec and check_out_port_sec.
> >northd: Add generic port security logical flows.
> >
> >   controller/binding.c |  78 +++-
> >   controller/binding.h |  23 +-
> >   controller/lflow.c   | 792 ++-
> >   controller/lflow.h   |   4 +
> >   controller/ovn-controller.c  |  21 +-
> >   include/ovn/actions.h|   6 +
> >   include/ovn/logical-fields.h |   1 +
> >   lib/actions.c|  75 +++-
> >   northd/northd.c  | 557 +---
> >   northd/ovn-northd.8.xml  | 263 ++--
> >   ovn-sb.ovsschema |   7 +-
> >   ovn-sb.xml   |  54 +++
> >   tests/ovn-northd.at  | 431 ---
> >   tests/ovn.at | 381 +++--
> >   tests/test-ovn.c |   2 +
> >   utilities/ovn-trace.c| 313 ++
> >   16 files changed, 2182 insertions(+), 826 deletions(-)
> >
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ovs-sandbox: Allow specifying initial contents for OVS and VTEP databases.

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 4:17 PM Mark Michelson  wrote:
>
> Acked-by: Mark Michelson 
>
> Since this is a new feature introduced after soft freeze for 22.06, this
> shouldn't be a candidate for 22.06. However, personally I feel this is
> so low-risk (and so useful) that it should be included. Does anyone
> disagree?

I'm fine.  It would be helpful for debugging.
I can take a look and merge it.

Numan

>
> On 5/17/22 06:47, Dumitru Ceara wrote:
> > This makes it easier to troubleshoot OVN in a sandbox when initial
> > contents for the NB/SB/OVS DBs are provided (e.g., from a production
> > environment).
> >
> > A way to load the DBs locally and run OVN against them is:
> > $ SANDBOXFLAGS="--nbdb-source=path-to-nb.db --sbdb-source=path-to-sb.db 
> > --ovs-source=path-to-conf.db" make sandbox
> >
> > Signed-off-by: Dumitru Ceara 
> > ---
> >   tutorial/ovs-sandbox | 28 ++--
> >   1 file changed, 26 insertions(+), 2 deletions(-)
> >
> > diff --git a/tutorial/ovs-sandbox b/tutorial/ovs-sandbox
> > index d81e00496..2219b0e99 100755
> > --- a/tutorial/ovs-sandbox
> > +++ b/tutorial/ovs-sandbox
> > @@ -79,6 +79,8 @@ ovn_rbac=true
> >   n_northds=1
> >   n_ics=1
> >   n_controllers=1
> > +ovs_source=
> > +vtep_source=
> >   nbdb_model=standalone
> >   nbdb_servers=3
> >   nbdb_source=
> > @@ -307,6 +309,18 @@ EOF
> >   --sbdb-so*)
> >   prev=ovnsb_source
> >   ;;
> > +--ovs-so*=*)
> > +ovs_source=$optarg
> > +;;
> > +--ovs-so*)
> > +prev=ovs_source
> > +;;
> > +--vtep-so*=*)
> > +vtep_source=$optarg
> > +;;
> > +--vtep-so*)
> > +prev=vtep_source
> > +;;
> >   --ic-nb-s*=*)
> >   ic_nb_servers=$optarg
> >   ic_nb_model=clustered
> > @@ -474,10 +488,20 @@ trap 'kill `cat "$sandbox"/*.pid`' 0 1 2 3 13 14 15
> >
> >   # Create database and start ovsdb-server.
> >   touch "$sandbox"/.conf.db.~lock~
> > -run ovsdb-tool create conf.db "$schema"
> > +if test ! -e "$ovs_source"; then
> > +run ovsdb-tool create conf.db "$schema"
> > +else
> > +run cp "$ovs_source" conf.db
> > +run ovsdb-tool convert conf.db "$schema"
> > +fi
> >   ovsdb_server_args=
> >
> > -run ovsdb-tool create vtep.db "$vtep_schema"
> > +if test ! -e "$vtep_source"; then
> > +run ovsdb-tool create vtep.db "$vtep_schema"
> > +else
> > +run cp "$vtep_source" vtep.db
> > +run ovsdb-tool convert vtep.db "$vtep_schema"
> > +fi
> >   ovsdb_server_args="vtep.db conf.db"
> >
> >   if [ "$HAVE_OPENSSL" = yes ]; then
> >
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn] tests: Fix failing test case - "Port security lflows"

2022-05-19 Thread numans
From: Numan Siddique 

Also added a few comments in the port security related functions
in controller/lflow.c

Fixes: 94974a02bfd5("northd: Add generic port security logical flows.")
Signed-off-by: Numan Siddique 
---
 controller/lflow.c  | 16 +++-
 tests/ovn-northd.at |  2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index e968231492..7a3419305a 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -2731,7 +2731,13 @@ lflow_handle_flows_for_lport(const struct 
sbrec_port_binding *pb,
 return false;
 }
 
-/* Program the port security flows. */
+/* Program the port security flows.
+ * Note: All the port security OF rules are added using the 'uuid'
+ * of the port binding.  Right now port binding 'uuid' is used in
+ * the logical flow table (l_ctx_out->flow_table) only for port
+ * security flows.  Later if new flows are added using the
+ * port binding'uuid', then this function should handle it properly.
+ */
 ofctrl_remove_flows(l_ctx_out->flow_table, >header_.uuid);
 
 if (pb->n_port_security && shash_find(l_ctx_in->binding_lports,
@@ -3399,6 +3405,10 @@ build_out_port_sec_ip4_flows(const struct 
sbrec_port_binding *pb,
 struct ovn_desired_flow_table *flow_table)
 {
 if (!ps_addr->n_ipv4_addrs && !ps_addr->n_ipv6_addrs) {
+ /* No IPv4 and no IPv6 addresses in the port security.
+  * Both IPv4 and IPv6 traffic should be delivered to the
+  * lport. build_out_port_sec_no_ip_flows() takes care of
+  * adding the required flow(s) to allow. */
 return;
 }
 
@@ -3493,6 +3503,10 @@ build_out_port_sec_ip6_flows(const struct 
sbrec_port_binding *pb,
 struct ovn_desired_flow_table *flow_table)
 {
 if (!ps_addr->n_ipv4_addrs && !ps_addr->n_ipv6_addrs) {
+/* No IPv4 and no IPv6 addresses in the port security.
+ * Both IPv4 and IPv6 traffic should be delivered to the
+ * lport. build_out_port_sec_no_ip_flows() takes care of
+ * adding the required flow(s) to allow. */
 return;
 }
 
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 0912e3bec4..5bd0935e72 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -7404,8 +7404,8 @@ AT_CHECK([cat sw0flows | grep -e port_sec | sort | sed 
's/table=./table=?/' ], [
   table=? (ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), 
action=(drop;)
   table=? (ls_in_check_port_sec), priority=100  , match=(vlan.present), 
action=(drop;)
   table=? (ls_in_check_port_sec), priority=50   , match=(1), 
action=(reg0[[15]] = check_in_port_sec(); next;)
-  table=? (ls_in_check_port_sec), priority=50   , match=(inport == "sw0p1"), 
action=(reg0[[14]] = 1; next(pipeline=ingress, table=16);)
   table=? (ls_in_check_port_sec), priority=70   , match=(inport == 
"localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
+  table=? (ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), 
action=(reg0[[14]] = 1; next(pipeline=ingress, table=16);)
   table=? (ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), 
action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=? (ls_in_apply_port_sec), priority=0, match=(1), action=(next;)
   table=? (ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), 
action=(drop;)
-- 
2.35.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ovs-sandbox: Allow specifying initial contents for OVS and VTEP databases.

2022-05-19 Thread Mark Michelson

Acked-by: Mark Michelson 

Since this is a new feature introduced after soft freeze for 22.06, this 
shouldn't be a candidate for 22.06. However, personally I feel this is 
so low-risk (and so useful) that it should be included. Does anyone 
disagree?


On 5/17/22 06:47, Dumitru Ceara wrote:

This makes it easier to troubleshoot OVN in a sandbox when initial
contents for the NB/SB/OVS DBs are provided (e.g., from a production
environment).

A way to load the DBs locally and run OVN against them is:
$ SANDBOXFLAGS="--nbdb-source=path-to-nb.db --sbdb-source=path-to-sb.db 
--ovs-source=path-to-conf.db" make sandbox

Signed-off-by: Dumitru Ceara 
---
  tutorial/ovs-sandbox | 28 ++--
  1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/tutorial/ovs-sandbox b/tutorial/ovs-sandbox
index d81e00496..2219b0e99 100755
--- a/tutorial/ovs-sandbox
+++ b/tutorial/ovs-sandbox
@@ -79,6 +79,8 @@ ovn_rbac=true
  n_northds=1
  n_ics=1
  n_controllers=1
+ovs_source=
+vtep_source=
  nbdb_model=standalone
  nbdb_servers=3
  nbdb_source=
@@ -307,6 +309,18 @@ EOF
  --sbdb-so*)
  prev=ovnsb_source
  ;;
+--ovs-so*=*)
+ovs_source=$optarg
+;;
+--ovs-so*)
+prev=ovs_source
+;;
+--vtep-so*=*)
+vtep_source=$optarg
+;;
+--vtep-so*)
+prev=vtep_source
+;;
  --ic-nb-s*=*)
  ic_nb_servers=$optarg
  ic_nb_model=clustered
@@ -474,10 +488,20 @@ trap 'kill `cat "$sandbox"/*.pid`' 0 1 2 3 13 14 15
  
  # Create database and start ovsdb-server.

  touch "$sandbox"/.conf.db.~lock~
-run ovsdb-tool create conf.db "$schema"
+if test ! -e "$ovs_source"; then
+run ovsdb-tool create conf.db "$schema"
+else
+run cp "$ovs_source" conf.db
+run ovsdb-tool convert conf.db "$schema"
+fi
  ovsdb_server_args=
  
-run ovsdb-tool create vtep.db "$vtep_schema"

+if test ! -e "$vtep_source"; then
+run ovsdb-tool create vtep.db "$vtep_schema"
+else
+run cp "$vtep_source" vtep.db
+run ovsdb-tool convert vtep.db "$vtep_schema"
+fi
  ovsdb_server_args="vtep.db conf.db"
  
  if [ "$HAVE_OPENSSL" = yes ]; then




___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 1/2] Prepare for 22.06.0.

2022-05-19 Thread Mark Michelson
Signed-off-by: Mark Michelson 
---
 NEWS | 4 ++--
 configure.ac | 2 +-
 debian/changelog | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 1cdba4bfc..74d4029ff 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
-Post v22.03.0
--
+OVN v22.06.0 - XX XXX XXX
+-
   - Support IGMP and MLD snooping on transit logical switches that connect
 different OVN Interconnection availability zones.
   - Replaced the usage of masked ct_label by ct_mark in most cases to work
diff --git a/configure.ac b/configure.ac
index eba85eede..b649441bc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(ovn, 22.03.90, b...@openvswitch.org)
+AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_HEADERS([config.h])
diff --git a/debian/changelog b/debian/changelog
index f2355c3be..27bb40232 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,8 +1,8 @@
-ovn (22.03.90-1) unstable; urgency=low
+ovn (22.06.0-1) unstable; urgency=low
 
* New upstream version
 
- -- OVN team   Thu, 24 Feb 2022 16:55:45 -0500
+ -- OVN team   Thu, 19 May 2022 15:11:38 -0400
 
 ovn (22.03.0-1) unstable; urgency=low
 
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 2/2] Prepare for post-22.06.0.

2022-05-19 Thread Mark Michelson
Signed-off-by: Mark Michelson 
---
 NEWS | 3 +++
 configure.ac | 2 +-
 debian/changelog | 6 ++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 74d4029ff..2a394f8d6 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+Post v22.06.0
+-
+
 OVN v22.06.0 - XX XXX XXX
 -
   - Support IGMP and MLD snooping on transit logical switches that connect
diff --git a/configure.ac b/configure.ac
index b649441bc..8665e71f4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
+AC_INIT(ovn, 22.06.90, b...@openvswitch.org)
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_HEADERS([config.h])
diff --git a/debian/changelog b/debian/changelog
index 27bb40232..21126029c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+ovn (22.06.90-1) unstable; urgency=low
+
+   * New upstream version
+
+ -- OVN team   Thu, 19 May 2022 15:11:38 -0400
+
 ovn (22.06.0-1) unstable; urgency=low
 
* New upstream version
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] northd: Determine gateway port for NAT when not specified

2022-05-19 Thread Mark Michelson

Hi Abhiram,

This is a great idea. I only have a couple of minor comments below.

On 5/5/22 13:43, Abhiram Sangana wrote:

If multiple distributed gateway ports (DGP) are configured on a
logical router, "gateway_port" column needs to be set for NAT entries
of the logical router for the rules to be applied, as part of commit
2d942be (northd: Add support for NAT with multiple DGP).

This patch updates the behavior by inferring the DGP where the NAT
rule needs to be applied based on the "external_ip" column of the
NAT rule when "gateway_port" column is not set.

Signed-off-by: Abhiram Sangana 
---
  northd/northd.c   |  49 --
  northd/ovn-northd.8.xml   |  19 ---
  ovn-nb.xml|  19 ---
  tests/ovn-nbctl.at|  47 ++
  tests/ovn-northd.at   | 102 --
  tests/ovn.at  |   2 +-
  utilities/ovn-nbctl.8.xml |   5 +-
  utilities/ovn-nbctl.c |  94 ---
  8 files changed, 228 insertions(+), 109 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index a5297..aa1c2ae05 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -2729,14 +2729,17 @@ join_logical_ports(struct northd_input *input_data,
  }
  }
  
+static const char *find_lrp_member_ip(const struct ovn_port *op,

+  const char *ip_s);
+
  /* Returns an array of strings, each consisting of a MAC address followed
   * by one or more IP addresses, and if the port is a distributed gateway
   * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
   * LPORT_NAME is the name of the L3 redirect port or the name of the
   * logical_port specified in a NAT rule. These strings include the
- * external IP addresses of NAT rules defined on that router which have
- * gateway_port not set or have gateway_port as the router port 'op', and all
- * of the IP addresses used in load balancer VIPs defined on that router.
+ * external IP addresses of NAT rules defined on that router whose
+ * gateway_port is router port 'op', and all of the IP addresses used in
+ * load balancer VIPs defined on that router.
   *
   * The caller must free each of the n returned strings with free(),
   * and must free the returned array when it is no longer needed. */
@@ -2779,7 +2782,9 @@ get_nat_addresses(const struct ovn_port *op, size_t *n, 
bool routable_only,
  
  /* Not including external IP of NAT rules whose gateway_port is

   * not 'op'. */
-if (nat->gateway_port && nat->gateway_port != op->nbrp) {
+if (op->od->n_l3dgw_ports > 1 &&
+((!nat->gateway_port && !find_lrp_member_ip(op, nat->external_ip))
+ || (nat->gateway_port && nat->gateway_port != op->nbrp))) {


This same if statement is repeated later in this patch. It would 
probably be good to factor this into a function.




  continue;
  }
  
@@ -3454,8 +3459,12 @@ ovn_port_update_sbrec(struct northd_input *input_data,

  struct ds nat_addr = DS_EMPTY_INITIALIZER;
  ds_put_format(_addr, "%s", nat_addresses);
  if (l3dgw_ports) {
+const struct ovn_port *l3dgw_port = (
+is_l3dgw_port(op->peer)
+? op->peer
+: op->peer->od->l3dgw_ports[0]);
  ds_put_format(_addr, " is_chassis_resident(%s)",
-op->peer->od->l3dgw_ports[0]->cr_port->json_key);
+l3dgw_port->cr_port->json_key);
  }
  nats[0] = xstrdup(ds_cstr(_addr));
  ds_destroy(_addr);
@@ -10502,9 +10511,11 @@ build_lrouter_port_nat_arp_nd_flow(struct ovn_port *op,
  const struct nbrec_nat *nat = nat_entry->nb;
  struct ds match = DS_EMPTY_INITIALIZER;
  
-/* ARP/ND should be sent from distributed gateway port specified in

+/* ARP/ND should be sent from distributed gateway port relevant to
   * the NAT rule. */
-if (nat->gateway_port && nat->gateway_port != op->nbrp) {
+if (op->od->n_l3dgw_ports > 1 &&
+((!nat->gateway_port && !find_lrp_member_ip(op, nat->external_ip)) ||
+ (nat->gateway_port && nat->gateway_port != op->nbrp))) {
  return;
  }
  
@@ -13287,14 +13298,24 @@ lrouter_check_nat_entry(struct ovn_datapath *od, const struct nbrec_nat *nat,

  /* Validate gateway_port of NAT rule. */
  *nat_l3dgw_port = NULL;
  if (nat->gateway_port == NULL) {
-if (od->n_l3dgw_ports > 1) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-VLOG_WARN_RL(, "NAT configured on logical router: %s with"
- "multiple distributed gateway ports needs to specify"
- "valid gateway_port.", od->nbr->name);
-return -EINVAL;
-} else if 

Re: [ovs-dev] [PATCH ovn 2/2] Prepare for post-22.06.0.

2022-05-19 Thread Ilya Maximets
On 5/19/22 21:20, Mark Michelson wrote:
> Signed-off-by: Mark Michelson 
> ---
>  NEWS | 3 +++
>  configure.ac | 2 +-
>  debian/changelog | 5 +
>  3 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/NEWS b/NEWS
> index 22df13585..4d2e6afc1 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,3 +1,6 @@
> +Post v22.06.0
> +-
> +
>  OVN v22.06.0 - XX XX XXX
>  
>- Support IGMP and MLD snooping on transit logical switches that connect
> diff --git a/configure.ac b/configure.ac
> index b649441bc..8665e71f4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -13,7 +13,7 @@
>  # limitations under the License.
>  
>  AC_PREREQ(2.63)
> -AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
> +AC_INIT(ovn, 22.06.90, b...@openvswitch.org)
>  AC_CONFIG_MACRO_DIR([m4])
>  AC_CONFIG_AUX_DIR([build-aux])
>  AC_CONFIG_HEADERS([config.h])
> diff --git a/debian/changelog b/debian/changelog
> index 27bb40232..b2e11a4a5 100644
> --- a/debian/changelog
> +++ b/debian/changelog
> @@ -1,3 +1,8 @@
> +ovn (22.06.90-1) unstable; urgency=low
> +
> +   * New upstream version
> +
> + -- OVN team   Thu, 19 May 2022 15:11:38 -0400

I believe an empty is required here.
Otherwise it make break parsing for debian packages.

>  ovn (22.06.0-1) unstable; urgency=low
>  
> * New upstream version

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn 1/2] Prepare for 22.06.0.

2022-05-19 Thread Ilya Maximets
On 5/19/22 21:20, Mark Michelson wrote:
> Signed-off-by: Mark Michelson 
> ---
>  NEWS | 4 ++--
>  configure.ac | 2 +-
>  debian/changelog | 4 ++--
>  3 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 1cdba4bfc..22df13585 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,5 @@
> -Post v22.03.0
> --
> +OVN v22.06.0 - XX XX XXX

Strange number of X-es. :)
Shouldn't it be XX XXX  ?

> +
>- Support IGMP and MLD snooping on transit logical switches that connect
>  different OVN Interconnection availability zones.
>- Replaced the usage of masked ct_label by ct_mark in most cases to work
> diff --git a/configure.ac b/configure.ac
> index eba85eede..b649441bc 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -13,7 +13,7 @@
>  # limitations under the License.
>  
>  AC_PREREQ(2.63)
> -AC_INIT(ovn, 22.03.90, b...@openvswitch.org)
> +AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
>  AC_CONFIG_MACRO_DIR([m4])
>  AC_CONFIG_AUX_DIR([build-aux])
>  AC_CONFIG_HEADERS([config.h])
> diff --git a/debian/changelog b/debian/changelog
> index f2355c3be..27bb40232 100644
> --- a/debian/changelog
> +++ b/debian/changelog
> @@ -1,8 +1,8 @@
> -ovn (22.03.90-1) unstable; urgency=low
> +ovn (22.06.0-1) unstable; urgency=low
>  
> * New upstream version
>  
> - -- OVN team   Thu, 24 Feb 2022 16:55:45 -0500
> + -- OVN team   Thu, 19 May 2022 15:11:38 -0400
>  
>  ovn (22.03.0-1) unstable; urgency=low
>  

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 1/2] Prepare for 22.06.0.

2022-05-19 Thread Mark Michelson
Signed-off-by: Mark Michelson 
---
 NEWS | 4 ++--
 configure.ac | 2 +-
 debian/changelog | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 1cdba4bfc..22df13585 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
-Post v22.03.0
--
+OVN v22.06.0 - XX XX XXX
+
   - Support IGMP and MLD snooping on transit logical switches that connect
 different OVN Interconnection availability zones.
   - Replaced the usage of masked ct_label by ct_mark in most cases to work
diff --git a/configure.ac b/configure.ac
index eba85eede..b649441bc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(ovn, 22.03.90, b...@openvswitch.org)
+AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_HEADERS([config.h])
diff --git a/debian/changelog b/debian/changelog
index f2355c3be..27bb40232 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,8 +1,8 @@
-ovn (22.03.90-1) unstable; urgency=low
+ovn (22.06.0-1) unstable; urgency=low
 
* New upstream version
 
- -- OVN team   Thu, 24 Feb 2022 16:55:45 -0500
+ -- OVN team   Thu, 19 May 2022 15:11:38 -0400
 
 ovn (22.03.0-1) unstable; urgency=low
 
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 2/2] Prepare for post-22.06.0.

2022-05-19 Thread Mark Michelson
Signed-off-by: Mark Michelson 
---
 NEWS | 3 +++
 configure.ac | 2 +-
 debian/changelog | 5 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 22df13585..4d2e6afc1 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+Post v22.06.0
+-
+
 OVN v22.06.0 - XX XX XXX
 
   - Support IGMP and MLD snooping on transit logical switches that connect
diff --git a/configure.ac b/configure.ac
index b649441bc..8665e71f4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(ovn, 22.06.0, b...@openvswitch.org)
+AC_INIT(ovn, 22.06.90, b...@openvswitch.org)
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_HEADERS([config.h])
diff --git a/debian/changelog b/debian/changelog
index 27bb40232..b2e11a4a5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,8 @@
+ovn (22.06.90-1) unstable; urgency=low
+
+   * New upstream version
+
+ -- OVN team   Thu, 19 May 2022 15:11:38 -0400
 ovn (22.06.0-1) unstable; urgency=low
 
* New upstream version
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn] northd: add the capability to inherit logical routers lbs on logical switches

2022-05-19 Thread Lorenzo Bianconi
Add the capability to automatically deploy a load-balancer on each
logical-switch connected to a logical router where the load-balancer
has been installed by the CMS. This patch allow to overcome the
distributed gw router scenario limitation where a load-balancer must be
installed on each datapath to properly reach the load-balancer.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2043543
Signed-off-by: Lorenzo Bianconi 
---
 northd/northd.c | 56 ++
 tests/ovn-northd.at | 59 +
 2 files changed, 115 insertions(+)

diff --git a/northd/northd.c b/northd/northd.c
index 60983308d..4e06d6172 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -63,6 +63,8 @@ static bool lflow_hash_lock_initialized = false;
 
 static bool check_lsp_is_up;
 
+static bool install_ls_lb_from_router;
+
 /* MAC allocated for service monitor usage. Just one mac is allocated
  * for this purpose and ovn-controller's on each chassis will make use
  * of this mac when sending out the packets to monitor the services
@@ -4085,6 +4087,55 @@ build_lrouter_lbs_reachable_ips(struct hmap *datapaths, 
struct hmap *lbs)
 }
 }
 
+static void
+build_lswitch_lbs_from_lrouter(struct hmap *datapaths, struct hmap *lbs)
+{
+if (!install_ls_lb_from_router) {
+return;
+}
+
+struct ovn_datapath *od;
+HMAP_FOR_EACH (od, key_node, datapaths) {
+if (!od->nbs) {
+continue;
+}
+
+struct ovn_port *op;
+LIST_FOR_EACH (op, dp_node, >port_list) {
+if (!lsp_is_router(op->nbsp)) {
+continue;
+}
+if (!op->peer) {
+continue;
+}
+
+struct ovn_datapath *peer_od = op->peer->od;
+for (size_t i = 0; i < peer_od->nbr->n_load_balancer; i++) {
+bool installed = false;
+const struct uuid *lb_uuid =
+_od->nbr->load_balancer[i]->header_.uuid;
+struct ovn_northd_lb *lb = ovn_northd_lb_find(lbs, lb_uuid);
+if (!lb) {
+continue;
+}
+
+for (size_t j = 0; j < lb->n_nb_ls; j++) {
+   if (lb->nb_ls[j] == od) {
+   installed = true;
+   break;
+   }
+}
+if (!installed) {
+ovn_northd_lb_add_ls(lb, od);
+}
+if (lb->nlb) {
+od->has_lb_vip |= lb_has_vip(lb->nlb);
+}
+}
+}
+}
+}
+
 /* This must be called after all ports have been processed, i.e., after
  * build_ports() because the reachability check requires the router ports
  * networks to have been parsed.
@@ -4097,6 +4148,7 @@ build_lb_port_related_data(struct hmap *datapaths, struct 
hmap *ports,
 build_lrouter_lbs_check(datapaths);
 build_lrouter_lbs_reachable_ips(datapaths, lbs);
 build_lb_svcs(input_data, ovnsb_txn, ports, lbs);
+build_lswitch_lbs_from_lrouter(datapaths, lbs);
 }
 
 /* Syncs relevant load balancers (applied to logical switches) to the
@@ -15611,6 +15663,10 @@ ovnnb_db_run(struct northd_input *input_data,
  "ignore_lsp_down", true);
 default_acl_drop = smap_get_bool(>options, "default_acl_drop", false);
 
+install_ls_lb_from_router = smap_get_bool(>options,
+  "install_ls_lb_from_router",
+  false);
+
 build_datapaths(input_data, ovnsb_txn, >datapaths, >lr_list);
 build_lbs(input_data, >datapaths, >lbs);
 build_ports(input_data, ovnsb_txn, sbrec_chassis_by_name,
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index ea311c7c0..9297c0c46 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -7289,3 +7289,62 @@ AT_CHECK([diff flows1 flows3])
 
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([check install_ls_lb_from_router option])
+ovn_start
+
+ovn-nbctl lr-add R1
+ovn-nbctl set logical_router R1 options:chassis=hv1
+ovn-nbctl lrp-add R1 R1-S0 02:ac:10:01:00:01 10.0.0.1/24
+ovn-nbctl lrp-add R1 R1-S1 02:ac:10:01:01:01 20.0.0.1/24
+ovn-nbctl lrp-add R1 R1-PUB 02:ac:20:01:01:01 172.16.0.1/24
+
+ovn-nbctl ls-add S0
+ovn-nbctl lsp-add S0 S0-R1
+ovn-nbctl lsp-set-type S0-R1 router
+ovn-nbctl lsp-set-addresses S0-R1 02:ac:10:01:00:01
+ovn-nbctl lsp-set-options S0-R1 router-port=R1-S0
+
+ovn-nbctl ls-add S1
+ovn-nbctl lsp-add S1 S1-R1
+ovn-nbctl lsp-set-type S1-R1 router
+ovn-nbctl lsp-set-addresses S1-R1 02:ac:10:01:01:01
+ovn-nbctl lsp-set-options S1-R1 router-port=R1-S1
+
+# Add load balancers on the logical router R1
+ovn-nbctl lb-add lb0 172.16.0.10:80 10.0.0.2:80
+ovn-nbctl lr-lb-add R1 lb0
+
+ovn-sbctl dump-flows S0 > S0flows
+ovn-sbctl dump-flows S1 > S1flows
+
+AT_CAPTURE_FILE([S0flows])
+AT_CAPTURE_FILE([S1flows])
+

Re: [ovs-dev] [PATCH ovn v8 1/3] Update port-up on main chassis only

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 12:09 PM Ihar Hrachyshka  wrote:
>
> In a future patch, there will be a scenario where the same port has
> attachments at multiple (specifically, 2) chassis, so make sure that
> 'up' property is updated by the main chassis only.
>
> Signed-off-by: Ihar Hrachyshka 


The patch LGTM.

Acked-by: Numan Siddique 

Your 2nd patch is not applying cleanly.

Can you please respin another version of the series ?

You can add my Acked-by tag in the v9 patch 1.

Thanks
Numan


> ---
>  controller/binding.c|  9 ++---
>  controller/binding.h|  2 ++
>  controller/if-status.c  | 15 ++-
>  controller/if-status.h  |  1 +
>  controller/ovn-controller.c |  4 ++--
>  5 files changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 0b6e83a74..6bdbe9912 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -698,6 +698,7 @@ local_binding_is_down(struct shash *local_bindings, const 
> char *pb_name)
>
>  void
>  local_binding_set_up(struct shash *local_bindings, const char *pb_name,
> + const struct sbrec_chassis *chassis_rec,
>   const char *ts_now_str, bool sb_readonly,
>   bool ovs_readonly)
>  {
> @@ -717,8 +718,8 @@ local_binding_set_up(struct shash *local_bindings, const 
> char *pb_name,
>  ts_now_str);
>  }
>
> -if (!sb_readonly && lbinding && b_lport && b_lport->pb->n_up
> -&& !b_lport->pb->up[0]) {
> +if (!sb_readonly && lbinding && b_lport && b_lport->pb->n_up &&
> +!b_lport->pb->up[0] && b_lport->pb->chassis == chassis_rec) {
>  VLOG_INFO("Setting lport %s up in Southbound", pb_name);
>  binding_lport_set_up(b_lport, sb_readonly);
>  LIST_FOR_EACH (b_lport, list_node, >binding_lports) {
> @@ -729,6 +730,7 @@ local_binding_set_up(struct shash *local_bindings, const 
> char *pb_name,
>
>  void
>  local_binding_set_down(struct shash *local_bindings, const char *pb_name,
> +   const struct sbrec_chassis *chassis_rec,
> bool sb_readonly, bool ovs_readonly)
>  {
>  struct local_binding *lbinding =
> @@ -743,7 +745,8 @@ local_binding_set_down(struct shash *local_bindings, 
> const char *pb_name,
>  OVN_INSTALLED_EXT_ID);
>  }
>
> -if (!sb_readonly && b_lport && b_lport->pb->n_up && b_lport->pb->up[0]) {
> +if (!sb_readonly && b_lport && b_lport->pb->n_up && b_lport->pb->up[0] &&
> +(!b_lport->pb->chassis || b_lport->pb->chassis == chassis_rec)) {
>  VLOG_INFO("Setting lport %s down in Southbound", pb_name);
>  binding_lport_set_down(b_lport, sb_readonly);
>  LIST_FOR_EACH (b_lport, list_node, >binding_lports) {
> diff --git a/controller/binding.h b/controller/binding.h
> index e49e1ebb6..6815d1fd6 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -155,9 +155,11 @@ ofp_port_t local_binding_get_lport_ofport(const struct 
> shash *local_bindings,
>  bool local_binding_is_up(struct shash *local_bindings, const char *pb_name);
>  bool local_binding_is_down(struct shash *local_bindings, const char 
> *pb_name);
>  void local_binding_set_up(struct shash *local_bindings, const char *pb_name,
> +  const struct sbrec_chassis *chassis_rec,
>const char *ts_now_str, bool sb_readonly,
>bool ovs_readonly);
>  void local_binding_set_down(struct shash *local_bindings, const char 
> *pb_name,
> +const struct sbrec_chassis *chassis_rec,
>  bool sb_readonly, bool ovs_readonly);
>
>  void binding_register_ovs_idl(struct ovsdb_idl *);
> diff --git a/controller/if-status.c b/controller/if-status.c
> index dbdf8b66f..ad61844d8 100644
> --- a/controller/if-status.c
> +++ b/controller/if-status.c
> @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct if_status_mgr *, 
> struct ovs_iface *,
>
>  static void if_status_mgr_update_bindings(
>  struct if_status_mgr *mgr, struct local_binding_data *binding_data,
> +const struct sbrec_chassis *,
>  bool sb_readonly, bool ovs_readonly);
>
>  struct if_status_mgr *
> @@ -306,6 +307,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>  void
>  if_status_mgr_run(struct if_status_mgr *mgr,
>struct local_binding_data *binding_data,
> +  const struct sbrec_chassis *chassis_rec,
>bool sb_readonly, bool ovs_readonly)
>  {
>  struct ofctrl_acked_seqnos *acked_seqnos =
> @@ -328,8 +330,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
>  ofctrl_acked_seqnos_destroy(acked_seqnos);
>
>  /* Update binding states. */
> -if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
> -  ovs_readonly);

Re: [ovs-dev] [RFC PATCH 0/6] Remove OVS kernel driver

2022-05-19 Thread Gregory Rose




On 4/15/2022 2:42 PM, Greg Rose wrote:

It is time to remove support for the OVS kernel driver and push
towards use of the upstream Linux openvswitch kernel driver
in it's place [1].

This patch series represents a first attempt but there are a few
primary remaining issues that I have yet to address.

A) Removal of debian packing support for the dkms kernel driver
module. The debian/rules are not well known to me - I've never
actually made any changes in that area and do not have a
well formed understanding of how debian packaging works.  I wil
attempt to fix that up in upcoming patch series.
B) Figuring out how the github workflow - I removed the tests I
could find that depend on the Linux kernel (i.e. they use
install_kernel() function.  Several other tests are  failing
that would not seem to depend on the Linux kernel.  I need to
read and understand that code better.
C) There are many Linux specific source modules in the datapath that
will need eventual removal but some headers are still required for
the userspace code (which seems counterintuitive but...)

Reviews, suggestions, etc. are appreciated!

1.  https://mail.openvswitch.org/pipermail/ovs-dev/2022-April/393292.html


I would like to suggest at this time that rather than removing the OVS
Linux kernel path that we "freeze" it at Linux 5.8. This will make it
easier for some consumers of OVS that are continuing to support the
Linux kernel datapath in old distributions.

The ultimate goal of shifting toward DPDK and AFXDP datapaths is still
preserved but we are placing less burden on some consumers of OVS for
older Linux distributions.

Perhaps in suggesting removal of the kernel datapath I was being a bit
overly aggressive.

Thoughts? Concerns? Other suggestions?

Thanks,

- Greg




Greg Rose (6):
   acinclude.m4: Remove support for building the OVS kernel module
   rhel: Remove kernel mode spec
   rhel: Remove RHEL 6 kernel module spec
   tests: Remove support for check-kmod test
   Documentation: Remove kernel module documentation
   Disable unsupported kernel builds

  .github/workflows/build-and-test.yml  |  35 -
  Documentation/faq/releases.rst|   5 +-
  .../contributing/backporting-patches.rst  |   7 +
  Documentation/intro/install/fedora.rst|  24 -
  Documentation/intro/install/general.rst   |  63 --
  acinclude.m4  | 683 +-
  rhel/automake.mk  |  17 -
  rhel/kmod-openvswitch-rhel6.spec.in   | 123 
  rhel/openvswitch-kmod-fedora.spec.in  | 152 
  tests/automake.mk |   6 -
  10 files changed, 11 insertions(+), 1104 deletions(-)
  delete mode 100644 rhel/kmod-openvswitch-rhel6.spec.in
  delete mode 100644 rhel/openvswitch-kmod-fedora.spec.in


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 0/3] Adding generic port security flows.

2022-05-19 Thread Mark Michelson

Thank you for the rebase, Numan. I pushed the series to main.

On 5/19/22 11:17, num...@ovn.org wrote:

From: Numan Siddique 

This patch series adds generic logical flows for port security in
the logical switch pipeline and pushes the actual port security
implementation logic to ovn-controller from ovn-northd.

ovn-northd will now add logical flows like:

table=0 (ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[14] = 
check_in_port_sec(); next;)
table=1 (ls_in_apply_port_sec), priority=50   , match=(reg0[14] == 1), 
action=(drop;)
table=1 (ls_in_apply_port_sec), priority=0, match=(1), action=(next;)

OVN action check_in_port_sec() resubmits the packet to openflow table
73.  ovn-controller will add port security flows in table 73,74 and 75
for all the logical ports it has claimed.  The port security information
is passed down the Port_Binding table in Southbound database.

The main motivation for the patch is to address scale concerns.
This patch series reduces the number of logical flows and ovn-northd
CPU utilization time.

Did some scale testing and below are the results:

Used a Northbound database from a deployment of 120 node cluster.
Number of logical switch ports with port security configured: 13711

With vanilla ovn-northd
---
Number of logical flows : 208061
Avg time taken to run build_lflows() : 1301 msec
Size of Southbound database after compaction: 104M

With ovn-northd using this feature
-
Number of logical flows : 83396
Avg time taken to run build_lflows() : 560  msec
Size of Southbound database after compaction: 45M


v1 -> v2
---
   * Rebased to resolve conflicts.
   * Added Mark's Acks.

Numan Siddique (3):
   ovn-controller: Add OF rules for port security.
   actions: Add new actions check_in_port_sec and check_out_port_sec.
   northd: Add generic port security logical flows.

  controller/binding.c |  78 +++-
  controller/binding.h |  23 +-
  controller/lflow.c   | 792 ++-
  controller/lflow.h   |   4 +
  controller/ovn-controller.c  |  21 +-
  include/ovn/actions.h|   6 +
  include/ovn/logical-fields.h |   1 +
  lib/actions.c|  75 +++-
  northd/northd.c  | 557 +---
  northd/ovn-northd.8.xml  | 263 ++--
  ovn-sb.ovsschema |   7 +-
  ovn-sb.xml   |  54 +++
  tests/ovn-northd.at  | 431 ---
  tests/ovn.at | 381 +++--
  tests/test-ovn.c |   2 +
  utilities/ovn-trace.c| 313 ++
  16 files changed, 2182 insertions(+), 826 deletions(-)



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] classifier: adjust segment boundary to execute prerequisite processing

2022-05-19 Thread Numan Siddique
On Thu, May 19, 2022 at 9:25 AM Ferriter, Cian  wrote:
>
> Hi Aaron,
>
> 
>
> > ---
> >  include/openvswitch/flow.h |  7 +++
> >  tests/classifier.at| 27 +++
> >  2 files changed, 30 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
> > index 3054015d93..df10cf579e 100644
> > --- a/include/openvswitch/flow.h
> > +++ b/include/openvswitch/flow.h
> > @@ -141,15 +141,14 @@ struct flow {
> >  uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */
> >  uint8_t nw_ttl; /* IP TTL/Hop Limit. */
> >  uint8_t nw_proto;   /* IP protocol or low 8 bits of ARP 
> > opcode. */
> > +/* L4 (64-bit aligned) */
> >  struct in6_addr nd_target;  /* IPv6 neighbor discovery (ND) target. */
> >  struct eth_addr arp_sha;/* ARP/ND source hardware address. */
> >  struct eth_addr arp_tha;/* ARP/ND target hardware address. */
> > -ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type.
> > - * With L3 to avoid matching L4. */
> > +ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. */
> >  ovs_be16 pad2;  /* Pad to 64 bits. */
> >  struct ovs_key_nsh nsh; /* Network Service Header keys */
> >
> > -/* L4 (64-bit aligned) */
> >  ovs_be16 tp_src;/* TCP/UDP/SCTP source port/ICMP type. */
> >  ovs_be16 tp_dst;/* TCP/UDP/SCTP destination port/ICMP 
> > code. */
> >  ovs_be16 ct_tp_src; /* CT original tuple source port/ICMP 
> > type. */
> > @@ -179,7 +178,7 @@ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) 
> > + sizeof(uint32_t)
>
> About the following BUILD_ASSRT_DECL which is in include/openvswitch/flow.h 
> directly below struct flow.h:
> /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
> BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
>   == sizeof(struct flow_tnl) + sizeof(struct ovs_key_nsh) + 
> 300
>   && FLOW_WC_SEQ == 42);
>
> Should we increment the FLOW_WC_SEQ value? The comment reads:
> /* This sequence number should be incremented whenever anything involving 
> flows
>  * or the wildcarding of flows changes.  This will cause build assertion
>  * failures in places which likely need to be updated. */
>
> We haven't changed anything in the order or content of struct flow but we 
> have changed (fixed) how flows are wildcarded. It doesn't look like any of 
> the code asserting FLOW_WC_SEQ == 42 and relying on struct flow order and 
> content needs updating.
> Just wondering your/others thoughts about whether to increment to 43 or not.
>
> >  enum {
> >  FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst),
> >  FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src),
> > -FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src),
> > +FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, nd_target),
> >  };
> >  BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0);
> >  BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0);
>
> IIUC, we are moving the L4 boundary 56B earlier in struct flow. This means 
> L3/L4 segment boundary is still at a U64 boundary. I know we have 
> BUILD_ASSERTs checking this, but I just thought to double check myself.
>
> > diff --git a/tests/classifier.at b/tests/classifier.at
> > index cdcd72c156..a0a8fe0359 100644
> > --- a/tests/classifier.at
> > +++ b/tests/classifier.at
> > @@ -129,6 +129,33 @@ Datapath actions: 3
> >  OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
> >  AT_CLEANUP
> >
> > +AT_SETUP([flow classifier - ipv6 ND dependancy])
> > +OVS_VSWITCHD_START
> > +add_of_ports br0 1 2
> > +AT_DATA([flows.txt], [dnl
> > + table=0,priority=100,ipv6,ipv6_src=1000::/10 actions=resubmit(,1)
> > + table=0,priority=0 actions=NORMAL
> > + table=1,priority=110,ipv6,ipv6_dst=1000::3 actions=resubmit(,2)
> > + table=1,priority=100,ipv6,ipv6_dst=1000::4 actions=resubmit(,2)
> > + table=1,priority=0 actions=NORMAL
> > + 
> > table=2,priority=120,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1
> >  actions=NORMAL
> > + table=2,priority=100,tcp actions=NORMAL
> > + table=2,priority=100,icmp6 actions=NORMAL
> > + table=2,priority=0 actions=NORMAL
> > +])
> > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> > +
> > +# test ICMPv6 echo request (which should have no nd_target field)
> > +AT_CHECK([ovs-appctl ofproto/trace br0
> > "in_port=1,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_ds
> > t=1000::4,nw_proto=58,icmpv6_type=128,icmpv6_code=0"], [0], [stdout])
> > +AT_CHECK([tail -2 stdout], [0],
> > +  [Megaflow:
> > recirc_id=0,eth,icmp6,in_port=1,dl_src=f6:d2:b0:19:5e:7b,dl_dst=d2:49:19:91:78:fe,ipv6_src=1000::/10,i
> > pv6_dst=1000::4,nw_ttl=0,nw_frag=no
> > +Datapath actions: 100,2
> > +])
> > +
> > +AT_CLEANUP
> > +
> > 

Re: [ovs-dev] [PATCH ovn v8 2/3] Support LSP:options:requested-chassis as a list

2022-05-19 Thread 0-day Robot
Bleep bloop.  Greetings Ihar Hrachyshka, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0001 Support LSP:options:requested-chassis as a list
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-dpdk: Delay vhost mempool creation on multi-NUMA.

2022-05-19 Thread Kevin Traynor

On 17/05/2022 10:12, David Marchand wrote:

On Fri, May 13, 2022 at 5:35 PM Kevin Traynor  wrote:


A mempool is currently created for a vhost port when it is added.

The NUMA info of the vhost port is not known until a device is added to
the port, so on multi-NUMA systems the initial NUMA node for the mempool
is a best guess based on vswitchd affinity.

When a device is added to the vhost port, the NUMA info can be checked
and if the guess was incorrect a mempool on the correct NUMA node
created.

The current scheme can have the effect of creating a mempool on a NUMA
node that will not be needed and at least for a certain time period
requires memory.

It is also difficult for a user trying to provision memory on different
NUMA nodes, if they are not sure which NUMA node the initial mempool
for a vhost port will be on.

This patch delays the creation of the mempool for a vhost port on
multi-NUMA systems until the vhost NUMA info is known.


I prefer having a single behavior for mono and multi numa (=> no
question about which behavior resulted in mempool
creation/attribution).
Though I don't have a strong opinion against having this difference in behavior.



Originally, I had the same behaviour for both single and multi-numa. I 
changed to only multi-numa as it was the multi-NUMA issue about 
switching mempools that I mostly wanted to remove.


I don't see any downsides for having the same behaviour on single NUMA.
It might be considered a bit of an improvement since the socket-limit 
defaults have been removed as it won't create the mempool until it is 
needed and if the device is never added, it won't create the mempool.


So, wasn't the main focus of the patch, but yes, it seems like it can be 
an improvement for single NUMA too. I will change to do this (unless I 
find some issue during testing).






Signed-off-by: Kevin Traynor 


Otherwise, this new behavior for multi numa and the patch lgtm.

I tested with a dual numa system, ovs running with pmd threads on numa
1, one bridge with vhost-user ports serving one vm in numa0 and one vm
in numa1.

Running ovs-appctl netdev-dpdk/get-mempool-info | grep ovs,
- before patch, no vm started
mempool @0x17f703180
 ^^
 We can notice that a numa0 mempool was created
even if PMD threads are on numa1 and no port uses this mempool.
- before patch, once vm in numa1 is started,
mempool @0x17f703180
mempool @0x11ffe01e40



- before patch, once vm in numa0 is started too,
mempool @0x17f703180
mempool @0x11ffe01e40


- after patch, no vm started

- after patch, once vm in numa1 is started,
mempool @0x11ffe01e40
 ^^
 Only one mempool in numa1
- after patch, once vm in numa0 is started too,
mempool @0x11ffe01e40
mempool @0x17f51dcc0



This is nice test to have multiple vm's using both NUMAs and make sure 
there is no confusion between them. Thanks for reviewing and testing.




Reviewed-by: David Marchand 



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v8 3/3] Clone packets to both port chassis

2022-05-19 Thread Ihar Hrachyshka
When multiple chassis are set in requested-chassis, port binding is
configured in multiple cluster locations. In case of live migration
scenario, only one of the locations run a workload at a particular
point in time. Yet, it's expected that the workload may switch to
running at an additional chassis at any moment during live migration
(depends on libvirt / qemu migration progress). To speed up the switch
to near instant, do the following:

When a port located sends a packet to another port that has multiple
chassis then, in addition to sending the packet to the main chassis,
also send it to additional chassis. When the sending port is bound on
either the main or additional chassis, then handle the packet locally
plus send it to all other chassis.

This is achieved with additional flows in tables 37 and 38.

Signed-off-by: Ihar Hrachyshka 
---
 controller/binding.c  |   2 +-
 controller/binding.h  |   3 +
 controller/physical.c | 376 ++-
 ovn-nb.xml|   9 +
 ovn-sb.xml|   9 +
 tests/ovn.at  | 693 ++
 6 files changed, 948 insertions(+), 144 deletions(-)

diff --git a/controller/binding.c b/controller/binding.c
index 066eec9a5..983785df9 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -1007,7 +1007,7 @@ update_port_additional_encap_if_needed(
 return true;
 }
 
-static bool
+bool
 is_additional_chassis(const struct sbrec_port_binding *pb,
   const struct sbrec_chassis *chassis_rec)
 {
diff --git a/controller/binding.h b/controller/binding.h
index 6815d1fd6..c60af8402 100644
--- a/controller/binding.h
+++ b/controller/binding.h
@@ -195,4 +195,7 @@ enum en_lport_type {
 
 enum en_lport_type get_lport_type(const struct sbrec_port_binding *);
 
+bool is_additional_chassis(const struct sbrec_port_binding *pb,
+   const struct sbrec_chassis *chassis_rec);
+
 #endif /* controller/binding.h */
diff --git a/controller/physical.c b/controller/physical.c
index 36f265a8c..af1f41c04 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -60,6 +60,11 @@ struct zone_ids {
 int snat;   /* MFF_LOG_SNAT_ZONE. */
 };
 
+struct tunnel {
+struct ovs_list list_node;
+const struct chassis_tunnel *tun;
+};
+
 static void
 load_logical_ingress_metadata(const struct sbrec_port_binding *binding,
   const struct zone_ids *zone_ids,
@@ -286,25 +291,83 @@ match_outport_dp_and_port_keys(struct match *match,
 match_set_reg(match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
 }
 
+static struct sbrec_encap *
+find_additional_encap_for_chassis(const struct sbrec_port_binding *pb,
+  const struct sbrec_chassis *chassis_rec)
+{
+for (int i = 0; i < pb->n_additional_encap; i++) {
+if (!strcmp(pb->additional_encap[i]->chassis_name,
+chassis_rec->name)) {
+return pb->additional_encap[i];
+}
+}
+return NULL;
+}
+
+static struct ovs_list *
+get_remote_tunnels(const struct sbrec_port_binding *binding,
+   const struct sbrec_chassis *chassis,
+   const struct hmap *chassis_tunnels)
+{
+const struct chassis_tunnel *tun;
+
+struct ovs_list *tunnels = xmalloc(sizeof *tunnels);
+ovs_list_init(tunnels);
+
+if (binding->chassis && binding->chassis != chassis) {
+tun = get_port_binding_tun(binding->encap, binding->chassis,
+   chassis_tunnels);
+if (!tun) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_WARN_RL(
+, "Failed to locate tunnel to reach main chassis %s "
+ "for port %s. Cloning packets disabled for the chassis.",
+binding->chassis->name, binding->logical_port);
+} else {
+struct tunnel *tun_elem = xmalloc(sizeof *tun_elem);
+tun_elem->tun = tun;
+ovs_list_push_back(tunnels, _elem->list_node);
+}
+}
+
+for (int i = 0; i < binding->n_additional_chassis; i++) {
+if (binding->additional_chassis[i] == chassis) {
+continue;
+}
+const struct sbrec_encap *additional_encap;
+additional_encap = find_additional_encap_for_chassis(binding, chassis);
+tun = get_port_binding_tun(additional_encap,
+   binding->additional_chassis[i],
+   chassis_tunnels);
+if (!tun) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_WARN_RL(
+, "Failed to locate tunnel to reach additional chassis %s "
+ "for port %s. Cloning packets disabled for the chassis.",
+binding->additional_chassis[i]->name, binding->logical_port);
+continue;
+}
+struct tunnel *tun_elem = xmalloc(sizeof 

[ovs-dev] [PATCH ovn v8 2/3] Support LSP:options:requested-chassis as a list

2022-05-19 Thread Ihar Hrachyshka
When the option is set to a comma separated list of chassis names, OVN
will attempt to bind the port at any number of other locations in
addition to the main chassis.

This is useful in live migration scenarios where it's important to
prepare the environment for workloads to move to, avoiding costly flow
configuration at the moment of the final port binding location change.

This may also be useful in port mirroring scenarios.

Corresponding database fields (pb->additional_chassis,
pb->requested_additional_chassis, pb->additional_encap) are introduced
as part of the patch. These are similar to fields designed to track the
main chassis (->chassis, ->requested_chassis, ->encap). But because we
support any number of additional chassis, these fields are lists.

Signed-off-by: Ihar Hrachyshka 
---
 NEWS |   1 +
 controller/binding.c | 294 ---
 controller/lport.c   |  46 ---
 controller/lport.h   |  11 +-
 northd/northd.c  |  62 ++---
 northd/ovn-northd.c  |   4 +-
 ovn-nb.xml   |  20 ++-
 ovn-sb.ovsschema |  17 ++-
 ovn-sb.xml   |  63 --
 tests/ovn.at | 218 
 10 files changed, 632 insertions(+), 104 deletions(-)

diff --git a/NEWS b/NEWS
index f18a7d766..856d683f3 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@ Post v22.03.0
 explicitly request routers to reply to any ARP/ND request for a VIP
 (when set to "all") and only for reachable VIPs (when set to "reachable"
 or by default).
+  - Support list of chassis for Logical_Switch_Port:options:requested-chassis.
 
 OVN v22.03.0 - 11 Mar 2022
 --
diff --git a/controller/binding.c b/controller/binding.c
index 6bdbe9912..066eec9a5 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -930,6 +930,130 @@ claimed_lport_set_up(const struct sbrec_port_binding *pb,
 }
 }
 
+typedef void (*set_func)(const struct sbrec_port_binding *pb,
+ const struct sbrec_encap *);
+
+static bool
+update_port_encap_if_needed(const struct sbrec_port_binding *pb,
+const struct sbrec_chassis *chassis_rec,
+const struct ovsrec_interface *iface_rec,
+bool sb_readonly)
+{
+const struct sbrec_encap *encap_rec =
+sbrec_get_port_encap(chassis_rec, iface_rec);
+if ((encap_rec && pb->encap != encap_rec) ||
+(!encap_rec && pb->encap)) {
+if (sb_readonly) {
+return false;
+}
+sbrec_port_binding_set_encap(pb, encap_rec);
+}
+return true;
+}
+
+static void
+append_additional_encap(const struct sbrec_port_binding *pb,
+const struct sbrec_encap *encap)
+{
+struct sbrec_encap **additional_encap = xmalloc(
+(pb->n_additional_encap + 1) * (sizeof *additional_encap));
+memcpy(additional_encap, pb->additional_encap,
+   pb->n_additional_encap * (sizeof *additional_encap));
+additional_encap[pb->n_additional_encap] = (struct sbrec_encap *) encap;
+sbrec_port_binding_set_additional_encap(
+pb, additional_encap, pb->n_additional_encap + 1);
+free(additional_encap);
+}
+
+static void
+remove_additional_encap_for_chassis(const struct sbrec_port_binding *pb,
+const struct sbrec_chassis *chassis_rec)
+{
+struct sbrec_encap **additional_encap = xmalloc(
+pb->n_additional_encap * (sizeof *additional_encap));
+
+int idx = 0;
+for (int i = 0; i < pb->n_additional_encap; i++) {
+if (!strcmp(pb->additional_encap[i]->chassis_name,
+chassis_rec->name)) {
+continue;
+}
+additional_encap[idx++] = pb->additional_encap[i];
+}
+sbrec_port_binding_set_additional_encap(pb, additional_encap, idx);
+free(additional_encap);
+}
+
+static bool
+update_port_additional_encap_if_needed(
+const struct sbrec_port_binding *pb,
+const struct sbrec_chassis *chassis_rec,
+const struct ovsrec_interface *iface_rec,
+bool sb_readonly)
+{
+const struct sbrec_encap *encap_rec =
+sbrec_get_port_encap(chassis_rec, iface_rec);
+if (encap_rec) {
+for (int i = 0; i < pb->n_additional_encap; i++) {
+if (pb->additional_encap[i] == encap_rec) {
+return true;
+}
+}
+if (sb_readonly) {
+return false;
+}
+append_additional_encap(pb, encap_rec);
+}
+return true;
+}
+
+static bool
+is_additional_chassis(const struct sbrec_port_binding *pb,
+  const struct sbrec_chassis *chassis_rec)
+{
+for (int i = 0; i < pb->n_additional_chassis; i++) {
+if (pb->additional_chassis[i] == chassis_rec) {
+return true;
+}
+}
+return false;
+}
+
+static void
+append_additional_chassis(const struct sbrec_port_binding *pb,
+  

[ovs-dev] [PATCH ovn v8 1/3] Update port-up on main chassis only

2022-05-19 Thread Ihar Hrachyshka
In a future patch, there will be a scenario where the same port has
attachments at multiple (specifically, 2) chassis, so make sure that
'up' property is updated by the main chassis only.

Signed-off-by: Ihar Hrachyshka 
---
 controller/binding.c|  9 ++---
 controller/binding.h|  2 ++
 controller/if-status.c  | 15 ++-
 controller/if-status.h  |  1 +
 controller/ovn-controller.c |  4 ++--
 5 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/controller/binding.c b/controller/binding.c
index 0b6e83a74..6bdbe9912 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -698,6 +698,7 @@ local_binding_is_down(struct shash *local_bindings, const 
char *pb_name)
 
 void
 local_binding_set_up(struct shash *local_bindings, const char *pb_name,
+ const struct sbrec_chassis *chassis_rec,
  const char *ts_now_str, bool sb_readonly,
  bool ovs_readonly)
 {
@@ -717,8 +718,8 @@ local_binding_set_up(struct shash *local_bindings, const 
char *pb_name,
 ts_now_str);
 }
 
-if (!sb_readonly && lbinding && b_lport && b_lport->pb->n_up
-&& !b_lport->pb->up[0]) {
+if (!sb_readonly && lbinding && b_lport && b_lport->pb->n_up &&
+!b_lport->pb->up[0] && b_lport->pb->chassis == chassis_rec) {
 VLOG_INFO("Setting lport %s up in Southbound", pb_name);
 binding_lport_set_up(b_lport, sb_readonly);
 LIST_FOR_EACH (b_lport, list_node, >binding_lports) {
@@ -729,6 +730,7 @@ local_binding_set_up(struct shash *local_bindings, const 
char *pb_name,
 
 void
 local_binding_set_down(struct shash *local_bindings, const char *pb_name,
+   const struct sbrec_chassis *chassis_rec,
bool sb_readonly, bool ovs_readonly)
 {
 struct local_binding *lbinding =
@@ -743,7 +745,8 @@ local_binding_set_down(struct shash *local_bindings, const 
char *pb_name,
 OVN_INSTALLED_EXT_ID);
 }
 
-if (!sb_readonly && b_lport && b_lport->pb->n_up && b_lport->pb->up[0]) {
+if (!sb_readonly && b_lport && b_lport->pb->n_up && b_lport->pb->up[0] &&
+(!b_lport->pb->chassis || b_lport->pb->chassis == chassis_rec)) {
 VLOG_INFO("Setting lport %s down in Southbound", pb_name);
 binding_lport_set_down(b_lport, sb_readonly);
 LIST_FOR_EACH (b_lport, list_node, >binding_lports) {
diff --git a/controller/binding.h b/controller/binding.h
index e49e1ebb6..6815d1fd6 100644
--- a/controller/binding.h
+++ b/controller/binding.h
@@ -155,9 +155,11 @@ ofp_port_t local_binding_get_lport_ofport(const struct 
shash *local_bindings,
 bool local_binding_is_up(struct shash *local_bindings, const char *pb_name);
 bool local_binding_is_down(struct shash *local_bindings, const char *pb_name);
 void local_binding_set_up(struct shash *local_bindings, const char *pb_name,
+  const struct sbrec_chassis *chassis_rec,
   const char *ts_now_str, bool sb_readonly,
   bool ovs_readonly);
 void local_binding_set_down(struct shash *local_bindings, const char *pb_name,
+const struct sbrec_chassis *chassis_rec,
 bool sb_readonly, bool ovs_readonly);
 
 void binding_register_ovs_idl(struct ovsdb_idl *);
diff --git a/controller/if-status.c b/controller/if-status.c
index dbdf8b66f..ad61844d8 100644
--- a/controller/if-status.c
+++ b/controller/if-status.c
@@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct if_status_mgr *, 
struct ovs_iface *,
 
 static void if_status_mgr_update_bindings(
 struct if_status_mgr *mgr, struct local_binding_data *binding_data,
+const struct sbrec_chassis *,
 bool sb_readonly, bool ovs_readonly);
 
 struct if_status_mgr *
@@ -306,6 +307,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
 void
 if_status_mgr_run(struct if_status_mgr *mgr,
   struct local_binding_data *binding_data,
+  const struct sbrec_chassis *chassis_rec,
   bool sb_readonly, bool ovs_readonly)
 {
 struct ofctrl_acked_seqnos *acked_seqnos =
@@ -328,8 +330,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
 ofctrl_acked_seqnos_destroy(acked_seqnos);
 
 /* Update binding states. */
-if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
-  ovs_readonly);
+if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
+  sb_readonly, ovs_readonly);
 }
 
 static void
@@ -390,6 +392,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr, struct 
ovs_iface *iface,
 static void
 if_status_mgr_update_bindings(struct if_status_mgr *mgr,
   struct local_binding_data *binding_data,
+  const struct sbrec_chassis *chassis_rec,
 

[ovs-dev] [PATCH ovn v8 0/3] Support multiple requested-chassis

2022-05-19 Thread Ihar Hrachyshka
This is the same as v7 except build failures due to switch statements
fixed. This series still misses the RARP activation strategy patch that
will be sent separately.

v8: build warnings fixed.
v7: dropped the patch that tags all traffic from tunnels as LOCAL_ONLY
(unneeded now that this series doesn't handle local traffic in
remote table).
v7: local_binding_set_up: don't set up when pb->chassis is NULL.
v7: added more test scenarios: 3 chassis, flipping chassis roles (main
to additional and vice versa), check behavior when one of chassis
doesn't claim a port.
v7: don't update_lport_tracking when port is not newly claimed.
v7: release localports from additional chassis too.
v7: properly handle binding when ->chassis not set but chassis name can
be found in the requested-chassis option.
v7: refactored consider_port_binding to simplify the logic, remove
redundant code paths.
v7: remove redundant flows that were left from prior versions of the
series.
v6: rebased, solved git conflicts.
v5: moved activation flows from table=8 to table=0.
v5: removed pause=true from rarp activation flow since we don't rely on
continuations.
v5: make rarp handle resubmit() admitted RARP packet to table=8. This
allows to avoid holding pending packets / waiting for flows deleted
etc.
v5: when cloning packets destined to a local binding to additional
chassis, clone them to tunnels in table=37, not table=38, to stay
consistent with tables' intent.
v5: dropped patch that added chassis-mirroring-enabled option. The
option doesn't resolve the ARP flipping issue. Instead, just
document the behavior of localnet attached switches when ports are
multi-chassis.
v5: (minor) set match's port and dp key inside
put_remote_port_redirect_overlay.
v4: redesign to reuse requested-chassis option
v4: support >2 chassis per port
v4: allow to disable tunneling enforcement when n_chassis >= 2
v3: re-sent as a single series
v2: added ddlog implementation
v2: re-inject RARP packet after vswitch updates flows
v1: split into pieces
v1: renamed options: migration-destination ->
 requested-additional-chassis,
 migration-unblocked ->
 additional-chassis-activated
v1: introduced options:activation-strategy=rarp to allow for other
strategies / having default no-op strategy
v1: implement in-memory port-activated tracking to avoid races
v1: numerous code cleanup / bug fixes
v1: special handling for localnet attached switches
v0: initial draft (single patch)

Ihar Hrachyshka (3):
  Update port-up on main chassis only
  Support LSP:options:requested-chassis as a list
  Clone packets to both port chassis

 NEWS|   1 +
 controller/binding.c| 303 ++--
 controller/binding.h|   5 +
 controller/if-status.c  |  15 +-
 controller/if-status.h  |   1 +
 controller/lport.c  |  46 +-
 controller/lport.h  |  11 +-
 controller/ovn-controller.c |   4 +-
 controller/physical.c   | 376 +--
 northd/northd.c |  62 ++-
 northd/ovn-northd.c |   4 +-
 ovn-nb.xml  |  29 +-
 ovn-sb.ovsschema|  17 +-
 ovn-sb.xml  |  72 ++-
 tests/ovn.at| 911 
 15 files changed, 1600 insertions(+), 257 deletions(-)

-- 
2.34.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 3/3] northd: Add generic port security logical flows.

2022-05-19 Thread numans
From: Numan Siddique 

ovn-northd will now make use of the newly added OVN actions -
check_in_port_sec and check_out_port_sec to implement the
port security for the logical ports.

Port security rules remain the same and nothing changes from
CMS's perspective.

Scale results:
Used a Northbound database from a deployment of 120 node cluster.
Number of logical switch ports with port security configured: 13711

With vanilla ovn-northd
---
Number of logical flows : 208061
Avg time taken to run build_lflows() : 1301 msec
Size of Southbound database after compaction: 104M

With ovn-northd using this feature
-
Number of logical flows : 83396
Avg time taken to run build_lflows() : 560  msec
Size of Southbound database after compaction: 45M

This patch shows significant improvements and reduces number of lflows
and ovn-northd CPU usage.  This would benefit in the large scale
deployments.  This patch also show significant improvements in
ovn-controller CPU utilization as there will be lesser logical flows
to process.

This change is not backward compatible.  As per the update/upgrade
recommendations, CMS should first update ovn-controllers with
the newer versions before updating ovn-northd.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2078927
Suggested-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Signed-off-by: Numan Siddique 
---
 northd/northd.c | 557 +++-
 northd/ovn-northd.8.xml | 263 +--
 tests/ovn-northd.at | 431 ---
 tests/ovn.at|  77 +++---
 4 files changed, 536 insertions(+), 792 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 6138d42ad9..f0c345015d 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -107,32 +107,31 @@ enum ovn_datapath_type {
 enum ovn_stage {
 #define PIPELINE_STAGES   \
 /* Logical switch ingress stages. */  \
-PIPELINE_STAGE(SWITCH, IN,  PORT_SEC_L2,0, "ls_in_port_sec_l2")   \
-PIPELINE_STAGE(SWITCH, IN,  PORT_SEC_IP,1, "ls_in_port_sec_ip")   \
-PIPELINE_STAGE(SWITCH, IN,  PORT_SEC_ND,2, "ls_in_port_sec_nd")   \
-PIPELINE_STAGE(SWITCH, IN,  LOOKUP_FDB ,3, "ls_in_lookup_fdb")\
-PIPELINE_STAGE(SWITCH, IN,  PUT_FDB,4, "ls_in_put_fdb")   \
-PIPELINE_STAGE(SWITCH, IN,  PRE_ACL,5, "ls_in_pre_acl")   \
-PIPELINE_STAGE(SWITCH, IN,  PRE_LB, 6, "ls_in_pre_lb")\
-PIPELINE_STAGE(SWITCH, IN,  PRE_STATEFUL,   7, "ls_in_pre_stateful")  \
-PIPELINE_STAGE(SWITCH, IN,  ACL_HINT,   8, "ls_in_acl_hint")  \
-PIPELINE_STAGE(SWITCH, IN,  ACL,9, "ls_in_acl")   \
-PIPELINE_STAGE(SWITCH, IN,  QOS_MARK,  10, "ls_in_qos_mark")  \
-PIPELINE_STAGE(SWITCH, IN,  QOS_METER, 11, "ls_in_qos_meter") \
-PIPELINE_STAGE(SWITCH, IN,  LB,12, "ls_in_lb")  \
-PIPELINE_STAGE(SWITCH, IN,  ACL_AFTER_LB,  13, "ls_in_acl_after_lb")  \
-PIPELINE_STAGE(SWITCH, IN,  STATEFUL,  14, "ls_in_stateful")  \
-PIPELINE_STAGE(SWITCH, IN,  PRE_HAIRPIN,   15, "ls_in_pre_hairpin")   \
-PIPELINE_STAGE(SWITCH, IN,  NAT_HAIRPIN,   16, "ls_in_nat_hairpin")   \
-PIPELINE_STAGE(SWITCH, IN,  HAIRPIN,   17, "ls_in_hairpin")   \
-PIPELINE_STAGE(SWITCH, IN,  ARP_ND_RSP,18, "ls_in_arp_rsp")   \
-PIPELINE_STAGE(SWITCH, IN,  DHCP_OPTIONS,  19, "ls_in_dhcp_options")  \
-PIPELINE_STAGE(SWITCH, IN,  DHCP_RESPONSE, 20, "ls_in_dhcp_response") \
-PIPELINE_STAGE(SWITCH, IN,  DNS_LOOKUP,21, "ls_in_dns_lookup")\
-PIPELINE_STAGE(SWITCH, IN,  DNS_RESPONSE,  22, "ls_in_dns_response")  \
-PIPELINE_STAGE(SWITCH, IN,  EXTERNAL_PORT, 23, "ls_in_external_port") \
-PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,   24, "ls_in_l2_lkup")   \
-PIPELINE_STAGE(SWITCH, IN,  L2_UNKNOWN,25, "ls_in_l2_unknown")\
+PIPELINE_STAGE(SWITCH, IN,  CHECK_PORT_SEC, 0, "ls_in_check_port_sec")   \
+PIPELINE_STAGE(SWITCH, IN,  APPLY_PORT_SEC, 1, "ls_in_apply_port_sec")   \
+PIPELINE_STAGE(SWITCH, IN,  LOOKUP_FDB ,2, "ls_in_lookup_fdb")\
+PIPELINE_STAGE(SWITCH, IN,  PUT_FDB,3, "ls_in_put_fdb")   \
+PIPELINE_STAGE(SWITCH, IN,  PRE_ACL,4, "ls_in_pre_acl")   \
+PIPELINE_STAGE(SWITCH, IN,  PRE_LB, 5, "ls_in_pre_lb")\
+PIPELINE_STAGE(SWITCH, IN,  PRE_STATEFUL,   6, "ls_in_pre_stateful")  \
+PIPELINE_STAGE(SWITCH, IN,  ACL_HINT,   7, "ls_in_acl_hint")  \
+PIPELINE_STAGE(SWITCH, IN,  ACL,8, "ls_in_acl")   \
+PIPELINE_STAGE(SWITCH, IN,  QOS_MARK,   9, "ls_in_qos_mark")  \
+PIPELINE_STAGE(SWITCH, IN,  QOS_METER, 10, "ls_in_qos_meter") \
+PIPELINE_STAGE(SWITCH, IN,  LB,11, "ls_in_lb")\
+PIPELINE_STAGE(SWITCH, IN,  

[ovs-dev] [PATCH ovn v2 2/3] actions: Add new actions check_in_port_sec and check_out_port_sec.

2022-05-19 Thread numans
From: Numan Siddique 

The action - check_in_port_sec runs the port security checks for the
incoming packet from the logical port (inport) and stores the result in a
destination register bit.  If the port security fails, 1 is stored
in the destination register bit.

The action - check_out_port_sec runs the port security checks for the
logical output port (outport) and stores the result in a destination
register bit.  If the port security fails, 1 is stored in the
destination register bit.

Usage:
reg0[1] = check_in_port_sec();
reg1[1] = check_out_port_sec();

The previous commit added the OF rules for the port security in openflow
tables - 73 and 74 for in port security checks and table 75 for out
port security.  These logical actions just translate to resubmitting
to these tables.

Upcoming patch will make use of these logical actions in the logical
switch pipeline.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2078927
Suggested-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Signed-off-by: Numan Siddique 
---
 include/ovn/actions.h |   2 +
 lib/actions.c |  75 --
 ovn-sb.xml|  39 ++
 tests/ovn.at  |  32 +
 tests/test-ovn.c  |   2 +
 utilities/ovn-trace.c | 313 ++
 6 files changed, 455 insertions(+), 8 deletions(-)

diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 49a2e26124..1ae496960e 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -116,6 +116,8 @@ struct ovn_extend_table;
 OVNACT(PUT_FDB,   ovnact_put_fdb) \
 OVNACT(GET_FDB,   ovnact_get_fdb) \
 OVNACT(LOOKUP_FDB,ovnact_lookup_fdb)  \
+OVNACT(CHECK_IN_PORT_SEC,  ovnact_result) \
+OVNACT(CHECK_OUT_PORT_SEC, ovnact_result) \
 
 /* enum ovnact_type, with a member OVNACT_ for each action. */
 enum OVS_PACKED_ENUM ovnact_type {
diff --git a/lib/actions.c b/lib/actions.c
index a9c27600c7..36d4a33ae5 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -4004,19 +4004,20 @@ format_CHK_LB_HAIRPIN_REPLY(const struct ovnact_result 
*res, struct ds *s)
 }
 
 static void
-encode_chk_lb_hairpin__(const struct ovnact_result *res,
-uint8_t hairpin_table,
-struct ofpbuf *ofpacts)
+encode_result_action__(const struct ovnact_result *res,
+   uint8_t resubmit_table,
+   int log_flags_result_bit,
+   struct ofpbuf *ofpacts)
 {
 struct mf_subfield dst = expr_resolve_field(>dst);
 ovs_assert(dst.field);
-put_load(0, MFF_LOG_FLAGS, MLF_LOOKUP_LB_HAIRPIN_BIT, 1, ofpacts);
-emit_resubmit(ofpacts, hairpin_table);
+put_load(0, MFF_LOG_FLAGS, log_flags_result_bit, 1, ofpacts);
+emit_resubmit(ofpacts, resubmit_table);
 
 struct ofpact_reg_move *orm = ofpact_put_REG_MOVE(ofpacts);
 orm->dst = dst;
 orm->src.field = mf_from_id(MFF_LOG_FLAGS);
-orm->src.ofs = MLF_LOOKUP_LB_HAIRPIN_BIT;
+orm->src.ofs = log_flags_result_bit;
 orm->src.n_bits = 1;
 }
 
@@ -4025,7 +4026,8 @@ encode_CHK_LB_HAIRPIN(const struct ovnact_result *res,
   const struct ovnact_encode_params *ep,
   struct ofpbuf *ofpacts)
 {
-encode_chk_lb_hairpin__(res, ep->lb_hairpin_ptable, ofpacts);
+encode_result_action__(res, ep->lb_hairpin_ptable,
+   MLF_LOOKUP_LB_HAIRPIN_BIT, ofpacts);
 }
 
 static void
@@ -4033,7 +4035,8 @@ encode_CHK_LB_HAIRPIN_REPLY(const struct ovnact_result 
*res,
 const struct ovnact_encode_params *ep,
 struct ofpbuf *ofpacts)
 {
-encode_chk_lb_hairpin__(res, ep->lb_hairpin_reply_ptable, ofpacts);
+encode_result_action__(res, ep->lb_hairpin_reply_ptable,
+   MLF_LOOKUP_LB_HAIRPIN_BIT, ofpacts);
 }
 
 static void
@@ -4216,6 +4219,54 @@ ovnact_lookup_fdb_free(struct ovnact_lookup_fdb *get_fdb 
OVS_UNUSED)
 {
 }
 
+static void
+parse_check_in_port_sec(struct action_context *ctx,
+const struct expr_field *dst,
+struct ovnact_result *dl)
+{
+parse_ovnact_result(ctx, "check_in_port_sec", NULL, dst, dl);
+}
+
+static void
+format_CHECK_IN_PORT_SEC(const struct ovnact_result *dl, struct ds *s)
+{
+expr_field_format(>dst, s);
+ds_put_cstr(s, " = check_in_port_sec();");
+}
+
+static void
+encode_CHECK_IN_PORT_SEC(const struct ovnact_result *dl,
+ const struct ovnact_encode_params *ep,
+ struct ofpbuf *ofpacts)
+{
+encode_result_action__(dl, ep->in_port_sec_ptable,
+   MLF_CHECK_PORT_SEC_BIT, ofpacts);
+}
+
+static void
+parse_check_out_port_sec(struct action_context *ctx,
+ const struct expr_field *dst,
+ struct ovnact_result *dl)
+{
+parse_ovnact_result(ctx, "check_out_port_sec", NULL, dst, dl);
+}
+
+static 

[ovs-dev] [PATCH ovn v2 1/3] ovn-controller: Add OF rules for port security.

2022-05-19 Thread numans
From: Numan Siddique 

ovn-controller will now generate OF rules for in port security and
out port security checks in OF tables - 73, 74 and 75.  These flows
will be added  if a port binding has port security defined in the
Port_Binding.Port_Security column which is newly added in this patch.

The idea of this patch is to program these OF rules directly within
the ovn-controller instead of ovn-northd generating logical flows.
This helps in reducing the numnber of logical flows overall in the
Southbound database.

Upcoming patches will add the necessary OVN actions which ovn-northd
can make use of.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2078927
Suggested-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Signed-off-by: Numan Siddique 
---
 controller/binding.c |  78 +++-
 controller/binding.h |  23 +-
 controller/lflow.c   | 792 ++-
 controller/lflow.h   |   4 +
 controller/ovn-controller.c  |  21 +-
 include/ovn/actions.h|   4 +
 include/ovn/logical-fields.h |   1 +
 ovn-sb.ovsschema |   7 +-
 ovn-sb.xml   |  15 +
 tests/ovn.at | 288 +
 10 files changed, 1199 insertions(+), 34 deletions(-)

diff --git a/controller/binding.c b/controller/binding.c
index 0b6e83a74f..92baebd296 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -524,24 +524,6 @@ update_active_pb_ras_pd(const struct sbrec_port_binding 
*pb,
 }
 }
 
-/* This structure represents a logical port (or port binding)
- * which is associated with 'struct local_binding'.
- *
- * An instance of 'struct binding_lport' is created for a logical port
- *  - If the OVS interface's iface-id corresponds to the logical port.
- *  - If it is a container or virtual logical port and its parent
- *has a 'local binding'.
- *
- */
-struct binding_lport {
-struct ovs_list list_node; /* Node in local_binding.binding_lports. */
-
-char *name;
-const struct sbrec_port_binding *pb;
-struct local_binding *lbinding;
-enum en_lport_type type;
-};
-
 static struct local_binding *local_binding_create(
 const char *name, const struct ovsrec_interface *);
 static void local_binding_add(struct shash *local_bindings,
@@ -584,6 +566,11 @@ static const struct sbrec_port_binding 
*binding_lport_get_parent_pb(
 struct binding_lport *b_lprt);
 static struct binding_lport *binding_lport_check_and_cleanup(
 struct binding_lport *, struct shash *b_lports);
+static bool binding_lport_has_port_sec_changed(
+struct binding_lport *, const struct sbrec_port_binding *);
+static void binding_lport_clear_port_sec(struct binding_lport *);
+static bool binding_lport_update_port_sec(
+struct binding_lport *, const struct sbrec_port_binding *);
 
 static char *get_lport_type_str(enum en_lport_type lport_type);
 static bool ovs_iface_matches_lport_iface_id_ver(
@@ -1107,6 +1094,11 @@ consider_vif_lport_(const struct sbrec_port_binding *pb,
b_ctx_out->tracked_dp_bindings);
 update_related_lport(pb, b_ctx_out);
 update_local_lports(pb->logical_port, b_ctx_out);
+if (binding_lport_update_port_sec(b_lport, pb) &&
+b_ctx_out->tracked_dp_bindings) {
+tracked_datapath_lport_add(pb, TRACKED_RESOURCE_UPDATED,
+   b_ctx_out->tracked_dp_bindings);
+}
 if (b_lport->lbinding->iface && qos_map && b_ctx_in->ovs_idl_txn) {
 get_qos_params(pb, qos_map);
 }
@@ -2799,6 +2791,7 @@ binding_lport_destroy(struct binding_lport *b_lport)
 ovs_list_remove(_lport->list_node);
 }
 
+binding_lport_clear_port_sec(b_lport);
 free(b_lport->name);
 free(b_lport);
 }
@@ -2925,6 +2918,55 @@ cleanup:
 }
 
 
+static bool
+binding_lport_has_port_sec_changed(struct binding_lport *b_lport,
+   const struct sbrec_port_binding *pb)
+{
+if (b_lport->n_port_security != pb->n_port_security) {
+return true;
+}
+
+for (size_t i = 0; i < b_lport->n_port_security; i++) {
+if (strcmp(b_lport->port_security[i], pb->port_security[i])) {
+return true;
+}
+}
+
+return false;
+}
+
+static void
+binding_lport_clear_port_sec(struct binding_lport *b_lport)
+{
+for (size_t i = 0; i < b_lport->n_port_security; i++) {
+free(b_lport->port_security[i]);
+}
+free(b_lport->port_security);
+b_lport->n_port_security = 0;
+}
+
+static bool
+binding_lport_update_port_sec(struct binding_lport *b_lport,
+  const struct sbrec_port_binding *pb)
+{
+if (binding_lport_has_port_sec_changed(b_lport, pb)) {
+binding_lport_clear_port_sec(b_lport);
+b_lport->port_security =
+pb->n_port_security ?
+xmalloc(pb->n_port_security * sizeof *b_lport->port_security) :
+NULL;
+
+   

[ovs-dev] [PATCH ovn v2 0/3] Adding generic port security flows.

2022-05-19 Thread numans
From: Numan Siddique 

This patch series adds generic logical flows for port security in
the logical switch pipeline and pushes the actual port security
implementation logic to ovn-controller from ovn-northd.

ovn-northd will now add logical flows like:

table=0 (ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[14] = 
check_in_port_sec(); next;)
table=1 (ls_in_apply_port_sec), priority=50   , match=(reg0[14] == 1), 
action=(drop;)
table=1 (ls_in_apply_port_sec), priority=0, match=(1), action=(next;)

OVN action check_in_port_sec() resubmits the packet to openflow table
73.  ovn-controller will add port security flows in table 73,74 and 75
for all the logical ports it has claimed.  The port security information
is passed down the Port_Binding table in Southbound database.

The main motivation for the patch is to address scale concerns.
This patch series reduces the number of logical flows and ovn-northd
CPU utilization time.

Did some scale testing and below are the results:

Used a Northbound database from a deployment of 120 node cluster.
Number of logical switch ports with port security configured: 13711

With vanilla ovn-northd
---
Number of logical flows : 208061
Avg time taken to run build_lflows() : 1301 msec
Size of Southbound database after compaction: 104M

With ovn-northd using this feature
-
Number of logical flows : 83396
Avg time taken to run build_lflows() : 560  msec
Size of Southbound database after compaction: 45M


v1 -> v2
---
  * Rebased to resolve conflicts.
  * Added Mark's Acks.

Numan Siddique (3):
  ovn-controller: Add OF rules for port security.
  actions: Add new actions check_in_port_sec and check_out_port_sec.
  northd: Add generic port security logical flows.

 controller/binding.c |  78 +++-
 controller/binding.h |  23 +-
 controller/lflow.c   | 792 ++-
 controller/lflow.h   |   4 +
 controller/ovn-controller.c  |  21 +-
 include/ovn/actions.h|   6 +
 include/ovn/logical-fields.h |   1 +
 lib/actions.c|  75 +++-
 northd/northd.c  | 557 +---
 northd/ovn-northd.8.xml  | 263 ++--
 ovn-sb.ovsschema |   7 +-
 ovn-sb.xml   |  54 +++
 tests/ovn-northd.at  | 431 ---
 tests/ovn.at | 381 +++--
 tests/test-ovn.c |   2 +
 utilities/ovn-trace.c| 313 ++
 16 files changed, 2182 insertions(+), 826 deletions(-)

-- 
2.35.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH dpdk-latest] system-dpdk: Update vhost tests to be compatible with DPDK 22.03.

2022-05-19 Thread Ferriter, Cian



> -Original Message-
> From: Pai G, Sunil 
> Sent: Tuesday 17 May 2022 15:11
> To: d...@openvswitch.org
> Cc: Stokes, Ian ; david.march...@redhat.com; 
> maxime.coque...@redhat.com;
> Ferriter, Cian 
> Subject: [PATCH dpdk-latest] system-dpdk: Update vhost tests to be compatible 
> with DPDK 22.03.
> 
> The DPDK commit [1] improves the socket layer logs in the vhost library
> to ease log filtering and debugging.
> Update the system-dpdk vhost tests to reflect this change.
> 
> [1] c85c35b1d447 ("vhost: improve socket layer logs")
> 
> Signed-off-by: Sunil Pai G 

I've tested the before and after cases here with latest OVS master and latest 
DPDK main and I can verify that:

All 3 vhost-user tests fail before:
  3: OVS-DPDK - add vhost-user-client port   FAILED (system-dpdk.at:66)
  4: OVS-DPDK - ping vhost-user portsFAILED (system-dpdk.at:100)
  5: OVS-DPDK - ping vhost-user-client ports FAILED (system-dpdk.at:175)

All 3 pass after:
  3: OVS-DPDK - add vhost-user-client port   ok
  4: OVS-DPDK - ping vhost-user portsok
  5: OVS-DPDK - ping vhost-user-client ports ok

The AT changes look good to me, I'm happy to Ack.
Acked-by: Cian Ferriter 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v5] northd: add configuration option for enabling parallelization

2022-05-19 Thread Mark Michelson

I pushed this change to main. Thanks, Xavier.

On 5/19/22 09:15, Mark Michelson wrote:

Thanks for your persistence on this, Xavier.

Acked-by: Mark Michelson 

On 5/19/22 07:20, Xavier Simonart wrote:

This patch is intended to change the way to enable northd lflow build
parallelization, as well as enable runtime change of number of threads.
Before this patch, the following was needed to use parallelization:
- enable parallelization through use_parallel_build in NBDB
- use --dummy-numa to select number of threads.
This second part was needed as otherwise as many threads as cores in 
the system
were used, while parallelization showed some performance improvement 
only until

using around 4 (or maybe 8) threads.

With this patch, the number of threads used for lflow parallel build 
can be

specified either:
- at startup, using --n-threads= as ovn-northd command line option
- using unixctl
If the number of threads specified is > 1, then parallelization is 
enabled.

If the number is 1, parallelization is disabled.
If the number is < 1, parallelization is disabled at startup and a 
warning

is logged.
If the number is > 256, parallelization is enabled (with 256 threads) and
a warning is logged.

The following unixctl have been added:
- set-n-threads : set the number of treads used.
- get-n-threads: returns the number of threads used
If the number of threads is within <2-256> bounds, parallelization is 
enabled.

If the number of thread is 1, parallelization is disabled.
Otherwise an error is thrown.

Note that, if set-n-threads failed for any reason (e.g. failure to 
setup some

semaphore), parallelization is disabled, and get-n-thread will return 1.

A new command line argument (--ovn-northd-n-threads) has also been 
added to

ovn-ctl.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2078552
Signed-off-by: Xavier Simonart 

---
v2:  - handled Dumitru's comments
  - added missing mutex_destroy
  - fixed issue when use_logical_dp_group is enabled after northd 
startup

  - rebased on top of main
v3:
  - fix mutex_destroy issue
v4:
  - handled Mark's comments
  - rebased on top of main
v5
  - handled Dumitru's comment
  - added --ovn-northd-n-threads option in ovn-ctl
---
  NEWS  |   8 ++
  lib/ovn-parallel-hmap.c   | 291 +-
  lib/ovn-parallel-hmap.h   |  31 ++--
  northd/northd.c   | 176 ---
  northd/northd.h   |   1 +
  northd/ovn-northd-ddlog.c |   6 -
  northd/ovn-northd.8.xml   |  70 +
  northd/ovn-northd.c   |  68 -
  tests/ovn-macros.at   |  59 ++--
  tests/ovn-northd.at   | 109 ++
  utilities/ovn-ctl |   5 +
  11 files changed, 502 insertions(+), 322 deletions(-)

diff --git a/NEWS b/NEWS
index 244824e3f..578d281d7 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,14 @@ Post v22.03.0
  implicit drop behavior on logical switches with ACLs applied.
    - Support (LSP.options:qos_min_rate) to guarantee minimal 
bandwidth available

  for a logical port.
+  - Changed the way to enable northd parallelization.
+    Removed support for:
+    - use_parallel_build in NBDB.
+    - --dummy-numa in northd cmdline.
+    Added support for:
+    -  --n-threads= in northd cmdline.
+    - set-n-threads/get-n-threads unixctls.
+    - --ovn-northd-n-threads command line argument in ovn-ctl
  OVN v22.03.0 - 11 Mar 2022
  --
diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index 7edc4c0b6..828e5a0e2 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -38,14 +38,10 @@ VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
  #ifndef OVS_HAS_PARALLEL_HMAP
-#define WORKER_SEM_NAME "%x-%p-%x"
+#define WORKER_SEM_NAME "%x-%p-%"PRIxSIZE
  #define MAIN_SEM_NAME "%x-%p-main"
-/* These are accessed under mutex inside add_worker_pool().
- * They do not need to be atomic.
- */
  static atomic_bool initial_pool_setup = ATOMIC_VAR_INIT(false);
-static bool can_parallelize = false;
  /* This is set only in the process of exit and the set is
   * accompanied by a fence. It does not need to be atomic or be
@@ -57,18 +53,18 @@ static struct ovs_list worker_pools = 
OVS_LIST_INITIALIZER(_pools);

  static struct ovs_mutex init_mutex = OVS_MUTEX_INITIALIZER;
-static int pool_size;
+static size_t pool_size = 1;
  static int sembase;
  static void worker_pool_hook(void *aux OVS_UNUSED);
-static void setup_worker_pools(bool force);
+static void setup_worker_pools(void);
  static void merge_list_results(struct worker_pool *pool OVS_UNUSED,
 void *fin_result, void *result_frags,
-   int index);
+   size_t index);
  static void merge_hash_results(struct worker_pool *pool OVS_UNUSED,
 void *fin_result, void *result_frags,
-   int index);
+   size_t 

[ovs-dev] [PATCH branch-2.17] dpdk: Use DPDK 21.11.1 release

2022-05-19 Thread Michael Phelan
  Modify ci linux build script to use the latest DPDK stable release 21.11.1.
  Modify Documentation to use the latest DPDK stable release 21.11.1.
  Update NEWS file to reflect the latest DPDK stable release 21.11.1.
  FAQ is updated to reflect the latest DPDK for each OVS branch.

Signed-off-by: Michael Phelan 
---
 .ci/linux-build.sh   | 2 +-
 Documentation/faq/releases.rst   | 2 +-
 Documentation/intro/install/dpdk.rst | 8 
 NEWS | 3 +++
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index 6cd38ff3e..c4ec93a39 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -220,7 +220,7 @@ fi
 
 if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then
 if [ -z "$DPDK_VER" ]; then
-DPDK_VER="21.11"
+DPDK_VER="21.11.1"
 fi
 install_dpdk $DPDK_VER
 fi
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index 530d36e25..319ee38c7 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -210,7 +210,7 @@ Q: What DPDK version does each Open vSwitch release work 
with?
 2.14.x   19.11.10
 2.15.x   20.11.4
 2.16.x   20.11.4
-2.17.x   21.11.0
+2.17.x   21.11.1
  
 
 Q: Are all the DPDK releases that OVS versions work with maintained?
diff --git a/Documentation/intro/install/dpdk.rst 
b/Documentation/intro/install/dpdk.rst
index d9f44055d..f8f01bfad 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -42,7 +42,7 @@ Build requirements
 In addition to the requirements described in :doc:`general`, building Open
 vSwitch with DPDK will require the following:
 
-- DPDK 21.11
+- DPDK 21.11.1
 
 - A `DPDK supported NIC`_
 
@@ -73,9 +73,9 @@ Install DPDK
 #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
 
$ cd /usr/src/
-   $ wget https://fast.dpdk.org/rel/dpdk-21.11.tar.xz
-   $ tar xf dpdk-21.11.tar.xz
-   $ export DPDK_DIR=/usr/src/dpdk-21.11
+   $ wget https://fast.dpdk.org/rel/dpdk-21.11.1.tar.xz
+   $ tar xf dpdk-21.11.1.tar.xz
+   $ export DPDK_DIR=/usr/src/dpdk-stable-21.11
$ cd $DPDK_DIR
 
 #. Configure and install DPDK using Meson
diff --git a/NEWS b/NEWS
index 8cae5f7de..76e943e4a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,8 @@
 v2.17.2 - xx xxx 
 -
+   - DPDK:
+ * OVS validated with DPDK 21.11.1. It is recommended to use this version
+   until further releases.
 
 v2.17.1 - 08 Apr 2022
 -
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] dpdk: Use DPDK 21.11.1 release

2022-05-19 Thread Michael Phelan
  Modify ci linux build script to use the latest DPDK stable release 21.11.1.
  Modify Documentation to use the latest DPDK stable release 21.11.1.
  Update NEWS file to reflect the latest DPDK stable release 21.11.1.
  FAQ is updated to reflect the latest DPDK for each OVS branch.

Signed-off-by: Michael Phelan 
---
 .ci/linux-build.sh   | 2 +-
 Documentation/faq/releases.rst   | 2 +-
 Documentation/intro/install/dpdk.rst | 8 
 NEWS | 3 +++
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index 6cd38ff3e..c4ec93a39 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -220,7 +220,7 @@ fi
 
 if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then
 if [ -z "$DPDK_VER" ]; then
-DPDK_VER="21.11"
+DPDK_VER="21.11.1"
 fi
 install_dpdk $DPDK_VER
 fi
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index c12ffaf4a..8cfe2d392 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -212,7 +212,7 @@ Q: What DPDK version does each Open vSwitch release work 
with?
 2.14.x   19.11.10
 2.15.x   20.11.4
 2.16.x   20.11.4
-2.17.x   21.11.0
+2.17.x   21.11.1
  
 
 Q: Are all the DPDK releases that OVS versions work with maintained?
diff --git a/Documentation/intro/install/dpdk.rst 
b/Documentation/intro/install/dpdk.rst
index d9f44055d..0f3712c79 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -42,7 +42,7 @@ Build requirements
 In addition to the requirements described in :doc:`general`, building Open
 vSwitch with DPDK will require the following:
 
-- DPDK 21.11
+- DPDK 21.11.1
 
 - A `DPDK supported NIC`_
 
@@ -73,9 +73,9 @@ Install DPDK
 #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
 
$ cd /usr/src/
-   $ wget https://fast.dpdk.org/rel/dpdk-21.11.tar.xz
-   $ tar xf dpdk-21.11.tar.xz
-   $ export DPDK_DIR=/usr/src/dpdk-21.11
+   $ wget https://fast.dpdk.org/rel/dpdk-21.11.1.tar.xz
+   $ tar xf dpdk-21.11.1.tar.xz
+   $ export DPDK_DIR=/usr/src/dpdk-stable-21.11.1
$ cd $DPDK_DIR
 
 #. Configure and install DPDK using Meson
diff --git a/NEWS b/NEWS
index eece0d0b2..b061c4a87 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,9 @@ Post-v2.17.0
- Windows:
  * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
  * IPv6 Geneve tunnel support.
+   - DPDK:
+ * OVS validated with DPDK 21.11.1. It is recommended to use this version
+   until further releases.
 
 
 v2.17.0 - 17 Feb 2022
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-discuss] Commit 355fef6f2 seems to break connectivity in my setup

2022-05-19 Thread Frode Nordahl
On Sat, May 14, 2022 at 2:10 AM Ilya Maximets  wrote:
>
> On 5/13/22 10:36, Frode Nordahl wrote:
> > On Fri, Mar 11, 2022 at 2:04 PM Liam Young  wrote:
> >>
> >> Hi,
> >>
> >>  Commit 355fef6f2 seems to break connectivity in my setup
> >
> > After OVN started colocating sNAT and dNAT operations in the same CT
> > zone [0] the above mentioned OVS commit appears to also break OVN [1].
> > I have been discussing with Numan and he has found a correlation with
> > the behavior of the open vswitch kernel data path conntrack
> > `skb_nfct_cached` function, i.e. if that is changed to always return
> > 'false' the erratic behavior disappears.
> >
> > So I guess the question then becomes, is this a OVS userspace or OVS
> > kernel problem?
> >
> > We have a reproducer in [3].
> >
> > 0: 
> > https://github.com/ovn-org/ovn/commit/4deac4509abbedd6ffaecf27eed01ddefccea40a
> > 1: https://bugs.launchpad.net/ubuntu/+source/ovn/+bug/1967856
> > 2: 
> > https://elixir.bootlin.com/linux/latest/source/net/openvswitch/conntrack.c#L683
> > 3: https://bugs.launchpad.net/ubuntu/+source/ovn/+bug/1967856/comments/6
> >
>
> Hrm.  I think, there is a logical bug in implementations of ct()
> datapath action in both datapaths.
>
> The problem appears when the OpenFlow pipeline for the packet contains
> several ct() actions.  NXAST_CT action (i.e. every ct() action) must
> always put the packet into an untracked state.  Tracked state will be
> set only in the 'recirc_table' where the packet is cloned by the ct()
> action for further processing.
>
> If an OF pipeline looks like this:
>
>   actions=ct(),something,something,ct(),something
>
> each action must be entered with the 'untracked' conntrack state in
> the packet metadata.
>
> However, ct() action in the datapath, unlike OpenFlow, doesn't work
> this way.  It modifies the conntrack state for the packet it is processing.
> During the flow translation OVS inserts recirculation right after the
> datapath ct() action.  This way conntrack state affects only processing
> after recirculation.  Sort of.
>
> The issue is that not all OpenFlow ct() actions have recirc_table
> specified.  These actions supposed to change the state of the conntrack
> subsystem, but they still must leave the packet itself in the untracked
> state with no strings attached.  But since datapath ct() actions doesn't
> work that way and since recirculation is not inserted (no recirc_table
> specified), packet after conntrack execution carries the state along to
> all other actions.
> This doesn't impact normal actions, but seems to break subsequent ct()
> actions on the same pipeline.
>
> In general, it looks to me that ct() action in the datapath is
> not supposed to cache any connection data inside the packet's metadata.
> This seems to be the root cause of the problem.  Fields that OF knows
> about should not trigger any issues if carried along with a packet,
> but datapath-specific metadata can not be cleared, because OFproto
> simply doesn't know about it.
>
> But, I guess, not caching the connection and other internal structures
> might significantly impact datapath performance.  Will it?
>
> Looking at the reproducer in [3], it has, for example, the flow with the
> following actions:
>
>   actions:ct(commit,zone=8,mark=0/0x1,nat(src)),
>   set(eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:06)),
>   set(ipv4(src=172.18.2.10,dst=192.168.100.6,ttl=62)),
>   ct(zone=8),recirc(0x4)
>
> So, if the first ct() will change some datapath-specific packet metadata,
> the second ct() may try to use that information.
>
> It looks like during the flow translation we must add ct_clear action
> explicitly after every ct() action, unless it was the last action in
> the list.  This will end up with adding a lot of ct_clear actions
> everywhere.
>
> Another option is the patch below that tries to track if ct_clear
> is required and adds that action before the next ct() action in
> the datapath.
>
> BTW, the test [3] fails with both kernel and userspace datapaths.
>
> The change below should fix the problem, I think.
> It looks like we also have to put ct_clear action before translating
> output to the patch port if we're in 'conntracked' state.
>
> And I do not know how to fix the problem for kernels without ct_clear
> support.  I don't think we can clear metadata that is internal to
> the kernel.  Ideas are welcome.
>
> CC: Aaron, Paolo.
>
> Any thoughts?

Thank you so much for the detailed explanation of what's going on and
for providing a proposed fix.

I took it for a spin and it does indeed appear to fix the OVN hairpin
issue, but it does unfortunately not appear to fix the issue Liam
reported for the ML2/OVS use case [4].

When trying that use case with your patch I see this in the Open vSwitch log:
2022-05-19T08:17:02.668Z|1|ofproto_dpif_xlate(handler1)|WARN|over
max translation depth 64 on bridge br-int while processing

Re: [ovs-dev] [PATCH] classifier: adjust segment boundary to execute prerequisite processing

2022-05-19 Thread Ferriter, Cian
Hi Aaron,



> ---
>  include/openvswitch/flow.h |  7 +++
>  tests/classifier.at| 27 +++
>  2 files changed, 30 insertions(+), 4 deletions(-)
> 
> diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
> index 3054015d93..df10cf579e 100644
> --- a/include/openvswitch/flow.h
> +++ b/include/openvswitch/flow.h
> @@ -141,15 +141,14 @@ struct flow {
>  uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */
>  uint8_t nw_ttl; /* IP TTL/Hop Limit. */
>  uint8_t nw_proto;   /* IP protocol or low 8 bits of ARP opcode. 
> */
> +/* L4 (64-bit aligned) */
>  struct in6_addr nd_target;  /* IPv6 neighbor discovery (ND) target. */
>  struct eth_addr arp_sha;/* ARP/ND source hardware address. */
>  struct eth_addr arp_tha;/* ARP/ND target hardware address. */
> -ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type.
> - * With L3 to avoid matching L4. */
> +ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. */
>  ovs_be16 pad2;  /* Pad to 64 bits. */
>  struct ovs_key_nsh nsh; /* Network Service Header keys */
> 
> -/* L4 (64-bit aligned) */
>  ovs_be16 tp_src;/* TCP/UDP/SCTP source port/ICMP type. */
>  ovs_be16 tp_dst;/* TCP/UDP/SCTP destination port/ICMP code. 
> */
>  ovs_be16 ct_tp_src; /* CT original tuple source port/ICMP type. 
> */
> @@ -179,7 +178,7 @@ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + 
> sizeof(uint32_t)

About the following BUILD_ASSRT_DECL which is in include/openvswitch/flow.h 
directly below struct flow.h:
/* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
  == sizeof(struct flow_tnl) + sizeof(struct ovs_key_nsh) + 300
  && FLOW_WC_SEQ == 42);

Should we increment the FLOW_WC_SEQ value? The comment reads:
/* This sequence number should be incremented whenever anything involving flows
 * or the wildcarding of flows changes.  This will cause build assertion
 * failures in places which likely need to be updated. */

We haven't changed anything in the order or content of struct flow but we have 
changed (fixed) how flows are wildcarded. It doesn't look like any of the code 
asserting FLOW_WC_SEQ == 42 and relying on struct flow order and content needs 
updating.
Just wondering your/others thoughts about whether to increment to 43 or not.

>  enum {
>  FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst),
>  FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src),
> -FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src),
> +FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, nd_target),
>  };
>  BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0);
>  BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0);

IIUC, we are moving the L4 boundary 56B earlier in struct flow. This means 
L3/L4 segment boundary is still at a U64 boundary. I know we have BUILD_ASSERTs 
checking this, but I just thought to double check myself.

> diff --git a/tests/classifier.at b/tests/classifier.at
> index cdcd72c156..a0a8fe0359 100644
> --- a/tests/classifier.at
> +++ b/tests/classifier.at
> @@ -129,6 +129,33 @@ Datapath actions: 3
>  OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
>  AT_CLEANUP
> 
> +AT_SETUP([flow classifier - ipv6 ND dependancy])
> +OVS_VSWITCHD_START
> +add_of_ports br0 1 2
> +AT_DATA([flows.txt], [dnl
> + table=0,priority=100,ipv6,ipv6_src=1000::/10 actions=resubmit(,1)
> + table=0,priority=0 actions=NORMAL
> + table=1,priority=110,ipv6,ipv6_dst=1000::3 actions=resubmit(,2)
> + table=1,priority=100,ipv6,ipv6_dst=1000::4 actions=resubmit(,2)
> + table=1,priority=0 actions=NORMAL
> + 
> table=2,priority=120,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1
>  actions=NORMAL
> + table=2,priority=100,tcp actions=NORMAL
> + table=2,priority=100,icmp6 actions=NORMAL
> + table=2,priority=0 actions=NORMAL
> +])
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +
> +# test ICMPv6 echo request (which should have no nd_target field)
> +AT_CHECK([ovs-appctl ofproto/trace br0
> "in_port=1,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_ds
> t=1000::4,nw_proto=58,icmpv6_type=128,icmpv6_code=0"], [0], [stdout])
> +AT_CHECK([tail -2 stdout], [0],
> +  [Megaflow:
> recirc_id=0,eth,icmp6,in_port=1,dl_src=f6:d2:b0:19:5e:7b,dl_dst=d2:49:19:91:78:fe,ipv6_src=1000::/10,i
> pv6_dst=1000::4,nw_ttl=0,nw_frag=no
> +Datapath actions: 100,2
> +])
> +
> +AT_CLEANUP
> +
> +
> +

Nit: The other tests in the file have one line after the "AT_CLEANUP", maybe 
this should be the same instead of 3 lines?

>  AT_BANNER([conjunctive match])
> 
>  AT_SETUP([single conjunctive match])

I've applied the patch to the latest master, run the unit 

Re: [ovs-dev] [PATCH ovn v5] northd: add configuration option for enabling parallelization

2022-05-19 Thread Mark Michelson

Thanks for your persistence on this, Xavier.

Acked-by: Mark Michelson 

On 5/19/22 07:20, Xavier Simonart wrote:

This patch is intended to change the way to enable northd lflow build
parallelization, as well as enable runtime change of number of threads.
Before this patch, the following was needed to use parallelization:
- enable parallelization through use_parallel_build in NBDB
- use --dummy-numa to select number of threads.
This second part was needed as otherwise as many threads as cores in the system
were used, while parallelization showed some performance improvement only until
using around 4 (or maybe 8) threads.

With this patch, the number of threads used for lflow parallel build can be
specified either:
- at startup, using --n-threads= as ovn-northd command line option
- using unixctl
If the number of threads specified is > 1, then parallelization is enabled.
If the number is 1, parallelization is disabled.
If the number is < 1, parallelization is disabled at startup and a warning
is logged.
If the number is > 256, parallelization is enabled (with 256 threads) and
a warning is logged.

The following unixctl have been added:
- set-n-threads : set the number of treads used.
- get-n-threads: returns the number of threads used
If the number of threads is within <2-256> bounds, parallelization is enabled.
If the number of thread is 1, parallelization is disabled.
Otherwise an error is thrown.

Note that, if set-n-threads failed for any reason (e.g. failure to setup some
semaphore), parallelization is disabled, and get-n-thread will return 1.

A new command line argument (--ovn-northd-n-threads) has also been added to
ovn-ctl.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2078552
Signed-off-by: Xavier Simonart 

---
v2:  - handled Dumitru's comments
  - added missing mutex_destroy
  - fixed issue when use_logical_dp_group is enabled after northd startup
  - rebased on top of main
v3:
  - fix mutex_destroy issue
v4:
  - handled Mark's comments
  - rebased on top of main
v5
  - handled Dumitru's comment
  - added --ovn-northd-n-threads option in ovn-ctl
---
  NEWS  |   8 ++
  lib/ovn-parallel-hmap.c   | 291 +-
  lib/ovn-parallel-hmap.h   |  31 ++--
  northd/northd.c   | 176 ---
  northd/northd.h   |   1 +
  northd/ovn-northd-ddlog.c |   6 -
  northd/ovn-northd.8.xml   |  70 +
  northd/ovn-northd.c   |  68 -
  tests/ovn-macros.at   |  59 ++--
  tests/ovn-northd.at   | 109 ++
  utilities/ovn-ctl |   5 +
  11 files changed, 502 insertions(+), 322 deletions(-)

diff --git a/NEWS b/NEWS
index 244824e3f..578d281d7 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,14 @@ Post v22.03.0
  implicit drop behavior on logical switches with ACLs applied.
- Support (LSP.options:qos_min_rate) to guarantee minimal bandwidth 
available
  for a logical port.
+  - Changed the way to enable northd parallelization.
+Removed support for:
+- use_parallel_build in NBDB.
+- --dummy-numa in northd cmdline.
+Added support for:
+-  --n-threads= in northd cmdline.
+- set-n-threads/get-n-threads unixctls.
+- --ovn-northd-n-threads command line argument in ovn-ctl
  
  OVN v22.03.0 - 11 Mar 2022

  --
diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index 7edc4c0b6..828e5a0e2 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -38,14 +38,10 @@ VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
  
  #ifndef OVS_HAS_PARALLEL_HMAP
  
-#define WORKER_SEM_NAME "%x-%p-%x"

+#define WORKER_SEM_NAME "%x-%p-%"PRIxSIZE
  #define MAIN_SEM_NAME "%x-%p-main"
  
-/* These are accessed under mutex inside add_worker_pool().

- * They do not need to be atomic.
- */
  static atomic_bool initial_pool_setup = ATOMIC_VAR_INIT(false);
-static bool can_parallelize = false;
  
  /* This is set only in the process of exit and the set is

   * accompanied by a fence. It does not need to be atomic or be
@@ -57,18 +53,18 @@ static struct ovs_list worker_pools = 
OVS_LIST_INITIALIZER(_pools);
  
  static struct ovs_mutex init_mutex = OVS_MUTEX_INITIALIZER;
  
-static int pool_size;

+static size_t pool_size = 1;
  
  static int sembase;
  
  static void worker_pool_hook(void *aux OVS_UNUSED);

-static void setup_worker_pools(bool force);
+static void setup_worker_pools(void);
  static void merge_list_results(struct worker_pool *pool OVS_UNUSED,
 void *fin_result, void *result_frags,
-   int index);
+   size_t index);
  static void merge_hash_results(struct worker_pool *pool OVS_UNUSED,
 void *fin_result, void *result_frags,
-   int index);
+   size_t index);
  
  bool

  ovn_stop_parallel_processing(void)
@@ -76,107 +72,184 @@ 

Re: [ovs-dev] [PATCH v2 0/2] Rxq scheduling code rework.

2022-05-19 Thread Kevin Traynor
Any issues with merging these? They are a dependency for cross-numa 
patches Anurag/Jan are submitting, so it will be easier for reviewers 
and avoid CI failure if they are in beforehand.


On 05/04/2022 15:13, Kevin Traynor wrote:

These are intended to reduce code complexity a bit and make the
code more extensible for additional features like cross-numa
polling [0].

There is no change in behaviour.

v2:
- Address minor comments from David
- Minor rebase as [1] has now merged

GHA: https://github.com/kevintraynor/ovs/actions/runs/2096405657

[0] https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392433.html
[1] https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392689.html

Kevin Traynor (2):
   dpif-netdev: Split function to find lowest loaded PMD thread core.
   dpif-netdev: Restructure rxq schedule logging.

  lib/dpif-netdev.c | 95 +--
  1 file changed, 58 insertions(+), 37 deletions(-)



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v5] northd: add configuration option for enabling parallelization

2022-05-19 Thread 0-day Robot
Bleep bloop.  Greetings Xavier Simonart, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0001 northd: add configuration option for enabling 
parallelization
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v5] northd: add configuration option for enabling parallelization

2022-05-19 Thread Xavier Simonart
This patch is intended to change the way to enable northd lflow build
parallelization, as well as enable runtime change of number of threads.
Before this patch, the following was needed to use parallelization:
- enable parallelization through use_parallel_build in NBDB
- use --dummy-numa to select number of threads.
This second part was needed as otherwise as many threads as cores in the system
were used, while parallelization showed some performance improvement only until
using around 4 (or maybe 8) threads.

With this patch, the number of threads used for lflow parallel build can be
specified either:
- at startup, using --n-threads= as ovn-northd command line option
- using unixctl
If the number of threads specified is > 1, then parallelization is enabled.
If the number is 1, parallelization is disabled.
If the number is < 1, parallelization is disabled at startup and a warning
is logged.
If the number is > 256, parallelization is enabled (with 256 threads) and
a warning is logged.

The following unixctl have been added:
- set-n-threads : set the number of treads used.
- get-n-threads: returns the number of threads used
If the number of threads is within <2-256> bounds, parallelization is enabled.
If the number of thread is 1, parallelization is disabled.
Otherwise an error is thrown.

Note that, if set-n-threads failed for any reason (e.g. failure to setup some
semaphore), parallelization is disabled, and get-n-thread will return 1.

A new command line argument (--ovn-northd-n-threads) has also been added to
ovn-ctl.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2078552
Signed-off-by: Xavier Simonart 

---
v2:  - handled Dumitru's comments
 - added missing mutex_destroy
 - fixed issue when use_logical_dp_group is enabled after northd startup
 - rebased on top of main
v3:
 - fix mutex_destroy issue
v4:
 - handled Mark's comments
 - rebased on top of main
v5
 - handled Dumitru's comment
 - added --ovn-northd-n-threads option in ovn-ctl
---
 NEWS  |   8 ++
 lib/ovn-parallel-hmap.c   | 291 +-
 lib/ovn-parallel-hmap.h   |  31 ++--
 northd/northd.c   | 176 ---
 northd/northd.h   |   1 +
 northd/ovn-northd-ddlog.c |   6 -
 northd/ovn-northd.8.xml   |  70 +
 northd/ovn-northd.c   |  68 -
 tests/ovn-macros.at   |  59 ++--
 tests/ovn-northd.at   | 109 ++
 utilities/ovn-ctl |   5 +
 11 files changed, 502 insertions(+), 322 deletions(-)

diff --git a/NEWS b/NEWS
index 244824e3f..578d281d7 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,14 @@ Post v22.03.0
 implicit drop behavior on logical switches with ACLs applied.
   - Support (LSP.options:qos_min_rate) to guarantee minimal bandwidth available
 for a logical port.
+  - Changed the way to enable northd parallelization.
+Removed support for:
+- use_parallel_build in NBDB.
+- --dummy-numa in northd cmdline.
+Added support for:
+-  --n-threads= in northd cmdline.
+- set-n-threads/get-n-threads unixctls.
+- --ovn-northd-n-threads command line argument in ovn-ctl
 
 OVN v22.03.0 - 11 Mar 2022
 --
diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index 7edc4c0b6..828e5a0e2 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -38,14 +38,10 @@ VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
 
 #ifndef OVS_HAS_PARALLEL_HMAP
 
-#define WORKER_SEM_NAME "%x-%p-%x"
+#define WORKER_SEM_NAME "%x-%p-%"PRIxSIZE
 #define MAIN_SEM_NAME "%x-%p-main"
 
-/* These are accessed under mutex inside add_worker_pool().
- * They do not need to be atomic.
- */
 static atomic_bool initial_pool_setup = ATOMIC_VAR_INIT(false);
-static bool can_parallelize = false;
 
 /* This is set only in the process of exit and the set is
  * accompanied by a fence. It does not need to be atomic or be
@@ -57,18 +53,18 @@ static struct ovs_list worker_pools = 
OVS_LIST_INITIALIZER(_pools);
 
 static struct ovs_mutex init_mutex = OVS_MUTEX_INITIALIZER;
 
-static int pool_size;
+static size_t pool_size = 1;
 
 static int sembase;
 
 static void worker_pool_hook(void *aux OVS_UNUSED);
-static void setup_worker_pools(bool force);
+static void setup_worker_pools(void);
 static void merge_list_results(struct worker_pool *pool OVS_UNUSED,
void *fin_result, void *result_frags,
-   int index);
+   size_t index);
 static void merge_hash_results(struct worker_pool *pool OVS_UNUSED,
void *fin_result, void *result_frags,
-   int index);
+   size_t index);
 
 bool
 ovn_stop_parallel_processing(void)
@@ -76,107 +72,184 @@ ovn_stop_parallel_processing(void)
 return workers_must_exit;
 }
 
-bool
-ovn_can_parallelize_hashes(bool force_parallel)
+size_t
+ovn_get_worker_pool_size(void)
 {
-   

Re: [ovs-dev] [PATCH ovn] controller: avoid recomputes triggered by SBDB Port_Binding updates.

2022-05-19 Thread Dumitru Ceara
On 5/17/22 19:36, Han Zhou wrote:
> On Tue, May 17, 2022 at 2:41 AM Xavier Simonart  wrote:
>>
>> Hi Han

Hi Xavier, Han,


>> Thanks for looking into this and for your feedback.
>> Please see comments below
>>
>> On Tue, May 17, 2022 at 8:03 AM Han Zhou  wrote:
>>>
>>>
>>>
>>> On Thu, May 12, 2022 at 2:04 AM Xavier Simonart 
> wrote:

 When VIF ports are claimed on a chassis, SBDB Port_Binding table is
> updated.
 If the SBDB IDL is still is read-only ("in transaction") when such a
> update
 is required, the update is not possible and recompute is triggered
> through
 I+P failure.

 This situation can happen:
 - after updating Port_Binding->chassis to SBDB for one port, in a
> following
   iteration, ovn-controller handles
> Interface:external_ids:ovn-installed
   (for the same port) while SBDB is still read-only.
 - after updating Port_Binding->chassis to SBDB for one port, in a
> following
   iteration, ovn-controller updates Port_Binding->chassis for another
> port,
   while SBDB is still read-only.

 This patch prevent the recompute, by having the if-status module
 updating the Port_Binding chassis (if needed), when possible.
 This does not delay Port_Binding chassis update compared to before this
 patch: Port_Binding chassis will be updated as soon as SBDB is again
 writable, as it was the case, through a recompute, before this patch.

>>> Thanks Xavier. I think the approach is good: moving the SB update from
> the I-P engine to if-status module, so it wouldn't need to trigger I-P
> engine recompute, and just let the if-status module update the SB as soon
> as it is writable, through the if-status's state-machine.
>>> However, I have some comments/questions regarding the implementation
> details, primarily confusion on the state transitions. Please see below.
>>>
 Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
 Signed-off-by: Xavier Simonart 
 ---
  controller/binding.c| 124 ++--
  controller/binding.h|   9 ++-
  controller/if-status.c  |  31 +++--
  controller/if-status.h  |   6 +-
  controller/ovn-controller.c |   6 +-
  tests/ovn.at|  55 +++-
  6 files changed, 184 insertions(+), 47 deletions(-)

 diff --git a/controller/binding.c b/controller/binding.c
 index e5ba56b25..d917d8775 100644
 --- a/controller/binding.c
 +++ b/controller/binding.c
 @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct
> shash *local_bindings,
  }

  bool
 -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
 +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
 +const struct sbrec_chassis *chassis_rec)
  {
  struct local_binding *lbinding =
  local_binding_find(local_bindings, pb_name);
  struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
 +if (b_lport && b_lport->pb->chassis != chassis_rec) {
 +return false;
 +}
>>>
>>> Why need the change here? I understand that it is obvious that the
> binding should not be considered up if the chassis is not updated in SB
> port_binding, but I wonder why we need the change in *this* patch.
>>> The function is called to see if an interface state should be moved from
> MARK_UP to INSTALLED, but if the chassis is not set, it shouldn't even be
> moved to MARK_UP.
>>>
>> I probably need to provide a better explanation of the states.
>> In fact, my two goals were: 1) avoid/decrease recomputes and 2) do not
> increase latency (time between interface is claimed and it's up in SB and
> ovn-installed in OVS).
>> There is not really an additional state covering the fact that the
> chassis is set in sbdb.
>> As soon as the state is CLAIMED, we try to set the pb->chassis (by 'try'
> I mean we update pb->chassis if sb is not readonly).
>> The fact that sb is readonly in CLAIMED state does not prevent us to move
> to INSTALL_FLOWS state, where we are installing flows in OVS.
> 
> This may be a problem. If we haven't updated pb->chassis, then the physical
> flows being installed are incomplete, and if we move the state forward, we
> would end up updating the port status (SB pb->up and OVS
> interface->external_ids:installed) too early, which completely void the
> original purpose of the if-statue module, which is to manage the interface
> state and tell CMS when a lport is ready to receive traffic.
> 

Sounds like an issue indeed.

>> As soon as the seqno for those flows was received, we moved to MARK_UP
> state.
>> In those three states, we can update pb->chassis (if not already done).
>> In MARK_UP we set the interface up.
>> We moved to INSTALLED when both up and chassis have been written.
>>
  if (lbinding && b_lport && lbinding->iface) {
  if 

Re: [ovs-dev] [PATCH ovn] Allow for setting the Next server IP in the DHCP header

2022-05-19 Thread Lucas Alvares Gomes
Hi,

Thanks Numan for the review. See the replies below.

On Thu, May 19, 2022 at 12:36 AM Numan Siddique  wrote:
>
> On Wed, May 11, 2022 at 11:34 AM  wrote:
> >
> > From: Lucas Alvares Gomes 
> >
> > In order to PXE boot a baremetal server using the OVN DHCP server we
> > need to allow users to set the "next-server" (siaddr) [0] field in the
> > DHCP header.
> >
> > While investigating this issue by comparing the DHCPOFFER and DHCPACK
> > packets sent my dnsmasq and OVN we saw that the "next-server" field
> > was the problem for OVN, without it PXE booting was timing out while
> > fetching the iPXE image from the TFTP server (see the bugzilla ticket
> > below for reference).
> >
> > To confirm this problem we created a bogus patch hardcoding the TFTP
> > address in the siaddr of the DHCP header (see the discussion in the
> > maillist below) and with this in place we were able to deploy a
> > baremetal node using the OVN DHCP end-to-end.
> >
> > This patch is a proper implementation that creates a new DHCP
> > configuration option called "next_server" to allow users to set this
> > field dynamically. This patch uses the DHCP code 253 which is a unsed
> > code for DHCP specification as this is not a normal DHCP option but a
> > special use case in OVN.
> >
> > [0]
> > https://github.com/openvswitch/ovs/blob/9dd3031d2e0e9597449e95428320ccaaff7d8b3d/lib/dhcp.h#L42
> >
> > Reported-by: https://bugzilla.redhat.com/show_bug.cgi?id=2083629
> > Reported-by:
> > https://mail.openvswitch.org/pipermail/ovs-discuss/2022-May/051821.html
> > Signed-off-by: Lucas Alvares Gomes 
> > ---
> >  controller/pinctrl.c | 69 ++--
> >  lib/actions.c| 13 -
> >  lib/ovn-l7.h |  8 +
> >  northd/ovn-northd.c  |  1 +
> >  ovn-nb.xml   |  7 +
> >  tests/ovn.at |  6 ++--
> >  tests/test-ovn.c |  1 +
> >  7 files changed, 80 insertions(+), 25 deletions(-)
> >
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index ae3da332c..428863293 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -2203,30 +2203,56 @@ pinctrl_handle_put_dhcp_opts(
> >   *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
> >   * --
> >   */
> > -struct dhcp_opt_header *in_dhcp_opt =
> > -(struct dhcp_opt_header *)reply_dhcp_opts_ptr->data;
> > -if (in_dhcp_opt->code == DHCP_OPT_BOOTFILE_CODE) {
> > -unsigned char *ptr = (unsigned char *)in_dhcp_opt;
> > -int len = sizeof *in_dhcp_opt + in_dhcp_opt->len;
> > -struct dhcp_opt_header *next_dhcp_opt =
> > -(struct dhcp_opt_header *)(ptr + len);
> > -
> > -if (next_dhcp_opt->code == DHCP_OPT_BOOTFILE_ALT_CODE) {
> > -if (!ipxe_req) {
> > -ofpbuf_pull(reply_dhcp_opts_ptr, len);
> > -next_dhcp_opt->code = DHCP_OPT_BOOTFILE_CODE;
> > -} else {
> > -char *buf = xmalloc(len);
> > +ovs_be32 next_server = in_dhcp_data->siaddr;
> > +bool bootfile_name_set = false;
> > +in_dhcp_ptr = reply_dhcp_opts_ptr->data;
> > +end = (const char *)reply_dhcp_opts_ptr->data + 
> > reply_dhcp_opts_ptr->size;
> > +
>
> Hi Lucas,
>
> Thanks for adding this support.
>
> It seems to me this while loop can be avoided since lib/actions.c
> always encodes offer_ip, DHCP_OPT_BOOTFILE_CODE,
> DHCP_OPT_BOOTFILE_ALT_CODE
> and now DHCP_OPT_NEXT_SERVER_CODE  in the userdata buffer before
> encoding other dhcp options.
>
> Since it is deterministic,  can't we do something like below instead
> of the while loop ?
>
> struct dhcp_opt_header *in_dhcp_opt =
> (struct dhcp_opt_header *)reply_dhcp_opts_ptr->data;
> if (in_dhcp_opt->code == DHCP_OPT_BOOTFILE_CODE) {
>
>advance in_dhcp_opt to the next option
> } else if (in_dhcp_opt->code == DHCP_OPT_BOOTFILE_ALT_CODE) {
>   ..
>  advance in_dhcp_opt to the next option
> }
>
> if (in_dhcp_opt->code == DHCP_OPT_NEXT_SERVER_CODE) {
> next_server = get_unaligned_be32(DHCP_OPT_PAYLOAD(in_dhcp_opt));
> }
>
> Let me know if this gets complicated because of my above suggestion.
> In that case,I'm fine to run the below while loop.
>

That's how I coded it the first time when I was testing the patch, but
I found a problem where if more than one option was set, for example:
bootfile_name and next_server only the first encoded option would be
processed. In order to process all options I needed to offset to the
next one like:

unsigned char *ptr = (unsigned char *)in_dhcp_opt;
int len = sizeof *in_dhcp_opt + in_dhcp_opt->len;
struct dhcp_opt_header *next_dhcp_opt = (struct dhcp_opt_header *)(ptr + len);

But by then the code wasn't really looking great anymore. That's why I
thought that the looping + switch case just looks better. Plus, it's more
future proof as if we need to add another option to this (and we might
with iPXE for IPv6) all we need to