Hi Ilya, thanks for the patch.

I have notes below.

On 1/23/25 18:55, Ilya Maximets wrote:
In L2 topology of OVN cluster, all the VIFs of the same network are
attached to the same logical switch.  Meaning, this switch has one LSP
for each container/VM in the cluster.  So, in a 10-node cluster with
100 containers/VMs per node, the switch will have 1000 ports.

This topology has limitations:

   - Processing of a very large switch may be expensive for northd as
     some changes to the switch may require reprocessing logical flows
     for all the ports.

   - OVN supports up to 32K ports in a single switch, i.e., a single
     network can't have more than 32K containers/VMs, which may be a
     serious limitation for large scale setups, as it can only
     accommodate a single /17 network.

A solution is to split this one large switch into a spine-leaf topology
by having a smaller switch per node that only have local LSPs and a
distributed switch that connects all these node switches together.
(Nothing prevents us from connecting multiple distributed switches,
but it's not the main focus of this work.)

Switches are connected via newly introduced "switch" type ports. These
ports have implicit "unknown" addresses, since the address information
is not leaked between directly connected switches.

Such topology should allow:

   - Better locality of changes: Adding new containers/VMs should only
     require adding LSPs to a single node switch on the same node where
     the container/VM is located.  And the switch updates should be
     cheaper for northd to process.

   - Higher number of containers/VMs in the cluster: We could create
     32K ports per node and not across the whole cluster.

   - Still have an L2 topology without any routers involved for EW
     traffic.

Unlike having node switches with localnet ports, there is no need
to have a dedicated provider network.  At the same time on OpenFlow
level it looks almost like if the spine switch didn't exist, which is
expected since it's a single L2 domain and the whole topology is
known to ovn-controller.  And the separation really exists only on
the logical level, providing a nice separation.

And while this topology may be useful as-is, it makes more sense in
the context of OVN interconnect, support for which will be added in
the next commits.

Implementation doesn't require changes in ovn-controller, we just
create a new 'switch' LSP type that translates into existing "patch"
port binding added into MC_unknown.

'ovn-nbctl lsp-set-type' command is updated to accept optional 'peer'
argument for switch ports, e.g.:

   ovn-nbctl lsp-set-type ls1-p1 switch peer=ls2-p2
          -- lsp-set-type ls2-p2 switch peer=ls1-p1

Adding a new 'lsp-set-peer' seemed excessive when it would only work
for a single port type.

Tests added to cover a local and a distributed spine-leaf topology.

Signed-off-by: Ilya Maximets <[email protected]>
---
  NEWS                      |   2 +
  lib/ovn-util.c            |   1 +
  northd/northd.c           |  42 ++++++-
  northd/northd.h           |   2 +
  ovn-nb.ovsschema          |   5 +-
  ovn-nb.xml                |  21 ++++
  tests/ovn-nbctl.at        |  21 +++-
  tests/ovn-northd.at       |  57 ++++++++++
  tests/ovn.at              | 233 ++++++++++++++++++++++++++++++++++++++
  utilities/ovn-nbctl.8.xml |  10 +-
  utilities/ovn-nbctl.c     |  21 +++-
  11 files changed, 406 insertions(+), 9 deletions(-)

diff --git a/NEWS b/NEWS
index 72c5a6339..07e0f056f 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,8 @@ Post v24.09.0
       and "routing-protocols" are now also usable on distributed gateway ports.
     - ovn-nb: Changed schema of ovn-nb to make networks optional within Logical
       Router Ports.
+   - Added support for Spine-Leaf topology of logical switches by adding
+     a new LSP type 'switch' that can directly connect two logical switches.
     - Bump python version required for building OVN to 3.7.
     - SSL/TLS:
       * TLSv1 and TLSv1.1 protocols are deprecated and disabled by default
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index b78bdbfa1..52479e3cf 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -610,6 +610,7 @@ static const char *OVN_NB_LSP_TYPES[] = {
      "localnet",
      "localport",
      "router",
+    "switch",
      "vtep",
      "external",
      "virtual",
diff --git a/northd/northd.c b/northd/northd.c
index cff0b21f4..1ffabfec5 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -1362,6 +1362,12 @@ lsp_is_router(const struct nbrec_logical_switch_port 
*nbsp)
      return !strcmp(nbsp->type, "router");
  }
+static bool
+lsp_is_switch(const struct nbrec_logical_switch_port *nbsp)
+{
+    return !strcmp(nbsp->type, "switch");
+}
+
  static bool
  lsp_is_remote(const struct nbrec_logical_switch_port *nbsp)
  {
@@ -1449,7 +1455,8 @@ lsp_force_fdb_lookup(const struct ovn_port *op)
  static struct ovn_port *
  ovn_port_get_peer(const struct hmap *lr_ports, struct ovn_port *op)
  {
-    if (!op->nbsp || !lsp_is_router(op->nbsp) || is_cr_port(op)) {
+    if (!op->nbsp || (!lsp_is_router(op->nbsp) && !lsp_is_switch(op->nbsp))
+        || is_cr_port(op)) {
          return NULL;
      }
@@ -2114,6 +2121,12 @@ parse_lsp_addrs(struct ovn_port *op)
      }
      op->n_lsp_non_router_addrs = op->n_lsp_addrs;
+ /* Addresses are not leaked between directly connected switches, so
+     * we should expect unknown addresses behind the port. */
+    if (lsp_is_switch(nbsp)) {
+        op->has_unknown = true;
+    }
+
      op->ps_addrs
          = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
      for (size_t j = 0; j < nbsp->n_port_security; j++) {
@@ -2401,7 +2414,8 @@ join_logical_ports(const struct sbrec_port_binding_table 
*sbrec_pb_table,
      }
/* Connect logical router ports, and logical switch ports of type "router",
-     * to their peers. */
+     * to their peers.  As well as logical switch ports of type "switch" to
+     * theirs. */
      struct ovn_port *op;
      HMAP_FOR_EACH (op, key_node, ports) {
          if (op->nbsp && lsp_is_router(op->nbsp) && !op->primary_port) {
@@ -2452,6 +2466,20 @@ join_logical_ports(const struct sbrec_port_binding_table 
*sbrec_pb_table,
                          arp_proxy, op->nbsp->name);
                  }
              }
+        } else if (op->nbsp && op->nbsp->peer && lsp_is_switch(op->nbsp)) {
+            struct ovn_port *peer = ovn_port_find(ports, op->nbsp->peer);
+
+            if (!peer) {
+                continue;
+            }
+            if (!peer->nbsp || !lsp_is_switch(peer->nbsp)) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+
+                VLOG_WARN_RL(&rl, "Bad configuration: The peer of a switch "
+                                  "port %s in not a 'switch' port", op->key);
+                continue;
+            }

One thing that's missing is a check to ensure that op->peer->nbsp->peer is op->nbsp->name. Right now it's possible for port A to have B as its peer, but B has C as its peer. It's also possible for A to have B as its peer, and for B to not have a peer configured at all.

I think if op->peer is B, but B->peer is not op, then we should keep op->peer NULL to prevent setting invalid or incomplete peer settings on the southbound Port_Binding.

+            op->peer = peer;
          } else if (op->nbrp && op->nbrp->peer && !is_cr_port(op)) {
              struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
              if (peer) {
@@ -3255,9 +3283,15 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
                  sbrec_port_binding_set_chassis(op->sb, NULL);
              }
+ if (lsp_is_switch(op->nbsp) && op->peer) {
+                smap_add(&options, "peer", op->peer->key);
+            }
+
              sbrec_port_binding_set_options(op->sb, &options);
              smap_destroy(&options);
-            if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
+            if (lsp_is_switch(op->nbsp)) {
+                sbrec_port_binding_set_type(op->sb, "patch");
+            } else if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
                  sbrec_port_binding_set_type(op->sb, op->nbsp->type);
              } else {
                  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
@@ -19413,7 +19447,7 @@ handle_port_binding_changes(struct ovsdb_idl_txn 
*ovnsb_txn,
bool up = false; - if (lsp_is_router(op->nbsp)) {
+        if (lsp_is_router(op->nbsp) || lsp_is_switch(op->nbsp)) {
              up = true;
          } else if (sb->chassis) {
              up = !smap_get_bool(&sb->chassis->other_config, "is-remote", 
false)
diff --git a/northd/northd.h b/northd/northd.h
index 9457a7be6..5bc3456f3 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -645,6 +645,8 @@ struct ovn_port {
       *
       *     - Two connected logical router ports have each other as peer.
       *
+     *     - Two connected logical switch ports have each other as peer.
+     *
       *     - Other kinds of ports have no peer. */
      struct ovn_port *peer;
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index e7aa6b2b1..dc2b82fdd 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@
  {
      "name": "OVN_Northbound",
-    "version": "7.9.0",
-    "cksum": "2414335430 38682",
+    "version": "7.10.0",
+    "cksum": "3177074739 38756",
      "tables": {
          "NB_Global": {
              "columns": {
@@ -155,6 +155,7 @@
                  "port_security": {"type": {"key": "string",
                                             "min": 0,
                                             "max": "unlimited"}},
+                "peer": {"type": {"key": "string", "min": 0, "max": 1}},

Why is the type "string" here? Why not use a reference to a Logical_Switch_Port row? Would that violate OVSDB rules for some reason?

                  "up": {"type": {"key": "boolean", "min": 0, "max": 1}},
                  "enabled": {"type": {"key": "boolean", "min": 0, "max": 1}},
                  "dhcpv4_options": {"type": {"key": {"type": "uuid",
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 24ef12f3b..3295295dc 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -987,6 +987,16 @@
              to which this logical switch port is connected.
            </dd>
+ <dt><code>switch</code></dt>
+          <dd>
+            A connection to another logical switch.  The value of <ref
+            column="peer" /> specifies the <ref column="name"/> of the
+            <ref table="Logical_Switch_Port"/> to which this logical switch
+            port is connected.  Such ports always have an implicit "unknown"
+            address, because the address information is not leaked between
+            directly connected switches.
+          </dd>
+
            <dt><code>localnet</code></dt>
            <dd>
              A connection to a locally accessible network from
@@ -1847,6 +1857,17 @@
        </column>
      </group>
+ <column name="peer">
+      <p>
+        For a switch port used to connect two logical switches, this
+        identifies the other switch port in the pair by <ref column="name"/>.
+      </p>

Is there any reason that the peer switch port can't be referred to by its UUID or name? If this must be the name, then I would suggest changing the name of this column to "peer_name" to better indicate that logical switch port UUIDs cannot be used.

+
+      <p>
+        For a switch port attached to a logical router, this column is empty.
+      </p>
+    </column>
+
      <group title="DHCP">
        <column name="dhcpv4_options">
          This column defines the DHCPv4 Options to be included by the
diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
index d4f61cb77..7a6529ee5 100644
--- a/tests/ovn-nbctl.at
+++ b/tests/ovn-nbctl.at
@@ -2419,6 +2419,11 @@ AT_CHECK([ovn-nbctl lsp-get-type lp0], [0], [dnl
  router
  ])
+AT_CHECK([ovn-nbctl lsp-set-type lp0 switch])
+AT_CHECK([ovn-nbctl lsp-get-type lp0], [0], [dnl
+switch
+])
+
  AT_CHECK([ovn-nbctl lsp-set-type lp0 localnet])
  AT_CHECK([ovn-nbctl lsp-get-type lp0], [0], [dnl
  localnet
@@ -2460,8 +2465,22 @@ ovn-nbctl: Logical switch port type 'eggs' is 
unrecognized. Not setting type.
dnl Empty string should work too
  AT_CHECK([ovn-nbctl lsp-set-type lp0 ""])
-AT_CHECK([ovn-nbctl lsp-get-type lp0], [0], [dnl
+AT_CHECK([ovn-nbctl lsp-get-type lp0], [0], [
+])
+dnl Only the 'switch' type should accept peer.
+AT_CHECK([ovn-nbctl lsp-set-type lp0 router peer=qwe], [1], [], [dnl
+ovn-nbctl: Peer can only be set for a logical switch port with type 'switch'.
+])
+AT_CHECK([ovn-nbctl lsp-get-type lp0], [0], [
+])
+dnl Check that peer can be set.
+AT_CHECK([ovn-nbctl lsp-set-type lp0 switch peer=my-peer])
+AT_CHECK([ovn-nbctl lsp-get-type lp0], [0], [dnl
+switch
+])
+AT_CHECK([ovn-nbctl get logical-switch-port lp0 peer], [0], [dnl
+my-peer
  ])])
dnl ---------------------------------------------------------------------
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 2b1791de0..5e108acd1 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -170,6 +170,23 @@ wait_row_count nb:Logical_Switch_Port 1 name=S1-vm1 
'up=true'
  AT_CLEANUP
  ])
+OVN_FOR_EACH_NORTHD_NO_HV([
+AT_SETUP([check up state of switch-switch LSP])
+ovn_start
+
+check ovn-nbctl ls-add S1
+check ovn-nbctl lsp-add S1 S1-p1
+check ovn-nbctl --wait=sb lsp-set-type S1-p1 switch peer=S2-p2
+
+check ovn-nbctl ls-add S2
+check ovn-nbctl lsp-add S2 S2-p2
+check ovn-nbctl --wait=sb lsp-set-type S2-p2 switch peer=S1-p1
+AT_CHECK([test x`ovn-nbctl lsp-get-up S1-p1` = xup])
+AT_CHECK([test x`ovn-nbctl lsp-get-up S2-p2` = xup])
+
+AT_CLEANUP
+])
+
  OVN_FOR_EACH_NORTHD_NO_HV([
  AT_SETUP([check up state of router LSP linked to a distributed LR])
  ovn_start
@@ -7298,7 +7315,47 @@ AT_CHECK([grep -e "ls_in_l2_lkup.*S1-vm1" S1flows | 
ovn_strip_lflows], [0], [dnl
  AT_CLEANUP
  ])
+OVN_FOR_EACH_NORTHD_NO_HV([
+AT_SETUP([implicit unknown addresses on switch-switch LSPs])

This test demonstrates that switch ports of type "switch" have implicit "unknown" addresses on them. But what happens if a user configures explicit IP/MAC addresses on a switch port of type "switch"?

i.e. ovn-nbctl lsp-set-addresses S1-S2 "<some_mac> <some_IP>" ?

This test should probably verify that explicit addresses are ignored and that the addresses are still treated as "unknown" on these types of ports.

+ovn_start NORTHD_TYPE
+check ovn-nbctl ls-add S1
+check ovn-nbctl lsp-add S1 S1-vm
+check ovn-nbctl lsp-set-addresses S1-vm "50:54:00:00:00:01 192.168.0.1"
+
+check ovn-nbctl ls-add S2
+check ovn-nbctl lsp-add S2 S2-vm
+check ovn-nbctl lsp-set-addresses S2-vm "50:54:00:00:00:02 192.168.0.2"
+check ovn-nbctl lsp-add S1 S1-S2
+check ovn-nbctl lsp-add S2 S2-S1
+check ovn-nbctl lsp-set-type S1-S2 switch peer=S2-S1
+check ovn-nbctl lsp-set-type S2-S1 switch peer=S1-S2
+check ovn-nbctl --wait=sb sync
+
+ovn-sbctl dump-flows S1 > S1flows
+AT_CAPTURE_FILE([S1flows])
+
+dnl Check that S2-vm address is not known on S1 and the forwarding to
+dnl _MC_unknown group is configured.
+AT_CHECK([grep -E "ls_in_l2_lkup.*S1-|unknown" S1flows | ovn_strip_lflows], 
[0], [dnl
+  table=??(ls_in_l2_lkup      ), priority=50   , match=(eth.dst == 50:54:00:00:00:01), 
action=(outport = "S1-vm"; output;)
+  table=??(ls_in_l2_unknown   ), priority=0    , match=(1), action=(output;)
+  table=??(ls_in_l2_unknown   ), priority=50   , match=(outport == "none"), 
action=(outport = "_MC_unknown"; output;)
+])
+
+ovn-sbctl dump-flows S2 > S2flows
+AT_CAPTURE_FILE([S2flows])
+
+dnl Check that S1-vm address is not known on S2 and the forwarding to
+dnl _MC_unknown group is configured.
+AT_CHECK([grep -E "ls_in_l2_lkup.*S2-|unknown" S2flows | ovn_strip_lflows], 
[0], [dnl
+  table=??(ls_in_l2_lkup      ), priority=50   , match=(eth.dst == 50:54:00:00:00:02), 
action=(outport = "S2-vm"; output;)
+  table=??(ls_in_l2_unknown   ), priority=0    , match=(1), action=(output;)
+  table=??(ls_in_l2_unknown   ), priority=50   , match=(outport == "none"), 
action=(outport = "_MC_unknown"; output;)
+])
+
+AT_CLEANUP
+])
# Duplicated datapaths shouldn't be created, but in case it is created because
  # of bug or dirty data, it should be properly deleted instead of causing
diff --git a/tests/ovn.at b/tests/ovn.at
index e9144b0cd..5e26a0e45 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -7906,6 +7906,239 @@ OVN_CLEANUP([hv1],[hv2])
  AT_CLEANUP
  ])
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([spine-leaf: 1 HV, 2 LSs, connected via spine switch])
+AT_KEYWORDS([spine leaf])
+AT_SKIP_IF([test $HAVE_SCAPY = no])
+ovn_start
+
+# Logical network:
+# Single network 191.168.1.0/24.  Two switches with VIF ports, connected
+# to a spine logical switch via 'switch' ports.
+
+check ovn-nbctl ls-add spine
+
+check ovn-nbctl ls-add ls1
+check ovn-nbctl ls-add ls2
+
+# Connect ls1 to spine.
+check ovn-nbctl lsp-add spine spine-to-ls1
+check ovn-nbctl lsp-add ls1 ls1-to-spine
+check ovn-nbctl lsp-set-type spine-to-ls1 switch peer=ls1-to-spine
+check ovn-nbctl lsp-set-type ls1-to-spine switch peer=spine-to-ls1
+
+# Connect ls2 to spine.
+check ovn-nbctl lsp-add spine spine-to-ls2
+check ovn-nbctl lsp-add ls2 ls2-to-spine
+check ovn-nbctl lsp-set-type spine-to-ls2 switch peer=ls2-to-spine
+check ovn-nbctl lsp-set-type ls2-to-spine switch peer=spine-to-ls2
+
+# Create logical port ls1-lp1 in ls1
+check ovn-nbctl lsp-add ls1 ls1-lp1 \
+-- lsp-set-addresses ls1-lp1 "f0:00:00:01:02:01 172.16.1.1"
+# Create logical port ls1-lp2 in ls1
+check ovn-nbctl lsp-add ls1 ls1-lp2 \
+-- lsp-set-addresses ls1-lp2 "f0:00:00:01:02:02 172.16.1.2"
+
+# Create logical port ls2-lp1 in ls2
+check ovn-nbctl lsp-add ls2 ls2-lp1 \
+-- lsp-set-addresses ls2-lp1 "f0:00:00:01:02:03 172.16.1.3"
+# Create logical port ls2-lp2 in ls2
+check ovn-nbctl lsp-add ls2 ls2-lp2 \
+-- lsp-set-addresses ls2-lp2 "f0:00:00:01:02:04 172.16.1.4"
+
+# Create one hypervisor and create OVS ports corresponding to logical ports.
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int vif1 -- \
+    set interface vif1 external-ids:iface-id=ls1-lp1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-int vif2 -- \
+    set interface vif2 external-ids:iface-id=ls1-lp2 \
+    options:tx_pcap=hv1/vif2-tx.pcap \
+    options:rxq_pcap=hv1/vif2-rx.pcap \
+    ofport-request=2
+
+ovs-vsctl -- add-port br-int vif3 -- \
+    set interface vif3 external-ids:iface-id=ls2-lp1 \
+    options:tx_pcap=hv1/vif3-tx.pcap \
+    options:rxq_pcap=hv1/vif3-rx.pcap \
+    ofport-request=3
+ovs-vsctl -- add-port br-int vif4 -- \
+    set interface vif4 external-ids:iface-id=ls2-lp2 \
+    options:tx_pcap=hv1/vif4-tx.pcap \
+    options:rxq_pcap=hv1/vif4-rx.pcap \
+    ofport-request=4
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+ovn-sbctl dump-flows > sbflows
+AT_CAPTURE_FILE([sbflows])
+
+# Send ip packets between the two ports.
+src_mac="f0:00:00:01:02:01"
+dst_mac="f0:00:00:01:02:03"
+src_ip=172.16.1.1
+dst_ip=172.16.1.3
+packet=$(fmt_pkt "Ether(dst='${dst_mac}', src='${src_mac}')/ \
+                  IP(src='${src_ip}', dst='${dst_ip}')/ \
+                  UDP(sport=1538, dport=4369)")
+check as hv1 ovs-appctl netdev-dummy/receive vif1 $packet
+
+# Check that datapath is not doing any extra work.
+AT_CHECK([as hv1 ovs-appctl ofproto/trace --names \
+                br-int in_port=vif1 $packet | tail -2], [0], [dnl
+Megaflow: 
recirc_id=0,eth,ip,in_port=vif1,dl_src=f0:00:00:01:02:01,dl_dst=f0:00:00:01:02:03,nw_frag=no
+Datapath actions: vif3
+])
+
+# No modifications expected.
+AT_CHECK([echo $packet > expected])
+
+AT_CHECK([touch empty])
+
+# Check that it is delivered where needed and not delivered where not.
+OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [empty])
+OVN_CHECK_PACKETS([hv1/vif3-tx.pcap], [expected])
+OVN_CHECK_PACKETS([hv1/vif4-tx.pcap], [empty])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([spine-leaf: 2 HVs, 2 LSs, connected via distributed spine switch])
+AT_KEYWORDS([spine leaf])
+AT_SKIP_IF([test $HAVE_SCAPY = no])
+ovn_start
+
+# Logical network:
+# Single network 172.16.1.0/24.  Two switches with VIF ports on two HVs,
+# connected to a spine distributed logical switch via 'switch' ports.
+
+check ovn-nbctl ls-add spine
+
+check ovn-nbctl ls-add ls1
+check ovn-nbctl ls-add ls2
+
+# Connect ls1 to spine.
+check ovn-nbctl lsp-add spine spine-to-ls1
+check ovn-nbctl lsp-add ls1 ls1-to-spine
+check ovn-nbctl lsp-set-type spine-to-ls1 switch peer=ls1-to-spine
+check ovn-nbctl lsp-set-type ls1-to-spine switch peer=spine-to-ls1
+
+# Connect ls2 to spine.
+check ovn-nbctl lsp-add spine spine-to-ls2
+check ovn-nbctl lsp-add ls2 ls2-to-spine
+check ovn-nbctl lsp-set-type spine-to-ls2 switch peer=ls2-to-spine
+check ovn-nbctl lsp-set-type ls2-to-spine switch peer=spine-to-ls2
+
+# Create logical port ls1-lp1 in ls1
+check ovn-nbctl lsp-add ls1 ls1-lp1 \
+-- lsp-set-addresses ls1-lp1 "f0:00:00:01:02:01 172.16.1.1"
+# Create logical port ls1-lp2 in ls1
+check ovn-nbctl lsp-add ls1 ls1-lp2 \
+-- lsp-set-addresses ls1-lp2 "f0:00:00:01:02:02 172.16.1.2"
+
+# Create logical port ls2-lp1 in ls2
+check ovn-nbctl lsp-add ls2 ls2-lp1 \
+-- lsp-set-addresses ls2-lp1 "f0:00:00:01:02:03 172.16.1.3"
+# Create logical port ls2-lp2 in ls2
+check ovn-nbctl lsp-add ls2 ls2-lp2 \
+-- lsp-set-addresses ls2-lp2 "f0:00:00:01:02:04 172.16.1.4"
+
+# Create hypervisors and OVS ports corresponding to logical ports.
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int vif1 -- \
+    set interface vif1 external-ids:iface-id=ls1-lp1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-int vif2 -- \
+    set interface vif2 external-ids:iface-id=ls1-lp2 \
+    options:tx_pcap=hv1/vif2-tx.pcap \
+    options:rxq_pcap=hv1/vif2-rx.pcap \
+    ofport-request=2
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+ovs-vsctl -- add-port br-int vif3 -- \
+    set interface vif3 external-ids:iface-id=ls2-lp1 \
+    options:tx_pcap=hv2/vif3-tx.pcap \
+    options:rxq_pcap=hv2/vif3-rx.pcap \
+    ofport-request=3
+ovs-vsctl -- add-port br-int vif4 -- \
+    set interface vif4 external-ids:iface-id=ls2-lp2 \
+    options:tx_pcap=hv2/vif4-tx.pcap \
+    options:rxq_pcap=hv2/vif4-rx.pcap \
+    ofport-request=4
+
+OVN_POPULATE_ARP
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+ovn-sbctl dump-flows > sbflows
+AT_CAPTURE_FILE([sbflows])
+
+# Send ip packets between the two ports.
+src_mac="f0:00:00:01:02:01"
+dst_mac="f0:00:00:01:02:03"
+src_ip=172.16.1.1
+dst_ip=172.16.1.3
+packet=$(fmt_pkt "Ether(dst='${dst_mac}', src='${src_mac}')/ \
+                  IP(src='${src_ip}', dst='${dst_ip}')/ \
+                  UDP(sport=1538, dport=4369)")
+check as hv1 ovs-appctl netdev-dummy/receive vif1 $packet
+
+# Check that datapath is not doing any extra work and sends the packet out
+# through the tunnel.
+AT_CHECK([as hv1 ovs-appctl ofproto/trace --names \
+                br-int in_port=vif1 $packet > ofproto-trace-1])
+AT_CAPTURE_FILE([ofproto-trace-1])
+AT_CHECK([grep 'Megaflow:' ofproto-trace-1], [0], [dnl
+Megaflow: 
recirc_id=0,eth,ip,in_port=vif1,dl_src=f0:00:00:01:02:01,dl_dst=f0:00:00:01:02:03,nw_ecn=0,nw_frag=no
+])
+AT_CHECK([grep -q \
+  'Datapath actions: tnl_push(tnl_port(genev_sys_6081).*out_port(br-phys))' \
+  ofproto-trace-1])
+
+# It's a little problematic to trace the other side, but we can check
+# datapath actions.
+AT_CHECK([as hv2 ovs-appctl dpctl/dump-flows --names \
+            | grep actions | sed 's/.*\(actions:.*\)/\1/' | sort], [0], [dnl
+actions:tnl_pop(genev_sys_6081)
+actions:vif3
+])
+
+# No modifications expected.
+AT_CHECK([echo $packet > expected])
+
+AT_CHECK([touch empty])
+
+# Check that it is delivered where needed and not delivered where not.
+OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [empty])
+OVN_CHECK_PACKETS([hv2/vif3-tx.pcap], [expected])
+OVN_CHECK_PACKETS([hv2/vif4-tx.pcap], [empty])
+
+OVN_CLEANUP([hv1], [hv2])
+AT_CLEANUP
+])
+
  OVN_FOR_EACH_NORTHD([
  AT_SETUP([icmp_reply: 1 HVs, 2 LSs, 1 lport/LS, 1 LR])
  AT_KEYWORDS([router-icmp-reply])
diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
index 63cefd119..19f121142 100644
--- a/utilities/ovn-nbctl.8.xml
+++ b/utilities/ovn-nbctl.8.xml
@@ -738,7 +738,8 @@
          or <code>disabled</code>.
        </dd>
- <dt><code>lsp-set-type</code> <var>port</var> <var>type</var></dt>
+      <dt><code>lsp-set-type</code> <var>port</var> <var>type</var>
+          [<code>peer=</code><var>peer</var>]</dt>
        <dd>
          <p>
            Set the type for the logical port.  The type must be one of the 
following:
@@ -755,6 +756,13 @@
              A connection to a logical router.
            </dd>
+ <dt><code>switch</code></dt>
+          <dd>
+            A connection to another logical switch.  The optional argument
+            <code>peer</code> identifies a logical switch port that connects
+            to this one.
+          </dd>
+
            <dt><code>localnet</code></dt>
            <dd>
              A connection to a locally accessible network from each 
ovn-controller
diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
index f5277af7c..01985cb82 100644
--- a/utilities/ovn-nbctl.c
+++ b/utilities/ovn-nbctl.c
@@ -881,6 +881,9 @@ print_ls(const struct nbrec_logical_switch *ls, struct ds 
*s)
          if (router_port) {
              ds_put_format(s, "        router-port: %s\n", router_port);
          }
+        if (lsp->peer) {
+            ds_put_format(s, "        peer: %s\n", lsp->peer);
+        }
      }
  }
@@ -913,6 +916,7 @@ nbctl_pre_show(struct ctl_context *ctx)
      ovsdb_idl_add_column(ctx->idl, 
&nbrec_logical_switch_port_col_external_ids);
      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_type);
      ovsdb_idl_add_column(ctx->idl, 
&nbrec_logical_switch_port_col_parent_name);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_peer);
      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_tag);
      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_addresses);
      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_options);
@@ -1731,6 +1735,7 @@ nbctl_pre_lsp_type(struct ctl_context *ctx)
  {
      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_name);
      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_type);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_peer);
  }
static void
@@ -1857,15 +1862,29 @@ nbctl_lsp_set_type(struct ctl_context *ctx)
  {
      const char *id = ctx->argv[1];
      const char *type = ctx->argv[2];
+    const char *peer = (ctx->argc < 4) ? NULL : ctx->argv[3];
      const struct nbrec_logical_switch_port *lsp = NULL;
+ if (peer && strncmp(peer, "peer=", 5)) {
+        ctl_error(ctx, "Unrecognized argument: %s\n", peer);
+        return;
+    }
+
      char *error = lsp_by_name_or_uuid(ctx, id, true, &lsp);
      if (error) {
          ctx->error = error;
          return;
      }
      if (ovn_is_known_nb_lsp_type(type)) {
+        if (peer && strcmp(type, "switch")) {
+            ctl_error(ctx, "Peer can only be set for a logical switch port "
+                           "with type 'switch'.");
+            return;
+        }
          nbrec_logical_switch_port_set_type(lsp, type);
+        if (peer) {
+            nbrec_logical_switch_port_set_peer(lsp, peer + 5);
+        } >       } else {
          ctl_error(ctx, "Logical switch port type '%s' is unrecognized. "
                    "Not setting type.", type);
@@ -8010,7 +8029,7 @@ static const struct ctl_command_syntax nbctl_commands[] = 
{
        nbctl_lsp_set_enabled, NULL, "", RW },
      { "lsp-get-enabled", 1, 1, "PORT", nbctl_pre_lsp_enabled,
        nbctl_lsp_get_enabled, NULL, "", RO },
-    { "lsp-set-type", 2, 2, "PORT TYPE", nbctl_pre_lsp_type,
+    { "lsp-set-type", 2, 3, "PORT TYPE [peer=PEER]", nbctl_pre_lsp_type,
        nbctl_lsp_set_type, NULL, "", RW },
      { "lsp-get-type", 1, 1, "PORT", nbctl_pre_lsp_type,
        nbctl_lsp_get_type, NULL, "", RO },

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to