Commit dd527a283cd8 partially supported multiple encap IPs. It supported
remote encap IP selection based on the destination VIF's encap_ip
configuration. This patch adds the support for selecting local encap IP
based on the source VIF's encap_ip configuration.

Co-authored-by: Lei Huang <[email protected]>
Signed-off-by: Lei Huang <[email protected]>
Signed-off-by: Han Zhou <[email protected]>
---
 NEWS                            |   3 +
 controller/chassis.c            |   2 +-
 controller/local_data.c         |   2 +-
 controller/local_data.h         |   2 +-
 controller/ovn-controller.8.xml |  30 ++++++-
 controller/ovn-controller.c     |  49 ++++++++++++
 controller/physical.c           | 134 ++++++++++++++++++++++----------
 controller/physical.h           |   2 +
 include/ovn/logical-fields.h    |   4 +-
 ovn-architecture.7.xml          |  18 ++++-
 tests/ovn.at                    |  51 +++++++++++-
 11 files changed, 243 insertions(+), 54 deletions(-)

diff --git a/NEWS b/NEWS
index 5f267b4c64cc..5a3eed608617 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,9 @@ Post v23.09.0
   - ovn-northd-ddlog has been removed.
   - A new LSP option "enable_router_port_acl" has been added to enable
     conntrack for the router port whose peer is l3dgw_port if set it true.
+  - Support selecting encapsulation IP based on the source/destination VIF's
+    settting. See ovn-controller(8) 'external_ids:ovn-encap-ip' for more
+    details.
 
 OVN v23.09.0 - 15 Sep 2023
 --------------------------
diff --git a/controller/chassis.c b/controller/chassis.c
index a6f13ccc42d5..55f2beb37674 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -61,7 +61,7 @@ struct ovs_chassis_cfg {
 
     /* Set of encap types parsed from the 'ovn-encap-type' external-id. */
     struct sset encap_type_set;
-    /* Set of encap IPs parsed from the 'ovn-encap-type' external-id. */
+    /* Set of encap IPs parsed from the 'ovn-encap-ip' external-id. */
     struct sset encap_ip_set;
     /* Interface type list formatted in the OVN-SB Chassis required format. */
     struct ds iface_types;
diff --git a/controller/local_data.c b/controller/local_data.c
index a9092783958f..8606414f8728 100644
--- a/controller/local_data.c
+++ b/controller/local_data.c
@@ -514,7 +514,7 @@ chassis_tunnels_destroy(struct hmap *chassis_tunnels)
  */
 struct chassis_tunnel *
 chassis_tunnel_find(const struct hmap *chassis_tunnels, const char *chassis_id,
-                    char *remote_encap_ip, char *local_encap_ip)
+                    char *remote_encap_ip, const char *local_encap_ip)
 {
     /*
      * If the specific encap_ip is given, look for the chassisid_ip entry,
diff --git a/controller/local_data.h b/controller/local_data.h
index bab95bcc3824..ca3905bd20e6 100644
--- a/controller/local_data.h
+++ b/controller/local_data.h
@@ -150,7 +150,7 @@ bool local_nonvif_data_handle_ovs_iface_changes(
 struct chassis_tunnel *chassis_tunnel_find(const struct hmap *chassis_tunnels,
                                            const char *chassis_id,
                                            char *remote_encap_ip,
-                                           char *local_encap_ip);
+                                           const char *local_encap_ip);
 
 bool get_chassis_tunnel_ofport(const struct hmap *chassis_tunnels,
                                const char *chassis_name,
diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index efa65e3fd927..5ebef048d721 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -176,10 +176,32 @@
 
       <dt><code>external_ids:ovn-encap-ip</code></dt>
       <dd>
-        The IP address that a chassis should use to connect to this node
-        using encapsulation types specified by
-        <code>external_ids:ovn-encap-type</code>. Multiple encapsulation IPs
-        may be specified with a comma-separated list.
+        <p>
+          The IP address that a chassis should use to connect to this node
+          using encapsulation types specified by
+          <code>external_ids:ovn-encap-type</code>. Multiple encapsulation IPs
+          may be specified with a comma-separated list.
+        </p>
+        <p>
+          In scenarios where multiple encapsulation IPs are present, distinct
+          tunnels are established for each remote chassis. These tunnels are
+          differentiated by setting unique <code>options:local_ip</code> and
+          <code>options:remote_ip</code> values in the tunnel interface. When
+          transmitting a packet to a remote chassis, the selection of local_ip
+          is guided by the <code>Interface:external_ids:encap-ip</code> from
+          the local OVSDB, corresponding to the VIF originating the packet, if
+          specified. The <code>Interface:external_ids:encap-ip</code> setting
+          of the VIF is also populated to the <code>Port_Binding</code>
+          table in the OVN SB database via the <code>encap</code> column.
+          Consequently, when a remote chassis needs to send a packet to a
+          port-binding associated with this VIF, it utilizes the tunnel with
+          the appropriate <code>options:remote_ip</code> that matches the
+          <code>ip</code> in <code>Port_Binding:encap</code>. This mechanism
+          is particularly beneficial for chassis with multiple physical
+          interfaces designated for tunneling, where each interface is
+          optimized for handling specific traffic associated with particular
+          VIFs.
+        </p>
       </dd>
 
       <dt><code>external_ids:ovn-encap-df_default</code></dt>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 856e5e270822..4ab57fe970fe 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4521,6 +4521,31 @@ struct ed_type_pflow_output {
     struct physical_debug debug;
 };
 
+static void
+parse_encap_ips(const struct ovsrec_open_vswitch_table *ovs_table,
+                size_t *n_encap_ips, const char * **encap_ips)
+{
+    const struct ovsrec_open_vswitch *cfg =
+        ovsrec_open_vswitch_table_first(ovs_table);
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
+    const char *encap_ips_str =
+        get_chassis_external_id_value(&cfg->external_ids, chassis_id,
+                                      "ovn-encap-ip", NULL);
+    struct sset encap_ip_set;
+    sset_init(&encap_ip_set);
+    sset_from_delimited_string(&encap_ip_set, encap_ips_str,  ",");
+
+    /* Sort the ips so that their index is deterministic. */
+    *encap_ips = sset_sort(&encap_ip_set);
+
+    /* Copy the strings so that we can destroy the sset. */
+    for (size_t i = 0; (*encap_ips)[i]; i++) {
+        (*encap_ips)[i] = xstrdup((*encap_ips)[i]);
+    }
+    *n_encap_ips = sset_count(&encap_ip_set);
+    sset_destroy(&encap_ip_set);
+}
+
 static void init_physical_ctx(struct engine_node *node,
                               struct ed_type_runtime_data *rt_data,
                               struct ed_type_non_vif_data *non_vif_data,
@@ -4572,6 +4597,7 @@ static void init_physical_ctx(struct engine_node *node,
         engine_get_input_data("ct_zones", node);
     struct simap *ct_zones = &ct_zones_data->current;
 
+    parse_encap_ips(ovs_table, &p_ctx->n_encap_ips, &p_ctx->encap_ips);
     p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
     p_ctx->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath;
     p_ctx->port_binding_table = port_binding_table;
@@ -4589,12 +4615,23 @@ static void init_physical_ctx(struct engine_node *node,
     p_ctx->patch_ofports = &non_vif_data->patch_ofports;
     p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels;
 
+
+
     struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx;
     p_ctx->if_mgr = ctrl_ctx->if_mgr;
 
     pflow_output_get_debug(node, &p_ctx->debug);
 }
 
+static void
+destroy_physical_ctx(struct physical_ctx *p_ctx)
+{
+    for (size_t i = 0; i < p_ctx->n_encap_ips; i++) {
+        free((char *)(p_ctx->encap_ips[i]));
+    }
+    free(p_ctx->encap_ips);
+}
+
 static void *
 en_pflow_output_init(struct engine_node *node OVS_UNUSED,
                              struct engine_arg *arg OVS_UNUSED)
@@ -4631,6 +4668,7 @@ en_pflow_output_run(struct engine_node *node, void *data)
     struct physical_ctx p_ctx;
     init_physical_ctx(node, rt_data, non_vif_data, &p_ctx);
     physical_run(&p_ctx, pflow_table);
+    destroy_physical_ctx(&p_ctx);
 
     engine_set_node_state(node, EN_UPDATED);
 }
@@ -4675,6 +4713,7 @@ pflow_output_if_status_mgr_handler(struct engine_node 
*node,
                 bool removed = sbrec_port_binding_is_deleted(binding);
                 if (!physical_handle_flows_for_lport(binding, removed, &p_ctx,
                                                      &pfo->flow_table)) {
+                    destroy_physical_ctx(&p_ctx);
                     return false;
                 }
             }
@@ -4684,11 +4723,13 @@ pflow_output_if_status_mgr_handler(struct engine_node 
*node,
             bool removed = sbrec_port_binding_is_deleted(pb);
             if (!physical_handle_flows_for_lport(pb, removed, &p_ctx,
                                                  &pfo->flow_table)) {
+                destroy_physical_ctx(&p_ctx);
                 return false;
             }
         }
         engine_set_node_state(node, EN_UPDATED);
     }
+    destroy_physical_ctx(&p_ctx);
     return true;
 }
 
@@ -4715,11 +4756,13 @@ pflow_output_sb_port_binding_handler(struct engine_node 
*node,
         bool removed = sbrec_port_binding_is_deleted(pb);
         if (!physical_handle_flows_for_lport(pb, removed, &p_ctx,
                                              &pfo->flow_table)) {
+            destroy_physical_ctx(&p_ctx);
             return false;
         }
     }
 
     engine_set_node_state(node, EN_UPDATED);
+    destroy_physical_ctx(&p_ctx);
     return true;
 }
 
@@ -4739,6 +4782,7 @@ pflow_output_sb_multicast_group_handler(struct 
engine_node *node, void *data)
     physical_handle_mc_group_changes(&p_ctx, &pfo->flow_table);
 
     engine_set_node_state(node, EN_UPDATED);
+    destroy_physical_ctx(&p_ctx);
     return true;
 }
 
@@ -4771,6 +4815,7 @@ pflow_output_runtime_data_handler(struct engine_node 
*node, void *data)
         if (tdp->tracked_type != TRACKED_RESOURCE_UPDATED) {
             /* Fall back to full recompute when a local datapath
              * is added or deleted. */
+            destroy_physical_ctx(&p_ctx);
             return false;
         }
 
@@ -4781,12 +4826,14 @@ pflow_output_runtime_data_handler(struct engine_node 
*node, void *data)
                 lport->tracked_type == TRACKED_RESOURCE_REMOVED ? true: false;
             if (!physical_handle_flows_for_lport(lport->pb, removed, &p_ctx,
                                                  &pfo->flow_table)) {
+                destroy_physical_ctx(&p_ctx);
                 return false;
             }
         }
     }
 
     engine_set_node_state(node, EN_UPDATED);
+    destroy_physical_ctx(&p_ctx);
     return true;
 }
 
@@ -4843,12 +4890,14 @@ pflow_output_activated_ports_handler(struct engine_node 
*node, void *data)
         if (pb) {
             if (!physical_handle_flows_for_lport(pb, false, &p_ctx,
                                                  &pfo->flow_table)) {
+                destroy_physical_ctx(&p_ctx);
                 return false;
             }
             tag_port_as_activated_in_engine(pp);
         }
     }
     engine_set_node_state(node, EN_UPDATED);
+    destroy_physical_ctx(&p_ctx);
     return true;
 }
 
diff --git a/controller/physical.c b/controller/physical.c
index e93bfbd2cffb..c192aed751d5 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -71,6 +71,8 @@ struct tunnel {
 static void
 load_logical_ingress_metadata(const struct sbrec_port_binding *binding,
                               const struct zone_ids *zone_ids,
+                              size_t n_encap_ips,
+                              const char **encap_ips,
                               struct ofpbuf *ofpacts_p);
 static int64_t get_vxlan_port_key(int64_t port_key);
 
@@ -138,14 +140,14 @@ put_resubmit(uint8_t table_id, struct ofpbuf *ofpacts)
  */
 static struct chassis_tunnel *
 get_port_binding_tun(const struct sbrec_encap *remote_encap,
-                     const struct sbrec_encap *local_encap,
                      const struct sbrec_chassis *chassis,
-                     const struct hmap *chassis_tunnels)
+                     const struct hmap *chassis_tunnels,
+                     const char *local_encap_ip)
 {
     struct chassis_tunnel *tun = NULL;
     tun = chassis_tunnel_find(chassis_tunnels, chassis->name,
                               remote_encap ? remote_encap->ip : NULL,
-                              local_encap ? local_encap->ip : NULL);
+                              local_encap_ip);
 
     if (!tun) {
         tun = chassis_tunnel_find(chassis_tunnels, chassis->name, NULL, NULL);
@@ -329,7 +331,8 @@ find_additional_encap_for_chassis(const struct 
sbrec_port_binding *pb,
 static struct ovs_list *
 get_remote_tunnels(const struct sbrec_port_binding *binding,
                    const struct sbrec_chassis *chassis,
-                   const struct hmap *chassis_tunnels)
+                   const struct hmap *chassis_tunnels,
+                   const char *local_encap_ip)
 {
     const struct chassis_tunnel *tun;
 
@@ -337,8 +340,8 @@ get_remote_tunnels(const struct sbrec_port_binding *binding,
     ovs_list_init(tunnels);
 
     if (binding->chassis && binding->chassis != chassis) {
-        tun = get_port_binding_tun(binding->encap, NULL, binding->chassis,
-                                   chassis_tunnels);
+        tun = get_port_binding_tun(binding->encap, binding->chassis,
+                chassis_tunnels, local_encap_ip);
         if (!tun) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
             VLOG_WARN_RL(
@@ -358,9 +361,9 @@ get_remote_tunnels(const struct sbrec_port_binding *binding,
         }
         const struct sbrec_encap *additional_encap;
         additional_encap = find_additional_encap_for_chassis(binding, chassis);
-        tun = get_port_binding_tun(additional_encap, NULL,
+        tun = get_port_binding_tun(additional_encap,
                                    binding->additional_chassis[i],
-                                   chassis_tunnels);
+                                   chassis_tunnels, local_encap_ip);
         if (!tun) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
             VLOG_WARN_RL(
@@ -384,36 +387,48 @@ put_remote_port_redirect_overlay(const struct 
sbrec_port_binding *binding,
                                  struct ofpbuf *ofpacts_p,
                                  const struct sbrec_chassis *chassis,
                                  const struct hmap *chassis_tunnels,
+                                 size_t n_encap_ips,
+                                 const char **encap_ips,
                                  struct ovn_desired_flow_table *flow_table)
 {
     /* Setup encapsulation */
-    struct ovs_list *tuns = get_remote_tunnels(binding, chassis,
-                                               chassis_tunnels);
-    if (!ovs_list_is_empty(tuns)) {
-        bool is_vtep_port = !strcmp(binding->type, "vtep");
-        /* rewrite MFF_IN_PORT to bypass OpenFlow loopback check for ARP/ND
-         * responder in L3 networks. */
-        if (is_vtep_port) {
-            put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16, ofpacts_p);
-        }
+    for (size_t i = 0; i < n_encap_ips; i++) {
+        struct ofpbuf *ofpacts_clone = ofpbuf_clone(ofpacts_p);
+
+        match_set_reg_masked(match, MFF_LOG_ENCAP_ID - MFF_REG0, i << 16,
+                             (uint32_t) 0xFFFF << 16);
+        struct ovs_list *tuns = get_remote_tunnels(binding, chassis,
+                                                   chassis_tunnels,
+                                                   encap_ips[i]);
+        if (!ovs_list_is_empty(tuns)) {
+            bool is_vtep_port = !strcmp(binding->type, "vtep");
+            /* rewrite MFF_IN_PORT to bypass OpenFlow loopback check for ARP/ND
+             * responder in L3 networks. */
+            if (is_vtep_port) {
+                put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16,
+                         ofpacts_clone);
+            }
 
-        struct tunnel *tun;
-        LIST_FOR_EACH (tun, list_node, tuns) {
-            put_encapsulation(mff_ovn_geneve, tun->tun,
-                              binding->datapath, port_key, is_vtep_port,
-                              ofpacts_p);
-            ofpact_put_OUTPUT(ofpacts_p)->port = tun->tun->ofport;
+            struct tunnel *tun;
+            LIST_FOR_EACH (tun, list_node, tuns) {
+                put_encapsulation(mff_ovn_geneve, tun->tun,
+                                  binding->datapath, port_key, is_vtep_port,
+                                  ofpacts_clone);
+                ofpact_put_OUTPUT(ofpacts_clone)->port = tun->tun->ofport;
+            }
+            put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_clone);
+            ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
+                            binding->header_.uuid.parts[0], match,
+                            ofpacts_clone, &binding->header_.uuid);
         }
-        put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
-        ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
-                        binding->header_.uuid.parts[0], match, ofpacts_p,
-                        &binding->header_.uuid);
-    }
-    struct tunnel *tun_elem;
-    LIST_FOR_EACH_POP (tun_elem, list_node, tuns) {
-        free(tun_elem);
+        struct tunnel *tun_elem;
+        LIST_FOR_EACH_POP (tun_elem, list_node, tuns) {
+            free(tun_elem);
+        }
+        free(tuns);
+
+        ofpbuf_delete(ofpacts_clone);
     }
-    free(tuns);
 }
 
 static void
@@ -673,7 +688,8 @@ put_replace_chassis_mac_flows(const struct simap *ct_zones,
         if (tag) {
             ofpact_put_STRIP_VLAN(ofpacts_p);
         }
-        load_logical_ingress_metadata(localnet_port, &zone_ids, ofpacts_p);
+        load_logical_ingress_metadata(localnet_port, &zone_ids, 0, NULL,
+                                      ofpacts_p);
         replace_mac = ofpact_put_SET_ETH_SRC(ofpacts_p);
         replace_mac->mac = router_port_mac;
 
@@ -853,7 +869,7 @@ put_zones_ofpacts(const struct zone_ids *zone_ids, struct 
ofpbuf *ofpacts_p)
 {
     if (zone_ids) {
         if (zone_ids->ct) {
-            put_load(zone_ids->ct, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
+            put_load(zone_ids->ct, MFF_LOG_CT_ZONE, 0, 16, ofpacts_p);
         }
         if (zone_ids->dnat) {
             put_load(zone_ids->dnat, MFF_LOG_DNAT_ZONE, 0, 32, ofpacts_p);
@@ -1014,9 +1030,23 @@ put_local_common_flows(uint32_t dp_key,
     }
 }
 
+static size_t
+encap_ip_to_id(size_t n_encap_ips, const char **encap_ips,
+               const char *ip)
+{
+    for (size_t i = 0; i < n_encap_ips; i++) {
+        if (!strcmp(ip, encap_ips[i])) {
+            return i;
+        }
+    }
+    return 0;
+}
+
 static void
 load_logical_ingress_metadata(const struct sbrec_port_binding *binding,
                               const struct zone_ids *zone_ids,
+                              size_t n_encap_ips,
+                              const char **encap_ips,
                               struct ofpbuf *ofpacts_p)
 {
     put_zones_ofpacts(zone_ids, ofpacts_p);
@@ -1026,6 +1056,14 @@ load_logical_ingress_metadata(const struct 
sbrec_port_binding *binding,
     uint32_t port_key = binding->tunnel_key;
     put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, ofpacts_p);
     put_load(port_key, MFF_LOG_INPORT, 0, 32, ofpacts_p);
+
+    /* Default encap_id 0. */
+    size_t encap_id = 0;
+    if (encap_ips && binding->encap) {
+        encap_id = encap_ip_to_id(n_encap_ips, encap_ips,
+                                  binding->encap->ip);
+    }
+    put_load(encap_id, MFF_LOG_ENCAP_ID, 16, 16, ofpacts_p);
 }
 
 static const struct sbrec_port_binding *
@@ -1070,7 +1108,7 @@ setup_rarp_activation_strategy(const struct 
sbrec_port_binding *binding,
     match_set_dl_type(&match, htons(ETH_TYPE_RARP));
     match_set_in_port(&match, ofport);
 
-    load_logical_ingress_metadata(binding, zone_ids, &ofpacts);
+    load_logical_ingress_metadata(binding, zone_ids, 0, NULL, &ofpacts);
 
     encode_controller_op(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
                          NX_CTLR_NO_METER, &ofpacts);
@@ -1384,7 +1422,7 @@ enforce_tunneling_for_multichassis_ports(
     }
 
     struct ovs_list *tuns = get_remote_tunnels(binding, chassis,
-                                               chassis_tunnels);
+                                               chassis_tunnels, NULL);
     if (ovs_list_is_empty(tuns)) {
         free(tuns);
         return;
@@ -1446,6 +1484,8 @@ consider_port_binding(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
                       const struct sbrec_chassis *chassis,
                       const struct physical_debug *debug,
                       const struct if_status_mgr *if_mgr,
+                      size_t n_encap_ips,
+                      const char **encap_ips,
                       struct ovn_desired_flow_table *flow_table,
                       struct ofpbuf *ofpacts_p)
 {
@@ -1479,9 +1519,10 @@ consider_port_binding(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
         ofpact_put_CT_CLEAR(ofpacts_p);
         put_load(0, MFF_LOG_DNAT_ZONE, 0, 32, ofpacts_p);
         put_load(0, MFF_LOG_SNAT_ZONE, 0, 32, ofpacts_p);
-        put_load(0, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
+        put_load(0, MFF_LOG_CT_ZONE, 0, 16, ofpacts_p);
         struct zone_ids peer_zones = get_zone_ids(peer, ct_zones);
-        load_logical_ingress_metadata(peer, &peer_zones, ofpacts_p);
+        load_logical_ingress_metadata(peer, &peer_zones, n_encap_ips,
+                                      encap_ips, ofpacts_p);
         put_load(0, MFF_LOG_FLAGS, 0, 32, ofpacts_p);
         put_load(0, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
         for (int i = 0; i < MFF_N_LOG_REGS; i++) {
@@ -1697,7 +1738,8 @@ consider_port_binding(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
          * as we're going to remove this with ofpbuf_pull() later. */
         uint32_t ofpacts_orig_size = ofpacts_p->size;
 
-        load_logical_ingress_metadata(binding, &zone_ids, ofpacts_p);
+        load_logical_ingress_metadata(binding, &zone_ids, n_encap_ips,
+                                      encap_ips, ofpacts_p);
 
         if (!strcmp(binding->type, "localport")) {
             /* mark the packet as incoming from a localport */
@@ -1904,7 +1946,7 @@ consider_port_binding(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
     } else {
         put_remote_port_redirect_overlay(
             binding, mff_ovn_geneve, port_key, &match, ofpacts_p,
-            chassis, chassis_tunnels, flow_table);
+            chassis, chassis_tunnels, n_encap_ips, encap_ips, flow_table);
     }
 out:
     if (ha_ch_ordered) {
@@ -2102,7 +2144,7 @@ consider_mc_group(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
 
         int zone_id = simap_get(ct_zones, port->logical_port);
         if (zone_id) {
-            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
+            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 16, &ofpacts);
         }
 
         const char *lport_name = (port->parent_port && *port->parent_port) ?
@@ -2278,7 +2320,10 @@ physical_eval_port_binding(struct physical_ctx *p_ctx,
                           p_ctx->patch_ofports,
                           p_ctx->chassis_tunnels,
                           pb, p_ctx->chassis, &p_ctx->debug,
-                          p_ctx->if_mgr, flow_table, &ofpacts);
+                          p_ctx->if_mgr,
+                          p_ctx->n_encap_ips,
+                          p_ctx->encap_ips,
+                          flow_table, &ofpacts);
     ofpbuf_uninit(&ofpacts);
 }
 
@@ -2402,7 +2447,10 @@ physical_run(struct physical_ctx *p_ctx,
                               p_ctx->patch_ofports,
                               p_ctx->chassis_tunnels, binding,
                               p_ctx->chassis, &p_ctx->debug,
-                              p_ctx->if_mgr, flow_table, &ofpacts);
+                              p_ctx->if_mgr,
+                              p_ctx->n_encap_ips,
+                              p_ctx->encap_ips,
+                              flow_table, &ofpacts);
     }
 
     /* Handle output to multicast groups, in tables 40 and 41. */
diff --git a/controller/physical.h b/controller/physical.h
index 1f1ed55efa16..7fe8ee3c18ed 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -66,6 +66,8 @@ struct physical_ctx {
     struct shash *local_bindings;
     struct simap *patch_ofports;
     struct hmap *chassis_tunnels;
+    size_t n_encap_ips;
+    const char **encap_ips;
     struct physical_debug debug;
 };
 
diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
index 272277ec4fa0..d3455a462a0d 100644
--- a/include/ovn/logical-fields.h
+++ b/include/ovn/logical-fields.h
@@ -37,7 +37,9 @@ enum ovn_controller_event {
 #define MFF_LOG_SNAT_ZONE  MFF_REG12  /* conntrack snat zone for gateway router
                                        * (32 bits). */
 #define MFF_LOG_CT_ZONE    MFF_REG13  /* Logical conntrack zone for lports
-                                       * (32 bits). */
+                                       * (0..15 of the 32 bits). */
+#define MFF_LOG_ENCAP_ID   MFF_REG13  /* Encap ID for lports
+                                       * (16..31 of the 32 bits). */
 #define MFF_LOG_INPORT     MFF_REG14  /* Logical input port (32 bits). */
 #define MFF_LOG_OUTPORT    MFF_REG15  /* Logical output port (32 bits). */
 
diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml
index 96294fe2b795..bfd8680cedfc 100644
--- a/ovn-architecture.7.xml
+++ b/ovn-architecture.7.xml
@@ -1247,8 +1247,8 @@
       chassis.  This is initialized to 0 at the beginning of the logical
         <!-- Keep the following in sync with MFF_LOG_CT_ZONE in
              ovn/lib/logical-fields.h. -->
-      ingress pipeline.  OVN stores this in Open vSwitch extension register
-      number 13.
+      ingress pipeline.  OVN stores this in the lower 16 bits of the Open
+      vSwitch extension register number 13.
     </dd>
 
     <dt>conntrack zone fields for routers</dt>
@@ -1263,6 +1263,20 @@
       traffic (for SNATing) in Open vSwitch extension register number 12.
     </dd>
 
+    <dt>Encap ID for logical ports</dt>
+    <dd>
+      A field that records an ID that indicates which encapsulation IP should
+      be used when sending packets to a remote chassis, according to the
+      original input logical port. This is useful when there are multiple IPs
+      available for encapsulation. The value only has local significance and is
+      not meaningful between chassis. This is initialized to 0 at the beginning
+      of the logical
+        <!-- Keep the following in sync with MFF_LOG_ENCAP_ID in
+             ovn/lib/logical-fields.h. -->
+      ingress pipeline.  OVN stores this in the higher 16 bits of the Open
+      vSwitch extension register number 13.
+    </dd>
+
     <dt>logical flow flags</dt>
     <dd>
       The logical flags are intended to handle keeping context between
diff --git a/tests/ovn.at b/tests/ovn.at
index 243fe0b8246c..e7f0c9681f60 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -30335,19 +30335,33 @@ AT_CLEANUP
 
 
 OVN_FOR_EACH_NORTHD([
-AT_SETUP([multiple encap ips tunnel creation])
+AT_SETUP([multiple encap ips selection based on VIF's encap_ip])
+AT_SKIP_IF([test $HAVE_SCAPY = no])
 ovn_start
 net_add n1
 
+ovn-nbctl ls-add ls1
+
 # 2 HVs, each with 2 encap-ips.
+# lspXY is the Yth LSP on hvX, with encap-ip 192.168.0.XY.
 for i in 1 2; do
     sim_add hv$i
     as hv$i
     ovs-vsctl add-br br-phys-$j
     ovn_attach n1 br-phys-$j 192.168.0.${i}1
     ovs-vsctl set open . 
external_ids:ovn-encap-ip=192.168.0.${i}1,192.168.0.${i}2
+
+    for j in 1 2; do
+        ovs-vsctl add-port br-int vif$i$j -- set Interface vif$i$j \
+            external_ids:iface-id=lsp$i$j \
+            external_ids:encap-ip=192.168.0.$i$j \
+            options:tx_pcap=hv$i/vif$i$j-tx.pcap 
options:rxq_pcap=hv$i/vif$i$j-rx.pcap
+        ovn-nbctl lsp-add ls1 lsp$i$j -- lsp-set-addresses lsp$i$j 
"f0:00:00:00:00:$i$j 10.0.0.$i$j"
+
+    done
 done
 
+wait_for_ports_up
 check ovn-nbctl --wait=hv sync
 
 check_tunnel_port() {
@@ -30376,6 +30390,41 @@ check_tunnel_port hv2 br-int 
[email protected]%192.168.0.21
 check_tunnel_port hv2 br-int [email protected]%192.168.0.22
 check_tunnel_port hv2 br-int [email protected]%192.168.0.22
 
+vif_to_hv() {
+    case $1 in dnl (
+        vif[[1]]?) echo hv1 ;; dnl (
+        vif[[2]]?) echo hv2 ;; dnl (
+        *) AT_FAIL_IF([:]) ;;
+    esac
+}
+
+# check_packet_tunnel SRC_VIF DST_VIF
+# Verify that a packet from SRC_VIF to DST_VIF goes through the corresponding
+# tunnel that matches the VIFs' encap_ip configurations.
+check_packet_tunnel() {
+    local src=$1 dst=$2
+    local src_mac=f0:00:00:00:00:$src
+    local dst_mac=f0:00:00:00:00:$dst
+    local src_ip=10.0.0.$src
+    local dst_ip=10.0.0.$dst
+    local local_encap_ip=192.168.0.$src
+    local remote_encap_ip=192.168.0.$dst
+    local packet=$(fmt_pkt "Ether(dst='${dst_mac}', src='${src_mac}')/ \
+                            IP(dst='${dst_ip}', src='${src_ip}')/ \
+                            ICMP(type=8)")
+    hv=`vif_to_hv vif$src`
+    as $hv
+    echo "vif$src -> vif$dst should go through tunnel $local_encap_ip -> 
$remote_encap_ip"
+    tunnel_ofport=$(ovs-vsctl --bare --column=ofport find interface 
options:local_ip=$local_encap_ip options:remote_ip=$remote_encap_ip)
+    AT_CHECK([test $(ovs-appctl ofproto/trace br-int in_port=vif$src $packet | 
grep "output\:" | awk -F ':' '{ print $2 }') == $tunnel_ofport])
+}
+
+for i in 1 2; do
+    for j in 1 2; do
+        check_packet_tunnel 1$i 2$j
+    done
+done
+
 OVN_CLEANUP([hv1],[hv2])
 AT_CLEANUP
 ])
-- 
2.38.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to