On Fri, Oct 31, 2025 at 1:28 PM Mark Michelson <[email protected]> wrote: > > Hi Han, I was only able to have a look at about 2/3 of the patch, but > I figured I would share my feedback anyway. Most of what I have to say > is low-level nitpicky things, rather than high-level commentary about > the feature itself. > > > On Wed, Oct 29, 2025 at 2:06 PM Han Zhou <[email protected]> wrote: > > > > This patch introduces flow-based tunnels as an alternative to > > traditional port-based tunnels, significantly reducing tunnel port count > > in large deployments. > > > > Flow-based tunnels use shared ports (one per tunnel type) with > > options:local_ip=flow and options:remote_ip=flow. OpenFlow flows > > dynamically set tunnel endpoints using set_field actions, reducing port > > count to O(T) where T is the number of tunnel types. > > > > The feature is experimental, and controlled by > > external_ids:ovn-enable-flow-based-tunnels (default: false). > > > > Some known limitations: > > - IPsec is not supported > > - BFD between tunnel endpoints is not supported, thus HA chassis not > > supported. > > > > Assisted-by: Cursor, with model: Claude Sonnet 4.5 > > Signed-off-by: Han Zhou <[email protected]> > > --- > > NEWS | 4 + > > controller/encaps.c | 247 ++++++++++++++++---- > > controller/encaps.h | 15 ++ > > controller/local_data.c | 202 ++++++++++++++++- > > controller/local_data.h | 34 ++- > > controller/ovn-controller.8.xml | 30 +++ > > controller/ovn-controller.c | 23 +- > > controller/physical.c | 384 ++++++++++++++++++++++++++++---- > > controller/physical.h | 3 + > > lib/ovn-util.h | 1 + > > tests/ovn-macros.at | 31 ++- > > tests/ovn.at | 135 ++++++++--- > > tests/ovs-macros.at | 4 +- > > 13 files changed, 988 insertions(+), 125 deletions(-) > > > > diff --git a/NEWS b/NEWS > > index 06cc18b41aff..405a04747666 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -58,6 +58,10 @@ Post v25.09.0 > > specified LS. > > - Add ovn-nbctl lsp-add-localnet-port which will create localnet port on > > specified LS. > > + - Added experimental flow-based tunnel support. Enable via > > + external_ids:ovn-enable-flow-based-tunnels=true to use shared tunnel > > + ports instead of per-chassis ports, reducing port count for large scale > > + environments. Default is disabled. > > > > OVN v25.09.0 - xxx xx xxxx > > -------------------------- > > diff --git a/controller/encaps.c b/controller/encaps.c > > index 67f8c9c9c82f..288959180402 100644 > > --- a/controller/encaps.c > > +++ b/controller/encaps.c > > @@ -29,14 +29,6 @@ > > > > VLOG_DEFINE_THIS_MODULE(encaps); > > > > -/* > > - * Given there could be multiple tunnels with different IPs to the same > > - * chassis we annotate the external_ids:ovn-chassis-id in tunnel port with > > - * <chassis_name>@<remote IP>%<local IP>. The external_id key > > - * "ovn-chassis-id" is kept for backward compatibility. > > - */ > > -#define OVN_TUNNEL_ID "ovn-chassis-id" > > - > > static char *current_br_int_name = NULL; > > > > void > > @@ -547,6 +539,179 @@ create_evpn_tunnels(struct tunnel_ctx *tc) > > } > > > > > > +bool > > +is_flow_based_tunnels_enabled( > > + const struct ovsrec_open_vswitch_table *ovs_table, > > + const struct sbrec_chassis *chassis) > > +{ > > + const struct ovsrec_open_vswitch *cfg = > > + ovsrec_open_vswitch_table_first(ovs_table); > > + > > + if (cfg) { > > + const char *enable_flow_tunnels = > > + get_chassis_external_id_value( > > Nit: You can use get_chassis_external_id_bool() here to get a bool > instead of a string. This saves you a strcmp() call later. > > > > + &cfg->external_ids, chassis->name, > > + "ovn-enable-flow-based-tunnels", "false"); > > + return !strcmp(enable_flow_tunnels, "true"); > > + } > > + return false; > > +} > > + > > +static char * > > +flow_based_tunnel_name(const char *tunnel_type, const char *chassis_idx) > > +{ > > + return xasprintf("ovn%s-%s", chassis_idx, tunnel_type); > > +} > > + > > +static void > > +flow_based_tunnel_ensure(struct tunnel_ctx *tc, const char *tunnel_type, > > + const char *port_name, > > + const struct sbrec_sb_global *sbg, > > + const struct ovsrec_open_vswitch_table *ovs_table) > > +{ > > + /* Check if flow-based tunnel already exists. */ > > + const struct ovsrec_port *existing_port = NULL; > > + for (size_t i = 0; i < tc->br_int->n_ports; i++) { > > + const struct ovsrec_port *port = tc->br_int->ports[i]; > > + if (!strcmp(port->name, port_name)) { > > + existing_port = port; > > + break; > > + } > > + } > > + > > + if (existing_port) { > > + return; > > + } > > + > > + /* Create flow-based tunnel port. */ > > + struct smap options = SMAP_INITIALIZER(&options); > > + smap_add(&options, "remote_ip", "flow"); > > + smap_add(&options, "local_ip", "flow"); > > + smap_add(&options, "key", "flow"); > > + > > + if (sbg->ipsec) { > > + /* For flow-based tunnels, we can't specify remote_name since > > + * remote chassis varies. IPsec will need to handle this differently. > > + */ > > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > > + VLOG_WARN_RL(&rl, "IPsec is not supported for flow-based tunnels. " > > + "Ignoring IPsec settings."); > > + } > > + > > + /* Add other tunnel options from OVS config. */ > > + const struct ovsrec_open_vswitch *cfg = > > + ovsrec_open_vswitch_table_first(ovs_table); > > + if (cfg) { > > + const char *encap_tos = > > + get_chassis_external_id_value(&cfg->external_ids, > > + tc->this_chassis->name, > > + "ovn-encap-tos", "none"); > > + if (encap_tos && strcmp(encap_tos, "none")) { > > + smap_add(&options, "tos", encap_tos); > > + } > > + > > + const char *encap_df = > > + get_chassis_external_id_value(&cfg->external_ids, > > + tc->this_chassis->name, > > + "ovn-encap-df_default", NULL); > > + if (encap_df) { > > + smap_add(&options, "df_default", encap_df); > > + } > > + } > > + > > + /* Create interface. */ > > + struct ovsrec_interface *iface = ovsrec_interface_insert(tc->ovs_txn); > > + ovsrec_interface_set_name(iface, port_name); > > + ovsrec_interface_set_type(iface, tunnel_type); > > + ovsrec_interface_set_options(iface, &options); > > + > > + /* Create port. */ > > + struct ovsrec_port *port = ovsrec_port_insert(tc->ovs_txn); > > + ovsrec_port_set_name(port, port_name); > > + ovsrec_port_set_interfaces(port, &iface, 1); > > + > > + /* Set external IDs to mark as flow-based tunnel using unified > > + * OVN_TUNNEL_ID. */ > > + const struct smap external_ids = SMAP_CONST2(&external_ids, > > + OVN_TUNNEL_ID, "flow", > > + "ovn-tunnel-type", > > + tunnel_type); > > + ovsrec_port_set_external_ids(port, &external_ids); > > + > > + /* Add to bridge. */ > > + ovsrec_bridge_update_ports_addvalue(tc->br_int, port); > > + > > + VLOG_INFO("Created flow-based %s tunnel port: %s", tunnel_type, port_name); > > + > > + smap_destroy(&options); > > +} > > + > > +static void > > +create_flow_based_tunnels(struct tunnel_ctx *tc, > > + const struct sbrec_sb_global *sbg) > > +{ > > + struct sset tunnel_types = SSET_INITIALIZER(&tunnel_types); > > + > > + for (size_t i = 0; i < tc->this_chassis->n_encaps; i++) { > > + sset_add(&tunnel_types, tc->this_chassis->encaps[i]->type); > > + } > > + > > + const char *tunnel_type; > > + SSET_FOR_EACH (tunnel_type, &tunnel_types) { > > + char *port_name = flow_based_tunnel_name(tunnel_type, > > + get_chassis_idx(tc->ovs_table)); > > + flow_based_tunnel_ensure(tc, tunnel_type, port_name, sbg, > > + tc->ovs_table); > > + /* remove any existing tunnel with this name from tracking so it > > + * doesn't get deleted. */ > > + struct tunnel_node *existing_tunnel = shash_find_data(&tc->tunnel, > > + port_name); > > + if (existing_tunnel) { > > + shash_find_and_delete(&tc->tunnel, port_name); > > + free(existing_tunnel); > > + } > > + free(port_name); > > + } > > + > > + sset_destroy(&tunnel_types); > > +} > > + > > +static void > > +create_port_based_tunnels(struct tunnel_ctx *tc, > > + const struct sbrec_chassis_table *chassis_table, > > + const struct sbrec_sb_global *sbg, > > + const struct sset *transport_zones) > > +{ > > + const struct sbrec_chassis *chassis_rec; > > + SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) { > > + if (strcmp(chassis_rec->name, tc->this_chassis->name)) { > > + /* Create tunnels to the other Chassis belonging to the > > + * same transport zone */ > > + if (!chassis_tzones_overlap(transport_zones, chassis_rec)) { > > + VLOG_DBG("Skipping encap creation for Chassis '%s' because " > > + "it belongs to different transport zones", > > + chassis_rec->name); > > + continue; > > + } > > + > > + if (smap_get_bool(&chassis_rec->other_config, "is-remote", false) > > + && !smap_get_bool(&tc->this_chassis->other_config, > > + "is-interconn", false)) { > > + VLOG_DBG("Skipping encap creation for Chassis '%s' because " > > + "it is remote but this chassis is not interconn.", > > + chassis_rec->name); > > + continue; > > + } > > + > > + if (chassis_tunnel_add(chassis_rec, sbg, tc->ovs_table, tc, > > + tc->this_chassis) == 0) { > > + VLOG_INFO("Creating encap for '%s' failed", chassis_rec->name); > > + continue; > > + } > > + } > > + } > > +} > > + > > void > > encaps_run(struct ovsdb_idl_txn *ovs_idl_txn, > > const struct ovsrec_bridge *br_int, > > @@ -561,6 +726,11 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn, > > return; > > } > > > > + bool use_flow_based = is_flow_based_tunnels_enabled(ovs_table, > > + this_chassis); > > + VLOG_DBG("Using %s tunnels for this chassis.", > > + use_flow_based ? "flow-based" : "port-based"); > > + > > if (!current_br_int_name) { > > /* The controller has just started, we need to look through all > > * bridges for old tunnel ports. */ > > @@ -590,8 +760,6 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn, > > current_br_int_name = xstrdup(br_int->name); > > } > > > > - const struct sbrec_chassis *chassis_rec; > > - > > struct tunnel_ctx tc = { > > .tunnel = SHASH_INITIALIZER(&tc.tunnel), > > .port_names = SSET_INITIALIZER(&tc.port_names), > > @@ -606,24 +774,30 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn, > > "ovn-controller: modifying OVS tunnels '%s'", > > this_chassis->name); > > > > - /* Collect all port names into tc.port_names. > > - * > > - * Collect all the OVN-created tunnels into tc.tunnel_hmap. */ > > + /* Collect existing port names and tunnel ports for cleanup. */ > > for (size_t i = 0; i < br_int->n_ports; i++) { > > const struct ovsrec_port *port = br_int->ports[i]; > > sset_add(&tc.port_names, port->name); > > > > const char *id = smap_get(&port->external_ids, OVN_TUNNEL_ID); > > if (id) { > > - if (!shash_find(&tc.tunnel, id)) { > > - struct tunnel_node *tunnel = xzalloc(sizeof *tunnel); > > - tunnel->bridge = br_int; > > - tunnel->port = port; > > - shash_add_assert(&tc.tunnel, id, tunnel); > > + struct tunnel_node *tunnel = xzalloc(sizeof *tunnel); > > + tunnel->bridge = br_int; > > + tunnel->port = port; > > + > > + if (use_flow_based) { > > + /* Flow-based: track by port name */ > > + shash_add(&tc.tunnel, port->name, tunnel); > > } else { > > - /* Duplicate port for tunnel-id. Arbitrarily choose > > - * to delete this one. */ > > - ovsrec_bridge_update_ports_delvalue(br_int, port); > > + /* Port-based: track by tunnel ID, handle duplicates */ > > + if (!shash_find(&tc.tunnel, id)) { > > + shash_add_assert(&tc.tunnel, id, tunnel); > > + } else { > > + /* Duplicate port for tunnel-id. Arbitrarily choose > > + * to delete this one. */ > > + ovsrec_bridge_update_ports_delvalue(br_int, port); > > + free(tunnel); > > + } > > } > > } > > > > @@ -632,32 +806,11 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn, > > } > > } > > > > - SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) { > > - if (strcmp(chassis_rec->name, this_chassis->name)) { > > - /* Create tunnels to the other Chassis belonging to the > > - * same transport zone */ > > - if (!chassis_tzones_overlap(transport_zones, chassis_rec)) { > > - VLOG_DBG("Skipping encap creation for Chassis '%s' because " > > - "it belongs to different transport zones", > > - chassis_rec->name); > > - continue; > > - } > > - > > - if (smap_get_bool(&chassis_rec->other_config, "is-remote", false) > > - && !smap_get_bool(&this_chassis->other_config, "is-interconn", > > - false)) { > > - VLOG_DBG("Skipping encap creation for Chassis '%s' because " > > - "it is remote but this chassis is not interconn.", > > - chassis_rec->name); > > - continue; > > - } > > - > > - if (chassis_tunnel_add(chassis_rec, sbg, ovs_table, &tc, > > - this_chassis) == 0) { > > - VLOG_INFO("Creating encap for '%s' failed", chassis_rec->name); > > - continue; > > - } > > - } > > + /* Create OVN tunnels (mode-specific). */ > > + if (use_flow_based) { > > + create_flow_based_tunnels(&tc, sbg); > > + } else { > > + create_port_based_tunnels(&tc, chassis_table, sbg, transport_zones); > > } > > > > create_evpn_tunnels(&tc); > > diff --git a/controller/encaps.h b/controller/encaps.h > > index 6f9891ee5907..240787e8fbb7 100644 > > --- a/controller/encaps.h > > +++ b/controller/encaps.h > > @@ -18,6 +18,17 @@ > > > > #include <stdbool.h> > > > > +/* > > + * Given there could be multiple tunnels with different IPs to the same > > + * chassis we annotate the external_ids:ovn-chassis-id in tunnel port with > > + * <chassis_name>@<remote IP>%<local IP>. The external_id key > > + * "ovn-chassis-id" is kept for backward compatibility. > > + * > > + * For flow-based tunnels, we use the special value "flow" to identify > > + * shared tunnel ports that handle dynamic endpoint resolution. > > + */ > > +#define OVN_TUNNEL_ID "ovn-chassis-id" > > + > > struct ovsdb_idl; > > struct ovsdb_idl_txn; > > struct ovsrec_bridge; > > @@ -38,6 +49,10 @@ void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn, > > const struct sset *transport_zones, > > const struct ovsrec_bridge_table *bridge_table); > > > > +bool is_flow_based_tunnels_enabled( > > + const struct ovsrec_open_vswitch_table *ovs_table, > > + const struct sbrec_chassis *chassis); > > + > > bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn, > > const struct ovsrec_bridge *br_int); > > > > diff --git a/controller/local_data.c b/controller/local_data.c > > index a35d3fa5ae37..d5afc89da2cd 100644 > > --- a/controller/local_data.c > > +++ b/controller/local_data.c > > @@ -51,6 +51,10 @@ static struct tracked_datapath *tracked_datapath_create( > > enum en_tracked_resource_type tracked_type, > > struct hmap *tracked_datapaths); > > > > +static void track_flow_based_tunnel( > > + const struct ovsrec_port *, const struct sbrec_chassis *, > > + struct ovs_list *flow_tunnels); > > + > > static bool datapath_is_switch(const struct sbrec_datapath_binding *); > > static bool datapath_is_transit_switch(const struct sbrec_datapath_binding *); > > > > @@ -454,7 +458,8 @@ void > > local_nonvif_data_run(const struct ovsrec_bridge *br_int, > > const struct sbrec_chassis *chassis_rec, > > struct simap *patch_ofports, > > - struct hmap *chassis_tunnels) > > + struct hmap *chassis_tunnels, > > + struct ovs_list *flow_tunnels) > > { > > for (int i = 0; i < br_int->n_ports; i++) { > > const struct ovsrec_port *port_rec = br_int->ports[i]; > > @@ -470,6 +475,10 @@ local_nonvif_data_run(const struct ovsrec_bridge *br_int, > > continue; > > } > > > > + if (flow_tunnels) { > > + track_flow_based_tunnel(port_rec, chassis_rec, flow_tunnels); > > + } > > + > > const char *localnet = smap_get(&port_rec->external_ids, > > "ovn-localnet-port"); > > const char *l2gateway = smap_get(&port_rec->external_ids, > > @@ -757,3 +766,194 @@ lb_is_local(const struct sbrec_load_balancer *sbrec_lb, > > > > return false; > > } > > + > > +/* Flow-based tunnel management functions. */ > > + > > +struct flow_based_tunnel * > > +flow_based_tunnel_find(const struct ovs_list *flow_tunnels, > > + enum chassis_tunnel_type type) > > +{ > > + struct flow_based_tunnel *tun; > > + LIST_FOR_EACH (tun, list_node, flow_tunnels) { > > If I've read the code correctly, the flow_tunnels list contains at > most one flow per encap type. If that's the case, then since the > number of encap types is limited, you could probably just use a static > array instead of a list. Then you can retrieve the tunnel based on an > index value instead of having to traverse the list. > > > + if (tun->type == type) { > > + return tun; > > + } > > + } > > + return NULL; > > +} > > + > > +void > > +flow_based_tunnels_destroy(struct ovs_list *flow_tunnels) > > +{ > > + struct flow_based_tunnel *tun, *next; > > + LIST_FOR_EACH_SAFE (tun, next, list_node, flow_tunnels) { > > + ovs_list_remove(&tun->list_node); > > + free(tun->port_name); > > + free(tun); > > + } > > +} > > + > > +ofp_port_t > > +get_flow_based_tunnel_port(enum chassis_tunnel_type type, > > + const struct ovs_list *flow_tunnels) > > +{ > > + struct flow_based_tunnel *tun = flow_based_tunnel_find(flow_tunnels, type); > > + return tun ? tun->ofport : 0; > > +} > > + > > +/* Direct tunnel endpoint selection utilities. */ > > + > > +enum chassis_tunnel_type > > +select_preferred_tunnel_type(const struct sbrec_chassis *local_chassis, > > + const struct sbrec_chassis *remote_chassis) > > +{ > > + /* Determine what tunnel types both chassis support */ > > + bool local_supports_geneve = false; > > + bool local_supports_vxlan = false; > > + bool remote_supports_geneve = false; > > + bool remote_supports_vxlan = false; > > + > > + for (size_t i = 0; i < local_chassis->n_encaps; i++) { > > + const char *type = local_chassis->encaps[i]->type; > > + if (!strcmp(type, "geneve")) { > > + local_supports_geneve = true; > > + } else if (!strcmp(type, "vxlan")) { > > + local_supports_vxlan = true; > > + } > > + } > > + > > + for (size_t i = 0; i < remote_chassis->n_encaps; i++) { > > + const char *type = remote_chassis->encaps[i]->type; > > + if (!strcmp(type, "geneve")) { > > + remote_supports_geneve = true; > > + } else if (!strcmp(type, "vxlan")) { > > + remote_supports_vxlan = true; > > + } > > + } > > + > > + /* Return preferred common tunnel type: geneve > vxlan */ > > + if (local_supports_geneve && remote_supports_geneve) { > > + return GENEVE; > > + } else if (local_supports_vxlan && remote_supports_vxlan) { > > + return VXLAN; > > + } else { > > + return TUNNEL_TYPE_INVALID; /* No common tunnel type */ > > + } > > +} > > + > > +const char * > > +select_default_encap_ip(const struct sbrec_chassis *chassis, > > + enum chassis_tunnel_type tunnel_type) > > +{ > > + const char *default_ip = NULL; > > + const char *first_ip = NULL; > > + const char *tunnel_type_str = (tunnel_type == GENEVE) ? "geneve" : "vxlan"; > > + > > + for (size_t i = 0; i < chassis->n_encaps; i++) { > > + const struct sbrec_encap *encap = chassis->encaps[i]; > > + > > + if (strcmp(encap->type, tunnel_type_str)) { > > + continue; > > + } > > + > > + if (!first_ip) { > > + first_ip = encap->ip; > > + } > > + > > + const char *is_default = smap_get(&encap->options, "default-encap-ip"); > > Nit: You can use smap_get_bool(&encap->options, "default-encap-ip", > false) to save a strcmp(). > > > + if (is_default && !strcmp(is_default, "true")) { > > + default_ip = encap->ip; > > + break; /* Found explicit default */ > > + } > > + } > > + > > + return default_ip ? default_ip : first_ip; > > +} > > + > > +const char * > > +select_port_encap_ip(const struct sbrec_port_binding *binding, > > + enum chassis_tunnel_type tunnel_type) > > +{ > > + const char *tunnel_type_str = (tunnel_type == GENEVE) ? "geneve" : "vxlan"; > > + > > + if (binding->encap && !strcmp(binding->encap->type, tunnel_type_str)) { > > + VLOG_DBG("Using port-specific encap IP %s for binding %s", > > + binding->encap->ip, binding->logical_port); > > + return binding->encap->ip; > > + } > > + > > + /* Fall back to chassis default encap IP */ > > + return select_default_encap_ip(binding->chassis, tunnel_type); > > +} > > + > > +static void > > +track_flow_based_tunnel(const struct ovsrec_port *port_rec, > > + const struct sbrec_chassis *chassis_rec, > > + struct ovs_list *flow_tunnels) > > +{ > > + if (port_rec->n_interfaces != 1) { > > + return; > > + } > > + > > + const struct ovsrec_interface *iface_rec = port_rec->interfaces[0]; > > + > > + /* Check if this is a flow-based tunnel port using unified > > + * OVN_TUNNEL_ID. */ > > + const char *tunnel_id = smap_get(&port_rec->external_ids, OVN_TUNNEL_ID); > > + const char *tunnel_type_str = smap_get(&port_rec->external_ids, > > + "ovn-tunnel-type"); > > + > > + if (!tunnel_id || !tunnel_type_str || strcmp(tunnel_id, "flow")) { > > + return; > > + } > > + > > + /* Get tunnel type. */ > > + enum chassis_tunnel_type tunnel_type; > > + if (!strcmp(tunnel_type_str, "geneve")) { > > + tunnel_type = GENEVE; > > + } else if (!strcmp(tunnel_type_str, "vxlan")) { > > + tunnel_type = VXLAN; > > + } else { > > + return; > > + } > > + > > + /* Check if we already track this tunnel type. */ > > + if (flow_based_tunnel_find(flow_tunnels, tunnel_type)) { > > + return; > > + } > > + > > + int64_t ofport = iface_rec->ofport ? *iface_rec->ofport : 0; > > + if (ofport <= 0 || ofport > UINT16_MAX) { > > + VLOG_INFO("Invalid ofport %"PRId64" for flow-based tunnel %s", > > + ofport, port_rec->name); > > + return; > > + } > > + > > + /* Detect if this tunnel will use IPv6. > > + * For flow-based tunnels with local_ip="flow", check if any of the > > + * chassis encap IPs for this tunnel type are IPv6 addresses. > > + */ > > + bool is_ipv6 = false; > > + for (size_t i = 0; i < chassis_rec->n_encaps; i++) { > > + const struct sbrec_encap *encap = chassis_rec->encaps[i]; > > + if (!strcmp(encap->type, tunnel_type_str)) { > > + if (addr_is_ipv6(encap->ip)) { > > + is_ipv6 = true; > > + break; > > + } > > + } > > + } > > + > > + struct flow_based_tunnel *tun = xzalloc(sizeof *tun); > > + tun->type = tunnel_type; > > + tun->ofport = u16_to_ofp(ofport); > > + tun->is_ipv6 = is_ipv6; > > + tun->port_name = xstrdup(port_rec->name); > > + > > + VLOG_INFO("Tracking flow-based tunnel: port=%s, type=%s, ofport=%"PRId64 > > + ", is_ipv6=%s", port_rec->name, tunnel_type_str, ofport, > > + is_ipv6 ? "true" : "false"); > > + > > + ovs_list_push_back(flow_tunnels, &tun->list_node); > > +} > > + > > diff --git a/controller/local_data.h b/controller/local_data.h > > index 841829f2e071..28e3fb70731d 100644 > > --- a/controller/local_data.h > > +++ b/controller/local_data.h > > @@ -28,6 +28,7 @@ > > struct sbrec_datapath_binding; > > struct sbrec_port_binding; > > struct sbrec_chassis; > > +struct sbrec_chassis_table; > > struct ovsdb_idl_index; > > struct ovsrec_bridge; > > struct ovsrec_interface_table; > > @@ -147,10 +148,22 @@ struct chassis_tunnel { > > bool is_ipv6; > > }; > > > > +/* Flow-based tunnel that consolidates multiple endpoints into a single > > + * port. */ > > +struct flow_based_tunnel { > > + struct ovs_list list_node; > > + enum chassis_tunnel_type type; /* geneve, vxlan */ > > + ofp_port_t ofport; /* Single port for all endpoints */ > > + bool is_ipv6; > > + char *port_name; /* e.g., "ovn-geneve" */ > > +}; > > + > > + > > void local_nonvif_data_run(const struct ovsrec_bridge *br_int, > > - const struct sbrec_chassis *, > > + const struct sbrec_chassis *chassis, > > struct simap *patch_ofports, > > - struct hmap *chassis_tunnels); > > + struct hmap *chassis_tunnels, > > + struct ovs_list *flow_tunnels); > > > > bool local_nonvif_data_handle_ovs_iface_changes( > > const struct ovsrec_interface_table *); > > @@ -165,6 +178,23 @@ bool get_chassis_tunnel_ofport(const struct hmap *chassis_tunnels, > > ofp_port_t *ofport); > > > > void chassis_tunnels_destroy(struct hmap *chassis_tunnels); > > + > > +/* Flow-based tunnel management functions. */ > > +struct flow_based_tunnel *flow_based_tunnel_find( > > + const struct ovs_list *flow_tunnels, enum chassis_tunnel_type); > > +void flow_based_tunnels_destroy(struct ovs_list *flow_tunnels); > > +ofp_port_t get_flow_based_tunnel_port( > > + enum chassis_tunnel_type, const struct ovs_list *flow_tunnels); > > + > > +/* Direct tunnel endpoint selection utilities. */ > > +enum chassis_tunnel_type select_preferred_tunnel_type( > > + const struct sbrec_chassis *local_chassis, > > + const struct sbrec_chassis *remote_chassis); > > +const char *select_default_encap_ip(const struct sbrec_chassis *, > > + enum chassis_tunnel_type tunnel_type); > > +const char *select_port_encap_ip(const struct sbrec_port_binding *binding, > > + enum chassis_tunnel_type tunnel_type); > > + > > void local_datapath_memory_usage(struct simap *usage); > > void add_local_datapath_external_port(struct local_datapath *ld, > > char *logical_port, const void *data); > > diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml > > index 32a1f7a6f658..dfc7cc2179ea 100644 > > --- a/controller/ovn-controller.8.xml > > +++ b/controller/ovn-controller.8.xml > > @@ -219,6 +219,36 @@ > > <code>false</code> to clear the DF flag. > > </dd> > > > > + <dt><code>external_ids:ovn-enable-flow-based-tunnels</code></dt> > > + <dd> > > + <p> > > + The boolean flag indicates if <code>ovn-controller</code> should > > + use flow-based tunnels instead of port-based tunnels for overlay > > + network connectivity. Default value is <code>false</code>. > > + </p> > > + <p> > > + <em>Port-based tunnels</em> (default mode) create a dedicated tunnel > > + port for each combination of remote chassis and local encap IP, > > + while <em>Flow-based tunnels</em> create a single shared tunnel port > > + for each tunnel type, and use OpenFlow <code>set_field</code> actions > > + to dynamically set tunnel source and destination IP addresses per > > + packet. > > + </p> > > + <p> > > + This feature is experimental and is disabled by default. There are > > + some known limitations to the feature: > > + <ul> > > + <li> > > + IPSec is not supported. > > + </li> > > + <li> > > + BFD between tunnel endpoints is not supported, thus HA chassis > > + (e.g. for Distributed Gateway Port redundancy) is not supported. > > + </li> > > + </ul> > > + </p> > > + </dd> > > + > > <dt><code>external_ids:ovn-bridge-mappings</code></dt> > > <dd> > > A list of key-value pairs that map a physical network name to a local > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index ea65d3a3e8b0..e96ab8b39879 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -3516,6 +3516,9 @@ struct ed_type_non_vif_data { > > struct simap patch_ofports; /* simap of patch ovs ports. */ > > struct hmap chassis_tunnels; /* hmap of 'struct chassis_tunnel' from the > > * tunnel OVS ports. */ > > + struct ovs_list flow_tunnels; /* list of 'struct flow_based_tunnel' from > > + * flow-based tunnel ports. */ > > + bool use_flow_based_tunnels; /* Enable flow-based tunnels. */ > > }; > > > > static void * > > @@ -3525,6 +3528,8 @@ en_non_vif_data_init(struct engine_node *node OVS_UNUSED, > > struct ed_type_non_vif_data *data = xzalloc(sizeof *data); > > simap_init(&data->patch_ofports); > > hmap_init(&data->chassis_tunnels); > > + ovs_list_init(&data->flow_tunnels); > > + data->use_flow_based_tunnels = false; > > return data; > > } > > > > @@ -3534,6 +3539,7 @@ en_non_vif_data_cleanup(void *data OVS_UNUSED) > > struct ed_type_non_vif_data *ed_non_vif_data = data; > > simap_destroy(&ed_non_vif_data->patch_ofports); > > chassis_tunnels_destroy(&ed_non_vif_data->chassis_tunnels); > > + flow_based_tunnels_destroy(&ed_non_vif_data->flow_tunnels); > > } > > > > static enum engine_node_state > > @@ -3542,8 +3548,11 @@ en_non_vif_data_run(struct engine_node *node, void *data) > > struct ed_type_non_vif_data *ed_non_vif_data = data; > > simap_destroy(&ed_non_vif_data->patch_ofports); > > chassis_tunnels_destroy(&ed_non_vif_data->chassis_tunnels); > > + flow_based_tunnels_destroy(&ed_non_vif_data->flow_tunnels); > > + > > simap_init(&ed_non_vif_data->patch_ofports); > > hmap_init(&ed_non_vif_data->chassis_tunnels); > > + ovs_list_init(&ed_non_vif_data->flow_tunnels); > > > > const struct ovsrec_open_vswitch_table *ovs_table = > > EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node)); > > @@ -3563,8 +3572,15 @@ en_non_vif_data_run(struct engine_node *node, void *data) > > = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id); > > ovs_assert(chassis); > > > > - local_nonvif_data_run(br_int, chassis, &ed_non_vif_data->patch_ofports, > > - &ed_non_vif_data->chassis_tunnels); > > + ed_non_vif_data->use_flow_based_tunnels = > > + is_flow_based_tunnels_enabled(ovs_table, chassis); > > + > > + local_nonvif_data_run(br_int, chassis, > > + &ed_non_vif_data->patch_ofports, > > + &ed_non_vif_data->chassis_tunnels, > > + ed_non_vif_data->use_flow_based_tunnels ? > > + &ed_non_vif_data->flow_tunnels : NULL); > > + > > return EN_UPDATED; > > } > > > > @@ -4673,6 +4689,7 @@ static void init_physical_ctx(struct engine_node *node, > > parse_encap_ips(ovs_table, &p_ctx->n_encap_ips, &p_ctx->encap_ips); > > p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name; > > p_ctx->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath; > > + p_ctx->sbrec_chassis_by_name = sbrec_chassis_by_name; > > p_ctx->port_binding_table = port_binding_table; > > p_ctx->ovs_interface_table = ovs_interface_table; > > p_ctx->mc_group_table = multicast_group_table; > > @@ -4686,6 +4703,8 @@ static void init_physical_ctx(struct engine_node *node, > > p_ctx->local_bindings = &rt_data->lbinding_data.bindings; > > p_ctx->patch_ofports = &non_vif_data->patch_ofports; > > p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels; > > + p_ctx->flow_tunnels = &non_vif_data->flow_tunnels; > > + p_ctx->use_flow_based_tunnels = non_vif_data->use_flow_based_tunnels; > > p_ctx->always_tunnel = n_opts->always_tunnel; > > p_ctx->evpn_bindings = &eb_data->bindings; > > p_ctx->evpn_multicast_groups = &eb_data->multicast_groups; > > diff --git a/controller/physical.c b/controller/physical.c > > index 6ac5dcd3fb28..e8800fcaa1c0 100644 > > --- a/controller/physical.c > > +++ b/controller/physical.c > > @@ -225,6 +225,202 @@ match_set_chassis_flood_outport(struct match *match, > > } > > } > > > > +/* Flow-based tunnel helper functions. */ > > + > > +static void > > +put_set_tunnel_src(const char *src_ip, struct ofpbuf *ofpacts) > > This and put_set_tunnel_dst() are nearly identical. The only > difference is the MFF values used. These could be combined into a > single function and use a boolean src/dst qualifier to determine which > MFF value to use. > > > +{ > > + if (strchr(src_ip, ':')) { > > + /* IPv6 */ > > + struct in6_addr src_ipv6; > > + if (inet_pton(AF_INET6, src_ip, &src_ipv6) == 1) { > > There is rarely a need to use inet_pton() directly in OVS/OVN code. > You can use ip_parse() and ipv6_parse() as provided by OVS. > > > + union mf_value value; > > + memset(&value, 0, sizeof value); > > + memcpy(&value.ipv6, &src_ipv6, sizeof src_ipv6); > > + ofpact_put_set_field(ofpacts, mf_from_id(MFF_TUN_IPV6_SRC), > > + &value, NULL); > > + } > > + } else { > > + /* IPv4 */ > > + struct in_addr src_ipv4; > > + if (inet_pton(AF_INET, src_ip, &src_ipv4) == 1) { > > + put_load(ntohl(src_ipv4.s_addr), MFF_TUN_SRC, 0, 32, ofpacts); > > + } > > + } > > +} > > + > > +static void > > +put_set_tunnel_dst(const char *dst_ip, struct ofpbuf *ofpacts) > > +{ > > + if (strchr(dst_ip, ':')) { > > + /* IPv6 */ > > + struct in6_addr dst_ipv6; > > + if (inet_pton(AF_INET6, dst_ip, &dst_ipv6) == 1) { > > + union mf_value value; > > + memset(&value, 0, sizeof value); > > + memcpy(&value.ipv6, &dst_ipv6, sizeof dst_ipv6); > > + ofpact_put_set_field(ofpacts, mf_from_id(MFF_TUN_IPV6_DST), > > + &value, NULL); > > + } > > + } else { > > + /* IPv4 */ > > + struct in_addr dst_ipv4; > > + if (inet_pton(AF_INET, dst_ip, &dst_ipv4) == 1) { > > + put_load(ntohl(dst_ipv4.s_addr), MFF_TUN_DST, 0, 32, ofpacts); > > + } > > + } > > +} > > + > > +/* Flow-based encapsulation that sets tunnel metadata and endpoint IPs. */ > > +static void > > +put_flow_based_encapsulation(enum mf_field_id mff_ovn_geneve, > > + enum chassis_tunnel_type tunnel_type, > > + const char *local_ip, const char *remote_ip, > > + const struct sbrec_datapath_binding *datapath, > > + uint16_t outport, bool is_ramp_switch, > > + struct ofpbuf *ofpacts) > > +{ > > + struct chassis_tunnel temp_tun = { > > + .type = tunnel_type, > > + }; > > + put_encapsulation(mff_ovn_geneve, &temp_tun, datapath, > > + outport, is_ramp_switch, ofpacts); > > + > > + /* Set tunnel source and destination IPs (flow-based specific) */ > > + put_set_tunnel_src(local_ip, ofpacts); > > + put_set_tunnel_dst(remote_ip, ofpacts); > > +} > > + > > + > > +/* Generate flows for flow-based tunnel to a specific chassis. */ > > +static void > > +put_flow_based_remote_port_redirect_overlay( > > + const struct sbrec_port_binding *binding, > > + const enum en_lport_type type, > > + const struct physical_ctx *ctx, > > + uint32_t port_key, > > + struct match *match, > > + struct ofpbuf *ofpacts_p, > > + struct ovn_desired_flow_table *flow_table) > > +{ > > + if (!ctx->use_flow_based_tunnels || !binding->chassis) { > > You check ctx->use_flow_based_tunnels before calling this function and > then here during the function. You could probably avoid the > double-check. One way would be to remove the check before calling > put_flow_based_remote_port_redirect_overlay(), and changing this > function to return a value to indicate whether it was successful or > not. This way, the caller of > put_flow_based_remote_port_redirect_overlay() can call it, check the > return value, and if it's false, move on to port-based tunnel flow > installation. > > Note: this is as far as I made it in my review. I will have a closer > look at the rest some time next week. >
Thanks Mark for the review. I agree with all your comments above, and I will wait for your complete review before I send v2. Best, Han _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
