On 1/31/25 6:37 PM, Lorenzo Bianconi wrote: > Fix ovn-ic mode when vxlan is used as encapsulation mode reducing the > maximum local dp key to ((2<<10)-1) in order to make some room for > OVN_MAX_DP_VXLAN_KEY_GLOBAL (vxlan tunnels export just 12 bit for > metadata key). > > Reported-at: https://issues.redhat.com/browse/FDP-1023 > Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> > ---
Hi Lorenzo, I have some comments inline - nothing major I think. However, I'm curious though about Vladislav's opinion of this change. Vladislav could you please confirm whether this doesn't break your existing scenarios? Thanks, Dumitru > - Changes in v4: > Introduce vxlan_mode in option column of IC_NB_Global table to enable > VXLAN protocol for cross-AZ traffic. Default value is false. > - Changes in v3: > Reduce the max local dp key to 1023 just if the cluster is running in > ovn-ic mode > - Changes in v2: > Document local datapath limitation > --- > NEWS | 2 ++ > ic/ovn-ic.c | 48 +++++++++++++++++++++++++++++------ > lib/ovn-util.h | 4 ++- > northd/en-global-config.c | 53 ++++++++++++++++++++++++++++++++++++++- > northd/en-global-config.h | 2 ++ > northd/inc-proc-northd.c | 2 ++ > northd/northd.c | 30 +++++++++++++++------- > northd/northd.h | 2 +- > ovn-ic-nb.xml | 7 ++++++ > ovn-nb.xml | 9 +++++++ > tests/ovn-ic.at | 32 +++++++++++++++++++++++ > tests/ovn-northd.at | 22 ++++++++++++++++ > 12 files changed, 194 insertions(+), 19 deletions(-) > > diff --git a/NEWS b/NEWS > index 2f0c965a7..4ee5c0a5a 100644 > --- a/NEWS > +++ b/NEWS > @@ -38,6 +38,8 @@ Post v24.09.0 > - Improved handling of IPv6 traffic by enabling address prefix tracking > in OVS for both IPv4 and IPv6 addresses, whenever possible, reducing > the amount of IPv6 datapath flows. > + - Reduce the max number of local datapath to 1024 when OVN is using VXLAN > + encapsulation type in OVN-interconnect mode. We need to mention the new configuration knob you're adding. > > OVN v24.09.0 - 13 Sep 2024 > -------------------------- > diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c > index 75b5d1787..3e46a518b 100644 > --- a/ic/ovn-ic.c > +++ b/ic/ovn-ic.c > @@ -181,18 +181,20 @@ az_run(struct ic_context *ctx) > } > > static uint32_t > -allocate_ts_dp_key(struct hmap *dp_tnlids) > +allocate_ts_dp_key(struct hmap *dp_tnlids, bool vxlan_mode) > { > - static uint32_t hint = OVN_MIN_DP_KEY_GLOBAL; > - return ovn_allocate_tnlid(dp_tnlids, "transit switch datapath", > - OVN_MIN_DP_KEY_GLOBAL, OVN_MAX_DP_KEY_GLOBAL, > - &hint); > + uint32_t hint = vxlan_mode ? OVN_MIN_DP_VXLAN_KEY_GLOBAL > + : OVN_MIN_DP_KEY_GLOBAL; Nit: this should be indented differently: uint32_t hint = vxlan_mode ? OVN_MIN_DP_VXLAN_KEY_GLOBAL : OVN_MIN_DP_KEY_GLOBAL; > + return ovn_allocate_tnlid(dp_tnlids, "transit switch datapath", hint, > + vxlan_mode ? OVN_MAX_DP_VXLAN_KEY_GLOBAL : OVN_MAX_DP_KEY_GLOBAL, > + &hint); > } > > static void > ts_run(struct ic_context *ctx) > { > const struct icnbrec_transit_switch *ts; > + bool dp_key_refresh = false; > > struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids); > struct shash isb_dps = SHASH_INITIALIZER(&isb_dps); > @@ -202,6 +204,20 @@ ts_run(struct ic_context *ctx) > ovn_add_tnlid(&dp_tnlids, isb_dp->tunnel_key); > } > > + bool vxlan_mode = false; > + const struct icnbrec_ic_nb_global *ic_nb = > + icnbrec_ic_nb_global_first(ctx->ovninb_idl); > + > + if (ic_nb && smap_get_bool(&ic_nb->options, "vxlan_mode", false)) { > + const struct icsbrec_encap *encap; > + ICSBREC_ENCAP_FOR_EACH (encap, ctx->ovnisb_idl) { > + if (!strcmp(encap->type, "vxlan")) { > + vxlan_mode = true; > + break; > + } > + } > + } > + > /* Sync INB TS to AZ NB */ > if (ctx->ovnnb_txn) { > struct shash nb_tses = SHASH_INITIALIZER(&nb_tses); > @@ -224,7 +240,19 @@ ts_run(struct ic_context *ctx) > nbrec_logical_switch_update_other_config_setkey(ls, > > "interconn-ts", > ts->name); > + nbrec_logical_switch_update_other_config_setkey( > + ls, "ic-vxlan_mode", vxlan_mode ? "true" : "false"); > + } else { > + bool _vxlan_mode = smap_get_bool(&ls->other_config, > + "ic-vxlan_mode", false); > + if (_vxlan_mode != vxlan_mode) { > + dp_key_refresh = true; > + nbrec_logical_switch_update_other_config_setkey( > + ls, "ic-vxlan_mode", > + vxlan_mode ? "true" : "false"); > + } > } > + > isb_dp = shash_find_data(&isb_dps, ts->name); > if (isb_dp) { > int64_t nb_tnl_key = smap_get_int(&ls->other_config, > @@ -260,7 +288,7 @@ ts_run(struct ic_context *ctx) > isb_dp = shash_find_and_delete(&isb_dps, ts->name); > if (!isb_dp) { > /* Allocate tunnel key */ > - int64_t dp_key = allocate_ts_dp_key(&dp_tnlids); > + int64_t dp_key = allocate_ts_dp_key(&dp_tnlids, vxlan_mode); > if (!dp_key) { > continue; > } > @@ -268,6 +296,12 @@ ts_run(struct ic_context *ctx) > isb_dp = icsbrec_datapath_binding_insert(ctx->ovnisb_txn); > icsbrec_datapath_binding_set_transit_switch(isb_dp, > ts->name); > icsbrec_datapath_binding_set_tunnel_key(isb_dp, dp_key); > + } else if (dp_key_refresh) { > + /* Refresh tunnel key since encap mode has changhed. */ > + int64_t dp_key = allocate_ts_dp_key(&dp_tnlids, vxlan_mode); > + if (dp_key) { > + icsbrec_datapath_binding_set_tunnel_key(isb_dp, dp_key); > + } > } > } > > @@ -1930,8 +1964,8 @@ static void > ovn_db_run(struct ic_context *ctx, > const struct icsbrec_availability_zone *az) > { > - ts_run(ctx); > gateway_run(ctx, az); > + ts_run(ctx); > port_binding_run(ctx, az); > route_run(ctx, az); > } > diff --git a/lib/ovn-util.h b/lib/ovn-util.h > index f2f70dd72..dabe72254 100644 > --- a/lib/ovn-util.h > +++ b/lib/ovn-util.h > @@ -163,7 +163,9 @@ void set_idl_probe_interval(struct ovsdb_idl *idl, const > char *remote, > #define OVN_MAX_DP_KEY_GLOBAL OVN_MAX_DP_KEY > > #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1) > -#define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM) > +#define OVN_MAX_DP_VXLAN_KEY_LOCAL ((1u << 10) - 1) > +#define OVN_MIN_DP_VXLAN_KEY_GLOBAL (OVN_MAX_DP_VXLAN_KEY_LOCAL + 1) > +#define OVN_MAX_DP_VXLAN_KEY_GLOBAL ((1u << 12) - 1) > > struct hmap; > void ovn_destroy_tnlids(struct hmap *tnlids); > diff --git a/northd/en-global-config.c b/northd/en-global-config.c > index ce16c26f2..d1d58dfc1 100644 > --- a/northd/en-global-config.c > +++ b/northd/en-global-config.c > @@ -71,6 +71,8 @@ en_global_config_run(struct engine_node *node , void *data) > > const struct nbrec_nb_global_table *nb_global_table = > EN_OVSDB_GET(engine_get_input("NB_nb_global", node)); > + const struct nbrec_logical_switch_table *nbrec_ls_table = > + EN_OVSDB_GET(engine_get_input("NB_logical_switch", node)); > const struct sbrec_sb_global_table *sb_global_table = > EN_OVSDB_GET(engine_get_input("SB_sb_global", node)); > const struct sbrec_chassis_table *sbrec_chassis_table = > @@ -121,10 +123,19 @@ en_global_config_run(struct engine_node *node , void > *data) > config_data->svc_monitor_mac); > } > > + bool ic_vxlan_mode = false; > + const struct nbrec_logical_switch *nbs; > + NBREC_LOGICAL_SWITCH_TABLE_FOR_EACH (nbs, nbrec_ls_table) { > + if (smap_get(&nbs->other_config, "ic-vxlan_mode")) { > + ic_vxlan_mode = true; > + break; > + } > + } > char *max_tunid = xasprintf("%d", > get_ovn_max_dp_key_local( > is_vxlan_mode(&nb->options, > - sbrec_chassis_table))); > + sbrec_chassis_table), > + ic_vxlan_mode)); I know this was indented like this but now it looks quite weird. I'd change it to: uint32_t max_dp_key = get_ovn_max_dp_key_local(is_vxlan_mode(&nb->options, sbrec_chassis_table), ic_vxlan_mode) char *max_tunid = xasprintf("%d", max_dp_key); > smap_replace(options, "max_tunid", max_tunid); > free(max_tunid); > > @@ -371,6 +382,46 @@ node_global_config_handler(struct engine_node *node, > void *data OVS_UNUSED) > return true; > } > > +bool > +global_config_nb_logical_switch_handler(struct engine_node *node, > + void *data) > +{ > + struct ed_type_global_config *config_data = data; > + const struct nbrec_logical_switch_table *nbrec_ls_table = > + EN_OVSDB_GET(engine_get_input("NB_logical_switch", node)); > + const struct nbrec_nb_global *nb = nbrec_nb_global_table_first( > + EN_OVSDB_GET(engine_get_input("NB_nb_global", node))); > + const struct sbrec_chassis_table *sbrec_chassis_table = > + EN_OVSDB_GET(engine_get_input("SB_chassis", node)); > + > + bool ic_vxlan_mode = false; > + const struct nbrec_logical_switch *nbs; > + NBREC_LOGICAL_SWITCH_TABLE_FOR_EACH (nbs, nbrec_ls_table) { > + if (smap_get(&nbs->other_config, "ic-vxlan_mode")) { > + ic_vxlan_mode = true; > + break; > + } > + } > + char *max_tunid = xasprintf("%d", > + get_ovn_max_dp_key_local( > + is_vxlan_mode(&nb->options, > + sbrec_chassis_table), > + ic_vxlan_mode)); Same comment here about indentation. > + struct smap *options = &config_data->nb_options; > + smap_replace(options, "max_tunid", max_tunid); > + free(max_tunid); > + > + if (!smap_equal(&nb->options, options)) { > + nbrec_nb_global_verify_options(nb); > + nbrec_nb_global_set_options(nb, options); > + } > + > + engine_set_node_state(node, EN_UPDATED); Do we need to set state to EN_UPDATED only if max_tunid changed value and otherwise set it to EN_UNCHANGED? It seems to me like that's the case. > + config_data->tracked = true; > + > + return true; > +} > + > /* static functions. */ > static void > northd_enable_all_features(struct ed_type_global_config *data) > diff --git a/northd/en-global-config.h b/northd/en-global-config.h > index 767810542..3660ec15b 100644 > --- a/northd/en-global-config.h > +++ b/northd/en-global-config.h > @@ -59,6 +59,8 @@ void en_global_config_clear_tracked_data(void *data); > bool global_config_nb_global_handler(struct engine_node *, void *data); > bool global_config_sb_global_handler(struct engine_node *, void *data); > bool global_config_sb_chassis_handler(struct engine_node *, void *data); > +bool global_config_nb_logical_switch_handler(struct engine_node *node, > + void *data); > > /* generic global config handler for any engine node which has global_config > * has an input node . */ > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > index 1d93e72ce..b4be79200 100644 > --- a/northd/inc-proc-northd.c > +++ b/northd/inc-proc-northd.c > @@ -183,6 +183,8 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > engine_add_input(&en_global_config, &en_nb_nb_global, > global_config_nb_global_handler); > + engine_add_input(&en_global_config, &en_nb_logical_switch, > + global_config_nb_logical_switch_handler); > engine_add_input(&en_global_config, &en_sb_sb_global, > global_config_sb_global_handler); > engine_add_input(&en_global_config, &en_sb_chassis, > diff --git a/northd/northd.c b/northd/northd.c > index 3ff4326e6..c6217e959 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -826,7 +826,7 @@ join_datapaths(const struct nbrec_logical_switch_table > *nbrec_ls_table, > struct ovsdb_idl_txn *ovnsb_txn, > struct hmap *datapaths, struct ovs_list *sb_only, > struct ovs_list *nb_only, struct ovs_list *both, > - struct ovs_list *lr_list) > + struct ovs_list *lr_list, bool *vxlan_ic_mode) I'd avoid passing this around as argument and just use a global vxlan_ic_mode variable. It's not pretty but it's what we do for 'vxlan_mode' already. > { > ovs_list_init(sb_only); > ovs_list_init(nb_only); > @@ -862,6 +862,7 @@ join_datapaths(const struct nbrec_logical_switch_table > *nbrec_ls_table, > ovs_list_push_back(sb_only, &od->list); > } > > + *vxlan_ic_mode = false; > const struct nbrec_logical_switch *nbs; > NBREC_LOGICAL_SWITCH_TABLE_FOR_EACH (nbs, nbrec_ls_table) { > struct ovn_datapath *od = ovn_datapath_find_(datapaths, > @@ -879,6 +880,10 @@ join_datapaths(const struct nbrec_logical_switch_table > *nbrec_ls_table, > > init_ipam_info_for_datapath(od); > init_mcast_info_for_datapath(od); > + > + if (smap_get_bool(&nbs->other_config, "ic-vxlan_mode", false)) { > + *vxlan_ic_mode = true; > + } > } > > const struct nbrec_logical_router *nbr; > @@ -936,22 +941,25 @@ is_vxlan_mode(const struct smap *nb_options, > } > > uint32_t > -get_ovn_max_dp_key_local(bool _vxlan_mode) > +get_ovn_max_dp_key_local(bool _vxlan_mode, bool vxlan_ic_mode) > { > if (_vxlan_mode) { > /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for VXLAN mode. */ > - return OVN_MAX_DP_VXLAN_KEY; > + return vxlan_ic_mode ? OVN_MAX_DP_VXLAN_KEY_LOCAL > + : OVN_MAX_DP_VXLAN_KEY; > } > - return OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM; > + return vxlan_ic_mode ? OVN_MAX_DP_VXLAN_KEY_LOCAL : OVN_MAX_DP_KEY_LOCAL; > } > > static void > ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids, > - struct ovn_datapath *od, uint32_t *hint) > + struct ovn_datapath *od, uint32_t *hint, > + bool vxlan_ic_mode) > { > if (!od->tunnel_key) { > od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath", > - OVN_MIN_DP_KEY_LOCAL, get_ovn_max_dp_key_local(vxlan_mode), > hint); > + OVN_MIN_DP_KEY_LOCAL, > + get_ovn_max_dp_key_local(vxlan_mode, vxlan_ic_mode), hint); > if (!od->tunnel_key) { > if (od->sb) { > sbrec_datapath_binding_delete(od->sb); > @@ -1025,10 +1033,12 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn, > struct ovs_list *lr_list) > { > struct ovs_list sb_only, nb_only, both; > + bool vxlan_ic_mode; > > struct hmap *datapaths = &ls_datapaths->datapaths; > join_datapaths(nbrec_ls_table, nbrec_lr_table, sbrec_dp_table, ovnsb_txn, > - datapaths, &sb_only, &nb_only, &both, lr_list); > + datapaths, &sb_only, &nb_only, &both, lr_list, > + &vxlan_ic_mode); > > /* Assign explicitly requested tunnel ids first. */ > struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids); > @@ -1050,10 +1060,12 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn, > /* Assign new tunnel ids where needed. */ > uint32_t hint = 0; > LIST_FOR_EACH_SAFE (od, list, &both) { > - ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint); > + ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint, > + vxlan_ic_mode); > } > LIST_FOR_EACH_SAFE (od, list, &nb_only) { > - ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint); > + ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint, > + vxlan_ic_mode); > } > > /* Sync tunnel ids from nb to sb. */ > diff --git a/northd/northd.h b/northd/northd.h > index 9457a7be6..f66328e72 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -872,6 +872,6 @@ bool > is_vxlan_mode(const struct smap *nb_options, > const struct sbrec_chassis_table *sbrec_chassis_table); > > -uint32_t get_ovn_max_dp_key_local(bool _vxlan_mode); > +uint32_t get_ovn_max_dp_key_local(bool _vxlan_mode, bool ic_mode); > > #endif /* NORTHD_H */ > diff --git a/ovn-ic-nb.xml b/ovn-ic-nb.xml > index 80138400d..abd8114fc 100644 > --- a/ovn-ic-nb.xml > +++ b/ovn-ic-nb.xml > @@ -78,6 +78,13 @@ > at least 1000 ms. > </p> > </column> > + > + <column name="options" key="vxlan_mode"> > + <p> > + This field allows the client to enable VXLAN as encapsulation > + protocol for for cross-AZ traffic. Default value is false. > + </p> > + </column> > </group> > > <group title="Connection Options"> > diff --git a/ovn-nb.xml b/ovn-nb.xml > index d82f9872b..f13b90665 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -394,6 +394,9 @@ > support HW VTEP functionality and main encap type is GENEVE or STT, > set > this option to <code>false</code> to use default > non-<code>VXLAN mode</code> tunnel IDs allocation logic. > + Please consider when OVN is running in <code>OVN-interconnect</code> > + mode and it is using <code>VXLAN</code> encapsulation type, the max > + number of local datapath is reduced to 1024. I think the term "local datapath" is not clear. Are those "AZ-local" switches? That is "non-transit logical switches"? Maybe we should just call them that instead? What do you think? > </column> > > <column name="options" key="always_tunnel" > @@ -888,6 +891,12 @@ > database. This kind of logical switch is created and controlled > by <code>ovn-ic</code>. > </column> > + <column name="other_config" key="ic-vxlan_mode" > + type='{"type": "boolean"}'> > + <code>ic-vxlan_mode</code> is set to true by <code>ovn-ic</code> when > + it runs <code>VXLAN</code> as encapsulation protocol for cross-AZ > + traffic. Default value is false. > + </column> > </group> > > <group title="Tunnel Key"> > diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at > index fbcfca2e4..a1eccb165 100644 > --- a/tests/ovn-ic.at > +++ b/tests/ovn-ic.at > @@ -94,6 +94,38 @@ OVN_CLEANUP_IC([az1]) > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ovn-ic -- VXLAN tunnel key]) > +ovn_init_ic_db > +net_add n1 > + > +ovn_start az1 > +sim_add gw-az1 > +as gw-az1 > + > +check ovs-vsctl add-br br-phys > +ovn_az_attach az1 n1 br-phys 192.168.1.1 > +check ovs-vsctl set open . external-ids:ovn-is-interconn=true > + > +AT_CHECK([ovn-ic-nbctl --wait=sb ts-add ts1]) > + > +# Check ISB > +check_row_count ic-sb:Datapath_Binding 1 transit_switch=ts1 > +check_column "ts1" ic-sb:Datapath_Binding transit_switch > +check_column "ts1" nb:Logical_Switch name > + > +wait_column "ic-vxlan_mode=false interconn-ts=ts1 > requested-tnl-key=16711682" nb:Logical_Switch other_config name="ts1" > +# Check tunnel key fits in VXLAN space > +check ovn-ic-nbctl --wait=sb set IC_NB_Global . options:vxlan_mode=true > +wait_column "ic-vxlan_mode=true interconn-ts=ts1 requested-tnl-key=1025" > nb:Logical_Switch other_config name="ts1" > + > +check ovn-ic-nbctl --wait=sb set IC_NB_Global . options:vxlan_mode=false > +wait_column "ic-vxlan_mode=false interconn-ts=ts1 > requested-tnl-key=16711682" nb:Logical_Switch other_config name="ts1" > + > +OVN_CLEANUP_IC([az1]) > +AT_CLEANUP > +]) > + > OVN_FOR_EACH_NORTHD([ > AT_SETUP([ovn-ic -- port-bindings deletion upon TS deletion]) > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index df646ec68..89c9d7c13 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -2978,7 +2978,29 @@ OVS_WAIT_UNTIL([grep "all port tunnel ids exhausted" > northd/ovn-northd.log]) > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD_NO_HV([ > +AT_SETUP([check VXLAN encap in IC-mode]) > +ovn_start > + > +get_max_tunid() { > + echo $(ovn-nbctl get NB_Global . options:max_tunid | sed s/":"//g | sed > s/\"//g) > +} > + > +check_uuid ovn-sbctl \ > + --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \ > + -- --id=@c create chassis name=hv1 encaps=@e > +check ovn-nbctl --wait=sb ls-add LS > +AT_CHECK([test "$(get_max_tunid)" -eq 4095]) > + > +check ovn-nbctl --wait=sb set logical-switch LS other-config:interconn-ts=LS > +check ovn-nbctl --wait=sb set logical-switch LS > other-config:ic-vxlan_mode=true > +AT_CHECK([test "$(get_max_tunid)" -eq 1023]) > > +check ovn-nbctl --wait=sb clear logical-switch LS other-config > +AT_CHECK([test "$(get_max_tunid)" -eq 4095]) > + > +AT_CLEANUP > +]) > > OVN_FOR_EACH_NORTHD_NO_HV([ > AT_SETUP([Logical Flow Datapath Groups]) _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev