> On 29-Nov-2023, at 2:24 PM, Dumitru Ceara <dce...@redhat.com> wrote: > > On 11/29/23 07:45, naveen.yerramneni wrote: >> This functionality can be enabled at the logical switch level: >> - "other_config:fdb_local" can be used to enable/disable this >> functionality, it is disabled by default. >> - "other_config:fdb_local_idle_timeout" sepcifies idle timeout >> for locally learned fdb flows, default timeout is 300 secs. >> >> If enabled, below lflow is added for each port that has unknown addr set. >> - table=2 (ls_in_lookup_fdb), priority=100, match=(inport == <in_port>), >> action=(commit_fdb_local(timeout=<timeout>); next; >> >> New OVN action: "commit_fdb_local". This sets following OVS action. >> - learn(table=71,idle_timeout=<timeout>,delete_learned,OXM_OF_METADATA[], >> >> NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],load:NXM_NX_REG14[]->NXM_NX_REG15[]) >> >> This is useful when OVN is managing VLAN network that has multiple ports >> set with unknown addr and localnet_learn_fdb is enabled. With this config, >> if there is east-west traffic flowing between VMs part of same VLAN >> deployed on different hypervisors then, MAC addrs of the source and >> destination VMs keeps flapping between VM port and localnet port in >> Southbound FDB table. Enabling fdb_local config makes fdb table local to >> the chassis and avoids MAC flapping. >> >> Signed-off-by: Naveen Yerramneni <naveen.yerramn...@nutanix.com> >> --- > > Hi Naveen, > > Thanks a lot for the patch! > > Just a note, we already have a fix for the east-west traffic that causes > FDB flapping when localnet is used: > > https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ovn-2Dorg_ovn_commit_2acf91e9628e9481c48e4a6cec8ad5159fdd6d2e&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=2PQjSDR7A28z1kXE1ptSm6X36oL_nCq1XxeEt7FkLmA&m=kPuq992rikXYk63APGxlIpfqY3lPpreN9f4ha9pZKpodnVgE9KfjEUNozpPUFzUu&s=LP9_zs2Rj34vMx20ntbu-A3taXqKMJNVH2TLQyOXCh0&e= > > > https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ovn-2Dorg_ovn_commit_f3a14907fe2b1ecdcfddfbed595cd097b6efbe14&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=2PQjSDR7A28z1kXE1ptSm6X36oL_nCq1XxeEt7FkLmA&m=kPuq992rikXYk63APGxlIpfqY3lPpreN9f4ha9pZKpodnVgE9KfjEUNozpPUFzUu&s=gsUGtjyf9gSOr1LkcCH0O6MB1_tjXi9fuTgwEFgbRx8&e= > > > In general, however, I think it's a very good idea to move the FDB away > from the Southbound and make it local to each hypervisor. That reduces > load on the Southbound among other things. >
Hi Dumitru, Thanks for informing about the patches. Yes, local FDB reduces load on southbound. >> include/ovn/actions.h | 7 +++ >> lib/actions.c | 94 ++++++++++++++++++++++++++++++++++++ >> northd/northd.c | 26 ++++++++++ >> ovn-nb.xml | 14 ++++++ >> tests/ovn.at | 108 ++++++++++++++++++++++++++++++++++++++++++ >> utilities/ovn-trace.c | 2 + >> 6 files changed, 251 insertions(+) >> >> diff --git a/include/ovn/actions.h b/include/ovn/actions.h >> index 49cfe0624..85ac92cd3 100644 >> --- a/include/ovn/actions.h >> +++ b/include/ovn/actions.h >> @@ -127,6 +127,7 @@ struct collector_set_ids; >> OVNACT(CHK_LB_AFF, ovnact_result) \ >> OVNACT(SAMPLE, ovnact_sample) \ >> OVNACT(MAC_CACHE_USE, ovnact_null) \ >> + OVNACT(COMMIT_FDB_LOCAL, ovnact_commit_fdb_local) \ >> >> /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */ >> enum OVS_PACKED_ENUM ovnact_type { >> @@ -514,6 +515,12 @@ struct ovnact_commit_lb_aff { >> uint16_t timeout; >> }; >> >> +/* OVNACT_COMMIT_FBD_LOCAL. */ >> +struct ovnact_commit_fdb_local{ >> + struct ovnact ovnact; >> + uint16_t timeout; /* fdb_local flow timeout */ >> +}; >> + >> /* Internal use by the helpers below. */ >> void ovnact_init(struct ovnact *, enum ovnact_type, size_t len); >> void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len); >> diff --git a/lib/actions.c b/lib/actions.c >> index a73fe1a1e..f5aa78db1 100644 >> --- a/lib/actions.c >> +++ b/lib/actions.c >> @@ -5236,6 +5236,98 @@ format_MAC_CACHE_USE(const struct ovnact_null *null >> OVS_UNUSED, struct ds *s) >> ds_put_cstr(s, "mac_cache_use;"); >> } >> >> +static void >> +parse_commit_fdb_local(struct action_context *ctx, >> + struct ovnact_commit_fdb_local *fdb_local) >> +{ >> + uint16_t timeout = 0; >> + lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */ >> + if (!lexer_match_id(ctx->lexer, "timeout")) { >> + lexer_syntax_error(ctx->lexer, "invalid parameter"); >> + return; >> + } >> + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { >> + lexer_syntax_error(ctx->lexer, "invalid parameter"); >> + return; >> + } >> + if (!action_parse_uint16(ctx, &timeout, "fdb_local flow timeout")) { >> + return; >> + } >> + fdb_local->timeout = timeout; >> + lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */ >> +} >> + >> +static void >> +format_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local, >> + struct ds *s) >> +{ >> + ds_put_format(s, "commit_fdb_local(timeout=%u);", fdb_local->timeout); >> +} >> + >> +static void >> +ovnact_commit_fdb_local_free(struct ovnact_commit_fdb_local *fdb_local >> OVS_UNUSED) >> +{ >> +} >> + >> +static void >> +commit_fdb_local_learn_action(struct ovnact_commit_fdb_local *fdb_local, >> + struct ofpbuf *ofpacts, uint32_t cookie) >> +{ >> + struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts); >> + struct match match = MATCH_CATCHALL_INITIALIZER; >> + struct ofpact_learn_spec *ol_spec; >> + unsigned int imm_bytes; >> + uint8_t *src_imm; >> + >> + ol->flags = NX_LEARN_F_DELETE_LEARNED; >> + ol->idle_timeout = fdb_local->timeout; >> + ol->hard_timeout = OFP_FLOW_PERMANENT; >> + ol->priority = OFP_DEFAULT_PRIORITY; >> + ol->table_id = OFTABLE_GET_FDB; >> + ol->cookie = htonll(cookie); >> + >> + /* Match on metadata of the packet that created the new table. */ >> + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); >> + ol_spec->dst.field = mf_from_id(MFF_METADATA); >> + ol_spec->dst.ofs = 0; >> + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; >> + ol_spec->n_bits = ol_spec->dst.n_bits; >> + ol_spec->dst_type = NX_LEARN_DST_MATCH; >> + ol_spec->src_type = NX_LEARN_SRC_FIELD; >> + ol_spec->src.field = mf_from_id(MFF_METADATA); >> + >> + /* Match on metadata of the packet. */ >> + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); >> + ol_spec->dst.field = mf_from_id(MFF_ETH_DST); >> + ol_spec->dst.ofs = 0; >> + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; >> + ol_spec->n_bits = ol_spec->dst.n_bits; >> + ol_spec->dst_type = NX_LEARN_DST_MATCH; >> + ol_spec->src_type = NX_LEARN_SRC_FIELD; >> + ol_spec->src.field = mf_from_id(MFF_ETH_SRC); >> + >> + >> + /* Load MFF_LOG_OUTPORT from MFF_IN_PORT. */ >> + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); >> + ol_spec->dst.field = mf_from_id(MFF_LOG_OUTPORT); >> + ol_spec->dst.ofs = 0; >> + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; >> + ol_spec->n_bits = ol_spec->dst.n_bits; >> + ol_spec->dst_type = NX_LEARN_DST_LOAD; >> + ol_spec->src_type = NX_LEARN_SRC_FIELD; >> + ol_spec->src.field = mf_from_id(MFF_LOG_INPORT); >> + >> + ofpact_finish_LEARN(ofpacts, &ol); >> +} > > A difference from today's SB.FDB centralized approach is that when > ovn-controller restarts these flows will be cleared, I think. > > Are we OK with that? I think so but if not what are the options to > avoid clearing the local fdb cache on restart? > OVS has to relearn the FDB flows whenever tables are cleared. During this time, packets gets flooded. I need to think about possible options if we want to retain FDB table. Can we take this up as an enhancement in a separate patch once we identify a solution for this ? > Another difference with today's approach is that this avoids a > controller action, that's great! > >> + >> +static void >> +encode_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local, >> + const struct ovnact_encode_params *ep, >> + struct ofpbuf *ofpacts) >> +{ >> + commit_fdb_local_learn_action(fdb_local, ofpacts, >> ep->lflow_uuid.parts[0]); >> +} >> + >> static void >> encode_MAC_CACHE_USE(const struct ovnact_null *null OVS_UNUSED, >> const struct ovnact_encode_params *ep, >> @@ -5451,6 +5543,8 @@ parse_action(struct action_context *ctx) >> parse_sample(ctx); >> } else if (lexer_match_id(ctx->lexer, "mac_cache_use")) { >> ovnact_put_MAC_CACHE_USE(ctx->ovnacts); >> + } else if (lexer_match_id(ctx->lexer, "commit_fdb_local")) { >> + parse_commit_fdb_local(ctx, >> ovnact_put_COMMIT_FDB_LOCAL(ctx->ovnacts)); >> } else { >> lexer_syntax_error(ctx->lexer, "expecting action"); >> } >> diff --git a/northd/northd.c b/northd/northd.c >> index d1465ddf7..de18694a0 100644 >> --- a/northd/northd.c >> +++ b/northd/northd.c >> @@ -1834,6 +1834,12 @@ localnet_can_learn_mac(const struct >> nbrec_logical_switch_port *nbsp) >> return smap_get_bool( ->options, "localnet_learn_fdb", false); >> } >> >> +static bool >> +ls_is_fdb_local(const struct nbrec_logical_switch *nbs) >> +{ >> + return smap_get_bool(&nbs->other_config, "fdb_local", false); >> +} >> + > > Personally, I'd prefer if we don't add another config knob and we just > make this the only way FDB works. We could also document that the FDB > SB table should be deprecated. > If we want to make local FDB as default then, I think we need to handle overlay use case as well. Probably, we might have to add a new stage in logical switch egress pipeline to learn FDB entries for packets coming over tunnel (or) something similar. Can we take this up in a separate patch ? >> static bool >> lsp_is_type_changed(const struct sbrec_port_binding *sb, >> const struct nbrec_logical_switch_port *nbsp, >> @@ -7033,6 +7039,8 @@ build_lswitch_port_sec_op(struct ovn_port *op, struct >> hmap *lflows, >> } >> } >> >> +#define FDB_LOCAL_DEF_IDLE_TIMEOUT_S 300 >> + > > This, on the other hand, might be a good candidate for a config option. “fdb_local_idle_timeout” option is added in this patch to configure the timeout. Default value is 300 secs. > >> static void >> build_lswitch_learn_fdb_op( >> struct ovn_port *op, struct hmap *lflows, >> @@ -7042,6 +7050,24 @@ build_lswitch_learn_fdb_op( >> >> if (!op->n_ps_addrs && op->has_unknown && (!strcmp(op->nbsp->type, "") || >> (lsp_is_localnet(op->nbsp) && localnet_can_learn_mac(op->nbsp)))) { >> + >> + if (ls_is_fdb_local(op->od->nbs)) >> + { >> + uint32_t idle_timeout =smap_get_uint( >> + &op->od->nbs->other_config, >> "fdb_local_idle_timeout", >> + FDB_LOCAL_DEF_IDLE_TIMEOUT_S); >> + ds_clear(match); >> + ds_clear(actions); >> + ds_put_format(match, "inport == %s", op->json_key); >> + ds_put_format(actions, "commit_fdb_local(timeout=%u); next;", >> + idle_timeout); >> + ovn_lflow_add_with_lport_and_hint(lflows, op->od, >> + S_SWITCH_IN_LOOKUP_FDB, 100, >> + ds_cstr(match), >> ds_cstr(actions), >> + op->key, &op->nbsp->header_); >> + return; >> + } >> + >> ds_clear(match); >> ds_clear(actions); >> ds_put_format(match, "inport == %s", op->json_key); >> diff --git a/ovn-nb.xml b/ovn-nb.xml >> index fcb1c6ecc..3547ec4a6 100644 >> --- a/ovn-nb.xml >> +++ b/ovn-nb.xml >> @@ -803,6 +803,20 @@ >> </column> >> </group> >> >> + <group title="Local FDB options"> >> + <column name="other_config" key="fdb_local" >> + type='{"type": "boolean"}'> >> + If set to <code>true</code>, FDB flows are commited only to the >> + local chassis instead of southbound DB. Default is false. >> + </column> >> + <column name="other_config" key="fdb_local_idle_timeout" >> + type='{"type": "integer", "minInteger": 0, "maxInteger": >> 65535}'> >> + Local FDB flows <code>idle_timeout</code> value in seconds. FDB >> local >> + flows exceeding this timeout will be automatically removed. The >> value >> + defaults to 300, 0 means disabled. >> + </column> >> + </group> >> + >> <column name="copp"> >> <p> >> The control plane protection policy from table <ref table="Copp"/> >> diff --git a/tests/ovn.at b/tests/ovn.at >> index 92cf27581..dff50364a 100644 >> --- a/tests/ovn.at >> +++ b/tests/ovn.at >> @@ -34504,6 +34504,114 @@ OVN_CLEANUP([hv1]) >> AT_CLEANUP >> ]) >> >> +OVN_FOR_EACH_NORTHD([ >> +AT_SETUP([Local FDB MAC learning]) >> +ovn_start >> +net_add n1 >> + >> +AT_CHECK([ovn-nbctl ls-add ls0]) >> + >> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0]) >> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "50:54:00:00:00:03 10.0.0.3" >> "unknown"]) >> +AT_CHECK([ovn-nbctl set logical_switch_port vif0 >> options:requested-tnl-key=2]) >> + >> +AT_CHECK([ovn-nbctl lsp-add ls0 vif1]) >> +AT_CHECK([ovn-nbctl lsp-set-addresses vif1 "50:54:00:00:00:04 10.0.0.4"]) >> +AT_CHECK([ovn-nbctl set logical_switch_port vif1 >> options:requested-tnl-key=3]) >> + >> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port]) >> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown]) >> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet]) >> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1]) >> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port >> options:localnet_learn_fdb=true]) >> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port >> options:requested-tnl-key=1]) >> + >> +AT_CHECK([ovn-nbctl set logical_switch ls0 other_config:fdb_local=true]) >> + >> +sim_add hv1 >> +as hv1 >> +ovs-vsctl add-br br-phys >> +ovn_attach n1 br-phys 192.168.0.1 >> +ovs-vsctl -- add-port br-int vif0 -- \ >> + set interface vif0 external-ids:iface-id=vif0 \ >> + options:tx_pcap=hv1/vif0-tx.pcap \ >> + options:rxq_pcap=hv1/vif0-rx.pcap \ >> + ofport-request=1 >> +ovs-vsctl -- add-port br-int vif1 -- \ >> + set interface vif1 external-ids:iface-id=vif1 \ >> + options:tx_pcap=hv1/vif1-tx.pcap \ >> + options:rxq_pcap=hv1/vif1-rx.pcap \ >> + ofport-request=2 >> +ovs-vsctl -- add-port br-phys ext0 -- \ >> + set interface ext0 \ >> + options:tx_pcap=hv1/ext0-tx.pcap \ >> + options:rxq_pcap=hv1/ext0-rx.pcap \ >> + ofport-request=3 >> +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys >> + >> +wait_for_ports_up >> +AT_CHECK([ovn-nbctl --wait=hv sync]) >> + >> +send_packet() { >> + src_mac=$1 >> + src_ip=$2 >> + dst_mac=$3 >> + dst_ip=$4 >> + iface=$5 >> + >> packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000 >> + ovs-appctl netdev-dummy/receive $iface $packet >> +} >> + >> + >> +# Check that there is commit_fdb_local_fdb() flow added by ovn-northd for >> vif0 and localnet >> +ovn-sbctl dump-flows ls0 > sw0flows >> +AT_CAPTURE_FILE([sw0flows]) >> + >> +AT_CHECK([grep "ls_in_lookup_fdb" sw0flows | sort], [0], [dnl >> + table=2 (ls_in_lookup_fdb ), priority=0 , dnl >> +match=(1), action=(next;) >> + table=2 (ls_in_lookup_fdb ), priority=100 , dnl >> +match=(inport == "ln_port"), action=(commit_fdb_local(timeout=300); next;) >> + table=2 (ls_in_lookup_fdb ), priority=100 , dnl >> +match=(inport == "vif0"), action=(commit_fdb_local(timeout=300); next;) >> +]) >> + >> +AT_CHECK([grep "ls_in_put_fdb" sw0flows | sort], [0], [dnl >> + table=3 (ls_in_put_fdb ), priority=0 , dnl >> +match=(1), action=(next;) >> +]) >> + >> + >> +src_mac="505400000003" >> +src_ip=`ip_to_hex 10.0.0.3` >> +dst_mac="505400000004" >> +dst_ip=`ip_to_hex 10.0.0.4` >> + >> +# send packet from vif0(which has unknown addr set) to vif1 >> +send_packet $src_mac $src_ip $dst_mac $dst_ip vif0 >> + >> +# send packet from vif1 to vif0(which has unknown addr set) >> +send_packet $dst_mac $dst_ip $src_mac $src_ip vif1 >> + >> +# send packet from underlay to vif1 >> +src_mac="505400000064" >> +src_ip=`ip_to_hex 10.0.0.100` >> +send_packet $src_mac $src_ip $dst_mac $dst_ip ext0 >> +AT_CHECK([ovn-nbctl --wait=hv sync]) >> + >> +# Make sure that OVS table 71 is populated on hv1. >> +AS_BOX([Check that ovn-controller programs the flows for FDB]) >> +as hv1 ovs-ofctl dump-flows br-int table=71 > hv1_offlows_table71.txt >> +AT_CAPTURE_FILE([hv1_offlows_table71.txt]) >> +AT_CHECK([cat hv1_offlows_table71.txt | grep -v NXST | cut -d ' ' -f7- | >> sort], [0], [dnl >> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:03 >> actions=load:0x2->NXM_NX_REG15[[]] >> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:64 >> actions=load:0x1->NXM_NX_REG15[[]] >> +]) >> + >> +OVN_CLEANUP([hv1]) >> +AT_CLEANUP >> +]) >> + >> OVN_FOR_EACH_NORTHD([ >> AT_SETUP([MAC binding aging]) >> AT_SKIP_IF([test $HAVE_SCAPY = no]) >> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c >> index 0b86eae7b..354f84a4b 100644 >> --- a/utilities/ovn-trace.c >> +++ b/utilities/ovn-trace.c >> @@ -3355,6 +3355,8 @@ trace_actions(const struct ovnact *ovnacts, size_t >> ovnacts_len, >> break; >> case OVNACT_MAC_CACHE_USE: >> break; >> + case OVNACT_COMMIT_FDB_LOCAL: >> + break; >> } >> } >> ofpbuf_uninit(&stack); > > Regards, > Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev