On 11/30/23 09:45, Naveen Yerramneni wrote: > > >> On 29-Nov-2023, at 2:24 PM, Dumitru Ceara <dce...@redhat.com> wrote: >> >> On 11/29/23 07:45, naveen.yerramneni wrote: >>> This functionality can be enabled at the logical switch level: >>> - "other_config:fdb_local" can be used to enable/disable this >>> functionality, it is disabled by default. >>> - "other_config:fdb_local_idle_timeout" sepcifies idle timeout >>> for locally learned fdb flows, default timeout is 300 secs. >>> >>> If enabled, below lflow is added for each port that has unknown addr set. >>> - table=2 (ls_in_lookup_fdb), priority=100, match=(inport == <in_port>), >>> action=(commit_fdb_local(timeout=<timeout>); next; >>> >>> New OVN action: "commit_fdb_local". This sets following OVS action. >>> - learn(table=71,idle_timeout=<timeout>,delete_learned,OXM_OF_METADATA[], >>> >>> NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],load:NXM_NX_REG14[]->NXM_NX_REG15[]) >>> >>> This is useful when OVN is managing VLAN network that has multiple ports >>> set with unknown addr and localnet_learn_fdb is enabled. With this config, >>> if there is east-west traffic flowing between VMs part of same VLAN >>> deployed on different hypervisors then, MAC addrs of the source and >>> destination VMs keeps flapping between VM port and localnet port in >>> Southbound FDB table. Enabling fdb_local config makes fdb table local to >>> the chassis and avoids MAC flapping. >>> >>> Signed-off-by: Naveen Yerramneni <naveen.yerramn...@nutanix.com> >>> --- >> >> Hi Naveen, >> >> Thanks a lot for the patch! >> >> Just a note, we already have a fix for the east-west traffic that causes >> FDB flapping when localnet is used: >> >> https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ovn-2Dorg_ovn_commit_2acf91e9628e9481c48e4a6cec8ad5159fdd6d2e&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=2PQjSDR7A28z1kXE1ptSm6X36oL_nCq1XxeEt7FkLmA&m=kPuq992rikXYk63APGxlIpfqY3lPpreN9f4ha9pZKpodnVgE9KfjEUNozpPUFzUu&s=LP9_zs2Rj34vMx20ntbu-A3taXqKMJNVH2TLQyOXCh0&e= >> >> >> https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ovn-2Dorg_ovn_commit_f3a14907fe2b1ecdcfddfbed595cd097b6efbe14&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=2PQjSDR7A28z1kXE1ptSm6X36oL_nCq1XxeEt7FkLmA&m=kPuq992rikXYk63APGxlIpfqY3lPpreN9f4ha9pZKpodnVgE9KfjEUNozpPUFzUu&s=gsUGtjyf9gSOr1LkcCH0O6MB1_tjXi9fuTgwEFgbRx8&e= >> >> >> In general, however, I think it's a very good idea to move the FDB away >> from the Southbound and make it local to each hypervisor. That reduces >> load on the Southbound among other things. >> > > Hi Dumitru, > > Thanks for informing about the patches. > Yes, local FDB reduces load on southbound. > > >>> include/ovn/actions.h | 7 +++ >>> lib/actions.c | 94 ++++++++++++++++++++++++++++++++++++ >>> northd/northd.c | 26 ++++++++++ >>> ovn-nb.xml | 14 ++++++ >>> tests/ovn.at | 108 ++++++++++++++++++++++++++++++++++++++++++ >>> utilities/ovn-trace.c | 2 + >>> 6 files changed, 251 insertions(+) >>> >>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h >>> index 49cfe0624..85ac92cd3 100644 >>> --- a/include/ovn/actions.h >>> +++ b/include/ovn/actions.h >>> @@ -127,6 +127,7 @@ struct collector_set_ids; >>> OVNACT(CHK_LB_AFF, ovnact_result) \ >>> OVNACT(SAMPLE, ovnact_sample) \ >>> OVNACT(MAC_CACHE_USE, ovnact_null) \ >>> + OVNACT(COMMIT_FDB_LOCAL, ovnact_commit_fdb_local) \ >>> >>> /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */ >>> enum OVS_PACKED_ENUM ovnact_type { >>> @@ -514,6 +515,12 @@ struct ovnact_commit_lb_aff { >>> uint16_t timeout; >>> }; >>> >>> +/* OVNACT_COMMIT_FBD_LOCAL. */ >>> +struct ovnact_commit_fdb_local{ >>> + struct ovnact ovnact; >>> + uint16_t timeout; /* fdb_local flow timeout */ >>> +}; >>> + >>> /* Internal use by the helpers below. */ >>> void ovnact_init(struct ovnact *, enum ovnact_type, size_t len); >>> void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len); >>> diff --git a/lib/actions.c b/lib/actions.c >>> index a73fe1a1e..f5aa78db1 100644 >>> --- a/lib/actions.c >>> +++ b/lib/actions.c >>> @@ -5236,6 +5236,98 @@ format_MAC_CACHE_USE(const struct ovnact_null *null >>> OVS_UNUSED, struct ds *s) >>> ds_put_cstr(s, "mac_cache_use;"); >>> } >>> >>> +static void >>> +parse_commit_fdb_local(struct action_context *ctx, >>> + struct ovnact_commit_fdb_local *fdb_local) >>> +{ >>> + uint16_t timeout = 0; >>> + lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */ >>> + if (!lexer_match_id(ctx->lexer, "timeout")) { >>> + lexer_syntax_error(ctx->lexer, "invalid parameter"); >>> + return; >>> + } >>> + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { >>> + lexer_syntax_error(ctx->lexer, "invalid parameter"); >>> + return; >>> + } >>> + if (!action_parse_uint16(ctx, &timeout, "fdb_local flow timeout")) { >>> + return; >>> + } >>> + fdb_local->timeout = timeout; >>> + lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */ >>> +} >>> + >>> +static void >>> +format_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local, >>> + struct ds *s) >>> +{ >>> + ds_put_format(s, "commit_fdb_local(timeout=%u);", fdb_local->timeout); >>> +} >>> + >>> +static void >>> +ovnact_commit_fdb_local_free(struct ovnact_commit_fdb_local *fdb_local >>> OVS_UNUSED) >>> +{ >>> +} >>> + >>> +static void >>> +commit_fdb_local_learn_action(struct ovnact_commit_fdb_local *fdb_local, >>> + struct ofpbuf *ofpacts, uint32_t cookie) >>> +{ >>> + struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts); >>> + struct match match = MATCH_CATCHALL_INITIALIZER; >>> + struct ofpact_learn_spec *ol_spec; >>> + unsigned int imm_bytes; >>> + uint8_t *src_imm; >>> + >>> + ol->flags = NX_LEARN_F_DELETE_LEARNED; >>> + ol->idle_timeout = fdb_local->timeout; >>> + ol->hard_timeout = OFP_FLOW_PERMANENT; >>> + ol->priority = OFP_DEFAULT_PRIORITY; >>> + ol->table_id = OFTABLE_GET_FDB; >>> + ol->cookie = htonll(cookie); >>> + >>> + /* Match on metadata of the packet that created the new table. */ >>> + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); >>> + ol_spec->dst.field = mf_from_id(MFF_METADATA); >>> + ol_spec->dst.ofs = 0; >>> + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; >>> + ol_spec->n_bits = ol_spec->dst.n_bits; >>> + ol_spec->dst_type = NX_LEARN_DST_MATCH; >>> + ol_spec->src_type = NX_LEARN_SRC_FIELD; >>> + ol_spec->src.field = mf_from_id(MFF_METADATA); >>> + >>> + /* Match on metadata of the packet. */ >>> + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); >>> + ol_spec->dst.field = mf_from_id(MFF_ETH_DST); >>> + ol_spec->dst.ofs = 0; >>> + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; >>> + ol_spec->n_bits = ol_spec->dst.n_bits; >>> + ol_spec->dst_type = NX_LEARN_DST_MATCH; >>> + ol_spec->src_type = NX_LEARN_SRC_FIELD; >>> + ol_spec->src.field = mf_from_id(MFF_ETH_SRC); >>> + >>> + >>> + /* Load MFF_LOG_OUTPORT from MFF_IN_PORT. */ >>> + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); >>> + ol_spec->dst.field = mf_from_id(MFF_LOG_OUTPORT); >>> + ol_spec->dst.ofs = 0; >>> + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; >>> + ol_spec->n_bits = ol_spec->dst.n_bits; >>> + ol_spec->dst_type = NX_LEARN_DST_LOAD; >>> + ol_spec->src_type = NX_LEARN_SRC_FIELD; >>> + ol_spec->src.field = mf_from_id(MFF_LOG_INPORT); >>> + >>> + ofpact_finish_LEARN(ofpacts, &ol); >>> +} >> >> A difference from today's SB.FDB centralized approach is that when >> ovn-controller restarts these flows will be cleared, I think. >> >> Are we OK with that? I think so but if not what are the options to >> avoid clearing the local fdb cache on restart? >> > > OVS has to relearn the FDB flows whenever tables are cleared. > During this time, packets gets flooded. I need to think about possible > options if we want to retain FDB table. > > Can we take this up as an enhancement in a separate patch > once we identify a solution for this ? >
Sounds good to me. >> Another difference with today's approach is that this avoids a >> controller action, that's great! >> >>> + >>> +static void >>> +encode_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local, >>> + const struct ovnact_encode_params *ep, >>> + struct ofpbuf *ofpacts) >>> +{ >>> + commit_fdb_local_learn_action(fdb_local, ofpacts, >>> ep->lflow_uuid.parts[0]); >>> +} >>> + >>> static void >>> encode_MAC_CACHE_USE(const struct ovnact_null *null OVS_UNUSED, >>> const struct ovnact_encode_params *ep, >>> @@ -5451,6 +5543,8 @@ parse_action(struct action_context *ctx) >>> parse_sample(ctx); >>> } else if (lexer_match_id(ctx->lexer, "mac_cache_use")) { >>> ovnact_put_MAC_CACHE_USE(ctx->ovnacts); >>> + } else if (lexer_match_id(ctx->lexer, "commit_fdb_local")) { >>> + parse_commit_fdb_local(ctx, >>> ovnact_put_COMMIT_FDB_LOCAL(ctx->ovnacts)); >>> } else { >>> lexer_syntax_error(ctx->lexer, "expecting action"); >>> } >>> diff --git a/northd/northd.c b/northd/northd.c >>> index d1465ddf7..de18694a0 100644 >>> --- a/northd/northd.c >>> +++ b/northd/northd.c >>> @@ -1834,6 +1834,12 @@ localnet_can_learn_mac(const struct >>> nbrec_logical_switch_port *nbsp) >>> return smap_get_bool( ->options, "localnet_learn_fdb", false); >>> } >>> >>> +static bool >>> +ls_is_fdb_local(const struct nbrec_logical_switch *nbs) >>> +{ >>> + return smap_get_bool(&nbs->other_config, "fdb_local", false); >>> +} >>> + >> >> Personally, I'd prefer if we don't add another config knob and we just >> make this the only way FDB works. We could also document that the FDB >> SB table should be deprecated. >> > > If we want to make local FDB as default then, I think we need to handle > overlay > use case as well. Probably, we might have to add a new stage in logical switch > egress pipeline to learn FDB entries for packets coming over tunnel (or) > something similar. > > Can we take this up in a separate patch ? > > Given that we have a fix already for the original problem you were trying to address I would prefer that we avoid adding new config knobs and handle both the localnet and overlay cases at the same time. OTOH, why is there a difference? The learned flow loads MFF_LOG_INPORT (from the packet that triggers the learn() action) into MFF_LOG_INPORT: With Geneve and STT overlay MFF_LOG_INPORT is still correctly set. With VXLAN that's not the case but that's already a documented limitation, LOG_INPORT is not available after VXLAN tunneling therefore features that need it (like egress ACLs matching against ingress port identifiers) are not supported: https://github.com/ovn-org/ovn/blob/main/ovn-architecture.7.xml#L2842 One more thing we need to take care of in order to be able to make local FDB the default is "FDB refresh": 551527a5e68e ("controller: Update FDB timestamp") https://github.com/ovn-org/ovn/commit/551527a5e68e7233ad80d212d549df98f13e37bc >>> static bool >>> lsp_is_type_changed(const struct sbrec_port_binding *sb, >>> const struct nbrec_logical_switch_port *nbsp, >>> @@ -7033,6 +7039,8 @@ build_lswitch_port_sec_op(struct ovn_port *op, struct >>> hmap *lflows, >>> } >>> } >>> >>> +#define FDB_LOCAL_DEF_IDLE_TIMEOUT_S 300 >>> + >> >> This, on the other hand, might be a good candidate for a config option. > > “fdb_local_idle_timeout” option is added in this patch to configure the > timeout. > Default value is 300 secs. > True, I missed that you do that below. However, we already have "LS.other_config:fdb_age_threshold" in the NB database to control exactly the same thing. I'd just use that one. >> >>> static void >>> build_lswitch_learn_fdb_op( >>> struct ovn_port *op, struct hmap *lflows, >>> @@ -7042,6 +7050,24 @@ build_lswitch_learn_fdb_op( >>> >>> if (!op->n_ps_addrs && op->has_unknown && (!strcmp(op->nbsp->type, "") >>> || >>> (lsp_is_localnet(op->nbsp) && localnet_can_learn_mac(op->nbsp)))) { >>> + >>> + if (ls_is_fdb_local(op->od->nbs)) >>> + { >>> + uint32_t idle_timeout =smap_get_uint( >>> + &op->od->nbs->other_config, >>> "fdb_local_idle_timeout", >>> + FDB_LOCAL_DEF_IDLE_TIMEOUT_S); >>> + ds_clear(match); >>> + ds_clear(actions); >>> + ds_put_format(match, "inport == %s", op->json_key); >>> + ds_put_format(actions, "commit_fdb_local(timeout=%u); next;", >>> + idle_timeout); >>> + ovn_lflow_add_with_lport_and_hint(lflows, op->od, >>> + S_SWITCH_IN_LOOKUP_FDB, 100, >>> + ds_cstr(match), >>> ds_cstr(actions), >>> + op->key, &op->nbsp->header_); >>> + return; >>> + } >>> + >>> ds_clear(match); >>> ds_clear(actions); >>> ds_put_format(match, "inport == %s", op->json_key); >>> diff --git a/ovn-nb.xml b/ovn-nb.xml >>> index fcb1c6ecc..3547ec4a6 100644 >>> --- a/ovn-nb.xml >>> +++ b/ovn-nb.xml >>> @@ -803,6 +803,20 @@ >>> </column> >>> </group> >>> >>> + <group title="Local FDB options"> >>> + <column name="other_config" key="fdb_local" >>> + type='{"type": "boolean"}'> >>> + If set to <code>true</code>, FDB flows are commited only to the >>> + local chassis instead of southbound DB. Default is false. >>> + </column> >>> + <column name="other_config" key="fdb_local_idle_timeout" >>> + type='{"type": "integer", "minInteger": 0, "maxInteger": >>> 65535}'> >>> + Local FDB flows <code>idle_timeout</code> value in seconds. FDB >>> local >>> + flows exceeding this timeout will be automatically removed. The >>> value >>> + defaults to 300, 0 means disabled. >>> + </column> >>> + </group> >>> + >>> <column name="copp"> >>> <p> >>> The control plane protection policy from table <ref table="Copp"/> >>> diff --git a/tests/ovn.at b/tests/ovn.at >>> index 92cf27581..dff50364a 100644 >>> --- a/tests/ovn.at >>> +++ b/tests/ovn.at >>> @@ -34504,6 +34504,114 @@ OVN_CLEANUP([hv1]) >>> AT_CLEANUP >>> ]) >>> >>> +OVN_FOR_EACH_NORTHD([ >>> +AT_SETUP([Local FDB MAC learning]) >>> +ovn_start >>> +net_add n1 >>> + >>> +AT_CHECK([ovn-nbctl ls-add ls0]) >>> + >>> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0]) >>> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "50:54:00:00:00:03 10.0.0.3" >>> "unknown"]) >>> +AT_CHECK([ovn-nbctl set logical_switch_port vif0 >>> options:requested-tnl-key=2]) >>> + >>> +AT_CHECK([ovn-nbctl lsp-add ls0 vif1]) >>> +AT_CHECK([ovn-nbctl lsp-set-addresses vif1 "50:54:00:00:00:04 10.0.0.4"]) >>> +AT_CHECK([ovn-nbctl set logical_switch_port vif1 >>> options:requested-tnl-key=3]) >>> + >>> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port]) >>> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown]) >>> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet]) >>> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1]) >>> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port >>> options:localnet_learn_fdb=true]) >>> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port >>> options:requested-tnl-key=1]) >>> + >>> +AT_CHECK([ovn-nbctl set logical_switch ls0 other_config:fdb_local=true]) >>> + >>> +sim_add hv1 >>> +as hv1 >>> +ovs-vsctl add-br br-phys >>> +ovn_attach n1 br-phys 192.168.0.1 >>> +ovs-vsctl -- add-port br-int vif0 -- \ >>> + set interface vif0 external-ids:iface-id=vif0 \ >>> + options:tx_pcap=hv1/vif0-tx.pcap \ >>> + options:rxq_pcap=hv1/vif0-rx.pcap \ >>> + ofport-request=1 >>> +ovs-vsctl -- add-port br-int vif1 -- \ >>> + set interface vif1 external-ids:iface-id=vif1 \ >>> + options:tx_pcap=hv1/vif1-tx.pcap \ >>> + options:rxq_pcap=hv1/vif1-rx.pcap \ >>> + ofport-request=2 >>> +ovs-vsctl -- add-port br-phys ext0 -- \ >>> + set interface ext0 \ >>> + options:tx_pcap=hv1/ext0-tx.pcap \ >>> + options:rxq_pcap=hv1/ext0-rx.pcap \ >>> + ofport-request=3 >>> +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys >>> + >>> +wait_for_ports_up >>> +AT_CHECK([ovn-nbctl --wait=hv sync]) >>> + >>> +send_packet() { >>> + src_mac=$1 >>> + src_ip=$2 >>> + dst_mac=$3 >>> + dst_ip=$4 >>> + iface=$5 >>> + >>> packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000 >>> + ovs-appctl netdev-dummy/receive $iface $packet >>> +} >>> + >>> + >>> +# Check that there is commit_fdb_local_fdb() flow added by ovn-northd for >>> vif0 and localnet >>> +ovn-sbctl dump-flows ls0 > sw0flows >>> +AT_CAPTURE_FILE([sw0flows]) >>> + >>> +AT_CHECK([grep "ls_in_lookup_fdb" sw0flows | sort], [0], [dnl >>> + table=2 (ls_in_lookup_fdb ), priority=0 , dnl >>> +match=(1), action=(next;) >>> + table=2 (ls_in_lookup_fdb ), priority=100 , dnl >>> +match=(inport == "ln_port"), action=(commit_fdb_local(timeout=300); next;) >>> + table=2 (ls_in_lookup_fdb ), priority=100 , dnl >>> +match=(inport == "vif0"), action=(commit_fdb_local(timeout=300); next;) >>> +]) >>> + >>> +AT_CHECK([grep "ls_in_put_fdb" sw0flows | sort], [0], [dnl >>> + table=3 (ls_in_put_fdb ), priority=0 , dnl >>> +match=(1), action=(next;) >>> +]) >>> + >>> + >>> +src_mac="505400000003" >>> +src_ip=`ip_to_hex 10.0.0.3` >>> +dst_mac="505400000004" >>> +dst_ip=`ip_to_hex 10.0.0.4` >>> + >>> +# send packet from vif0(which has unknown addr set) to vif1 >>> +send_packet $src_mac $src_ip $dst_mac $dst_ip vif0 >>> + >>> +# send packet from vif1 to vif0(which has unknown addr set) >>> +send_packet $dst_mac $dst_ip $src_mac $src_ip vif1 >>> + >>> +# send packet from underlay to vif1 >>> +src_mac="505400000064" >>> +src_ip=`ip_to_hex 10.0.0.100` >>> +send_packet $src_mac $src_ip $dst_mac $dst_ip ext0 >>> +AT_CHECK([ovn-nbctl --wait=hv sync]) >>> + >>> +# Make sure that OVS table 71 is populated on hv1. >>> +AS_BOX([Check that ovn-controller programs the flows for FDB]) >>> +as hv1 ovs-ofctl dump-flows br-int table=71 > hv1_offlows_table71.txt >>> +AT_CAPTURE_FILE([hv1_offlows_table71.txt]) >>> +AT_CHECK([cat hv1_offlows_table71.txt | grep -v NXST | cut -d ' ' -f7- | >>> sort], [0], [dnl >>> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:03 >>> actions=load:0x2->NXM_NX_REG15[[]] >>> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:64 >>> actions=load:0x1->NXM_NX_REG15[[]] >>> +]) >>> + >>> +OVN_CLEANUP([hv1]) >>> +AT_CLEANUP >>> +]) >>> + >>> OVN_FOR_EACH_NORTHD([ >>> AT_SETUP([MAC binding aging]) >>> AT_SKIP_IF([test $HAVE_SCAPY = no]) >>> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c >>> index 0b86eae7b..354f84a4b 100644 >>> --- a/utilities/ovn-trace.c >>> +++ b/utilities/ovn-trace.c >>> @@ -3355,6 +3355,8 @@ trace_actions(const struct ovnact *ovnacts, size_t >>> ovnacts_len, >>> break; >>> case OVNACT_MAC_CACHE_USE: >>> break; >>> + case OVNACT_COMMIT_FDB_LOCAL: >>> + break; >>> } >>> } >>> ofpbuf_uninit(&stack); >> Regards, Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev