> On 29-Nov-2023, at 2:24 PM, Dumitru Ceara <dce...@redhat.com> wrote:
> 
> On 11/29/23 07:45, naveen.yerramneni wrote:
>> This functionality can be enabled at the logical switch level:
>>  - "other_config:fdb_local" can be used to enable/disable this
>>    functionality, it is disabled by default.
>>  - "other_config:fdb_local_idle_timeout" sepcifies idle timeout
>>    for locally learned fdb flows, default timeout is 300 secs.
>> 
>> If enabled, below lflow is added for each port that has unknown addr set.
>>  - table=2 (ls_in_lookup_fdb), priority=100, match=(inport == <in_port>),
>>    action=(commit_fdb_local(timeout=<timeout>); next;
>> 
>> New OVN action: "commit_fdb_local". This sets following OVS action.
>>  - learn(table=71,idle_timeout=<timeout>,delete_learned,OXM_OF_METADATA[],
>>          
>> NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],load:NXM_NX_REG14[]->NXM_NX_REG15[])
>> 
>> This is useful when OVN is managing VLAN network that has multiple ports
>> set with unknown addr and localnet_learn_fdb is enabled. With this config,
>> if there is east-west traffic flowing between VMs part of same VLAN
>> deployed on different hypervisors then, MAC addrs of the source and
>> destination VMs keeps flapping between VM port and localnet port in 
>> Southbound FDB table. Enabling fdb_local config makes fdb table local to
>> the chassis and avoids MAC flapping.
>> 
>> Signed-off-by: Naveen Yerramneni <naveen.yerramn...@nutanix.com>
>> ---
> 
> Hi Naveen,
> 
> Thanks a lot for the patch!
> 
> Just a note, we already have a fix for the east-west traffic that causes
> FDB flapping when localnet is used:
> 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ovn-2Dorg_ovn_commit_2acf91e9628e9481c48e4a6cec8ad5159fdd6d2e&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=2PQjSDR7A28z1kXE1ptSm6X36oL_nCq1XxeEt7FkLmA&m=kPuq992rikXYk63APGxlIpfqY3lPpreN9f4ha9pZKpodnVgE9KfjEUNozpPUFzUu&s=LP9_zs2Rj34vMx20ntbu-A3taXqKMJNVH2TLQyOXCh0&e=
>  
> 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ovn-2Dorg_ovn_commit_f3a14907fe2b1ecdcfddfbed595cd097b6efbe14&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=2PQjSDR7A28z1kXE1ptSm6X36oL_nCq1XxeEt7FkLmA&m=kPuq992rikXYk63APGxlIpfqY3lPpreN9f4ha9pZKpodnVgE9KfjEUNozpPUFzUu&s=gsUGtjyf9gSOr1LkcCH0O6MB1_tjXi9fuTgwEFgbRx8&e=
>  
> 
> In general, however, I think it's a very good idea to move the FDB away
> from the Southbound and make it local to each hypervisor.  That reduces
> load on the Southbound among other things.
> 

Hi Dumitru,

Thanks for informing about the patches.
Yes, local FDB reduces load on southbound.


>> include/ovn/actions.h |   7 +++
>> lib/actions.c         |  94 ++++++++++++++++++++++++++++++++++++
>> northd/northd.c       |  26 ++++++++++
>> ovn-nb.xml            |  14 ++++++
>> tests/ovn.at          | 108 ++++++++++++++++++++++++++++++++++++++++++
>> utilities/ovn-trace.c |   2 +
>> 6 files changed, 251 insertions(+)
>> 
>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
>> index 49cfe0624..85ac92cd3 100644
>> --- a/include/ovn/actions.h
>> +++ b/include/ovn/actions.h
>> @@ -127,6 +127,7 @@ struct collector_set_ids;
>>     OVNACT(CHK_LB_AFF,        ovnact_result)          \
>>     OVNACT(SAMPLE,            ovnact_sample)          \
>>     OVNACT(MAC_CACHE_USE,     ovnact_null)            \
>> +    OVNACT(COMMIT_FDB_LOCAL,  ovnact_commit_fdb_local) \
>> 
>> /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>> enum OVS_PACKED_ENUM ovnact_type {
>> @@ -514,6 +515,12 @@ struct ovnact_commit_lb_aff {
>>     uint16_t timeout;
>> };
>> 
>> +/* OVNACT_COMMIT_FBD_LOCAL. */
>> +struct ovnact_commit_fdb_local{
>> +    struct ovnact ovnact;
>> +    uint16_t timeout;  /* fdb_local flow timeout */
>> +};
>> +
>> /* Internal use by the helpers below. */
>> void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
>> void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
>> diff --git a/lib/actions.c b/lib/actions.c
>> index a73fe1a1e..f5aa78db1 100644
>> --- a/lib/actions.c
>> +++ b/lib/actions.c
>> @@ -5236,6 +5236,98 @@ format_MAC_CACHE_USE(const struct ovnact_null *null 
>> OVS_UNUSED, struct ds *s)
>>     ds_put_cstr(s, "mac_cache_use;");
>> }
>> 
>> +static void
>> +parse_commit_fdb_local(struct action_context *ctx,
>> +                     struct ovnact_commit_fdb_local *fdb_local)
>> +{
>> +    uint16_t timeout = 0;
>> +    lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */
>> +    if (!lexer_match_id(ctx->lexer, "timeout")) {
>> +        lexer_syntax_error(ctx->lexer, "invalid parameter");
>> +        return;
>> +    }
>> +    if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
>> +        lexer_syntax_error(ctx->lexer, "invalid parameter");
>> +        return;
>> +    }
>> +    if (!action_parse_uint16(ctx, &timeout, "fdb_local flow timeout")) {
>> +        return;
>> +    }
>> +    fdb_local->timeout = timeout;
>> +    lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */
>> +}
>> +
>> +static void
>> +format_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local,
>> +                      struct ds *s)
>> +{
>> +    ds_put_format(s, "commit_fdb_local(timeout=%u);", fdb_local->timeout);
>> +}
>> +
>> +static void
>> +ovnact_commit_fdb_local_free(struct ovnact_commit_fdb_local *fdb_local 
>> OVS_UNUSED)
>> +{
>> +}
>> +
>> +static void
>> +commit_fdb_local_learn_action(struct ovnact_commit_fdb_local *fdb_local,
>> +                        struct ofpbuf *ofpacts, uint32_t cookie)
>> +{
>> +    struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts);
>> +    struct match match = MATCH_CATCHALL_INITIALIZER;
>> +    struct ofpact_learn_spec *ol_spec;
>> +    unsigned int imm_bytes;
>> +    uint8_t *src_imm;
>> +
>> +    ol->flags = NX_LEARN_F_DELETE_LEARNED;
>> +    ol->idle_timeout = fdb_local->timeout;
>> +    ol->hard_timeout = OFP_FLOW_PERMANENT;
>> +    ol->priority = OFP_DEFAULT_PRIORITY;
>> +    ol->table_id = OFTABLE_GET_FDB;
>> +    ol->cookie = htonll(cookie);
>> +
>> +    /* Match on metadata of the packet that created the new table. */
>> +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
>> +    ol_spec->dst.field = mf_from_id(MFF_METADATA);
>> +    ol_spec->dst.ofs = 0;
>> +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
>> +    ol_spec->n_bits = ol_spec->dst.n_bits;
>> +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
>> +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
>> +    ol_spec->src.field = mf_from_id(MFF_METADATA);
>> +
>> +    /* Match on metadata of the packet. */
>> +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
>> +    ol_spec->dst.field = mf_from_id(MFF_ETH_DST);
>> +    ol_spec->dst.ofs = 0;
>> +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
>> +    ol_spec->n_bits = ol_spec->dst.n_bits;
>> +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
>> +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
>> +    ol_spec->src.field = mf_from_id(MFF_ETH_SRC);
>> +
>> +
>> +    /* Load MFF_LOG_OUTPORT from MFF_IN_PORT. */
>> +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
>> +    ol_spec->dst.field = mf_from_id(MFF_LOG_OUTPORT);
>> +    ol_spec->dst.ofs = 0;
>> +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
>> +    ol_spec->n_bits = ol_spec->dst.n_bits;
>> +    ol_spec->dst_type = NX_LEARN_DST_LOAD;
>> +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
>> +    ol_spec->src.field = mf_from_id(MFF_LOG_INPORT);
>> +
>> +    ofpact_finish_LEARN(ofpacts, &ol);
>> +}
> 
> A difference from today's SB.FDB centralized approach is that when
> ovn-controller restarts these flows will be cleared, I think.
> 
> Are we OK with that?  I think so but if not what are the options to
> avoid clearing the local fdb cache on restart?
> 

OVS has to relearn the FDB  flows whenever tables are cleared.
During this time, packets gets flooded. I need to think about possible 
options if we want to retain FDB table.

Can we take this up as an enhancement in a separate patch
once we identify a solution for this ?

> Another difference with today's approach is that this avoids a
> controller action, that's great!
> 
>> +
>> +static void
>> +encode_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local,
>> +                      const struct ovnact_encode_params *ep,
>> +                      struct ofpbuf *ofpacts)
>> +{
>> +     commit_fdb_local_learn_action(fdb_local, ofpacts, 
>> ep->lflow_uuid.parts[0]);
>> +}
>> +
>> static void
>> encode_MAC_CACHE_USE(const struct ovnact_null *null OVS_UNUSED,
>>                      const struct ovnact_encode_params *ep,
>> @@ -5451,6 +5543,8 @@ parse_action(struct action_context *ctx)
>>         parse_sample(ctx);
>>     } else if (lexer_match_id(ctx->lexer, "mac_cache_use")) {
>>         ovnact_put_MAC_CACHE_USE(ctx->ovnacts);
>> +    } else if (lexer_match_id(ctx->lexer, "commit_fdb_local")) {
>> +        parse_commit_fdb_local(ctx, 
>> ovnact_put_COMMIT_FDB_LOCAL(ctx->ovnacts));
>>     } else {
>>         lexer_syntax_error(ctx->lexer, "expecting action");
>>     }
>> diff --git a/northd/northd.c b/northd/northd.c
>> index d1465ddf7..de18694a0 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -1834,6 +1834,12 @@ localnet_can_learn_mac(const struct 
>> nbrec_logical_switch_port *nbsp)
>>     return smap_get_bool(&nbsp->options, "localnet_learn_fdb", false);
>> }
>> 
>> +static bool
>> +ls_is_fdb_local(const struct nbrec_logical_switch *nbs)
>> +{
>> +    return smap_get_bool(&nbs->other_config, "fdb_local", false);
>> +}
>> +
> 
> Personally, I'd prefer if we don't add another config knob and we just
> make this the only way FDB works.  We could also document that the FDB
> SB table should be deprecated.
> 

If we want to make local FDB as default then, I think we need to handle overlay 
use case as well. Probably, we might have to add a new stage in logical switch
egress pipeline to learn FDB entries for packets coming over tunnel (or)
something similar.

Can we take this up in a separate patch ?
  

>> static bool
>> lsp_is_type_changed(const struct sbrec_port_binding *sb,
>>                 const struct nbrec_logical_switch_port *nbsp,
>> @@ -7033,6 +7039,8 @@ build_lswitch_port_sec_op(struct ovn_port *op, struct 
>> hmap *lflows,
>>     }
>> }
>> 
>> +#define FDB_LOCAL_DEF_IDLE_TIMEOUT_S 300
>> +
> 
> This, on the other hand, might be a good candidate for a config option.

“fdb_local_idle_timeout” option is added in this patch to configure the timeout.
Default value is 300 secs.

> 
>> static void
>> build_lswitch_learn_fdb_op(
>>         struct ovn_port *op, struct hmap *lflows,
>> @@ -7042,6 +7050,24 @@ build_lswitch_learn_fdb_op(
>> 
>>     if (!op->n_ps_addrs && op->has_unknown && (!strcmp(op->nbsp->type, "") ||
>>         (lsp_is_localnet(op->nbsp) && localnet_can_learn_mac(op->nbsp)))) {
>> +
>> +        if (ls_is_fdb_local(op->od->nbs))
>> +        {
>> +            uint32_t idle_timeout =smap_get_uint(
>> +                        &op->od->nbs->other_config, 
>> "fdb_local_idle_timeout",
>> +                        FDB_LOCAL_DEF_IDLE_TIMEOUT_S);
>> +            ds_clear(match);
>> +            ds_clear(actions);
>> +            ds_put_format(match, "inport == %s", op->json_key);
>> +            ds_put_format(actions, "commit_fdb_local(timeout=%u); next;",
>> +                                    idle_timeout);
>> +            ovn_lflow_add_with_lport_and_hint(lflows, op->od,
>> +                                              S_SWITCH_IN_LOOKUP_FDB, 100,
>> +                                              ds_cstr(match), 
>> ds_cstr(actions),
>> +                                              op->key, &op->nbsp->header_);
>> +            return;
>> +        }
>> +
>>         ds_clear(match);
>>         ds_clear(actions);
>>         ds_put_format(match, "inport == %s", op->json_key);
>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>> index fcb1c6ecc..3547ec4a6 100644
>> --- a/ovn-nb.xml
>> +++ b/ovn-nb.xml
>> @@ -803,6 +803,20 @@
>>       </column>
>>     </group>
>> 
>> +    <group title="Local FDB options">
>> +      <column name="other_config" key="fdb_local"
>> +              type='{"type": "boolean"}'>
>> +        If set to <code>true</code>, FDB flows are commited only to the
>> +        local chassis instead of southbound DB. Default is false.
>> +      </column>
>> +      <column name="other_config" key="fdb_local_idle_timeout"
>> +              type='{"type": "integer", "minInteger": 0, "maxInteger": 
>> 65535}'>
>> +        Local FDB flows <code>idle_timeout</code> value in seconds. FDB 
>> local
>> +        flows exceeding this timeout will be automatically removed. The 
>> value
>> +        defaults to 300, 0 means disabled.
>> +      </column>
>> +    </group>
>> +
>>     <column name="copp">
>>       <p>
>>         The control plane protection policy from table <ref table="Copp"/>
>> diff --git a/tests/ovn.at b/tests/ovn.at
>> index 92cf27581..dff50364a 100644
>> --- a/tests/ovn.at
>> +++ b/tests/ovn.at
>> @@ -34504,6 +34504,114 @@ OVN_CLEANUP([hv1])
>> AT_CLEANUP
>> ])
>> 
>> +OVN_FOR_EACH_NORTHD([
>> +AT_SETUP([Local FDB MAC learning])
>> +ovn_start
>> +net_add n1
>> +
>> +AT_CHECK([ovn-nbctl ls-add ls0])
>> +
>> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
>> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "50:54:00:00:00:03 10.0.0.3" 
>> "unknown"])
>> +AT_CHECK([ovn-nbctl set logical_switch_port vif0 
>> options:requested-tnl-key=2])
>> +
>> +AT_CHECK([ovn-nbctl lsp-add ls0 vif1])
>> +AT_CHECK([ovn-nbctl lsp-set-addresses vif1 "50:54:00:00:00:04 10.0.0.4"])
>> +AT_CHECK([ovn-nbctl set logical_switch_port vif1 
>> options:requested-tnl-key=3])
>> +
>> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
>> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
>> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
>> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
>> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port 
>> options:localnet_learn_fdb=true])
>> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port 
>> options:requested-tnl-key=1])
>> +
>> +AT_CHECK([ovn-nbctl set logical_switch ls0 other_config:fdb_local=true])
>> +
>> +sim_add hv1
>> +as hv1
>> +ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.1
>> +ovs-vsctl -- add-port br-int vif0 -- \
>> +    set interface vif0 external-ids:iface-id=vif0 \
>> +    options:tx_pcap=hv1/vif0-tx.pcap \
>> +    options:rxq_pcap=hv1/vif0-rx.pcap \
>> +    ofport-request=1
>> +ovs-vsctl -- add-port br-int vif1 -- \
>> +    set interface vif1 external-ids:iface-id=vif1 \
>> +    options:tx_pcap=hv1/vif1-tx.pcap \
>> +    options:rxq_pcap=hv1/vif1-rx.pcap \
>> +    ofport-request=2
>> +ovs-vsctl -- add-port br-phys ext0 -- \
>> +    set interface ext0 \
>> +    options:tx_pcap=hv1/ext0-tx.pcap \
>> +    options:rxq_pcap=hv1/ext0-rx.pcap \
>> +    ofport-request=3
>> +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
>> +
>> +wait_for_ports_up
>> +AT_CHECK([ovn-nbctl --wait=hv sync])
>> +
>> +send_packet() {
>> +    src_mac=$1
>> +    src_ip=$2
>> +    dst_mac=$3
>> +    dst_ip=$4
>> +    iface=$5
>> +    
>> packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
>> +    ovs-appctl netdev-dummy/receive $iface $packet
>> +}
>> +
>> +
>> +# Check that there is commit_fdb_local_fdb() flow added by ovn-northd for 
>> vif0 and localnet
>> +ovn-sbctl dump-flows ls0 > sw0flows
>> +AT_CAPTURE_FILE([sw0flows])
>> +
>> +AT_CHECK([grep "ls_in_lookup_fdb" sw0flows | sort], [0], [dnl
>> +  table=2 (ls_in_lookup_fdb   ), priority=0    , dnl
>> +match=(1), action=(next;)
>> +  table=2 (ls_in_lookup_fdb   ), priority=100  , dnl
>> +match=(inport == "ln_port"), action=(commit_fdb_local(timeout=300); next;)
>> +  table=2 (ls_in_lookup_fdb   ), priority=100  , dnl
>> +match=(inport == "vif0"), action=(commit_fdb_local(timeout=300); next;)
>> +])
>> +
>> +AT_CHECK([grep "ls_in_put_fdb" sw0flows | sort], [0], [dnl
>> +  table=3 (ls_in_put_fdb      ), priority=0    , dnl
>> +match=(1), action=(next;)
>> +])
>> +
>> +
>> +src_mac="505400000003"
>> +src_ip=`ip_to_hex 10.0.0.3`
>> +dst_mac="505400000004"
>> +dst_ip=`ip_to_hex 10.0.0.4`
>> +
>> +# send packet from vif0(which has unknown addr set) to vif1
>> +send_packet $src_mac $src_ip $dst_mac $dst_ip vif0
>> +
>> +# send packet from vif1 to vif0(which has unknown addr set)
>> +send_packet $dst_mac $dst_ip $src_mac $src_ip vif1
>> +
>> +# send packet from underlay to vif1
>> +src_mac="505400000064"
>> +src_ip=`ip_to_hex 10.0.0.100`
>> +send_packet $src_mac $src_ip $dst_mac $dst_ip ext0
>> +AT_CHECK([ovn-nbctl --wait=hv sync])
>> +
>> +# Make sure that OVS table 71 is populated on hv1.
>> +AS_BOX([Check that ovn-controller programs the flows for FDB])
>> +as hv1 ovs-ofctl dump-flows br-int table=71 > hv1_offlows_table71.txt
>> +AT_CAPTURE_FILE([hv1_offlows_table71.txt])
>> +AT_CHECK([cat hv1_offlows_table71.txt | grep -v NXST | cut -d ' ' -f7- | 
>> sort], [0], [dnl
>> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:03 
>> actions=load:0x2->NXM_NX_REG15[[]]
>> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:64 
>> actions=load:0x1->NXM_NX_REG15[[]]
>> +])
>> +
>> +OVN_CLEANUP([hv1])
>> +AT_CLEANUP
>> +])
>> +
>> OVN_FOR_EACH_NORTHD([
>> AT_SETUP([MAC binding aging])
>> AT_SKIP_IF([test $HAVE_SCAPY = no])
>> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
>> index 0b86eae7b..354f84a4b 100644
>> --- a/utilities/ovn-trace.c
>> +++ b/utilities/ovn-trace.c
>> @@ -3355,6 +3355,8 @@ trace_actions(const struct ovnact *ovnacts, size_t 
>> ovnacts_len,
>>             break;
>>         case OVNACT_MAC_CACHE_USE:
>>             break;
>> +        case OVNACT_COMMIT_FDB_LOCAL:
>> +            break;
>>         }
>>     }
>>     ofpbuf_uninit(&stack);
> 
> Regards,
> Dumitru

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to