On 11/29/23 07:45, naveen.yerramneni wrote:
> This functionality can be enabled at the logical switch level:
>   - "other_config:fdb_local" can be used to enable/disable this
>     functionality, it is disabled by default.
>   - "other_config:fdb_local_idle_timeout" sepcifies idle timeout
>     for locally learned fdb flows, default timeout is 300 secs.
> 
> If enabled, below lflow is added for each port that has unknown addr set.
>   - table=2 (ls_in_lookup_fdb), priority=100, match=(inport == <in_port>),
>     action=(commit_fdb_local(timeout=<timeout>); next;
> 
> New OVN action: "commit_fdb_local". This sets following OVS action.
>   - learn(table=71,idle_timeout=<timeout>,delete_learned,OXM_OF_METADATA[],
>           
> NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],load:NXM_NX_REG14[]->NXM_NX_REG15[])
> 
> This is useful when OVN is managing VLAN network that has multiple ports
> set with unknown addr and localnet_learn_fdb is enabled. With this config,
> if there is east-west traffic flowing between VMs part of same VLAN
> deployed on different hypervisors then, MAC addrs of the source and
> destination VMs keeps flapping between VM port and localnet port in 
> Southbound FDB table. Enabling fdb_local config makes fdb table local to
> the chassis and avoids MAC flapping.
> 
> Signed-off-by: Naveen Yerramneni <naveen.yerramn...@nutanix.com>
> ---

Hi Naveen,

Thanks a lot for the patch!

Just a note, we already have a fix for the east-west traffic that causes
FDB flapping when localnet is used:

https://github.com/ovn-org/ovn/commit/2acf91e9628e9481c48e4a6cec8ad5159fdd6d2e

https://github.com/ovn-org/ovn/commit/f3a14907fe2b1ecdcfddfbed595cd097b6efbe14

In general, however, I think it's a very good idea to move the FDB away
from the Southbound and make it local to each hypervisor.  That reduces
load on the Southbound among other things.

>  include/ovn/actions.h |   7 +++
>  lib/actions.c         |  94 ++++++++++++++++++++++++++++++++++++
>  northd/northd.c       |  26 ++++++++++
>  ovn-nb.xml            |  14 ++++++
>  tests/ovn.at          | 108 ++++++++++++++++++++++++++++++++++++++++++
>  utilities/ovn-trace.c |   2 +
>  6 files changed, 251 insertions(+)
> 
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 49cfe0624..85ac92cd3 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -127,6 +127,7 @@ struct collector_set_ids;
>      OVNACT(CHK_LB_AFF,        ovnact_result)          \
>      OVNACT(SAMPLE,            ovnact_sample)          \
>      OVNACT(MAC_CACHE_USE,     ovnact_null)            \
> +    OVNACT(COMMIT_FDB_LOCAL,  ovnact_commit_fdb_local) \
>  
>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -514,6 +515,12 @@ struct ovnact_commit_lb_aff {
>      uint16_t timeout;
>  };
>  
> +/* OVNACT_COMMIT_FBD_LOCAL. */
> +struct ovnact_commit_fdb_local{
> +    struct ovnact ovnact;
> +    uint16_t timeout;  /* fdb_local flow timeout */
> +};
> +
>  /* Internal use by the helpers below. */
>  void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
>  void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
> diff --git a/lib/actions.c b/lib/actions.c
> index a73fe1a1e..f5aa78db1 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -5236,6 +5236,98 @@ format_MAC_CACHE_USE(const struct ovnact_null *null 
> OVS_UNUSED, struct ds *s)
>      ds_put_cstr(s, "mac_cache_use;");
>  }
>  
> +static void
> +parse_commit_fdb_local(struct action_context *ctx,
> +                     struct ovnact_commit_fdb_local *fdb_local)
> +{
> +    uint16_t timeout = 0;
> +    lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */
> +    if (!lexer_match_id(ctx->lexer, "timeout")) {
> +        lexer_syntax_error(ctx->lexer, "invalid parameter");
> +        return;
> +    }
> +    if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
> +        lexer_syntax_error(ctx->lexer, "invalid parameter");
> +        return;
> +    }
> +    if (!action_parse_uint16(ctx, &timeout, "fdb_local flow timeout")) {
> +        return;
> +    }
> +    fdb_local->timeout = timeout;
> +    lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */
> +}
> +
> +static void
> +format_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local,
> +                      struct ds *s)
> +{
> +    ds_put_format(s, "commit_fdb_local(timeout=%u);", fdb_local->timeout);
> +}
> +
> +static void
> +ovnact_commit_fdb_local_free(struct ovnact_commit_fdb_local *fdb_local 
> OVS_UNUSED)
> +{
> +}
> +
> +static void
> +commit_fdb_local_learn_action(struct ovnact_commit_fdb_local *fdb_local,
> +                        struct ofpbuf *ofpacts, uint32_t cookie)
> +{
> +    struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts);
> +    struct match match = MATCH_CATCHALL_INITIALIZER;
> +    struct ofpact_learn_spec *ol_spec;
> +    unsigned int imm_bytes;
> +    uint8_t *src_imm;
> +
> +    ol->flags = NX_LEARN_F_DELETE_LEARNED;
> +    ol->idle_timeout = fdb_local->timeout;
> +    ol->hard_timeout = OFP_FLOW_PERMANENT;
> +    ol->priority = OFP_DEFAULT_PRIORITY;
> +    ol->table_id = OFTABLE_GET_FDB;
> +    ol->cookie = htonll(cookie);
> +
> +    /* Match on metadata of the packet that created the new table. */
> +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> +    ol_spec->dst.field = mf_from_id(MFF_METADATA);
> +    ol_spec->dst.ofs = 0;
> +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> +    ol_spec->n_bits = ol_spec->dst.n_bits;
> +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
> +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
> +    ol_spec->src.field = mf_from_id(MFF_METADATA);
> +
> +    /* Match on metadata of the packet. */
> +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> +    ol_spec->dst.field = mf_from_id(MFF_ETH_DST);
> +    ol_spec->dst.ofs = 0;
> +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> +    ol_spec->n_bits = ol_spec->dst.n_bits;
> +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
> +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
> +    ol_spec->src.field = mf_from_id(MFF_ETH_SRC);
> +
> +
> +    /* Load MFF_LOG_OUTPORT from MFF_IN_PORT. */
> +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> +    ol_spec->dst.field = mf_from_id(MFF_LOG_OUTPORT);
> +    ol_spec->dst.ofs = 0;
> +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> +    ol_spec->n_bits = ol_spec->dst.n_bits;
> +    ol_spec->dst_type = NX_LEARN_DST_LOAD;
> +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
> +    ol_spec->src.field = mf_from_id(MFF_LOG_INPORT);
> +
> +    ofpact_finish_LEARN(ofpacts, &ol);
> +}

A difference from today's SB.FDB centralized approach is that when
ovn-controller restarts these flows will be cleared, I think.

Are we OK with that?  I think so but if not what are the options to
avoid clearing the local fdb cache on restart?

Another difference with today's approach is that this avoids a
controller action, that's great!

> +
> +static void
> +encode_COMMIT_FDB_LOCAL(const struct ovnact_commit_fdb_local *fdb_local,
> +                      const struct ovnact_encode_params *ep,
> +                      struct ofpbuf *ofpacts)
> +{
> +     commit_fdb_local_learn_action(fdb_local, ofpacts, 
> ep->lflow_uuid.parts[0]);
> +}
> +
>  static void
>  encode_MAC_CACHE_USE(const struct ovnact_null *null OVS_UNUSED,
>                       const struct ovnact_encode_params *ep,
> @@ -5451,6 +5543,8 @@ parse_action(struct action_context *ctx)
>          parse_sample(ctx);
>      } else if (lexer_match_id(ctx->lexer, "mac_cache_use")) {
>          ovnact_put_MAC_CACHE_USE(ctx->ovnacts);
> +    } else if (lexer_match_id(ctx->lexer, "commit_fdb_local")) {
> +        parse_commit_fdb_local(ctx, 
> ovnact_put_COMMIT_FDB_LOCAL(ctx->ovnacts));
>      } else {
>          lexer_syntax_error(ctx->lexer, "expecting action");
>      }
> diff --git a/northd/northd.c b/northd/northd.c
> index d1465ddf7..de18694a0 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -1834,6 +1834,12 @@ localnet_can_learn_mac(const struct 
> nbrec_logical_switch_port *nbsp)
>      return smap_get_bool(&nbsp->options, "localnet_learn_fdb", false);
>  }
>  
> +static bool
> +ls_is_fdb_local(const struct nbrec_logical_switch *nbs)
> +{
> +    return smap_get_bool(&nbs->other_config, "fdb_local", false);
> +}
> +

Personally, I'd prefer if we don't add another config knob and we just
make this the only way FDB works.  We could also document that the FDB
SB table should be deprecated.

>  static bool
>  lsp_is_type_changed(const struct sbrec_port_binding *sb,
>                  const struct nbrec_logical_switch_port *nbsp,
> @@ -7033,6 +7039,8 @@ build_lswitch_port_sec_op(struct ovn_port *op, struct 
> hmap *lflows,
>      }
>  }
>  
> +#define FDB_LOCAL_DEF_IDLE_TIMEOUT_S 300
> +

This, on the other hand, might be a good candidate for a config option.

>  static void
>  build_lswitch_learn_fdb_op(
>          struct ovn_port *op, struct hmap *lflows,
> @@ -7042,6 +7050,24 @@ build_lswitch_learn_fdb_op(
>  
>      if (!op->n_ps_addrs && op->has_unknown && (!strcmp(op->nbsp->type, "") ||
>          (lsp_is_localnet(op->nbsp) && localnet_can_learn_mac(op->nbsp)))) {
> +
> +        if (ls_is_fdb_local(op->od->nbs))
> +        {
> +            uint32_t idle_timeout =smap_get_uint(
> +                        &op->od->nbs->other_config, "fdb_local_idle_timeout",
> +                        FDB_LOCAL_DEF_IDLE_TIMEOUT_S);
> +            ds_clear(match);
> +            ds_clear(actions);
> +            ds_put_format(match, "inport == %s", op->json_key);
> +            ds_put_format(actions, "commit_fdb_local(timeout=%u); next;",
> +                                    idle_timeout);
> +            ovn_lflow_add_with_lport_and_hint(lflows, op->od,
> +                                              S_SWITCH_IN_LOOKUP_FDB, 100,
> +                                              ds_cstr(match), 
> ds_cstr(actions),
> +                                              op->key, &op->nbsp->header_);
> +            return;
> +        }
> +
>          ds_clear(match);
>          ds_clear(actions);
>          ds_put_format(match, "inport == %s", op->json_key);
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index fcb1c6ecc..3547ec4a6 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -803,6 +803,20 @@
>        </column>
>      </group>
>  
> +    <group title="Local FDB options">
> +      <column name="other_config" key="fdb_local"
> +              type='{"type": "boolean"}'>
> +        If set to <code>true</code>, FDB flows are commited only to the
> +        local chassis instead of southbound DB. Default is false.
> +      </column>
> +      <column name="other_config" key="fdb_local_idle_timeout"
> +              type='{"type": "integer", "minInteger": 0, "maxInteger": 
> 65535}'>
> +        Local FDB flows <code>idle_timeout</code> value in seconds. FDB local
> +        flows exceeding this timeout will be automatically removed. The value
> +        defaults to 300, 0 means disabled.
> +      </column>
> +    </group>
> +
>      <column name="copp">
>        <p>
>          The control plane protection policy from table <ref table="Copp"/>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 92cf27581..dff50364a 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -34504,6 +34504,114 @@ OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
>  
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([Local FDB MAC learning])
> +ovn_start
> +net_add n1
> +
> +AT_CHECK([ovn-nbctl ls-add ls0])
> +
> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "50:54:00:00:00:03 10.0.0.3" 
> "unknown"])
> +AT_CHECK([ovn-nbctl set logical_switch_port vif0 
> options:requested-tnl-key=2])
> +
> +AT_CHECK([ovn-nbctl lsp-add ls0 vif1])
> +AT_CHECK([ovn-nbctl lsp-set-addresses vif1 "50:54:00:00:00:04 10.0.0.4"])
> +AT_CHECK([ovn-nbctl set logical_switch_port vif1 
> options:requested-tnl-key=3])
> +
> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port 
> options:localnet_learn_fdb=true])
> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port 
> options:requested-tnl-key=1])
> +
> +AT_CHECK([ovn-nbctl set logical_switch ls0 other_config:fdb_local=true])
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +ovs-vsctl -- add-port br-int vif0 -- \
> +    set interface vif0 external-ids:iface-id=vif0 \
> +    options:tx_pcap=hv1/vif0-tx.pcap \
> +    options:rxq_pcap=hv1/vif0-rx.pcap \
> +    ofport-request=1
> +ovs-vsctl -- add-port br-int vif1 -- \
> +    set interface vif1 external-ids:iface-id=vif1 \
> +    options:tx_pcap=hv1/vif1-tx.pcap \
> +    options:rxq_pcap=hv1/vif1-rx.pcap \
> +    ofport-request=2
> +ovs-vsctl -- add-port br-phys ext0 -- \
> +    set interface ext0 \
> +    options:tx_pcap=hv1/ext0-tx.pcap \
> +    options:rxq_pcap=hv1/ext0-rx.pcap \
> +    ofport-request=3
> +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
> +
> +wait_for_ports_up
> +AT_CHECK([ovn-nbctl --wait=hv sync])
> +
> +send_packet() {
> +    src_mac=$1
> +    src_ip=$2
> +    dst_mac=$3
> +    dst_ip=$4
> +    iface=$5
> +    
> packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
> +    ovs-appctl netdev-dummy/receive $iface $packet
> +}
> +
> +
> +# Check that there is commit_fdb_local_fdb() flow added by ovn-northd for 
> vif0 and localnet
> +ovn-sbctl dump-flows ls0 > sw0flows
> +AT_CAPTURE_FILE([sw0flows])
> +
> +AT_CHECK([grep "ls_in_lookup_fdb" sw0flows | sort], [0], [dnl
> +  table=2 (ls_in_lookup_fdb   ), priority=0    , dnl
> +match=(1), action=(next;)
> +  table=2 (ls_in_lookup_fdb   ), priority=100  , dnl
> +match=(inport == "ln_port"), action=(commit_fdb_local(timeout=300); next;)
> +  table=2 (ls_in_lookup_fdb   ), priority=100  , dnl
> +match=(inport == "vif0"), action=(commit_fdb_local(timeout=300); next;)
> +])
> +
> +AT_CHECK([grep "ls_in_put_fdb" sw0flows | sort], [0], [dnl
> +  table=3 (ls_in_put_fdb      ), priority=0    , dnl
> +match=(1), action=(next;)
> +])
> +
> +
> +src_mac="505400000003"
> +src_ip=`ip_to_hex 10.0.0.3`
> +dst_mac="505400000004"
> +dst_ip=`ip_to_hex 10.0.0.4`
> +
> +# send packet from vif0(which has unknown addr set) to vif1
> +send_packet $src_mac $src_ip $dst_mac $dst_ip vif0
> +
> +# send packet from vif1 to vif0(which has unknown addr set)
> +send_packet $dst_mac $dst_ip $src_mac $src_ip vif1
> +
> +# send packet from underlay to vif1
> +src_mac="505400000064"
> +src_ip=`ip_to_hex 10.0.0.100`
> +send_packet $src_mac $src_ip $dst_mac $dst_ip ext0
> +AT_CHECK([ovn-nbctl --wait=hv sync])
> +
> +# Make sure that OVS table 71 is populated on hv1.
> +AS_BOX([Check that ovn-controller programs the flows for FDB])
> +as hv1 ovs-ofctl dump-flows br-int table=71 > hv1_offlows_table71.txt
> +AT_CAPTURE_FILE([hv1_offlows_table71.txt])
> +AT_CHECK([cat hv1_offlows_table71.txt | grep -v NXST | cut -d ' ' -f7- | 
> sort], [0], [dnl
> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:03 
> actions=load:0x2->NXM_NX_REG15[[]]
> +idle_timeout=300, idle_age=0, metadata=0x1,dl_dst=50:54:00:00:00:64 
> actions=load:0x1->NXM_NX_REG15[[]]
> +])
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
> +
>  OVN_FOR_EACH_NORTHD([
>  AT_SETUP([MAC binding aging])
>  AT_SKIP_IF([test $HAVE_SCAPY = no])
> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> index 0b86eae7b..354f84a4b 100644
> --- a/utilities/ovn-trace.c
> +++ b/utilities/ovn-trace.c
> @@ -3355,6 +3355,8 @@ trace_actions(const struct ovnact *ovnacts, size_t 
> ovnacts_len,
>              break;
>          case OVNACT_MAC_CACHE_USE:
>              break;
> +        case OVNACT_COMMIT_FDB_LOCAL:
> +            break;
>          }
>      }
>      ofpbuf_uninit(&stack);

Regards,
Dumitru

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to