> Hi Lorenzo, I have a few comments below.
> 
> On 7/13/22 08:44, Lorenzo Bianconi wrote:
> > commit_ecmp_nh action translates to an openflow "learn" action that
> > inserts two new flows in the OFTABLE_ECMP_NH_MAC and OFTABLE_ECMP_NH
> > tables. These new flows are used to match on the the 5-tuple and the
> > expected next-hop mac address and set REGBIT_KNOWN_ECMP_NH.
> > commit_ecmp_nh action will be used to improve ECMP symmetric routing
> > reliability.
> > 
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> >   controller/lflow.h           |   2 +
> >   include/ovn/actions.h        |   7 ++
> >   include/ovn/logical-fields.h |   3 +
> >   lib/actions.c                | 176 +++++++++++++++++++++++++++++++++++
> >   ovn-sb.xml                   |  34 +++++++
> >   utilities/ovn-trace.c        |   2 +
> >   6 files changed, 224 insertions(+)
> > 
> > diff --git a/controller/lflow.h b/controller/lflow.h
> > index 342967917..543d3cd96 100644
> > --- a/controller/lflow.h
> > +++ b/controller/lflow.h
> > @@ -79,6 +79,8 @@ struct uuid;
> >   #define OFTABLE_CHK_IN_PORT_SEC      73
> >   #define OFTABLE_CHK_IN_PORT_SEC_ND   74
> >   #define OFTABLE_CHK_OUT_PORT_SEC     75
> > +#define OFTABLE_ECMP_NH_MAC          76
> > +#define OFTABLE_ECMP_NH              77
> >   enum ref_type {
> >       REF_TYPE_ADDRSET,
> > diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> > index 33c319f1c..cef930e1a 100644
> > --- a/include/ovn/actions.h
> > +++ b/include/ovn/actions.h
> > @@ -118,6 +118,7 @@ struct ovn_extend_table;
> >       OVNACT(LOOKUP_FDB,        ovnact_lookup_fdb)      \
> >       OVNACT(CHECK_IN_PORT_SEC,  ovnact_result)         \
> >       OVNACT(CHECK_OUT_PORT_SEC, ovnact_result)         \
> > +    OVNACT(COMMIT_ECMP_NH,    ovnact_commit_ecmp_nh)  \
> >   /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
> >   enum OVS_PACKED_ENUM ovnact_type {
> > @@ -453,6 +454,12 @@ struct ovnact_lookup_fdb {
> >       struct expr_field dst;     /* 1-bit destination field. */
> >   };
> > +/* OVNACT_COMMIT_ECMP_NH. */
> > +struct ovnact_commit_ecmp_nh {
> > +    struct ovnact ovnact;
> > +    bool ipv6;
> > +};
> > +
> >   /* Internal use by the helpers below. */
> >   void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
> >   void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
> > diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> > index bfb07ebef..3db7265e4 100644
> > --- a/include/ovn/logical-fields.h
> > +++ b/include/ovn/logical-fields.h
> > @@ -70,6 +70,7 @@ enum mff_log_flags_bits {
> >       MLF_LOCALPORT_BIT = 10,
> >       MLF_USE_SNAT_ZONE = 11,
> >       MLF_CHECK_PORT_SEC_BIT = 12,
> > +    MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13,
> >   };
> >   /* MFF_LOG_FLAGS_REG flag assignments */
> > @@ -113,6 +114,8 @@ enum mff_log_flags {
> >       /* Indicate the packet has been received from a localport */
> >       MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT),
> > +
> > +    MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT),
> >   };
> >   /* OVN logical fields
> > diff --git a/lib/actions.c b/lib/actions.c
> > index aab044306..132c63228 100644
> > --- a/lib/actions.c
> > +++ b/lib/actions.c
> > @@ -41,6 +41,7 @@
> >   #include "uuid.h"
> >   #include "socket-util.h"
> >   #include "lib/ovn-util.h"
> > +#include "controller/lflow.h"
> >   VLOG_DEFINE_THIS_MODULE(actions);
> >   
> > @@ -4278,6 +4279,179 @@ encode_CHECK_OUT_PORT_SEC(const struct 
> > ovnact_result *dl,
> >                              MLF_CHECK_PORT_SEC_BIT, ofpacts);
> >   }
> > +static void
> > +parse_commit_ecmp_nh(struct action_context *ctx,
> > +                     struct ovnact_commit_ecmp_nh *ecmp_nh)
> > +{
> > +    bool ipv6;
> > +
> > +    lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */
> > +    if (!lexer_match_id(ctx->lexer, "ipv6")) {
> > +        lexer_syntax_error(ctx->lexer, "invalid parameter");
> > +        return;
> > +    }
> > +    if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
> > +        lexer_syntax_error(ctx->lexer, "invalid parameter");
> > +        return;
> > +    }
> > +    if (lexer_match_string(ctx->lexer, "true") ||
> > +        lexer_match_id(ctx->lexer, "true")) {
> > +        ipv6 = true;
> > +    } else if (lexer_match_string(ctx->lexer, "false") ||
> > +               lexer_match_id(ctx->lexer, "false")) {
> > +        ipv6 = false;
> > +    } else {
> > +        lexer_syntax_error(ctx->lexer,
> > +                           "expecting true or false");
> > +        return;
> > +    }
> > +    lexer_force_match(ctx->lexer, LEX_T_SEMICOLON);
> > +
> > +    lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */
> > +
> > +    ecmp_nh->ipv6 = ipv6;
> > +}
> > +
> > +static void
> > +format_COMMIT_ECMP_NH(const struct ovnact_commit_ecmp_nh *ecmp_nh,
> > +                      struct ds *s)
> > +{
> > +    ds_put_format(s, "commit_ecmp_nh(ipv6=%s;);",
> > +                  ecmp_nh->ipv6 ? "true" : "false");
> > +}
> > +
> > +static void
> > +ovnact_commit_ecmp_nh_free(struct ovnact_commit_ecmp_nh *ecmp_nh 
> > OVS_UNUSED)
> > +{
> > +}
> > +
> > +static void
> > +commit_ecmp_learn_action(struct ofpbuf *ofpacts, bool nw_conn, bool ipv6)
> > +{
> 
> In the bugzilla issue, Dumitru suggests checking the src and dst L4 port as
> part of the flows generated by the learn() action. Is there a reason we're
> not doing it here? For that matter, would it make sense to check the L4
> protocol as well? I'm thinking of a potential scenario where we have UDP and
> TCP traffic between the same IP addresses, but for whatever reason, the UDP
> traffic was received over one ECMP route and the TCP traffic was received
> over a different one.

Hi Mark,

I discussed this point with Dumitru and we agreed to match just on L3 info
(addresses and ip protocol). Doing so we will take care even of not L4 IP
traffic (e.g. ICMP).

> 
> > +    struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts);
> > +    struct match match = MATCH_CATCHALL_INITIALIZER;
> > +    struct ofpact_learn_spec *ol_spec;
> > +    unsigned int imm_bytes;
> > +    uint8_t *src_imm;
> > +
> > +    ol->flags = NX_LEARN_F_DELETE_LEARNED;
> > +    ol->idle_timeout = 20; /* seconds. */
> > +    ol->hard_timeout = 30; /* seconds. */
> > +    ol->priority = OFP_DEFAULT_PRIORITY;
> > +    ol->table_id = nw_conn ? OFTABLE_ECMP_NH_MAC : OFTABLE_ECMP_NH;
> > +
> > +    /* Match on metadata of the packet that created the new table. */
> > +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> > +    ol_spec->dst.field = mf_from_id(MFF_METADATA);
> > +    ol_spec->dst.ofs = 0;
> > +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> > +    ol_spec->n_bits = ol_spec->dst.n_bits;
> > +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
> > +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
> > +    ol_spec->src.field = mf_from_id(MFF_METADATA);
> > +
> > +    if (nw_conn) {
> > +        ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> > +        ol_spec->dst.field = mf_from_id(MFF_ETH_SRC);
> > +        ol_spec->src.field = mf_from_id(MFF_ETH_SRC);
> > +        ol_spec->dst.ofs = 0;
> > +        ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> > +        ol_spec->n_bits = ol_spec->dst.n_bits;
> > +        ol_spec->dst_type = NX_LEARN_DST_MATCH;
> > +        ol_spec->src_type = NX_LEARN_SRC_FIELD;
> > +    }
> > +
> > +    /* Match on the same ETH type as the packet that created the new 
> > table. */
> > +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> > +    ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE);
> > +    ol_spec->dst.ofs = 0;
> > +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> > +    ol_spec->n_bits = ol_spec->dst.n_bits;
> > +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
> > +    ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
> > +    union mf_value imm_eth_type = {
> > +        .be16 = !ipv6 ? htons(ETH_TYPE_IP) : htons(ETH_TYPE_IPV6)
> 
> It's much easier to understand the code if the condition on ipv6 is
> reversed, i.e.:
> 
> .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP)
> 
> This same pattern is repeated many times in this function.

ack, I will fix it.

> 
> > +    };
> > +    mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match);
> > +    /* Push value last, as this may reallocate 'ol_spec'. */
> > +    imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
> > +    src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
> > +    memcpy(src_imm, &imm_eth_type, imm_bytes);
> > +
> > +    /* IP src. */
> > +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> > +    ol_spec->dst.field =
> > +        !ipv6 ? mf_from_id(MFF_IPV4_SRC) : mf_from_id(MFF_IPV6_SRC);
> > +    if (nw_conn) {
> > +        ol_spec->src.field =
> > +            !ipv6 ? mf_from_id(MFF_IPV4_SRC) : mf_from_id(MFF_IPV6_SRC);
> > +    } else {
> > +        ol_spec->src.field =
> > +            !ipv6 ? mf_from_id(MFF_IPV4_DST) : mf_from_id(MFF_IPV6_DST);
> > +    }
> > +    ol_spec->dst.ofs = 0;
> > +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> > +    ol_spec->n_bits = ol_spec->dst.n_bits;
> > +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
> > +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
> > +
> > +    /* IP dst. */
> > +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> > +    ol_spec->dst.field =
> > +        !ipv6 ? mf_from_id(MFF_IPV4_DST) : mf_from_id(MFF_IPV6_DST);
> > +    if (nw_conn) {
> > +        ol_spec->src.field =
> > +            !ipv6 ? mf_from_id(MFF_IPV4_DST) : mf_from_id(MFF_IPV6_DST);
> > +    } else {
> > +        ol_spec->src.field =
> > +            !ipv6 ? mf_from_id(MFF_IPV4_SRC) : mf_from_id(MFF_IPV6_SRC);
> > +    }
> > +    ol_spec->dst.ofs = 0;
> > +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> > +    ol_spec->n_bits = ol_spec->dst.n_bits;
> > +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
> > +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
> > +
> > +    /* IP proto. */
> > +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> > +    ol_spec->dst.field = mf_from_id(MFF_IP_PROTO);
> > +    ol_spec->src.field = mf_from_id(MFF_IP_PROTO);
> > +    ol_spec->dst.ofs = 0;
> > +    ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
> > +    ol_spec->n_bits = ol_spec->dst.n_bits;
> > +    ol_spec->dst_type = NX_LEARN_DST_MATCH;
> > +    ol_spec->src_type = NX_LEARN_SRC_FIELD;
> > +
> > +    /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */
> > +    ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
> > +    ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS);
> > +    ol_spec->dst.ofs = MLF_LOOKUP_COMMIT_ECMP_NH_BIT;
> > +    ol_spec->dst.n_bits = 1;
> > +    ol_spec->n_bits = ol_spec->dst.n_bits;
> > +    ol_spec->dst_type = NX_LEARN_DST_LOAD;
> > +    ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
> > +    union mf_value imm_reg_value = {
> > +        .u8 = 1
> > +    };
> > +    mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match);
> > +
> > +    /* Push value last, as this may reallocate 'ol_spec' */
> > +    imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
> > +    src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
> > +    memcpy(src_imm, &imm_reg_value, imm_bytes);
> > +
> > +    ofpact_finish_LEARN(ofpacts, &ol);
> > +}
> > +
> > +static void
> > +encode_COMMIT_ECMP_NH(const struct ovnact_commit_ecmp_nh *ecmp_nh,
> > +                      const struct ovnact_encode_params *ep OVS_UNUSED,
> > +                      struct ofpbuf *ofpacts)
> > +{
> > +     commit_ecmp_learn_action(ofpacts, true, ecmp_nh->ipv6);
> > +     commit_ecmp_learn_action(ofpacts, false, ecmp_nh->ipv6);
> > +}
> > +
> >   /* Parses an assignment or exchange or put_dhcp_opts action. */
> >   static void
> >   parse_set_action(struct action_context *ctx)
> > @@ -4458,6 +4632,8 @@ parse_action(struct action_context *ctx)
> >           ovnact_put_CT_SNAT_TO_VIP(ctx->ovnacts);
> >       } else if (lexer_match_id(ctx->lexer, "put_fdb")) {
> >           parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts));
> > +    } else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) {
> > +        parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts));
> >       } else {
> >           lexer_syntax_error(ctx->lexer, "expecting action");
> >       }
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index 59ad3aa2d..ef5586020 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -2573,6 +2573,40 @@ tcp.flags = RST;
> >               <b>Example:</b> <code>reg8[0..7] = 
> > check_out_port_sec();</code>
> >             </p>
> >           </dd>
> > +
> > +        <dt><code>commit_ecmp_nh(<var>ipv6</var>);</code></dt>
> > +        <dd>
> > +          <p>
> > +              <b>Parameters</b>: IPv4/IPv6 traffic.
> > +          </p>
> > +
> > +          <p>
> > +            This action translates to an openflow "learn" action that 
> > inserts
> > +            two new flows in the <code>OFTABLE_ECMP_NH_MAC</code> and in 
> > the
> > +            <code>OFTABLE_ECMP_NH</code> tables.
> 
> "OFTABLE_ECMP_NH_MAC" and "OFTABLE_ECMP_NH" will not mean anything to the
> users who read this. You should probably refer to them by their table
> numbers (76 and 77) instead. This is done in other patches in this series as
> well.

ack, I will fix it

Regards,
Lorenzo

> 
> > +          </p>
> > +
> > +          <ul>
> > +            <li>
> > +              Match on the the 5-tuple and the expected next-hop mac 
> > address
> > +              in <code>OFTABLE_ECMP_NH_MAC</code> table:
> > +              <code>nw_src=ip0</code>, <code>nw_dst=ip1</code>,
> > +              <code>ip_proto</code>,<code>dl_src=ethaddr</code> and
> > +              set <code>REGBIT_KNOWN_ECMP_NH</code>.
> > +            </li>
> > +            <li>
> > +              Match on the 5-tuple in <code>OFTABLE_ECMP_NH</code> table:
> > +              <code>nw_src=ip1</code>, <code>nw_dst=ip0</code>,
> > +              <code>ip_proto</code> and set 
> > <code>REGBIT_KNOWN_ECMP_NH</code>
> > +              to 1
> > +            </li>
> > +          </ul>
> > +
> > +          <p>
> > +            This action is applied if the packet arrives via ECMP route or
> > +            if it is routed via an ECMP route
> > +          </p>
> > +        </dd>
> >         </dl>
> >       </column>
> > diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> > index c4110de0a..fd84a1a5e 100644
> > --- a/utilities/ovn-trace.c
> > +++ b/utilities/ovn-trace.c
> > @@ -3224,6 +3224,8 @@ trace_actions(const struct ovnact *ovnacts, size_t 
> > ovnacts_len,
> >               execute_check_out_port_sec(ovnact_get_CHECK_OUT_PORT_SEC(a),
> >                                          dp, uflow);
> >               break;
> > +        case OVNACT_COMMIT_ECMP_NH:
> > +            break;
> >           }
> >       }
> >       ofpbuf_uninit(&stack);
> > 
> 
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to