Hi Lorenzo, I have a few comments below.
On 7/13/22 08:44, Lorenzo Bianconi wrote:
commit_ecmp_nh action translates to an openflow "learn" action that
inserts two new flows in the OFTABLE_ECMP_NH_MAC and OFTABLE_ECMP_NH
tables. These new flows are used to match on the the 5-tuple and the
expected next-hop mac address and set REGBIT_KNOWN_ECMP_NH.
commit_ecmp_nh action will be used to improve ECMP symmetric routing
reliability.
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
controller/lflow.h | 2 +
include/ovn/actions.h | 7 ++
include/ovn/logical-fields.h | 3 +
lib/actions.c | 176 +++++++++++++++++++++++++++++++++++
ovn-sb.xml | 34 +++++++
utilities/ovn-trace.c | 2 +
6 files changed, 224 insertions(+)
diff --git a/controller/lflow.h b/controller/lflow.h
index 342967917..543d3cd96 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -79,6 +79,8 @@ struct uuid;
#define OFTABLE_CHK_IN_PORT_SEC 73
#define OFTABLE_CHK_IN_PORT_SEC_ND 74
#define OFTABLE_CHK_OUT_PORT_SEC 75
+#define OFTABLE_ECMP_NH_MAC 76
+#define OFTABLE_ECMP_NH 77
enum ref_type {
REF_TYPE_ADDRSET,
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 33c319f1c..cef930e1a 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -118,6 +118,7 @@ struct ovn_extend_table;
OVNACT(LOOKUP_FDB, ovnact_lookup_fdb) \
OVNACT(CHECK_IN_PORT_SEC, ovnact_result) \
OVNACT(CHECK_OUT_PORT_SEC, ovnact_result) \
+ OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \
/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
@@ -453,6 +454,12 @@ struct ovnact_lookup_fdb {
struct expr_field dst; /* 1-bit destination field. */
};
+/* OVNACT_COMMIT_ECMP_NH. */
+struct ovnact_commit_ecmp_nh {
+ struct ovnact ovnact;
+ bool ipv6;
+};
+
/* Internal use by the helpers below. */
void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
index bfb07ebef..3db7265e4 100644
--- a/include/ovn/logical-fields.h
+++ b/include/ovn/logical-fields.h
@@ -70,6 +70,7 @@ enum mff_log_flags_bits {
MLF_LOCALPORT_BIT = 10,
MLF_USE_SNAT_ZONE = 11,
MLF_CHECK_PORT_SEC_BIT = 12,
+ MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -113,6 +114,8 @@ enum mff_log_flags {
/* Indicate the packet has been received from a localport */
MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT),
+
+ MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT),
};
/* OVN logical fields
diff --git a/lib/actions.c b/lib/actions.c
index aab044306..132c63228 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -41,6 +41,7 @@
#include "uuid.h"
#include "socket-util.h"
#include "lib/ovn-util.h"
+#include "controller/lflow.h"
VLOG_DEFINE_THIS_MODULE(actions);
@@ -4278,6 +4279,179 @@ encode_CHECK_OUT_PORT_SEC(const struct ovnact_result
*dl,
MLF_CHECK_PORT_SEC_BIT, ofpacts);
}
+static void
+parse_commit_ecmp_nh(struct action_context *ctx,
+ struct ovnact_commit_ecmp_nh *ecmp_nh)
+{
+ bool ipv6;
+
+ lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */
+ if (!lexer_match_id(ctx->lexer, "ipv6")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (lexer_match_string(ctx->lexer, "true") ||
+ lexer_match_id(ctx->lexer, "true")) {
+ ipv6 = true;
+ } else if (lexer_match_string(ctx->lexer, "false") ||
+ lexer_match_id(ctx->lexer, "false")) {
+ ipv6 = false;
+ } else {
+ lexer_syntax_error(ctx->lexer,
+ "expecting true or false");
+ return;
+ }
+ lexer_force_match(ctx->lexer, LEX_T_SEMICOLON);
+
+ lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */
+
+ ecmp_nh->ipv6 = ipv6;
+}
+
+static void
+format_COMMIT_ECMP_NH(const struct ovnact_commit_ecmp_nh *ecmp_nh,
+ struct ds *s)
+{
+ ds_put_format(s, "commit_ecmp_nh(ipv6=%s;);",
+ ecmp_nh->ipv6 ? "true" : "false");
+}
+
+static void
+ovnact_commit_ecmp_nh_free(struct ovnact_commit_ecmp_nh *ecmp_nh OVS_UNUSED)
+{
+}
+
+static void
+commit_ecmp_learn_action(struct ofpbuf *ofpacts, bool nw_conn, bool ipv6)
+{
In the bugzilla issue, Dumitru suggests checking the src and dst L4 port
as part of the flows generated by the learn() action. Is there a reason
we're not doing it here? For that matter, would it make sense to check
the L4 protocol as well? I'm thinking of a potential scenario where we
have UDP and TCP traffic between the same IP addresses, but for whatever
reason, the UDP traffic was received over one ECMP route and the TCP
traffic was received over a different one.
+ struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts);
+ struct match match = MATCH_CATCHALL_INITIALIZER;
+ struct ofpact_learn_spec *ol_spec;
+ unsigned int imm_bytes;
+ uint8_t *src_imm;
+
+ ol->flags = NX_LEARN_F_DELETE_LEARNED;
+ ol->idle_timeout = 20; /* seconds. */
+ ol->hard_timeout = 30; /* seconds. */
+ ol->priority = OFP_DEFAULT_PRIORITY;
+ ol->table_id = nw_conn ? OFTABLE_ECMP_NH_MAC : OFTABLE_ECMP_NH;
+
+ /* Match on metadata of the packet that created the new table. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_METADATA);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+ ol_spec->src.field = mf_from_id(MFF_METADATA);
+
+ if (nw_conn) {
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_ETH_SRC);
+ ol_spec->src.field = mf_from_id(MFF_ETH_SRC);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+ }
+
+ /* Match on the same ETH type as the packet that created the new table. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_eth_type = {
+ .be16 = !ipv6 ? htons(ETH_TYPE_IP) : htons(ETH_TYPE_IPV6)
It's much easier to understand the code if the condition on ipv6 is
reversed, i.e.:
.be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP)
This same pattern is repeated many times in this function.
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match);
+ /* Push value last, as this may reallocate 'ol_spec'. */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_eth_type, imm_bytes);
+
+ /* IP src. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ !ipv6 ? mf_from_id(MFF_IPV4_SRC) : mf_from_id(MFF_IPV6_SRC);
+ if (nw_conn) {
+ ol_spec->src.field =
+ !ipv6 ? mf_from_id(MFF_IPV4_SRC) : mf_from_id(MFF_IPV6_SRC);
+ } else {
+ ol_spec->src.field =
+ !ipv6 ? mf_from_id(MFF_IPV4_DST) : mf_from_id(MFF_IPV6_DST);
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* IP dst. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ !ipv6 ? mf_from_id(MFF_IPV4_DST) : mf_from_id(MFF_IPV6_DST);
+ if (nw_conn) {
+ ol_spec->src.field =
+ !ipv6 ? mf_from_id(MFF_IPV4_DST) : mf_from_id(MFF_IPV6_DST);
+ } else {
+ ol_spec->src.field =
+ !ipv6 ? mf_from_id(MFF_IPV4_SRC) : mf_from_id(MFF_IPV6_SRC);
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* IP proto. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->src.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS);
+ ol_spec->dst.ofs = MLF_LOOKUP_COMMIT_ECMP_NH_BIT;
+ ol_spec->dst.n_bits = 1;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_reg_value = {
+ .u8 = 1
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match);
+
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_reg_value, imm_bytes);
+
+ ofpact_finish_LEARN(ofpacts, &ol);
+}
+
+static void
+encode_COMMIT_ECMP_NH(const struct ovnact_commit_ecmp_nh *ecmp_nh,
+ const struct ovnact_encode_params *ep OVS_UNUSED,
+ struct ofpbuf *ofpacts)
+{
+ commit_ecmp_learn_action(ofpacts, true, ecmp_nh->ipv6);
+ commit_ecmp_learn_action(ofpacts, false, ecmp_nh->ipv6);
+}
+
/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
@@ -4458,6 +4632,8 @@ parse_action(struct action_context *ctx)
ovnact_put_CT_SNAT_TO_VIP(ctx->ovnacts);
} else if (lexer_match_id(ctx->lexer, "put_fdb")) {
parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts));
+ } else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) {
+ parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts));
} else {
lexer_syntax_error(ctx->lexer, "expecting action");
}
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 59ad3aa2d..ef5586020 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -2573,6 +2573,40 @@ tcp.flags = RST;
<b>Example:</b> <code>reg8[0..7] = check_out_port_sec();</code>
</p>
</dd>
+
+ <dt><code>commit_ecmp_nh(<var>ipv6</var>);</code></dt>
+ <dd>
+ <p>
+ <b>Parameters</b>: IPv4/IPv6 traffic.
+ </p>
+
+ <p>
+ This action translates to an openflow "learn" action that inserts
+ two new flows in the <code>OFTABLE_ECMP_NH_MAC</code> and in the
+ <code>OFTABLE_ECMP_NH</code> tables.
"OFTABLE_ECMP_NH_MAC" and "OFTABLE_ECMP_NH" will not mean anything to
the users who read this. You should probably refer to them by their
table numbers (76 and 77) instead. This is done in other patches in this
series as well.
+ </p>
+
+ <ul>
+ <li>
+ Match on the the 5-tuple and the expected next-hop mac address
+ in <code>OFTABLE_ECMP_NH_MAC</code> table:
+ <code>nw_src=ip0</code>, <code>nw_dst=ip1</code>,
+ <code>ip_proto</code>,<code>dl_src=ethaddr</code> and
+ set <code>REGBIT_KNOWN_ECMP_NH</code>.
+ </li>
+ <li>
+ Match on the 5-tuple in <code>OFTABLE_ECMP_NH</code> table:
+ <code>nw_src=ip1</code>, <code>nw_dst=ip0</code>,
+ <code>ip_proto</code> and set <code>REGBIT_KNOWN_ECMP_NH</code>
+ to 1
+ </li>
+ </ul>
+
+ <p>
+ This action is applied if the packet arrives via ECMP route or
+ if it is routed via an ECMP route
+ </p>
+ </dd>
</dl>
</column>
diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
index c4110de0a..fd84a1a5e 100644
--- a/utilities/ovn-trace.c
+++ b/utilities/ovn-trace.c
@@ -3224,6 +3224,8 @@ trace_actions(const struct ovnact *ovnacts, size_t
ovnacts_len,
execute_check_out_port_sec(ovnact_get_CHECK_OUT_PORT_SEC(a),
dp, uflow);
break;
+ case OVNACT_COMMIT_ECMP_NH:
+ break;
}
}
ofpbuf_uninit(&stack);
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev