commit_lb_aff action translates to an openflow "learn" action that
inserts a new flow in the OFTABLE_CHK_LB_AFFINITY table. The new flow is
used to match on the the 5-tuple and set REGBIT_KNOWN_LB_SESSION bit.
Moreover the new flow stores backend IP and port in register REG4 and
REG8[0..15] respectively.
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
controller/lflow.h | 1 +
include/ovn/actions.h | 15 ++
include/ovn/logical-fields.h | 3 +
lib/actions.c | 362 +++++++++++++++++++++++++++++++++++
ovn-sb.xml | 35 ++++
tests/ovn.at | 7 +
utilities/ovn-trace.c | 2 +
7 files changed, 425 insertions(+)
diff --git a/controller/lflow.h b/controller/lflow.h
index 8cbe312ca..4be079555 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -79,6 +79,7 @@ struct uuid;
#define OFTABLE_CHK_OUT_PORT_SEC 75
#define OFTABLE_ECMP_NH_MAC 76
#define OFTABLE_ECMP_NH 77
+#define OFTABLE_CHK_LB_AFFINITY 78
enum ref_type {
REF_TYPE_ADDRSET,
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index d7ee84dac..597cbb8e3 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -121,6 +121,7 @@ struct ovn_extend_table;
OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \
OVNACT(CHK_ECMP_NH_MAC, ovnact_result) \
OVNACT(CHK_ECMP_NH, ovnact_result) \
+ OVNACT(COMMIT_LB_AFF, ovnact_commit_lb_aff) \
/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
@@ -463,6 +464,20 @@ struct ovnact_commit_ecmp_nh {
uint8_t proto;
};
+/* OVNACT_COMMIT_LB_AFF. */
+struct ovnact_commit_lb_aff {
+ struct ovnact ovnact;
+
+ struct in6_addr vip;
+ uint16_t vip_port;
+ uint8_t proto;
+
+ struct in6_addr backend;
+ uint16_t backend_port;
+
+ uint16_t timeout;
+};
+
/* Internal use by the helpers below. */
void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
index 3db7265e4..52f40de38 100644
--- a/include/ovn/logical-fields.h
+++ b/include/ovn/logical-fields.h
@@ -71,6 +71,7 @@ enum mff_log_flags_bits {
MLF_USE_SNAT_ZONE = 11,
MLF_CHECK_PORT_SEC_BIT = 12,
MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13,
+ MLF_COMMIT_LB_AFF_BIT = 14,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -116,6 +117,8 @@ enum mff_log_flags {
MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT),
MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT),
+
+ MLF_COMMIT_LB_AFF = (1 << MLF_COMMIT_LB_AFF_BIT),
};
/* OVN logical fields
diff --git a/lib/actions.c b/lib/actions.c
index adbb42db4..5c68e5c3a 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -4600,6 +4600,366 @@ encode_CHK_ECMP_NH(const struct ovnact_result *res,
MLF_LOOKUP_COMMIT_ECMP_NH_BIT, ofpacts);
}
+static void
+parse_commit_lb_aff(struct action_context *ctx,
+ struct ovnact_commit_lb_aff *lb_aff)
+{
+ uint16_t timeout, port = 0;
+ char *ip_str;
+ int family;
+
+ lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */
+ if (!lexer_match_id(ctx->lexer, "vip")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (ctx->lexer->token.type != LEX_T_STRING) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str,
+ &port, &family)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (family == AF_INET) {
+ ovs_be32 ip4;
+ ip_parse(ip_str, &ip4);
+ in6_addr_set_mapped_ipv4(&lb_aff->vip, ip4);
+ } else {
+ ipv6_parse(ip_str, &lb_aff->vip);
+ }
+
+ lb_aff->vip_port = port;
+ free(ip_str);
+
+ lexer_get(ctx->lexer);
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+
+ if (!lexer_match_id(ctx->lexer, "backend")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (ctx->lexer->token.type != LEX_T_STRING) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str,
+ &port, &family)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (family == AF_INET) {
+ ovs_be32 ip4;
+ ip_parse(ip_str, &ip4);
+ in6_addr_set_mapped_ipv4(&lb_aff->backend, ip4);
+ } else {
+ ipv6_parse(ip_str, &lb_aff->backend);
+ }
+
+ lb_aff->backend_port = port;
+ free(ip_str);
+
+ lexer_get(ctx->lexer);
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+
+ if (!lexer_match_id(ctx->lexer, "proto")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (lexer_match_id(ctx->lexer, "tcp")) {
+ lb_aff->proto = IPPROTO_TCP;
+ } else if (lexer_match_id(ctx->lexer, "udp")) {
+ lb_aff->proto = IPPROTO_UDP;
+ } else if (lexer_match_id(ctx->lexer, "sctp")) {
+ lb_aff->proto = IPPROTO_SCTP;
+ } else {
+ lexer_syntax_error(ctx->lexer, "invalid protocol");
+ return;
+ }
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+
+ if (!lexer_match_id(ctx->lexer, "timeout")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (!action_parse_uint16(ctx, &timeout, "affinity timeout")) {
+ return;
+ }
+ lb_aff->timeout = timeout;
+
+ lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */
+
+}
+
+static void
+format_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, struct ds *s)
+{
+ if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->vip)) {
+ char ip_str[INET6_ADDRSTRLEN] = {};
+ inet_ntop(AF_INET6, &lb_aff->vip, ip_str, INET6_ADDRSTRLEN);
+ ds_put_format(s, "commit_lb_aff(vip = \"[%s]", ip_str);
+ } else {
+ ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->vip);
+ char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip));
+ ds_put_format(s, "commit_lb_aff(vip = \"%s", ip_str);
+ free(ip_str);
+ }
+ if (lb_aff->vip_port) {
+ ds_put_format(s, ":%d", lb_aff->vip_port);
+ }
+ ds_put_cstr(s, "\"");
+
+ if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->backend)) {
+ char ip_str[INET6_ADDRSTRLEN] = {};
+ inet_ntop(AF_INET6, &lb_aff->backend, ip_str, INET6_ADDRSTRLEN);
+ ds_put_format(s, ", backend = \"[%s]", ip_str);
+ } else {
+ ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->backend);
+ char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip));
+ ds_put_format(s, ", backend = \"%s", ip_str);
+ free(ip_str);
+ }
+ if (lb_aff->backend_port) {
+ ds_put_format(s, ":%d", lb_aff->backend_port);
+ }
+ ds_put_cstr(s, "\"");
+
+ const char *proto;
+ switch (lb_aff->proto) {
+ case IPPROTO_UDP:
+ proto = "udp";
+ break;
+ case IPPROTO_SCTP:
+ proto = "sctp";
+ break;
+ case IPPROTO_TCP:
+ default:
+ proto = "tcp";
+ break;
+ }
+ ds_put_format(s, ", proto = %s, timeout = %d);",
+ proto, lb_aff->timeout);
+}
+
+static void
+encode_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff,
+ const struct ovnact_encode_params *ep OVS_UNUSED,
+ struct ofpbuf *ofpacts)
+{
+ bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_aff->vip);
+ size_t ol_offset = ofpacts->size;
+ struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts);
+ struct match match = MATCH_CATCHALL_INITIALIZER;
+ struct ofpact_learn_spec *ol_spec;
+ unsigned int imm_bytes;
+ uint8_t *src_imm;
+
+ ol->flags = NX_LEARN_F_DELETE_LEARNED;
+ ol->idle_timeout = lb_aff->timeout; /* seconds. */
+ ol->priority = OFP_DEFAULT_PRIORITY;
+ ol->table_id = OFTABLE_CHK_LB_AFFINITY;
+
+ /* Match on the same ETH type as the packet that created the new table. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_eth_type = {
+ .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP)
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match);
+ /* Push value last, as this may reallocate 'ol_spec'. */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_eth_type, imm_bytes);
+
+ /* IP src. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC);
+ ol_spec->src.field =
+ ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* IP dst. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST);
+ union mf_value imm_ip;
+ if (ipv6) {
+ imm_ip = (union mf_value) {
+ .ipv6 = lb_aff->vip,
+ };
+ } else {
+ ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->vip);
+ imm_ip = (union mf_value) {
+ .be32 = ip4,
+ };
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ mf_write_subfield_value(&ol_spec->dst, &imm_ip, &match);
+
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_ip, imm_bytes);
+
+ /* IP proto. */
+ union mf_value imm_proto = {
+ .u8 = lb_aff->proto,
+ };
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->src.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match);
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_proto, imm_bytes);
+
+ /* dst port */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ switch (lb_aff->proto) {
+ case IPPROTO_TCP:
+ ol_spec->dst.field = mf_from_id(MFF_TCP_DST);
+ ol_spec->src.field = mf_from_id(MFF_TCP_DST);
+ break;
+ case IPPROTO_UDP:
+ ol_spec->dst.field = mf_from_id(MFF_UDP_DST);
+ ol_spec->src.field = mf_from_id(MFF_UDP_DST);
+ break;
+ case IPPROTO_SCTP:
+ ol_spec->dst.field = mf_from_id(MFF_SCTP_DST);
+ ol_spec->src.field = mf_from_id(MFF_SCTP_DST);
+ break;
+ default:
+ OVS_NOT_REACHED();
+ break;
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS);
+ ol_spec->dst.ofs = MLF_COMMIT_LB_AFF_BIT;
+ ol_spec->dst.n_bits = 1;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_reg_value = {
+ .u8 = 1
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match);
+
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ ol = ofpacts->header;
+ memcpy(src_imm, &imm_reg_value, imm_bytes);
+
+ /* Load backend IP in REG4/XXREG1. */
+ union mf_value imm_backend_ip;
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+
+ if (ipv6) {
+ imm_backend_ip = (union mf_value) {
+ .ipv6 = lb_aff->backend,
+ };
+ ol_spec->dst.field = mf_from_id(MFF_XXREG1);
+ } else {
+ ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->backend);
+ imm_backend_ip = (union mf_value) {
+ .be32 = ip4,
+ };
+ ol_spec->dst.field = mf_from_id(MFF_REG4);
+ }
+
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ mf_write_subfield_value(&ol_spec->dst, &imm_backend_ip, &match);
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_backend_ip, imm_bytes);
+
+ /* Load backend port in REG8. */
+ union mf_value imm_backend_port;
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ imm_backend_port = (union mf_value) {
+ .be16 = htons(lb_aff->backend_port),
+ };
+
+ ol_spec->dst.field = mf_from_id(MFF_REG8);
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = 8 * sizeof(lb_aff->backend_port);
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ mf_write_subfield_value(&ol_spec->dst, &imm_backend_port, &match);
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_backend_port, imm_bytes);
+
+ ol = ofpbuf_at_assert(ofpacts, ol_offset, sizeof *ol);
+ ofpact_finish_LEARN(ofpacts, &ol);
+}
+
+static void
+ovnact_commit_lb_aff_free(struct ovnact_commit_lb_aff *ecmp_nh OVS_UNUSED)
+{
+}
+
/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
@@ -4790,6 +5150,8 @@ parse_action(struct action_context *ctx)
parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts));
} else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) {
parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts));
+ } else if (lexer_match_id(ctx->lexer, "commit_lb_aff")) {
+ parse_commit_lb_aff(ctx, ovnact_put_COMMIT_LB_AFF(ctx->ovnacts));
} else {
lexer_syntax_error(ctx->lexer, "expecting action");
}
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 315d60853..fae62c09d 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -2624,6 +2624,41 @@ tcp.flags = RST;
register <var>R</var> is set to 1.
</p>
</dd>
+
+ <dt>
+ <code>
+ commit_lb_aff(<var>vip</var>, <var>backend</var>,
+ <var>proto</var>, <var>timeout</var>);
+ </code>
+ </dt>
+ <dd>
+ <p>
+ <b>Parameters</b>: load-balancer virtual ip:port <var>vip</var>,
+ load-balancer backend ip:port <var>backend</var>, load-balancer
+ protocol <var>proto</var>, affinity timeout <var>timeout</var>.
+ </p>
+
+ <p>
+ This action translates to an openflow "learn" action that inserts
+ a new flow in tables 78.