From: Numan Siddique <[email protected]>
When a container port is created inside a VM, the below kernel message
is seen and IPv6 doesn't work on that interface.
[ 138.000753] IPv6: vlan4: IPv6 duplicate address <IPv6 LLA> detected!
When a container port sends a ethernet broadcast packet, OVN delivers the same
packet back to the child port (and hence the DAD check fails).
This is because
- 'MLF_ALLOW_LOOPBACK_BIT' is set in REG10 in table 0 for the packets received
from any child port.
- for ethernet broadcast packets, Table 33 (OFTABLE_LOCAL_OUTPUT) clones the
packet for every local port 'P' which belongs to the same datapath i.e
'P'->REG15, resubmit(,34)
- If REG14 and REG15 are same, Table 34 (OFTABLE_CHECK_LOOPBACK) drops the
packet
if 'MLF_ALLOW_LOOPBACK_BIT' is not set.
- But in the case of container ports, this bit will be set and hence doesn't
gets
dropped and eventually gets delivered to the source container port.
- The VM's kernel thinks its a DAD packet. The latest kernels (4.19) implements
the RFC -7527 (enhanced DAD), but it is still a problem for older kernels.
This patch fixes the issue by using a new register bit
(MLF_NESTED_CONTAINER_BIT)
instead of 'MLF_ALLOW_LOOPBACK_BIT' and sets it in REG10 for the packets
received
from child ports so that Table 34 drops the packet for the source port.
(cherry picked from 22e506d3b686d654239c381a8c4166803fd00692)
Conflicts:
ovn/controller/physical.c
Branch 2.9 doesn't have indexing support and the function lport_lookup_by_name()
is not available. To resolve this conflict, I had to use
SBREC_PORT_BINDING_FOR_EACH
instead.
Signed-off-by: Numan Siddique <[email protected]>
CC: Gurucharan Shetty <[email protected]>
---
ovn/controller/ofctrl.c | 30 +++++++++++------
ovn/controller/ofctrl.h | 5 +++
ovn/controller/physical.c | 70 ++++++++++++++++++++++++++++++++++-----
ovn/lib/logical-fields.h | 4 +++
tests/ovn.at | 11 ++++++
5 files changed, 102 insertions(+), 18 deletions(-)
diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index fc4d4d928..70852e790 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -624,10 +624,11 @@ ofctrl_recv(const struct ofp_header *oh, enum ofptype
type)
*
* The caller should initialize its own hmap to hold the flows. */
void
-ofctrl_add_flow(struct hmap *desired_flows,
- uint8_t table_id, uint16_t priority, uint64_t cookie,
- const struct match *match,
- const struct ofpbuf *actions)
+ofctrl_check_and_add_flow(struct hmap *desired_flows,
+ uint8_t table_id, uint16_t priority, uint64_t cookie,
+ const struct match *match,
+ const struct ofpbuf *actions,
+ bool log_duplicate_flow)
{
struct ovn_flow *f = xmalloc(sizeof *f);
f->table_id = table_id;
@@ -639,13 +640,14 @@ ofctrl_add_flow(struct hmap *desired_flows,
f->cookie = cookie;
if (ovn_flow_lookup(desired_flows, f)) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
- if (!VLOG_DROP_DBG(&rl)) {
- char *s = ovn_flow_to_string(f);
- VLOG_DBG("dropping duplicate flow: %s", s);
- free(s);
+ if (log_duplicate_flow) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ if (!VLOG_DROP_DBG(&rl)) {
+ char *s = ovn_flow_to_string(f);
+ VLOG_DBG("dropping duplicate flow: %s", s);
+ free(s);
+ }
}
-
ovn_flow_destroy(f);
return;
}
@@ -653,6 +655,14 @@ ofctrl_add_flow(struct hmap *desired_flows,
hmap_insert(desired_flows, &f->hmap_node, f->hmap_node.hash);
}
+void
+ofctrl_add_flow(struct hmap *desired_flows,
+ uint8_t table_id, uint16_t priority, uint64_t cookie,
+ const struct match *match, const struct ofpbuf *actions)
+{
+ ofctrl_check_and_add_flow(desired_flows, table_id, priority, cookie,
+ match, actions, true);
+}
/* ovn_flow. */
diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
index 125f9a4c2..f1f8a29e3 100644
--- a/ovn/controller/ofctrl.h
+++ b/ovn/controller/ofctrl.h
@@ -56,4 +56,9 @@ void ofctrl_add_flow(struct hmap *desired_flows, uint8_t
table_id,
void ofctrl_flow_table_clear(void);
+void ofctrl_check_and_add_flow(struct hmap *desired_flows, uint8_t table_id,
+ uint16_t priority, uint64_t cookie,
+ const struct match *,
+ const struct ofpbuf *ofpacts,
+ bool log_duplicate_flow);
#endif /* ovn/ofctrl.h */
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 8c92c1d9b..d64659543 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -189,7 +189,8 @@ get_zone_ids(const struct sbrec_port_binding *binding,
static void
put_local_common_flows(uint32_t dp_key, uint32_t port_key,
- bool nested_container, const struct zone_ids *zone_ids,
+ uint32_t parent_port_key,
+ const struct zone_ids *zone_ids,
struct ofpbuf *ofpacts_p, struct hmap *flow_table)
{
struct match match;
@@ -244,11 +245,19 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key,
/* Table 64, Priority 100.
* =======================
*
- * If the packet is supposed to hair-pin because the "loopback"
- * flag is set (or if the destination is a nested container),
+ * If the packet is supposed to hair-pin because the
+ * - "loopback" flag is set
+ * - or if the destination is a nested container
+ * - or if "nested_container" flag is set and the destination is the
+ * parent port,
* temporarily set the in_port to zero, resubmit to
* table 65 for logical-to-physical translation, then restore
- * the port number. */
+ * the port number.
+ *
+ * If 'parent_port_key' is set, then the 'port_key' represents a nested
+ * container. */
+
+ bool nested_container = parent_port_key ? true: false;
match_init_catchall(&match);
ofpbuf_clear(ofpacts_p);
match_set_metadata(&match, htonll(dp_key));
@@ -264,6 +273,38 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key,
put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0,
&match, ofpacts_p);
+
+ if (nested_container) {
+ /* It's a nested container and when the packet from the nested
+ * container is to be sent to the parent port, "nested_container"
+ * flag will be set. We need to temporarily set the in_port to zero
+ * as mentioned in the comment above.
+ *
+ * If a parent port has multiple child ports, then this if condition
+ * will be hit multiple times, but we want to add only one flow.
+ * ofctrl_add_flow() logs a warning message for duplicate flows.
+ * So use the function 'ofctrl_check_and_add_flow' which doesn't
+ * log a warning.
+ *
+ * Other option is to add this flow for all the ports which are not
+ * nested containers. In which case we will add this flow for all the
+ * ports even if they don't have any child ports which is
+ * unnecessary.
+ */
+ match_init_catchall(&match);
+ ofpbuf_clear(ofpacts_p);
+ match_set_metadata(&match, htonll(dp_key));
+ match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, parent_port_key);
+ match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
+ MLF_NESTED_CONTAINER, MLF_NESTED_CONTAINER);
+
+ put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(ofpacts_p));
+ put_load(0, MFF_IN_PORT, 0, 16, ofpacts_p);
+ put_resubmit(OFTABLE_LOG_TO_PHY, ofpacts_p);
+ put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
+ ofctrl_check_and_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0,
+ &match, ofpacts_p, false);
+ }
}
static void
@@ -328,7 +369,7 @@ consider_port_binding(struct controller_ctx *ctx,
}
struct zone_ids binding_zones = get_zone_ids(binding, ct_zones);
- put_local_common_flows(dp_key, port_key, false, &binding_zones,
+ put_local_common_flows(dp_key, port_key, 0, &binding_zones,
ofpacts_p, flow_table);
match_init_catchall(&match);
@@ -451,6 +492,7 @@ consider_port_binding(struct controller_ctx *ctx,
int tag = 0;
bool nested_container = false;
+ const struct sbrec_port_binding *parent_port = NULL;
ofp_port_t ofport;
bool is_remote = false;
if (binding->parent_port && *binding->parent_port) {
@@ -462,6 +504,13 @@ consider_port_binding(struct controller_ctx *ctx,
if (ofport) {
tag = *binding->tag;
nested_container = true;
+ const struct sbrec_port_binding *pb = NULL;
+ SBREC_PORT_BINDING_FOR_EACH (pb, ctx->ovnsb_idl) {
+ if (!strcmp(binding->parent_port, pb->logical_port)) {
+ parent_port = pb;
+ break;
+ }
+ }
}
} else {
ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
@@ -522,7 +571,10 @@ consider_port_binding(struct controller_ctx *ctx,
*/
struct zone_ids zone_ids = get_zone_ids(binding, ct_zones);
- put_local_common_flows(dp_key, port_key, nested_container, &zone_ids,
+ uint32_t parent_port_key = parent_port ? parent_port->tunnel_key : 0;
+ /* Pass the parent port tunnel key if the port is a nested
+ * container. */
+ put_local_common_flows(dp_key, port_key, parent_port_key, &zone_ids,
ofpacts_p, flow_table);
/* Table 0, Priority 150 and 100.
@@ -552,8 +604,10 @@ consider_port_binding(struct controller_ctx *ctx,
if (nested_container) {
/* When a packet comes from a container sitting behind a
* parent_port, we should let it loopback to other containers
- * or the parent_port itself. */
- put_load(MLF_ALLOW_LOOPBACK, MFF_LOG_FLAGS, 0, 1, ofpacts_p);
+ * or the parent_port itself. Indicate this by setting the
+ * MLF_NESTED_CONTAINER_BIT in MFF_LOG_FLAGS.*/
+ put_load(1, MFF_LOG_FLAGS, MLF_NESTED_CONTAINER_BIT, 1,
+ ofpacts_p);
}
ofpact_put_STRIP_VLAN(ofpacts_p);
}
diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h
index b1dbb035a..95759a8bb 100644
--- a/ovn/lib/logical-fields.h
+++ b/ovn/lib/logical-fields.h
@@ -50,6 +50,7 @@ enum mff_log_flags_bits {
MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
MLF_FORCE_SNAT_FOR_LB_BIT = 3,
MLF_LOCAL_ONLY_BIT = 4,
+ MLF_NESTED_CONTAINER_BIT = 5,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -75,6 +76,9 @@ enum mff_log_flags {
* hypervisors should instead only be output to local targets
*/
MLF_LOCAL_ONLY = (1 << MLF_LOCAL_ONLY_BIT),
+
+ /* Indicate that a packet was received from a nested container. */
+ MLF_NESTED_CONTAINER = (1 << MLF_NESTED_CONTAINER_BIT),
};
#endif /* ovn/lib/logical-fields.h */
diff --git a/tests/ovn.at b/tests/ovn.at
index 2478bddd6..1d21c3d02 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -6713,6 +6713,17 @@
packet=${dst_mac}${src_mac}8100000208004500001c000000003f110100${src_ip}${dst_ip
echo $packet >> expected1
OVN_CHECK_PACKETS([hv1/vm1-tx.pcap], [expected1])
+# Send broadcast packet from foo1. foo1 should not receive the same packet.
+src_mac="f00000010205"
+dst_mac="ffffffffffff"
+src_ip=`ip_to_hex 192 168 1 2`
+dst_ip=`ip_to_hex 255 255 255 255`
+packet=${dst_mac}${src_mac}8100000108004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
+as hv1 ovs-appctl netdev-dummy/receive vm1 $packet
+
+# expected packet at VM1
+OVN_CHECK_PACKETS([hv1/vm1-tx.pcap], [expected1])
+
OVN_CLEANUP([hv1],[hv2])
AT_CLEANUP
--
2.17.2
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev