On 8/25/22 12:16, Xavier Simonart wrote:
> active_v4_flows count was intialized when the northd node was computed.
> However, neither sb_multicast_group nor en_sb_igmp_group causes northd
> updates. Hence this count could keep increasing while processing igmp groups.
>
> This issue was sometimes 'hidden' by northd recomputes due to lflow unable
> to be incrementally processed (sb busy).
>
> active_v4_flows is now reinitialized right before building flows
> (i.e. as part of the lflow node, which is computed on igmp group changes).
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2094710
>
> Signed-off-by: Xavier Simonart <[email protected]>
>
Hi Xavier,
Thanks for fixing this!
> ---
> v2: - rebased on main
> - use platform independent print format
> ---
> northd/northd.c | 18 +++++--
> tests/system-ovn.at | 125 ++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 140 insertions(+), 3 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 7e2681865..d8a9ae769 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -1051,7 +1051,12 @@ init_mcast_info_for_switch_datapath(struct
> ovn_datapath *od)
> mcast_sw_info->query_max_response =
> smap_get_ullong(&od->nbs->other_config, "mcast_query_max_response",
> OVN_MCAST_DEFAULT_QUERY_MAX_RESPONSE_S);
> +}
>
> +static void
> +init_mcast_flow_count(struct ovn_datapath *od)
> +{
Calling this for an 'od' that corresponds to a logical router is
actually undefined behavior, I think.
What do you think if we add a check here, something like below?
if (od->nbr) {
return;
}
> + struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw;
> mcast_sw_info->active_v4_flows = ATOMIC_VAR_INIT(0);
> mcast_sw_info->active_v6_flows = ATOMIC_VAR_INIT(0);
> }
> @@ -8368,6 +8373,10 @@ build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group
> *igmp_group,
> if (atomic_compare_exchange_strong(
> &mcast_sw_info->active_v4_flows, &table_size,
> mcast_sw_info->table_size)) {
> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
> 1);
> +
> + VLOG_INFO_RL(&rl, "Too many active mcast flows: %"PRIu64,
> + mcast_sw_info->active_v4_flows);
> return;
> }
> atomic_add(&mcast_sw_info->active_v4_flows, 1, &dummy);
> @@ -15069,6 +15078,11 @@ build_mcast_groups(struct lflow_input *input_data,
>
> hmap_init(mcast_groups);
> hmap_init(igmp_groups);
> + struct ovn_datapath *od;
> +
> + HMAP_FOR_EACH (od, key_node, datapaths) {
> + init_mcast_flow_count(od);
> + }
>
> HMAP_FOR_EACH (op, key_node, ports) {
> if (op->nbrp && lrport_is_enabled(op->nbrp)) {
> @@ -15126,8 +15140,7 @@ build_mcast_groups(struct lflow_input *input_data,
> }
>
> /* If the datapath value is stale, purge the group. */
> - struct ovn_datapath *od =
> - ovn_datapath_from_sbrec(datapaths, sb_igmp->datapath);
> + od = ovn_datapath_from_sbrec(datapaths, sb_igmp->datapath);
>
> if (!od || ovn_datapath_is_stale(od)) {
> sbrec_igmp_group_delete(sb_igmp);
> @@ -15172,7 +15185,6 @@ build_mcast_groups(struct lflow_input *input_data,
> * IGMP groups are based on the groups learnt by their multicast enabled
> * peers.
> */
> - struct ovn_datapath *od;
> HMAP_FOR_EACH (od, key_node, datapaths) {
>
> if (ovs_list_is_empty(&od->mcast_info.groups)) {
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 992813614..87093fbcc 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -8266,3 +8266,128 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port
> patch-.*/d
>
> AT_CLEANUP
> ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([mcast flow count])
To match the other IGMP/IP multicast system tests we should probably add:
AT_KEYWORDS([ovnigmp IP-multicast])
Other tests that use tcpdump have an additional check:
AT_SKIP_IF([test $HAVE_TCPDUMP = no])
Not a problem of this patch, I wonder what's best: to almost silently
skip the test because tcpdump is not installed or to fail the test
because tcpdump is not installed.
> +
> +ovn_start
> +
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_BR([br-int])
> +
> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \
> + -- set Open_vSwitch . external-ids:system-id=hv1 \
> + -- set Open_vSwitch .
> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> + -- set bridge br-int fail-mode=secure
> other-config:disable-in-band=true
> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +
> +check ovn-nbctl ls-add ls
> +check ovn-nbctl lsp-add ls vm1
> +check ovn-nbctl lsp-set-addresses vm1 00:00:00:00:00:01
> +check ovn-nbctl lsp-add ls vm2
> +check ovn-nbctl lsp-set-addresses vm2 00:00:00:00:00:02
> +check ovn-nbctl lsp-add ls vm3
> +check ovn-nbctl lsp-set-addresses vm3 00:00:00:00:00:03
> +
> +check ovn-nbctl set logical_switch ls other_config:mcast_querier=false
> other_config:mcast_snoop=true other_config:mcast_query_interval=30
> other_config:mcast_eth_src=00:00:00:00:00:05
> other_config:mcast_ip4_src=42.42.42.5 other_config:mcast_ip6_src=fe80::1
> other_config:mcast_idle_timeout=3000
> +ovn-sbctl list ip_multicast
> +
> +wait_igmp_flows_installed()
> +{
> + OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int table=31 | \
> + grep 'priority=90' | grep "nw_dst=$1"])
> +}
> +
> +ADD_NAMESPACES(vm1)
> +ADD_INT([vm1], [vm1], [br-int], [42.42.42.1/24])
> +NS_CHECK_EXEC([vm1], [ip link set vm1 address 00:00:00:00:00:01], [0])
> +NS_CHECK_EXEC([vm1], [ip route add default via 42.42.42.5], [0])
> +NS_CHECK_EXEC([vm1], [ip -6 addr add 2000::1/24 dev vm1], [0])
> +NS_CHECK_EXEC([vm1], [ip -6 route add default via 2000::5], [0])
> +check ovs-vsctl set Interface vm1 external_ids:iface-id=vm1
> +
> +ADD_NAMESPACES(vm2)
> +ADD_INT([vm2], [vm2], [br-int], [42.42.42.2/24])
> +NS_CHECK_EXEC([vm2], [ip link set vm2 address 00:00:00:00:00:02], [0])
> +NS_CHECK_EXEC([vm2], [ip -6 addr add 2000::2/64 dev vm2], [0])
> +NS_CHECK_EXEC([vm2], [ip link set lo up], [0])
> +check ovs-vsctl set Interface vm2 external_ids:iface-id=vm2
> +
> +ADD_NAMESPACES(vm3)
> +NS_CHECK_EXEC([vm3], [tcpdump -n -i any -nnle > vm3.pcap 2>/dev/null &],
> [ignore], [ignore])
I know we used tcpdump like this before in this file but we probably
should use NETNS_DAEMONIZE() instead.
> +
> +ADD_INT([vm3], [vm3], [br-int], [42.42.42.3/24])
> +NS_CHECK_EXEC([vm3], [ip link set vm3 address 00:00:00:00:00:03], [0])
> +NS_CHECK_EXEC([vm3], [ip -6 addr add 2000::3/64 dev vm3], [0])
There's no v6 checks later on, were you planning to add some? If not
this line can probably be removed.
> +NS_CHECK_EXEC([vm3], [ip link set lo up], [0])
> +NS_CHECK_EXEC([vm3], [ip route add default via 42.42.42.5], [0])
> +NS_CHECK_EXEC([vm3], [ip -6 route add default via 2000::5], [0])
Same here.
> +check ovs-vsctl set Interface vm3 external_ids:iface-id=vm3
> +
> +NS_CHECK_EXEC([vm2], [sysctl -w net.ipv4.igmp_max_memberships=100],
> [ignore], [ignore])
> +NS_CHECK_EXEC([vm3], [sysctl -w net.ipv4.igmp_max_memberships=100],
> [ignore], [ignore])
> +wait_for_ports_up
> +
> +NS_CHECK_EXEC([vm3], [ip addr add 228.0.0.1 dev vm3 autojoin], [0])
> +wait_igmp_flows_installed 228.0.0.1
> +
> +NS_CHECK_EXEC([vm1], [ping -q -c 3 -i 0.3 -w 2 228.0.0.1], [ignore],
> [ignore])
> +
> +OVS_WAIT_UNTIL([
> + requests=`grep "ICMP echo request" -c vm3.pcap`
> + test "${requests}" -ge "3"
> +])
> +kill $(pidof tcpdump)
> +
> +NS_CHECK_EXEC([vm3], [tcpdump -n -i any -nnleX > vm3.pcap 2>/dev/null &],
> [ignore], [ignore])
> +NS_CHECK_EXEC([vm2], [tcpdump -n -i any -nnleX > vm2.pcap 2>/dev/null &],
> [ignore], [ignore])
> +NS_CHECK_EXEC([vm1], [tcpdump -n -i any -nnleX > vm1.pcap 2>/dev/null &],
> [ignore], [ignore])
Same comment about using NETNS_DAEMONIZE() instead.
> +
> +for i in `seq 1 40`;do
> + NS_CHECK_EXEC([vm2], [ip addr add 228.1.$i.1 dev vm2 autojoin &], [0])
> + NS_CHECK_EXEC([vm3], [ip addr add 229.1.$i.1 dev vm3 autojoin &], [0])
> + # Do not go too fast. If going fast, there is a higher chance of sb
> being busy, causing full recompute (engine has not run)
> + # In this test, we do not want too many recomputes as they might hide
> I+I related errors
> + sleep 0.2
> +done
> +
> +for i in `seq 1 40`;do
> + wait_igmp_flows_installed 228.1.$i.1
> + wait_igmp_flows_installed 229.1.$i.1
> +done
> +ovn-sbctl list multicast_group
> +
> +NS_CHECK_EXEC([vm1], [ping -q -c 3 -i 0.3 -w 2 228.1.1.1], [ignore],
> [ignore])
> +
> +OVS_WAIT_UNTIL([
> + requests=`grep "ICMP echo request" -c vm2.pcap`
> + test "${requests}" -ge "3"
> +])
> +kill $(pidof tcpdump)
> +
> +# The test could succeed thanks to a lucky northd recompute...after hitting
> too any flows
> +# Double check we never hit error condition
> +AT_CHECK([grep -qE 'Too many active mcast flows' northd/ovn-northd.log], [1])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
> +/connection dropped.*/d
> +/removing policing failed: No such device/d"])
> +AT_CLEANUP
> +])
Regards,
Dumitru
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev