If BFD goes down between highest priority and lower priority gw, we expect garp to be emitted by the highest priority gw, and traffic to flow through it. If highest priority gw died, then lower priority gw should take over. Finally, if lower priority gw died, traffic should not be affected.
Signed-off-by: Xavier Simonart <xsimo...@redhat.com> Signed-off-by: Ales Musil <amu...@redhat.com> (cherry picked from commit 0d2560be1fcc32e5fd8fe953df90a3b15349e731) --- v2: - Added missing cherry-pick and Acked-by/Signed-off-by from origin commit. --- tests/multinode.at | 309 +++++++++++++++++++++++++++++++++------------ 1 file changed, 229 insertions(+), 80 deletions(-) diff --git a/tests/multinode.at b/tests/multinode.at index b3fc5c60c..2302ed195 100644 --- a/tests/multinode.at +++ b/tests/multinode.at @@ -2997,13 +2997,18 @@ AT_SETUP([HA: Check for missing garp on leader when BFD goes back up]) # └────────────────────────────────────────────────────────────────────────────────────────────────────────┘ # The goal of this test is the check that GARP are properly generated by higest priority traffic when -# BFD comes back up. +# BFD goes down, and back up, and this whether the BFD event is due either to some bfd packet lost +# or by gw death. +# gw1 is the highest priority gw; gw2 the second priority and gw3 is configured as the lowest priority gw. +# So gw3 should in this test neither send garp or receive packets. # # Enable vconn so we can check the GARP from a log perspective. m_as ovn-gw-1 ovn-appctl vlog/set vconn:dbg m_as ovn-gw-2 ovn-appctl vlog/set vconn:dbg +m_as ovn-gw-3 ovn-appctl vlog/set vconn:dbg m_as ovn-gw-1 ovn-appctl vlog/disable-rate-limit m_as ovn-gw-2 ovn-appctl vlog/disable-rate-limit +m_as ovn-gw-3 ovn-appctl vlog/disable-rate-limit check_fake_multinode_setup @@ -3013,10 +3018,13 @@ cleanup_multinode_resources ip_ch1=$(m_as ovn-chassis-1 ip a show dev eth1 | grep "inet " | awk '{print $2}'| cut -d '/' -f1) ip_gw1=$(m_as ovn-gw-1 ip a show dev eth1 | grep "inet " | awk '{print $2}'| cut -d '/' -f1) ip_gw2=$(m_as ovn-gw-2 ip a show dev eth1 | grep "inet " | awk '{print $2}'| cut -d '/' -f1) +ip_gw3=$(m_as ovn-gw-3 ip a show dev eth1 | grep "inet " | awk '{print $2}'| cut -d '/' -f1) from_gw1_to_gw2=$(m_as ovn-gw-1 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_gw2) +from_gw1_to_gw3=$(m_as ovn-gw-1 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_gw3) from_gw1_to_ch1=$(m_as ovn-gw-1 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_ch1) from_gw2_to_gw1=$(m_as ovn-gw-2 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_gw1) +from_gw2_to_gw3=$(m_as ovn-gw-2 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_gw3) from_gw2_to_ch1=$(m_as ovn-gw-2 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_ch1) from_ch1_to_gw1=$(m_as ovn-chassis-1 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_gw1) from_ch1_to_gw2=$(m_as ovn-chassis-1 ovs-vsctl --bare --columns=name find interface options:remote_ip=$ip_gw2) @@ -3028,6 +3036,7 @@ OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys]) OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys]) OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys]) OVS_WAIT_UNTIL([m_as ovn-gw-2 ip link show | grep -q genev_sys]) +OVS_WAIT_UNTIL([m_as ovn-gw-3 ip link show | grep -q genev_sys]) check multinode_nbctl ls-add inside check multinode_nbctl ls-add outside @@ -3047,8 +3056,9 @@ check multinode_nbctl -- lsp-add outside outside_R1 \ -- lsp-set-addresses outside_R1 router multinode_nbctl -- --id=@gc0 create Gateway_Chassis name=outside_gw1 chassis_name=ovn-gw-1 priority=20 \ - -- --id=@gc1 create Gateway_Chassis name=outside_gw2 chassis_name=ovn-gw-2 priority=10 \ - -- set Logical_Router_Port R1_outside 'gateway_chassis=[@gc0,@gc1]' + -- --id=@gc1 create Gateway_Chassis name=outside_gw2 chassis_name=ovn-gw-2 priority=10 \ + -- --id=@gc2 create Gateway_Chassis name=outside_gw3 chassis_name=ovn-gw-3 priority=5 \ + -- set Logical_Router_Port R1_outside 'gateway_chassis=[@gc0,@gc1,@gc2]' # Create localnet port in outside check multinode_nbctl lsp-add outside ln-outside @@ -3065,11 +3075,13 @@ check multinode_nbctl lsp-set-options ln-ext1 network_name=public # Make sure garp-max-timeout-sec is not set m_as ovn-gw-1 ovs-vsctl remove open . external_ids garp-max-timeout-sec m_as ovn-gw-2 ovs-vsctl remove open . external_ids garp-max-timeout-sec +m_as ovn-gw-3 ovs-vsctl remove open . external_ids garp-max-timeout-sec m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex m_as ovn-gw-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex +m_as ovn-gw-3 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex m_as ovn-chassis-1 /data/create_fake_vm.sh inside1 hv1-vif1 f0:00:c0:a8:01:01 1500 192.168.1.1 24 192.168.1.254 2000::1/64 2000::a m_as ovn-chassis-2 /data/create_fake_vm.sh ext1 ext1 00:00:c0:a8:00:01 1500 192.168.0.1 24 192.168.0.254 1000::3/64 1000::a @@ -3078,13 +3090,14 @@ m_as ovn-chassis-2 /data/create_fake_vm.sh ext1 ext1 00:00:c0:a8:00:01 1500 192. m_check_row_count HA_Chassis_Group 1 name=R1_outside # There should be 2 ha_chassis rows in SB DB. -m_check_row_count HA_Chassis 2 'chassis!=[[]]' +m_check_row_count HA_Chassis 3 'chassis!=[[]]' ha_ch=$(m_fetch_column HA_Chassis_Group ha_chassis) m_check_column "$ha_ch" HA_Chassis _uuid gw1_chassis=$(m_fetch_column Chassis _uuid name=ovn-gw-1) gw2_chassis=$(m_fetch_column Chassis _uuid name=ovn-gw-2) +gw3_chassis=$(m_fetch_column Chassis _uuid name=ovn-gw-3) wait_bfd_enabled() { chassis=$1 @@ -3109,13 +3122,13 @@ wait_bfd_up() { } # check BFD enablement on tunnel ports from ovn-gw-1 ########## -for chassis in $from_gw1_to_gw2 $from_gw1_to_ch1; do +for chassis in $from_gw1_to_gw2 $from_gw1_to_gw3 $from_gw1_to_ch1; do echo "checking ovn-gw-1 -> $chassis" wait_bfd_enabled ovn-gw-1 $chassis done # check BFD enablement on tunnel ports from ovn-gw-2 ########## -for chassis in $from_gw2_to_gw1 $from_gw2_to_ch1; do +for chassis in $from_gw2_to_gw1 $from_gw2_to_gw3 $from_gw2_to_ch1; do echo "checking ovn-gw-2 -> $chassis" wait_bfd_enabled ovn-gw-2 $chassis done @@ -3131,12 +3144,15 @@ gw1_pid=$(podman inspect -f '{{.State.Pid}}' ovn-gw-1) nsenter --net=/proc/$gw1_pid/ns/net nft list tables | grep ovn-test && nsenter --net=/proc/$gw1_pid/ns/net nft delete table ip ovn-test on_exit "nsenter --net=/proc/$gw1_pid/ns/net nft list tables | grep ovn-test && nsenter --net=/proc/$gw1_pid/ns/net nft delete table ip ovn-test" -wait_bfd_up ovn-gw-1 $from_gw1_to_gw2 -wait_bfd_up ovn-gw-1 $from_gw1_to_ch1 -wait_bfd_up ovn-gw-2 $from_gw2_to_gw1 -wait_bfd_up ovn-gw-2 $from_gw2_to_ch1 -wait_bfd_up ovn-chassis-1 $from_ch1_to_gw1 -wait_bfd_up ovn-chassis-1 $from_ch1_to_gw2 +for chassis in $from_gw1_to_gw2 $from_gw1_to_gw3 $from_gw1_to_ch1; do + wait_bfd_up ovn-gw-1 $chassis +done +for chassis in $from_gw2_to_gw1 $from_gw2_to_gw3 $from_gw2_to_ch1; do + wait_bfd_up ovn-gw-2 $chassis +done +for chassis in $from_ch1_to_gw1 $from_ch1_to_gw2; do + wait_bfd_up ovn-chassis-1 $chassis +done m_wait_row_count Port_Binding 1 logical_port=cr-R1_outside chassis=$gw1_chassis check multinode_nbctl --wait=hv sync @@ -3149,11 +3165,13 @@ start_tcpdump() { M_START_TCPDUMP([ovn-gw-1], [-neei eth2 -Q out], [gw1_out]) M_START_TCPDUMP([ovn-gw-2], [-neei eth2], [gw2]) M_START_TCPDUMP([ovn-gw-2], [-neei eth2 -Q out], [gw2_out]) + M_START_TCPDUMP([ovn-gw-3], [-neei eth2], [gw3]) + M_START_TCPDUMP([ovn-gw-3], [-neei eth2 -Q out], [gw3_out]) } stop_tcpdump() { echo "$(date +%H:%M:%S.%03N) Stopping tcpdump" - m_kill 'ovn-gw-1 ovn-gw-2 ovn-chassis-1 ovn-chassis-2' tcpdump + m_kill 'ovn-gw-1 ovn-gw-2 ovn-gw-3 ovn-chassis-1 ovn-chassis-2' tcpdump } # Send packets from chassis2 (ext1) to chassis1 @@ -3172,79 +3190,216 @@ stop_sending_background_packets() { stop_tcpdump } +check_for_new_garps() { + hv=$1 + expecting_garp=$2 + n_new_garps=$(cat ${hv}_out.tcpdump | grep -c "f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28") + + if [ "$expecting_garp" == "true" ]; then + AS_BOX([$(date +%H:%M:%S.%03N) Waiting/checking for garp from $hv - Starting with $n_new_garps]) + OVS_WAIT_UNTIL([ + n_garps=$n_new_garps + n_new_garps=$(cat ${hv}_out.tcpdump | grep -c "f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28") + echo "We saw $n_new_garps so far on ${hv}." + test "$n_garps" -ne "$n_new_garps" + ]) + else + AS_BOX([$(date +%H:%M:%S.%03N) Checking no garp from ${hv}]) + # Waiting a few seconds to get a chance to see unexpected garps. + sleep 3 + n_garps=$(cat ${hv}_out.tcpdump | grep -c "f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28") + AT_CHECK([test "$n_garps" -eq "$n_new_garps"]) + fi +} + +check_for_new_echo_pkts() { + hv=$1 + mac_src=$2 + mac_dst=$3 + expecting_pkts=$4 + n_new_echo_req=$(cat ${hv}.tcpdump | grep -c "$mac_src > $mac_dst, ethertype IPv4 (0x0800), length 98: 192.168.0.1 > 192.168.1.1: ICMP echo request") + n_new_echo_rep=$(cat ${hv}.tcpdump | grep -c "$mac_dst > $mac_src, ethertype IPv4 (0x0800), length 98: 192.168.1.1 > 192.168.0.1: ICMP echo reply") + + if [ "$expecting_pkts" == "true" ]; then + AS_BOX([$(date +%H:%M:%S.%03N) Waiting/checking for echo pkts through ${hv}]) + echo "Starting with $n_new_echo_req requests and $n_new_echo_rep replies so far on ${hv}." + OVS_WAIT_UNTIL([ + n_echo_req=$n_new_echo_req + n_echo_rep=$n_new_echo_rep + n_new_echo_req=$(cat ${hv}.tcpdump | grep -c "$mac_src > $mac_dst, ethertype IPv4 (0x0800), length 98: 192.168.0.1 > 192.168.1.1: ICMP echo request") + n_new_echo_rep=$(cat ${hv}.tcpdump | grep -c "$mac_dst > $mac_src, ethertype IPv4 (0x0800), length 98: 192.168.1.1 > 192.168.0.1: ICMP echo reply") + echo "We saw $n_new_echo_req requests and $n_new_echo_rep replies so far on ${hv}." + test "$n_echo_req" -ne "$n_new_echo_req" && test "$n_echo_rep" -ne "$n_new_echo_rep" + ]) + else + AS_BOX([$(date +%H:%M:%S.%03N) Checking no pkts from ${hv}]) + # Waiting a few seconds to get a chance to see unexpected pkts. + sleep 3 + n_echo_req=$(cat ${hv}.tcpdump | grep -c "$mac_src > $mac_dst, ethertype IPv4 (0x0800), length 98: 192.168.0.1 > 192.168.1.1: ICMP echo request") + n_echo_rep=$(cat ${hv}.tcpdump | grep -c "$mac_dst > $mac_src, ethertype IPv4 (0x0800), length 98: 192.168.1.1 > 192.168.0.1: ICMP echo reply") + echo "We saw $n_new_echo_req requests and $n_new_echo_rep replies on ${hv}." + AT_CHECK([test "$n_echo_req" -eq "$n_new_echo_req" && test "$n_echo_rep" -eq "$n_new_echo_rep"]) + fi +} + +dump_statistics() { + n1=$(m_as ovn-gw-1 grep -c Changing /var/log/ovn/ovn-controller.log) + n2=$(m_as ovn-gw-2 grep -c Changing /var/log/ovn/ovn-controller.log) + n3=$(m_as ovn-gw-3 grep -c Changing /var/log/ovn/ovn-controller.log) + ch1_req=$(grep -c "ICMP echo request" ch1.tcpdump) + ch1_rep=$(grep -c "ICMP echo reply" ch1.tcpdump) + ch2_req=$(grep -c "ICMP echo request" ch2.tcpdump) + ch2_rep=$(grep -c "ICMP echo reply" ch2.tcpdump) + gw1_req=$(grep -c "ICMP echo request" gw1.tcpdump) + gw1_rep=$(grep -c "ICMP echo reply" gw1.tcpdump) + gw2_req=$(grep -c "ICMP echo request" gw2.tcpdump) + gw2_rep=$(grep -c "ICMP echo reply" gw2.tcpdump) + gw3_req=$(grep -c "ICMP echo request" gw3.tcpdump) + gw3_rep=$(grep -c "ICMP echo reply" gw3.tcpdump) + echo "$n1 claims in gw1, $n2 in gw2 and $n3 on gw3" + echo "ch2_request=$ch2_req gw1_request=$gw1_req gw2_request=$gw2_req gw3_request=$gw3_req ch1_request=$ch1_req ch1_reply=$ch1_rep gw1_reply=$gw1_rep gw2_reply=$gw2_rep gw3_reply=$gw3_rep ch2_reply=$ch2_rep" +} + check_migration_between_gw1_and_gw2() { + action=$1 send_background_packets - # We do not check whether GARP have been generated on gw1: the test might have been start already quite some time ago. # We make sure gw1 is leader since enough time that it generated all its garps. AS_BOX([$(date +%H:%M:%S.%03N) Waiting all garps sent by gw1]) - new_n_garps=$(cat gw1_out.tcpdump | grep -c "f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28") + n_new_garps=$(cat gw1_out.tcpdump | grep -c "f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28") OVS_WAIT_UNTIL([ - n_garps=$new_n_garps + n_garps=$n_new_garps echo "We saw $n_garps so far." # Garp delay might be up to 8 seconds. sleep 10 - new_n_garps=$(cat gw1_out.tcpdump | grep -c "f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28") - test "$n_garps" == "$new_n_garps" -]) + n_new_garps=$(cat gw1_out.tcpdump | grep -c "f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28") + test "$n_garps" -eq "$n_new_garps" + ]) - # It should go through gw1 - AS_BOX([$(date +%H:%M:%S.%03N) Waiting/checking it went through gw1]) - OVS_WAIT_FOR_OUTPUT([cat gw1.tcpdump | grep "ICMP echo" | cut -d ' ' -f2-15 | sort | uniq], [0], [dnl -00:00:c0:a8:00:01 > f0:00:c0:a8:00:fe, ethertype IPv4 (0x0800), length 98: 192.168.0.1 > 192.168.1.1: ICMP echo request, -f0:00:c0:a8:00:fe > 00:00:c0:a8:00:01, ethertype IPv4 (0x0800), length 98: 192.168.1.1 > 192.168.0.1: ICMP echo reply, -]) + # All packets should go through gw1, and none through gw2 or gw3. + check_for_new_echo_pkts gw1 "00:00:c0:a8:00:01" "f0:00:c0:a8:00:fe" "true" + check_for_new_echo_pkts gw2 "00:00:c0:a8:00:01" "f0:00:c0:a8:00:fe" "false" + check_for_new_echo_pkts gw3 "00:00:c0:a8:00:01" "f0:00:c0:a8:00:fe" "false" - flap_count=$(m_as ovn-gw-1 ovs-vsctl get interface $from_gw1_to_gw2 bfd_status | sed 's/.*flap_count=\"\([[0-9]]*\).*/\1/g') + flap_count_gw_1=$(m_as ovn-gw-1 ovs-vsctl get interface $from_gw1_to_gw2 bfd_status | sed 's/.*flap_count=\"\([[0-9]]*\).*/\1/g') + flap_count_gw_2=$(m_as ovn-gw-2 ovs-vsctl get interface $from_gw2_to_gw1 bfd_status | sed 's/.*flap_count=\"\([[0-9]]*\).*/\1/g') - AS_BOX([$(date +%H:%M:%S.%03N) Blocking bfd on gw1 (from $ip_gw1 to $ip_gw2)]) - nsenter --net=/proc/$gw1_pid/ns/net nft add table ip ovn-test - nsenter --net=/proc/$gw1_pid/ns/net nft 'add chain ip ovn-test INPUT { type filter hook input priority 0; policy accept; }' - # Drop BFD from gw-1 to gw-2: geneve port (6081), inner port 3784 (0xec8), Session state Up, Init, Down. - nsenter --net=/proc/$gw1_pid/ns/net nft add rule ip ovn-test INPUT ip daddr $ip_gw1 ip saddr $ip_gw2 udp dport 6081 '@th,416,16 == 0x0ec8 @th,472,8 == 0xc0 counter drop' - nsenter --net=/proc/$gw1_pid/ns/net nft add rule ip ovn-test INPUT ip daddr $ip_gw1 ip saddr $ip_gw2 udp dport 6081 '@th,416,16 == 0x0ec8 @th,472,8 == 0x80 counter drop' - nsenter --net=/proc/$gw1_pid/ns/net nft add rule ip ovn-test INPUT ip daddr $ip_gw1 ip saddr $ip_gw2 udp dport 6081 '@th,416,16 == 0x0ec8 @th,472,8 == 0x40 counter drop' + if [ test "$action" == "stop_bfd" ]; then + AS_BOX([$(date +%H:%M:%S.%03N) Blocking bfd on gw1 (from $ip_gw1 to $ip_gw2)]) + nsenter --net=/proc/$gw1_pid/ns/net nft add table ip ovn-test + nsenter --net=/proc/$gw1_pid/ns/net nft 'add chain ip ovn-test INPUT { type filter hook input priority 0; policy accept; }' + # Drop BFD from gw-1 to gw-2: geneve port (6081), inner port 3784 (0xec8), Session state Up, Init, Down. + nsenter --net=/proc/$gw1_pid/ns/net nft add rule ip ovn-test INPUT ip daddr $ip_gw1 ip saddr $ip_gw2 udp dport 6081 '@th,416,16 == 0x0ec8 @th,472,8 == 0xc0 counter drop' + nsenter --net=/proc/$gw1_pid/ns/net nft add rule ip ovn-test INPUT ip daddr $ip_gw1 ip saddr $ip_gw2 udp dport 6081 '@th,416,16 == 0x0ec8 @th,472,8 == 0x80 counter drop' + nsenter --net=/proc/$gw1_pid/ns/net nft add rule ip ovn-test INPUT ip daddr $ip_gw1 ip saddr $ip_gw2 udp dport 6081 '@th,416,16 == 0x0ec8 @th,472,8 == 0x40 counter drop' - # We do not check that packets go through gw2 as BFD between chassis-2 and gw1 is still up - AS_BOX([$(date +%H:%M:%S.%03N) Waiting for flap count between gw1 and gw2 to increase]) - OVS_WAIT_UNTIL([ - new_flap_count=$(m_as ovn-gw-1 ovs-vsctl get interfac $from_gw1_to_gw2 bfd_status | sed 's/.*flap_count=\"\([[0-9]]*\).*/\1/g') - echo "Comparing $new_flap_count versus $flap_count" - test "$new_flap_count" -gt "$((flap_count))" - ]) + # We do not check that packets go through gw2 as BFD between chassis-2 and gw1 is still up + fi - AS_BOX([$(date +%H:%M:%S.%03N) Flapped!]) - stop_sending_background_packets - cp gw1.tcpdump gw1.1.tcpdump - cp gw2.tcpdump gw2.1.tcpdump - cp ch1.tcpdump ch1.1.tcpdump - cp ch2.tcpdump ch2.1.tcpdump + if [ test "$action" == "kill_gw2" ]; then + AS_BOX([$(date +%H:%M:%S.%03N) Killing gw2 ovn-controller]) + on_exit 'm_as ovn-gw-2 /usr/share/openvswitch/scripts/ovs-ctl status || + m_as ovn-gw-2 /usr/share/openvswitch/scripts/ovs-ctl start --system-id=ovn-gw-1' + on_exit 'm_as ovn-gw-2 /usr/share/ovn/scripts/ovn-ctl status_controller || + m_as ovn-gw-2 /usr/share/ovn/scripts/ovn-ctl start_controller ${CONTROLLER_SSL_ARGS}' + + m_as ovn-gw-2 kill -9 $(m_as ovn-gw-2 cat /run/ovn/ovn-controller.pid) + m_as ovn-gw-2 kill -9 $(m_as ovn-gw-2 cat /run/openvswitch/ovs-vswitchd.pid) + m_as ovn-gw-2 kill -9 $(m_as ovn-gw-2 cat /run/openvswitch/ovsdb-server.pid) + # Also delete datapath (flows) + m_as ovn-gw-2 ovs-dpctl del-dp system@ovs-system + fi - send_background_packets - # Give some time for gw1 and gw2 to fight. - sleep 5 - nsenter --net=/proc/$gw1_pid/ns/net nft -a list ruleset + if [ test "$action" == "kill_gw1" ]; then + AS_BOX([$(date +%H:%M:%S.%03N) Killing gw1 ovn-controller]) + on_exit 'm_as ovn-gw-1 /usr/share/openvswitch/scripts/ovs-ctl status || + m_as ovn-gw-1 /usr/share/openvswitch/scripts/ovs-ctl start --system-id=ovn-gw-1' + on_exit 'm_as ovn-gw-1 /usr/share/ovn/scripts/ovn-ctl status_controller || + m_as ovn-gw-1 /usr/share/ovn/scripts/ovn-ctl start_controller ${CONTROLLER_SSL_ARGS}' + + m_as ovn-gw-1 kill -9 $(m_as ovn-gw-1 cat /run/ovn/ovn-controller.pid) + m_as ovn-gw-1 kill -9 $(m_as ovn-gw-1 cat /run/openvswitch/ovs-vswitchd.pid) + m_as ovn-gw-1 kill -9 $(m_as ovn-gw-1 cat /run/openvswitch/ovsdb-server.pid) + # Also delete datapath (flows) + m_as ovn-gw-1 ovs-dpctl del-dp system@ovs-system + fi - AS_BOX([$(date +%H:%M:%S.%03N) Unblocking bfd on gw1]) - nsenter --net=/proc/$gw1_pid/ns/net nft delete table ip ovn-test + if [ test "$action" == "kill_gw2" ]; then + AS_BOX([$(date +%H:%M:%S.%03N) Waiting for flap count between gw1 and gw2 to increase]) + OVS_WAIT_UNTIL([ + new_flap_count=$(m_as ovn-gw-1 ovs-vsctl get interfac $from_gw1_to_gw2 bfd_status | sed 's/.*flap_count=\"\([[0-9]]*\).*/\1/g') + echo "Comparing $new_flap_count versus $flap_count_gw_1" + test "$new_flap_count" -gt "$((flap_count_gw_1))" + ]) + else + AS_BOX([$(date +%H:%M:%S.%03N) Waiting for flap count between gw2 and gw1 to increase]) + OVS_WAIT_UNTIL([ + new_flap_count=$(m_as ovn-gw-2 ovs-vsctl get interfac $from_gw2_to_gw1 bfd_status | sed 's/.*flap_count=\"\([[0-9]]*\).*/\1/g') + echo "Comparing $new_flap_count versus $flap_count_gw_2" + test "$new_flap_count" -gt "$((flap_count_gw_2))" + ]) - AS_BOX([$(date +%H:%M:%S.%03N) Waiting/checking for garp from gw1]) - OVS_WAIT_FOR_OUTPUT([cat gw1_out.tcpdump | grep "Broadcast" | cut -d ' ' -f2-16 | sort | uniq], [0], [dnl -f0:00:c0:a8:00:fe > Broadcast, ethertype ARP (0x0806), length 42: Request who-has 192.168.0.254 tell 192.168.0.254, length 28 -]) + fi + AS_BOX([$(date +%H:%M:%S.%03N) Flapped!]) - AS_BOX([$(date +%H:%M:%S.%03N) Waiting traffic went through gw1]) - OVS_WAIT_FOR_OUTPUT([cat gw1.tcpdump| grep "ICMP echo" | cut -d ' ' -f2-15 | sort | uniq], [0], [dnl -00:00:c0:a8:00:01 > f0:00:c0:a8:00:fe, ethertype IPv4 (0x0800), length 98: 192.168.0.1 > 192.168.1.1: ICMP echo request, -f0:00:c0:a8:00:fe > 00:00:c0:a8:00:01, ethertype IPv4 (0x0800), length 98: 192.168.1.1 > 192.168.0.1: ICMP echo reply, -]) + # Wait a few more second for the fight. + sleep 2 + AS_BOX([$(date +%H:%M:%S.%03N) Statistics after flapping]) + dump_statistics + + if [ test "$action" == "stop_bfd" ]; then + # gw1 still alive and gw2 tried to claim => gw1 should restart generating garps. + check_for_new_garps gw1 "true" + check_for_new_garps gw2 "false" + check_for_new_garps gw3 "false" + check_for_new_echo_pkts gw1 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "true" + check_for_new_echo_pkts gw2 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts gw3 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts ch1 f0:00:c0:a8:01:fe f0:00:c0:a8:01:01 "true" + AS_BOX([$(date +%H:%M:%S.%03N) Unblocking bfd on gw1]) + nsenter --net=/proc/$gw1_pid/ns/net nft -a list ruleset + nsenter --net=/proc/$gw1_pid/ns/net nft delete table ip ovn-test + fi - AS_BOX([$(date +%H:%M:%S.%03N) Waiting it reaches ch1]) - OVS_WAIT_FOR_OUTPUT([cat ch1.tcpdump | grep "ICMP echo" | cut -d ' ' -f2-15 | sort | uniq], [0], [dnl -f0:00:c0:a8:01:01 > f0:00:c0:a8:01:fe, ethertype IPv4 (0x0800), length 98: 192.168.1.1 > 192.168.0.1: ICMP echo reply, -f0:00:c0:a8:01:fe > f0:00:c0:a8:01:01, ethertype IPv4 (0x0800), length 98: 192.168.0.1 > 192.168.1.1: ICMP echo request, -]) + if [ test "$action" == "kill_gw2" ]; then + # gw1 still alive, but gw2 did not try to claim => gw1 should not generate new garps. + check_for_new_garps gw1 "false" + check_for_new_garps gw2 "false" + check_for_new_garps gw3 "false" + check_for_new_echo_pkts gw1 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "true" + check_for_new_echo_pkts gw2 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts gw3 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts ch1 f0:00:c0:a8:01:fe f0:00:c0:a8:01:01 "true" + AS_BOX([$(date +%H:%M:%S.%03N) Restarting gw2 ovn-vswitchd]) + m_as ovn-gw-2 /usr/share/openvswitch/scripts/ovs-ctl start --system-id=ovn-gw-2 + + AS_BOX([$(date +%H:%M:%S.%03N) Restarting gw2 ovn-controller]) + m_as ovn-gw-2 /usr/share/ovn/scripts/ovn-ctl start_controller ${CONTROLLER_SSL_ARGS} + fi + if [ test "$action" == "kill_gw1" ]; then + # gw1 died => gw2 should generate garps. + check_for_new_garps gw1 "false" + check_for_new_garps gw2 "true" + check_for_new_garps gw3 "false" + check_for_new_echo_pkts gw1 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts gw2 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "true" + check_for_new_echo_pkts gw3 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts ch1 f0:00:c0:a8:01:fe f0:00:c0:a8:01:01 "true" + AS_BOX([$(date +%H:%M:%S.%03N) Restarting gw1 ovn-vswitchd]) + m_as ovn-gw-1 /usr/share/openvswitch/scripts/ovs-ctl start --system-id=ovn-gw-1 + + AS_BOX([$(date +%H:%M:%S.%03N) Restarting gw1 ovn-controller]) + m_as ovn-gw-1 /usr/share/ovn/scripts/ovn-ctl start_controller ${CONTROLLER_SSL_ARGS} + fi + + # The network is now restored => packets should go through gw1 and reach chassis-1. + check_for_new_echo_pkts gw1 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "true" + check_for_new_echo_pkts gw2 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts gw3 00:00:c0:a8:00:01 f0:00:c0:a8:00:fe "false" + check_for_new_echo_pkts ch1 f0:00:c0:a8:01:fe f0:00:c0:a8:01:01 "true" + AS_BOX([$(date +%H:%M:%S.%03N) Statistics after network restored]) + dump_statistics stop_sending_background_packets } @@ -3270,20 +3425,14 @@ f0:00:c0:a8:00:fe > 00:00:c0:a8:00:01, ethertype IPv4 (0x0800), length 98: 192.1 00:00:c0:a8:00:01 > f0:00:c0:a8:00:fe, ethertype IPv4 (0x0800), length 98: 192.168.0.1 > 192.168.1.1: ICMP echo reply, ]) -check_migration_between_gw1_and_gw2 - -n1=$(m_as ovn-gw-1 grep -c Changing /var/log/ovn//ovn-controller.log) -n2=$(m_as ovn-gw-2 grep -c Changing /var/log/ovn//ovn-controller.log) -ch1_req=$(grep -c "ICMP echo request" ch1.tcpdump) -ch1_rep=$(grep -c "ICMP echo reply" ch1.tcpdump) -ch2_req=$(grep -c "ICMP echo request" ch2.tcpdump) -ch2_rep=$(grep -c "ICMP echo reply" ch2.tcpdump) -gw1_req=$(grep -c "ICMP echo request" gw1.tcpdump) -gw1_rep=$(grep -c "ICMP echo reply" gw1.tcpdump) -gw2_req=$(grep -c "ICMP echo request" gw2.tcpdump) -gw2_rep=$(grep -c "ICMP echo reply" gw2.tcpdump) -echo "$n1 claims in gw1 and $n2 in gw2" -echo "ch2_request=$ch2_req gw1_request=$gw1_req gw2_request=$gw2_req ch1_request=$ch1_req ch1_reply=$ch1_rep gw1_reply=$gw1_rep gw2_reply=$gw2_rep ch2_reply=$ch2_rep" +# We stop bfd between gw1 & gw2, but keep gw1 & gw2 running. +check_migration_between_gw1_and_gw2 "stop_bfd" + +# We simulate death of gw2. It should not have any effect. +check_migration_between_gw1_and_gw2 "kill_gw2" + +# We simulate death of gw1. gw2 should take over. +check_migration_between_gw1_and_gw2 "kill_gw1" AT_CLEANUP ]) -- 2.47.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev