UDP health checks rely on receiving either an ICMP error to indicate
that the service is down, or no response as a way to indicate that the
service is live.

If a particularly dumb service (for instance a UDP "echo" service) is
configured, it will send UDP responses to our health check probes. In
this case, we don't want to send the response to our health check
processing, because the health check does not expect to get a UDP
response. Instead, ignore the response and let it get dropped eventually
by the logical router.

Fixes: ceea1d7cccad ("northd: Allow LB health checks from router IPs.")
Reported-at: https://redhat.atlassian.net/browse/FDP-3435
Co-authored-by: Dumitru Ceara <[email protected]>
Signed-off-by: Dumitru Ceara <[email protected]>
Signed-off-by: Mark Michelson <[email protected]>
---
 northd/northd.c     | 32 +++++++++--------
 tests/ovn.at        | 26 +++-----------
 tests/system-ovn.at | 87 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 36 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 734efea65..89d58074c 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -8872,25 +8872,29 @@ build_lb_health_check_response_lflows(
             /* icmp6 type 1 and icmp4 type 3 are included in the match, because
              * the controller is using them to detect unreachable ports. */
             if (addr_is_ipv6(backend_nb->svc_mon_src_ip)) {
-                ds_put_format(match,
-                              "inport == \"%s\" && ip6.dst == %s && "
-                              "eth.dst == %s && "
-                              "(%s.src == %s || icmp6.type == 1)",
+                ds_put_format(match, "inport == \"%s\" && ip6.dst == %s && "
+                              "ip6.src == %s && eth.dst == %s && ",
                               backend_nb->logical_port,
                               backend_nb->svc_mon_src_ip,
-                              backend_nb->svc_mon_lrp->lrp_networks.ea_s,
-                              protocol,
-                              backend->port_str);
+                              backend->ip_str,
+                              backend_nb->svc_mon_lrp->lrp_networks.ea_s);
+                if (!strcmp(protocol, "tcp")) {
+                    ds_put_format(match, "tcp.src == %s", backend->port_str);
+                } else {
+                    ds_put_cstr(match, "icmp6.type == 1");
+                }
             } else {
-                ds_put_format(match,
-                              "inport == \"%s\" && ip4.dst == %s && "
-                              "eth.dst == %s && "
-                              "(%s.src == %s || icmp4.type == 3)",
+                ds_put_format(match, "inport == \"%s\" && ip4.dst == %s && "
+                              "ip4.src == %s && eth.dst == %s && ",
                               backend_nb->logical_port,
                               backend_nb->svc_mon_src_ip,
-                              backend_nb->svc_mon_lrp->lrp_networks.ea_s,
-                              protocol,
-                              backend->port_str);
+                              backend->ip_str,
+                              backend_nb->svc_mon_lrp->lrp_networks.ea_s);
+                if (!strcmp(protocol, "tcp")) {
+                    ds_put_format(match, "tcp.src == %s", backend->port_str);
+                } else {
+                    ds_put_cstr(match, "icmp4.type == 3");
+                }
             }
 
             /* ovn-controller expects health check responses from the LS
diff --git a/tests/ovn.at b/tests/ovn.at
index 6580de6c2..750cff7c2 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -27208,20 +27208,11 @@ AT_CHECK([grep "priority=110" sbflows2 | grep 
"handle_svc_check" | \
           grep 'inport == "sw1-p1"' | grep "ip4.dst == 20.0.0.1" | \
           grep -q "tcp.src == 80" ])
 
-# Verify flow also matches ICMP unreachable (type 3 for IPv4).
-AT_CHECK([grep "priority=110" sbflows1 | grep "handle_svc_check" | \
-          grep 'inport == "sw0-p1"' | grep "ip4.dst == 10.0.0.1" | \
-          grep -q "icmp4.type == 3"])
-
-AT_CHECK([grep "priority=110" sbflows2 | grep "handle_svc_check" | \
-          grep 'inport == "sw1-p1"' | grep "ip4.dst == 20.0.0.1" | \
-          grep -q "icmp4.type == 3"])
-
 ## UDP Load balancer health check flows.
-# UDP backend - verify flow uses UDP instead of TCP.
+# UDP backend - verify flow uses ICMP type 3 instead of TCP.
 AT_CHECK([grep "priority=110" sbflows1 | grep "handle_svc_check" | \
           grep 'inport == "sw0-p2"' | grep "ip4.dst == 10.0.0.1" | \
-          grep -q "udp.src == 53"])
+          grep -q "icmp4.type == 3"])
 
 # Verify NO TCP match in the UDP backend flow.
 AT_CHECK([grep "priority=110" sbflows1 | grep "handle_svc_check" | \
@@ -27370,20 +27361,11 @@ AT_CHECK([grep "priority=110" sbflows2 | grep 
"handle_svc_check" | \
           grep 'inport == "sw1-p1"' | grep "ip6.dst == 2002::1" | \
           grep -q "tcp.src == 80" ])
 
-# Verify flow also matches ICMP unreachable (type 1 for IPv6).
-AT_CHECK([grep "priority=110" sbflows1 | grep "handle_svc_check" | \
-          grep 'inport == "sw0-p1"' | grep "ip6.dst == 2001::1" | \
-          grep -q "icmp6.type == 1"])
-
-AT_CHECK([grep "priority=110" sbflows2 | grep "handle_svc_check" | \
-          grep 'inport == "sw1-p1"' | grep "ip6.dst == 2002::1" | \
-          grep -q "icmp6.type == 1"])
-
 ## UDP Load balancer health check flows.
-# UDP backend - verify flow uses UDP instead of TCP.
+# UDP backend - verify flow uses ICMP instead of TCP.
 AT_CHECK([grep "priority=110" sbflows1 | grep "handle_svc_check" | \
           grep 'inport == "sw0-p2"' | grep "ip6.dst == 2001::1" | \
-          grep -q "udp.src == 53"])
+          grep -q "icmp6.type == 1"])
 
 # Verify NO TCP match in the UDP backend flow.
 AT_CHECK([grep "priority=110" sbflows1 | grep "handle_svc_check" | \
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 7edb8a45b..79bad55a9 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -21496,3 +21496,90 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
patch-.*/d
 /connection dropped.*/d"])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Unsupported protocol message])
+AT_SKIP_IF([test $HAVE_NC = no])
+
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller.
+check ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller.
+start_daemon ovn-controller
+
+check ovn-nbctl ls-add ls1
+check ovn-nbctl lsp-add ls1 ls1p1
+check ovn-nbctl lsp-set-addresses ls1p1 "00:00:00:01:01:01 192.168.1.1"
+check ovn-nbctl lsp-add ls1 ls1p2
+check ovn-nbctl lsp-set-addresses ls1p2 "00:00:00:01:01:02 192.168.1.2"
+
+check ovn-nbctl lr-add lr1
+check ovn-nbctl lrp-add lr1 lr1-ls1 00:00:00:00:00:01 192.168.1.254/24
+check ovn-nbctl lsp-add ls1 ls1-lr1
+check ovn-nbctl lsp-set-addresses ls1-lr1 "00:00:00:00:00:01 192.168.1.254"
+check ovn-nbctl lsp-set-type ls1-lr1 router
+check ovn-nbctl lsp-set-options ls1-lr1 router-port=lr1-ls1
+
+check ovn-nbctl lrp-add lr1 lr1-ls2 00:00:00:00:00:02 192.168.2.254/24
+
+check ovn-nbctl ls-add ls2
+check ovn-nbctl lsp-add ls2 ls2-lr1
+check ovn-nbctl lsp-set-addresses ls2-lr1 "00:00:00:00:00:02 192.168.2.254"
+check ovn-nbctl lsp-set-type ls2-lr1 router
+check ovn-nbctl lsp-set-options ls2-lr1 router-port=lr1-ls2
+
+check ovn-nbctl lsp-add ls2 ls2p1
+check ovn-nbctl lsp-set-addresses ls2p1 "00:00:00:01:02:01 192.168.2.1"
+
+ADD_NAMESPACES(ls1p1)
+ADD_VETH(ls1p1, ls1p1, br-int, "192.168.1.1/24", "00:00:00:01:01:01",
+         "192.168.1.254")
+
+ADD_NAMESPACES(ls2p1)
+ADD_VETH(ls2p1, ls2p1, br-int, "192.168.2.1/24", "00:00:00:01:02:01",
+         "192.168.2.254")
+
+ADD_NAMESPACES(ls1p2)
+ADD_VETH(ls1p2, ls1p2, br-int, "192.168.1.2/24", "00:00:00:01:01:02",
+         "192.168.1.254")
+
+check ovn-nbctl lb-add lb0 192.168.5.1:12345 
192.168.1.1:12345,192.168.1.2:12345
+check ovn-nbctl ls-lb-add ls1 lb0
+check ovn-nbctl lr-lb-add lr1 lb0
+lb_uuid=$(fetch_column nb:Load_Balancer _uuid name=lb0)
+check ovn-nbctl set Load_Balancer $lb_uuid protocol=udp
+check ovn-nbctl --wait=hv set Logical_Router lr1 options:chassis="hv1"
+
+wait_for_ports_up ls1p1 ls1p2 ls2p1
+
+NETNS_DAEMONIZE([ls1p1], [nc -l 12345 --udp -k --sh-exec ls], [nc1.pid])
+NETNS_DAEMONIZE([ls1p2], [nc -l 12345 --udp -k --sh-exec ls], [nc2.pid])
+
+hc_uuid=$(ovn-nbctl --id=@hc create Load_Balancer_Health_Check 
vip="192.168.5.1\:12345" -- \
+          add Load_Balancer $lb_uuid health_check @hc)
+check ovn-nbctl set Load_Balancer_Health_Check $hc_uuid options:timeout=20 
options:success_count=3 options:failure_count=3
+check ovn-nbctl --wait=sb set load_balancer $lb_uuid 
ip_port_mappings:192.168.1.1=ls1p1:192.168.1.254
+
+NS_EXEC([ls2p1], [nc --udp 192.168.5.1 12345 <<< h])
+
+# It may not seem like we're actually testing anything in this test.
+# If there is a warning or error in the ovn-controller log about
+# an unsupported health check protocol, it will cause a test failure
+# when we stop ovn-controller.
+OVN_CLEANUP_CONTROLLER([hv1])
+OVN_CLEANUP_NORTHD
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])
-- 
2.52.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to