Expand the NxN test with the network connectivity check between all the
nodes.  Unfortunately, we can't really run this test with Libreswan 4.x,
since, due to internal issues in these versions, we are getting into
states where everything is loaded and active, but no traffic can pass.
This is an internal issue in Libreswan that we can't workaround from
the outside.  So, the fix is required in Libreswan itself.  4.5 and
earlier versions seem to not be affected by this problem, at least not
severely affected, but it's easier to just cut off all the 4.x versions
from the test.

3.32 version from Ubuntu 22.04 and Libreswna 5.1 work just fine with
this test.

Test is relatively long, but it is very valuable, IMO.  Besides
stressing ovs-monitor-ipsec with various failure and asynchronous
connection establishment conditions, which are important for OVS, it
also was used to reproduce and fix several bugs in Libreswan 4.x.
Unfortunately, not all the issues are understood and fixed yet.

Signed-off-by: Ilya Maximets <[email protected]>
---
 tests/system-ipsec.at | 84 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 76 insertions(+), 8 deletions(-)

diff --git a/tests/system-ipsec.at b/tests/system-ipsec.at
index de459804b..4ab384d89 100644
--- a/tests/system-ipsec.at
+++ b/tests/system-ipsec.at
@@ -71,7 +71,9 @@ m4_define([IPSEC_ADD_NODE],
   on_exit "kill `cat $ovs_base/$1/ovs-monitor-ipsec.pid`"
 
   dnl Set up OVS bridge
-  NS_EXEC([$1], [ovs-vsctl --db unix:$ovs_base/$1/db.sock add-br br-ipsec])]
+  NS_CHECK_EXEC([$1],
+    [ovs-vsctl --db unix:$ovs_base/$1/db.sock add-br br-ipsec \
+               -- set-controller br-ipsec punix:$ovs_base/br-ipsec.$1.mgmt])]
 )
 m4_define([IPSEC_ADD_NODE_LEFT], [IPSEC_ADD_NODE(left, p0, $1, $2)])
 m4_define([IPSEC_ADD_NODE_RIGHT], [IPSEC_ADD_NODE(right, p1, $1, $2)])
@@ -429,7 +431,8 @@ m4_for([id], [1], NODES, [1], [
                 self-sign node-id], [0], [stdout])
   AT_CHECK(OVS_VSCTL([node-id], set Open_vSwitch . \
       other_config:certificate=${ovs_base}/node-id-cert.pem \
-      other_config:private_key=${ovs_base}/node-id-privkey.pem),
+      other_config:private_key=${ovs_base}/node-id-privkey.pem \
+      -- set bridge br-ipsec other-config:hwaddr=f2:ff:00:00:00:id),
       [0], [ignore], [ignore])
   on_exit "ipsec --rundir $ovs_base/node-id status > $ovs_base/node-id/status"
 ])
@@ -445,11 +448,18 @@ m4_for([LEFT], [1], NODES, [1], [
     fi
 ])])
 
+dnl These are not necessary, but nice to have in the test log in
+dnl order to spot pluto failures during the test.
+on_exit "grep -E 'Timed out|outdated|half-loaded|defunct' \
+            $ovs_base/node-*/ovs-monitor-ipsec.log"
+on_exit "grep -E 'ABORT|ERROR' $ovs_base/node-*/pluto.log"
+
 m4_define([WAIT_FOR_LOADED_CONNS], [
   m4_for([id], [1], NODES, [1], [
     echo "================== node-id ========================="
     iterations=0
     loaded=0
+    active=0
     dnl Using a custom loop instead of OVS_WAIT_UNTIL, because it may take
     dnl much longer than a default timeout.  The default retransmit timeout
     dnl for pluto is 60 seconds.  Also, we need to make sure pluto didn't
@@ -463,8 +473,11 @@ m4_define([WAIT_FOR_LOADED_CONNS], [
         START_PLUTO([node-id])
       else
         loaded=$(IPSEC_STATUS_LOADED(node-id))
+        m4_if([$1], [active],
+              [active=$(IPSEC_STATUS_ACTIVE(node-id))], [active=$loaded])
       fi
-      if test "$loaded" -ne $(( (NODES - 1) * 2 )); then
+      if test "$loaded" -ne "$(( (NODES - 1) * 2 ))" -o \
+              "$loaded" -ne "$active"; then
         sleep 3
       else
         break
@@ -505,11 +518,66 @@ OVS_WAIT_UNTIL([grep -q 'tun-2.*need to reconcile' \
 dnl Wait for all the connections to be loaded back.
 WAIT_FOR_LOADED_CONNS()
 
-dnl These are not necessary, but nice to have in the test log in
-dnl order to spot pluto failures during the test.
-grep -E 'Timed out|outdated|half-loaded|defunct' \
-            $ovs_base/node-*/ovs-monitor-ipsec.log
-grep -E 'ABORT|ERROR' $ovs_base/node-*/pluto.log
+dnl Next section will check connectivity between all the nodes.
+dnl Different versions of Libreswan 4.x have issues where connections
+dnl are not being correctly established or never become active in a
+dnl way that can not be mitigated from ovs-monitor-ipsec or the test.
+dnl So, only checking connectivity for Libreswan 3- or 5+.
+dnl Skipping in the middle of the test, so test can still fail while
+dnl testing with Libreswan 4, if the first half fails.
+AT_SKIP_IF([ipsec --version 2>&1 | grep -q 'Libreswan 4\.'])
+
+dnl Turn off IPv6 and add static ARP entries for all namespaces to avoid
+dnl any broadcast / multicast traffic that would otherwise be multiplied
+dnl by each node creating a traffic storm.  Add specific OpenFlow rules
+dnl to forward traffic to exact destinations without any MAC learning.
+m4_for([LEFT], [1], NODES, [1], [
+  NS_CHECK_EXEC([node-LEFT], [sysctl -w net.ipv6.conf.all.disable_ipv6=1],
+                [0], [ignore])
+  AT_CHECK([ovs-ofctl del-flows unix:$ovs_base/br-ipsec.node-LEFT.mgmt])
+  AT_CHECK([ovs-ofctl add-flow unix:$ovs_base/br-ipsec.node-LEFT.mgmt \
+                  "dl_dst=f2:ff:00:00:00:LEFT actions=LOCAL"])
+  m4_for([RIGHT], [1], NODES, [1], [
+    if test LEFT -ne RIGHT; then
+      NS_CHECK_EXEC([node-LEFT],
+        [ip neigh add 192.0.0.RIGHT lladdr f2:ff:00:00:00:RIGHT dev br-ipsec])
+      AT_CHECK([ovs-ofctl add-flow unix:$ovs_base/br-ipsec.node-LEFT.mgmt \
+                  "dl_dst=f2:ff:00:00:00:RIGHT actions=tun-RIGHT"])
+    fi
+  ])
+])
+
+dnl Bring up and add IP addresses for br-ipsec interface.
+m4_for([id], [1], NODES, [1], [
+  echo "================== node-id ========================="
+  NS_CHECK_EXEC([node-id], [ip addr add 192.0.0.id/24 dev br-ipsec])
+  NS_CHECK_EXEC([node-id], [ip link set dev br-ipsec up])
+])
+
+dnl Wait for all the connections to be loaded and active.  In case one of
+dnl the pluto processes crashed some of the connections may never become
+dnl active.  But we did run this loop with a pluto reviving logic twice
+dnl already, so the chances for pluto to be down here are much lower.
+WAIT_FOR_LOADED_CONNS([active])
+
+dnl Check the full mesh ping.
+m4_for([LEFT], [1], NODES, [1], [
+  m4_for([RIGHT], [1], NODES, [1], [
+    if test LEFT -ne RIGHT; then
+      echo "====== ping: node-LEFT --> node-RIGHT =========="
+      dnl Ping without checking in case connection will recover after the
+      dnl first packet.
+      NS_CHECK_EXEC([node-LEFT],
+                    [ping -q -c 1 -W 2 192.0.0.RIGHT | FORMAT_PING],
+                    [ignore], [stdout])
+      dnl Now check.  If this one fails, there is no actual connectivity.
+      NS_CHECK_EXEC([node-LEFT],
+                    [ping -q -c 3 -i 0.1 -W 2 192.0.0.RIGHT | FORMAT_PING],
+                    [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+    fi
+])])
 
 OVS_TRAFFIC_VSWITCHD_STOP()
 AT_CLEANUP
-- 
2.46.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to