On 01/11/2024 3:23, Ilya Maximets wrote:
> Add a test to check establishment of IPsec connections among multiple
> nodes and check the reconciliation logic along the way.
>
> The test:
> - Creates 20 network namespaces.
> - Starts Libreswan, OVS and ovs-monitor-ipsec in each of them.
> - Adds a geneve tunnel from each namespace to every other namespace.
> - Checks that each namespace has all the IPsec connections loaded.
> - Removes a few connections manually.
> - Checks that these connections are added back.
>
> Unfortunately, many widely used versions of Libreswan have issues
> of pluto crashing frequently. For that reason the test is trying
> to bring pluto back online once it finds a dead one.
>
> Also, since retransmit-timeout is 60 seconds and our command timeout
> is 120, we can't actually use the OVS_WAIT_UNTIL macro most of the
> time, so the checks are done in the custom loop that waits up to
> 300 seconds.
>
> Acked-by: Eelco Chaudron <[email protected]>
> Signed-off-by: Ilya Maximets <[email protected]>
> ---
> tests/system-ipsec.at | 138 ++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 125 insertions(+), 13 deletions(-)
>
> diff --git a/tests/system-ipsec.at b/tests/system-ipsec.at
> index 1e155fece..de459804b 100644
> --- a/tests/system-ipsec.at
> +++ b/tests/system-ipsec.at
> @@ -8,6 +8,18 @@ m4_define([IPSEC_SETUP_UNDERLAY],
> dnl Set up the underlay switch
> AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])])
>
> +m4_define([START_PLUTO], [
> + rm -f $ovs_base/$1/pluto.pid
> + mkdir -p $ovs_base/$1/ipsec.d
> + touch $ovs_base/$1/ipsec.conf
> + touch $ovs_base/$1/secrets
> + ipsec initnss --nssdir $ovs_base/$1/ipsec.d
> + NS_CHECK_EXEC([$1], [ipsec pluto --config $ovs_base/$1/ipsec.conf \
> + --ipsecdir $ovs_base/$1 --nssdir $ovs_base/$1/ipsec.d \
> + --logfile $ovs_base/$1/pluto.log --secretsfile $ovs_base/$1/secrets \
> + --rundir $ovs_base/$1], [0], [], [stderr])
> +])
> +
> dnl IPSEC_ADD_NODE([namespace], [device], [address], [peer address]))
> dnl
> dnl Creates a dummy host that acts as an IPsec endpoint. Creates host in
> @@ -45,15 +57,8 @@ m4_define([IPSEC_ADD_NODE],
> on_exit "kill_ovs_vswitchd `cat $ovs_base/$1/vswitchd.pid`"
>
> dnl Start pluto
> - mkdir -p $ovs_base/$1/ipsec.d
> - touch $ovs_base/$1/ipsec.conf
> - touch $ovs_base/$1/secrets
> - ipsec initnss --nssdir $ovs_base/$1/ipsec.d
> - NS_CHECK_EXEC([$1], [ipsec pluto --config $ovs_base/$1/ipsec.conf \
> - --ipsecdir $ovs_base/$1 --nssdir $ovs_base/$1/ipsec.d \
> - --logfile $ovs_base/$1/pluto.log --secretsfile $ovs_base/$1/secrets \
> - --rundir $ovs_base/$1], [0], [], [stderr])
> - on_exit "kill `cat $ovs_base/$1/pluto.pid`"
> + START_PLUTO([$1])
> + on_exit 'kill $(cat $ovs_base/$1/pluto.pid)'
>
> dnl Start ovs-monitor-ipsec
> NS_CHECK_EXEC([$1], [ovs-monitor-ipsec unix:${OVS_RUNDIR}/$1/db.sock\
> @@ -110,16 +115,18 @@ m4_define([CHECK_LIBRESWAN],
> dnl IPSEC_STATUS_LOADED([])
> dnl
> dnl Get number of loaded connections from ipsec status
> -m4_define([IPSEC_STATUS_LOADED], [ipsec --rundir $ovs_base/$1 status | \
> +m4_define([IPSEC_STATUS_LOADED], [
> + ipsec --rundir $ovs_base/$1 status | \
> grep "Total IPsec connections" | \
> - sed 's/[[0-9]]* *Total IPsec connections: loaded \([[0-2]]\),
> active \([[0-2]]\).*/\1/m'])
> + sed 's/[[0-9]]* *Total IPsec connections: loaded \([[0-9]]*\),
> active \([[0-9]]*\).*/\1/m'])
>
> dnl IPSEC_STATUS_ACTIVE([])
> dnl
> dnl Get number of active connections from ipsec status
> -m4_define([IPSEC_STATUS_ACTIVE], [ipsec --rundir $ovs_base/$1 status | \
> +m4_define([IPSEC_STATUS_ACTIVE], [
> + ipsec --rundir $ovs_base/$1 status | \
> grep "Total IPsec connections" | \
> - sed 's/[[0-9]]* *Total IPsec connections: loaded \([[0-2]]\),
> active \([[0-2]]\).*/\2/m'])
> + sed 's/[[0-9]]* *Total IPsec connections: loaded \([[0-9]]*\),
> active \([[0-9]]*\).*/\2/m'])
>
> dnl CHECK_ESP_TRAFFIC()
> dnl
> @@ -401,3 +408,108 @@ CHECK_ESP_TRAFFIC
>
> OVS_TRAFFIC_VSWITCHD_STOP()
> AT_CLEANUP
> +
> +AT_SETUP([IPsec -- Libreswan NxN geneve tunnels + reconciliation])
> +AT_KEYWORDS([ipsec libreswan scale reconciliation])
> +dnl Note: Geneve test may not work on older kernels due to CVE-2020-25645
> +dnl https://bugzilla.redhat.com/show_bug.cgi?id=1883988
> +
> +CHECK_LIBRESWAN()
> +OVS_TRAFFIC_VSWITCHD_START()
> +IPSEC_SETUP_UNDERLAY()
> +
> +m4_define([NODES], [20])
> +
> +dnl Set up fake hosts.
> +m4_for([id], [1], NODES, [1], [
> + IPSEC_ADD_NODE([node-id], [p-id], 10.1.1.id, 10.1.1.254)
> + AT_CHECK([ovs-pki -b -d ${ovs_base} -l ${ovs_base}/ovs-pki.log \
> + req -u node-id], [0], [stdout])
> + AT_CHECK([ovs-pki -b -d ${ovs_base} -l ${ovs_base}/ovs-pki.log \
> + self-sign node-id], [0], [stdout])
> + AT_CHECK(OVS_VSCTL([node-id], set Open_vSwitch . \
> + other_config:certificate=${ovs_base}/node-id-cert.pem \
> + other_config:private_key=${ovs_base}/node-id-privkey.pem),
> + [0], [ignore], [ignore])
> + on_exit "ipsec --rundir $ovs_base/node-id status >
> $ovs_base/node-id/status"
> +])
> +
> +dnl Create a full mesh of tunnels.
> +m4_for([LEFT], [1], NODES, [1], [
> + m4_for([RIGHT], [1], NODES, [1], [
> + if test LEFT -ne RIGHT; then
> + AT_CHECK(OVS_VSCTL(node-LEFT, add-port br-ipsec tun-RIGHT \
> + -- set Interface tun-RIGHT type=geneve
> options:remote_ip=10.1.1.RIGHT \
> + options:remote_cert=${ovs_base}/node-RIGHT-cert.pem),
> + [0], [ignore], [ignore])
> + fi
> +])])
> +
> +m4_define([WAIT_FOR_LOADED_CONNS], [
> + m4_for([id], [1], NODES, [1], [
> + echo "================== node-id ========================="
> + iterations=0
> + loaded=0
> + dnl Using a custom loop instead of OVS_WAIT_UNTIL, because it may take
> + dnl much longer than a default timeout. The default retransmit timeout
> + dnl for pluto is 60 seconds. Also, we need to make sure pluto didn't
> + dnl crash in the process and revive it if it did, unfortunately.
> + while true; do
> + date
> + AT_CHECK([ipsec --rundir $ovs_base/node-id status 2>&1 \
> + | grep -E "whack|Total"], [ignore], [stdout])
> + if grep -E 'is Pluto running?|refused' stdout; then
> + echo "node-id: Pluto died, restarting..."
> + START_PLUTO([node-id])
> + else
> + loaded=$(IPSEC_STATUS_LOADED(node-id))
> + fi
> + if test "$loaded" -ne $(( (NODES - 1) * 2 )); then
> + sleep 3
> + else
> + break
> + fi
> + let iterations=$iterations+1
> + AT_CHECK([test $iterations -lt 100])
> + done
> + ])
> +])
> +
> +dnl Wait for all the connections to be loaded to pluto. Not waiting for
> +dnl them to become active, because if pluto is down on one of the nodes,
> +dnl some connections may not become active until we revive it. Some
> +dnl connections may also never become active due to bugs in libreswan 4.x.
> +WAIT_FOR_LOADED_CONNS()
> +
> +AT_CHECK([ipsec auto --help], [ignore], [ignore], [stderr])
> +auto=auto
> +if test -s stderr; then
> + auto=
> +fi
> +
> +dnl Remove connections for two tunnels. One fully and one partially.
> +AT_CHECK([ipsec $auto --ctlsocket $ovs_base/node-1/pluto.ctl \
> + --config $ovs_base/node-1/ipsec.conf \
> + --delete tun-5-out-1], [0], [stdout])
> +AT_CHECK([ipsec $auto --ctlsocket $ovs_base/node-1/pluto.ctl \
> + --config $ovs_base/node-1/ipsec.conf \
> + --delete tun-2-in-1], [0], [stdout])
> +AT_CHECK([ipsec $auto --ctlsocket $ovs_base/node-1/pluto.ctl \
> + --config $ovs_base/node-1/ipsec.conf \
> + --delete tun-2-out-1], [0], [stdout])
> +
> +dnl Wait for the monitor to notice the missing connections.
> +OVS_WAIT_UNTIL([grep -q 'tun-2.*need to reconcile' \
> + $ovs_base/node-1/ovs-monitor-ipsec.log])
> +
> +dnl Wait for all the connections to be loaded back.
> +WAIT_FOR_LOADED_CONNS()
> +
> +dnl These are not necessary, but nice to have in the test log in
> +dnl order to spot pluto failures during the test.
> +grep -E 'Timed out|outdated|half-loaded|defunct' \
> + $ovs_base/node-*/ovs-monitor-ipsec.log
> +grep -E 'ABORT|ERROR' $ovs_base/node-*/pluto.log
> +
> +OVS_TRAFFIC_VSWITCHD_STOP()
> +AT_CLEANUP
Acked-by: Roi Dayan <[email protected]>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev