> diff --git a/tools/testing/selftests/net/ecmp_rehash.sh
> b/tools/testing/selftests/net/ecmp_rehash.sh
> new file mode 100755
> index 000000000000..ffaacf1d9513
> --- /dev/null
> +++ b/tools/testing/selftests/net/ecmp_rehash.sh
> @@ -0,0 +1,1066 @@
[ ... ]
> +tc_filter_pkt_count()
> +{
> + local ns=$1; shift
> + local dev=$1; shift
> +
> + ip netns exec "$ns" tc -s filter show dev "$dev" parent 1: 2>/dev/null |
> + awk '/Sent .* pkt/ {
> + for (i=1; i<=NF; i++)
> + if ($i == "pkt") { print $(i-1); exit }
> + }'
> +}
These per-device tc filter counters match all TCP IPv6 traffic on the
device, with no isolation by port.
[ ... ]
> +test_ecmp_syncookie_path_consistency()
> +{
[ ... ]
> + for r in $(seq 1 "$ECMP_REBUILD_ROUNDS"); do
> + local port=$((PORT + 3 * ECMP_REBUILD_ROUNDS + r))
> +
> + ip netns exec "$NS2" socat -u \
> + "TCP6-LISTEN:$port,bind=[fd00:ff::2],reuseaddr" \
> + - >/dev/null &
> + local server_pid=$!
> +
> + wait_local_port_listen "$NS2" "$port" tcp
> +
> + local srv_base0 srv_base1
> + srv_base0=$(tc_filter_pkt_count "$NS2" veth0b)
> + srv_base1=$(tc_filter_pkt_count "$NS2" veth1b)
The baseline is snapshotted here, after the new listener starts, which
handles leakage that arrives before this point.
[ ... ]
> + local srv_tcp0 srv_tcp1
> + srv_tcp0=$(tc_filter_pkt_count "$NS2" veth0b)
> + srv_tcp1=$(tc_filter_pkt_count "$NS2" veth1b)
> + local srv_delta0=$(( ${srv_tcp0:-0} - ${srv_base0:-0} ))
> + local srv_delta1=$(( ${srv_tcp1:-0} - ${srv_base1:-0} ))
> +
> + if [ "$srv_delta0" -gt 0 ] && [ "$srv_delta1" -gt 0 ]; then
> + path_splits=$((path_splits + 1))
> + fi
> +
> + kill "$client_pid" 2>/dev/null
> + wait "$client_pid" 2>/dev/null
> + kill "$server_pid" 2>/dev/null
> + wait "$server_pid" 2>/dev/null
> + done
The client and server from each round are killed at the end of the
round, but the loop moves straight on to the next round.
Can asynchronous teardown traffic (FIN/RST/ACK) from the killed client
or server of a previous round still be in flight after srv_base0 and
srv_base1 are snapshotted for the next round? Since the counters match
all TCP IPv6 traffic on the device rather than the current round's port,
would a late teardown packet on one path plus the new round's traffic on
the other path increment both srv_delta0 and srv_delta1, satisfying the
srv_delta0 > 0 && srv_delta1 > 0 check and recording a spurious split?
Would a short quiescence wait or filtering the tc counters by the
current round's port make this check more robust against late packets?
> + if [ "$path_splits" -gt 0 ]; then
> + check_err 1 "$path_splits/$ECMP_REBUILD_ROUNDS had split server
> path"
> + fi
The same pattern appears in ecmp_dst_rebuild_loop(), which also relies
on these global per-device counters across rounds.
This was raised by an automated review on an earlier version
(v9, https://lore.kernel.org/r/[email protected])
and the counter structure is unchanged in the current version. It
affects test reliability rather than kernel correctness.
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26839485409