From: Geliang Tang <[email protected]>
Add NVMe iopolicy testing to mptcp_nvme.sh, with the default set to
"numa". It can be set to "round-robin" or "queue-depth".
Test results with 4 NVMe multipath paths and round-robin iopolicy show
that TCP and MPTCP achieve similar bandwidth:
# ./mptcp_nvme.sh tcp 4 round-robin
READ: bw=455MiB/s (478MB/s), 455MiB/s-455MiB/s (478MB/s-478MB/s),
io=4665MiB (4891MB), run=10242-10242msec
WRITE: bw=455MiB/s (477MB/s), 455MiB/s-455MiB/s (477MB/s-477MB/s),
io=4633MiB (4858MB), run=10184-10184msec
# ./mptcp_nvme.sh mptcp 4 round-robin
READ: bw=445MiB/s (466MB/s), 445MiB/s-445MiB/s (466MB/s-466MB/s),
io=4575MiB (4797MB), run=10287-10287msec
WRITE: bw=445MiB/s (467MB/s), 445MiB/s-445MiB/s (467MB/s-467MB/s),
io=4572MiB (4794MB), run=10267-10267msec
A "loss" argument is added to simulate network packet loss. When loss=1,
each veth interface is configured with "delay 5ms loss 0.5%" using tc
qdisc. Under this scenario, TCP performance is reduced by multiples
compared to MPTCP:
# ./mptcp_nvme.sh tcp 4 round-robin 1
READ: bw=144MiB/s (151MB/s), 144MiB/s-144MiB/s (151MB/s-151MB/s),
io=1909MiB (2001MB), run=13231-13231msec
WRITE: bw=100.0MiB/s (105MB/s), 100.0MiB/s-100.0MiB/s (105MB/s-105MB/s),
io=1397MiB (1465MB), run=13980-13980msec
# ./mptcp_nvme.sh mptcp 4 round-robin 1
READ: bw=428MiB/s (449MB/s), 428MiB/s-428MiB/s (449MB/s-449MB/s),
io=4524MiB (4743MB), run=10564-10564msec
WRITE: bw=431MiB/s (452MB/s), 431MiB/s-431MiB/s (452MB/s-452MB/s),
io=4513MiB (4732MB), run=10481-10481msec
These results demonstrate that MPTCP has better resilience against
packet loss compared to TCP, as it can leverage multiple subflows to
mitigate network degradation.
Cc: Hannes Reinecke <[email protected]>
Cc: John Meneghini <[email protected]>
Cc: Randy Jennings <[email protected]>
Cc: Nilay Shroff <[email protected]>
Co-developed-by: zhenwei pi <[email protected]>
Signed-off-by: zhenwei pi <[email protected]>
Co-developed-by: Hui Zhu <[email protected]>
Signed-off-by: Hui Zhu <[email protected]>
Co-developed-by: Gang Yan <[email protected]>
Signed-off-by: Gang Yan <[email protected]>
Signed-off-by: Geliang Tang <[email protected]>
---
.../testing/selftests/net/mptcp/mptcp_nvme.sh | 70 ++++++++++++++++++-
1 file changed, 69 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_nvme.sh
b/tools/testing/selftests/net/mptcp/mptcp_nvme.sh
index 5b1133dbc2d5..3ab04be05dff 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_nvme.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_nvme.sh
@@ -6,6 +6,8 @@
ret=0
trtype="${1:-mptcp}"
path="${2:-1}"
+iopolicy=${3:-"numa"} # round-robin, queue-depth
+loss=${4:-0}
nqn="nqn.2014-08.org.nvmexpress.${trtype}dev.$$.${RANDOM}"
ns=1
port=$((RANDOM % 10000 + 20000))
@@ -17,6 +19,7 @@ loop_dev=""
export trtype path nqn ns port trsvcid
export loop_dev temp_file
+export iopolicy loss
usage()
{
@@ -24,10 +27,12 @@ usage()
Usage:
- $(basename "$0") [trtype] [path]
+ $(basename "$0") [trtype] [path] [iopolicy] [loss]
trtype Transport type (tcp|mptcp) - default: mptcp
path Number of multipath (1-4) - default: 1
+ iopolicy I/O policy (numa|round-robin|queue-depth) - default: numa
+ loss Enable packet loss (0|1) - default: 0
EOF
exit ${KSFT_FAIL}
@@ -44,6 +49,16 @@ validate_params()
echo "Invalid path count ${path}. Must be between 1 and 4"
usage
fi
+
+ if [[ ! "${iopolicy}" =~ ^(numa|round-robin|queue-depth)$ ]]; then
+ echo "Invalid iopolicy ${iopolicy}."
+ usage
+ fi
+
+ if [[ ! "${loss}" =~ ^[01]$ ]]; then
+ echo "Invalid loss value ${loss}. Must be 0 or 1"
+ usage
+ fi
}
# This function is invoked indirectly
@@ -105,6 +120,7 @@ cleanup()
unset -v trtype path nqn ns port trsvcid
unset -v loop_dev temp_file
+ unset -v iopolicy loss
}
# $tc_args needs word splitting to pass multiple arguments to netem
@@ -113,6 +129,10 @@ init()
{
local tc_args="rate 1000mbit"
+ if [ "${loss}" -eq 1 ]; then
+ tc_args+=" delay 5ms loss 0.5%"
+ fi
+
mptcp_lib_ns_init ns1 ns2
# ns1 ns2
@@ -193,6 +213,48 @@ run_target()
done
}
+# This function is invoked indirectly
+#shellcheck disable=SC2317,SC2329
+set_io_policy()
+{
+ local nqn="$1"
+ local iopolicy="$2"
+ local subname
+ local policy
+ local current
+
+ subname=$(nvme list-subsys 2>/dev/null | grep "${nqn}" |
+ grep -o 'nvme-subsys[0-9]*' | head -1)
+ if [ -z "$subname" ]; then
+ return 1
+ fi
+
+ policy="/sys/class/nvme-subsystem/${subname}/iopolicy"
+ if [ ! -e "$policy" ]; then
+ # NVMe multipath not supported, skip iopolicy setting
+ return 0
+ fi
+
+ if [ ! -w "$policy" ]; then
+ return 1
+ fi
+
+ if ! echo "${iopolicy}" > "$policy" 2>/dev/null; then
+ return 1
+ fi
+
+ current=$(cat "$policy" 2>/dev/null)
+ if [ -z "$current" ]; then
+ return 1
+ fi
+
+ if [[ "$current" != *"${iopolicy}"* ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
# This function is invoked indirectly
#shellcheck disable=SC2317,SC2329
run_host()
@@ -242,6 +304,11 @@ run_host()
return 1
fi
+ if ! set_io_policy "${nqn}" "${iopolicy}"; then
+ echo "Failed to set I/O policy to ${iopolicy}"
+ return 1
+ fi
+
sleep 1
echo "fio randread /dev/${devname}"
@@ -306,6 +373,7 @@ run_test()
fi
if ! ip netns exec "$ns2" bash <<- EOF
+ $(declare -f set_io_policy)
$(declare -f run_host)
run_host
exit \$?
--
2.53.0