The txonly multi-flow mode generates 64 unique UDP source ports per
lcore by cycling the high byte from 0xC0 to 0xFF. On SmartNICs with
limited hardware flow table caching, this fixed count can exhaust the
flow cache and degrade receive-side performance.

Add --txonly-flows=N command line parameter and 'set txonly-flows'
runtime command to limit the number of unique source ports per lcore
to between 1 and 64. The default remains 64 to preserve existing
behavior.

The source port encoding is unchanged: the low byte carries the lcore
ID (avoiding atomics) and the high byte cycles through N values
starting at 0xC0. Total unique flows = txonly_nb_flows * active_lcores.

Reported-by: Long Li <[email protected]>
Signed-off-by: Stephen Hemminger <[email protected]>
Tested-by: Long Li <[email protected]>

---
v2 - rename to shorter command line option

 app/test-pmd/cmdline.c                      | 49 +++++++++++++++++++++
 app/test-pmd/parameters.c                   | 12 +++++
 app/test-pmd/testpmd.c                      |  3 ++
 app/test-pmd/testpmd.h                      |  1 +
 app/test-pmd/txonly.c                       | 19 ++++----
 doc/guides/testpmd_app_ug/run_app.rst       |  6 +++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 17 +++++++
 7 files changed, 97 insertions(+), 10 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index c33c66f327..a0d6ee37aa 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -384,6 +384,10 @@ static void cmd_help_long_parsed(void *parsed_result,
                        "    Set the scheduling on timestamps"
                        " timings for the TXONLY mode\n\n"
 
+                       "set txflows (N)\n"
+                       "    Set the number of flows per lcore in"
+                       " txonly multi-flow mode (1-64)\n\n"
+
                        "set corelist (x[,y]*)\n"
                        "    Set the list of forwarding cores.\n\n"
 
@@ -4612,6 +4616,50 @@ static cmdline_parse_inst_t cmd_set_txtimes = {
        },
 };
 
+/* *** SET NUMBER OF FLOWS IN TXONLY MULTI-FLOW MODE *** */
+
+struct cmd_set_txflows_result {
+       cmdline_fixed_string_t cmd_keyword;
+       cmdline_fixed_string_t name;
+       uint16_t value;
+};
+
+static void
+cmd_set_txflows_parsed(void *parsed_result,
+                              __rte_unused struct cmdline *cl,
+                              __rte_unused void *data)
+{
+       struct cmd_set_txflows_result *res = parsed_result;
+
+       if (res->value < 1 || res->value > 64) {
+               fprintf(stderr, "txonly-flows must be >= 1 and <= 64\n");
+               return;
+       }
+       txonly_flows = res->value;
+}
+
+static cmdline_parse_token_string_t cmd_set_txflows_keyword =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_txflows_result,
+                                cmd_keyword, "set");
+static cmdline_parse_token_string_t cmd_set_txflows_name =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_txflows_result,
+                                name, "txonly-flows");
+static cmdline_parse_token_num_t cmd_set_txflows_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_txflows_result,
+                             value, RTE_UINT16);
+
+static cmdline_parse_inst_t cmd_set_txflows = {
+       .f = cmd_set_txflows_parsed,
+       .data = NULL,
+       .help_str = "set txonly-flows <N>",
+       .tokens = {
+               (void *)&cmd_set_txflows_keyword,
+               (void *)&cmd_set_txflows_name,
+               (void *)&cmd_set_txflows_value,
+               NULL,
+       },
+};
+
 /* *** ADD/REMOVE ALL VLAN IDENTIFIERS TO/FROM A PORT VLAN RX FILTER *** */
 struct cmd_rx_vlan_filter_all_result {
        cmdline_fixed_string_t rx_vlan;
@@ -14099,6 +14147,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
        &cmd_set_rxoffs,
        &cmd_set_rxpkts,
        &cmd_set_rxhdrs,
+       &cmd_set_txflows,
        &cmd_set_txpkts,
        &cmd_set_txsplit,
        &cmd_set_txtimes,
diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index f2037925c2..3617860830 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -193,6 +193,8 @@ enum {
        TESTPMD_OPT_MULTI_RX_MEMPOOL_NUM,
 #define TESTPMD_OPT_TXONLY_MULTI_FLOW "txonly-multi-flow"
        TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM,
+#define TESTPMD_OPT_TXONLY_FLOWS "txonly-flows"
+       TESTPMD_OPT_TXONLY_FLOWS_NUM,
 #define TESTPMD_OPT_RXQ_SHARE "rxq-share"
        TESTPMD_OPT_RXQ_SHARE_NUM,
 #define TESTPMD_OPT_ETH_LINK_SPEED "eth-link-speed"
@@ -348,6 +350,7 @@ static const struct option long_options[] = {
        REQUIRED_ARG(TESTPMD_OPT_TXPKTS),
        NO_ARG(TESTPMD_OPT_MULTI_RX_MEMPOOL),
        NO_ARG(TESTPMD_OPT_TXONLY_MULTI_FLOW),
+       REQUIRED_ARG(TESTPMD_OPT_TXONLY_FLOWS),
        OPTIONAL_ARG(TESTPMD_OPT_RXQ_SHARE),
        REQUIRED_ARG(TESTPMD_OPT_ETH_LINK_SPEED),
        NO_ARG(TESTPMD_OPT_DISABLE_LINK_CHECK),
@@ -499,6 +502,8 @@ usage(char* progname)
                " or total packet length.\n");
        printf("  --multi-rx-mempool: enable multi-rx-mempool support\n");
        printf("  --txonly-multi-flow: generate multiple flows in txonly 
mode\n");
+       printf("  --txonly-nb-flows=N: number of flows per lcore in txonly"
+              " multi-flow mode (1-64, default 64)\n");
        printf("  --tx-ip=src,dst: IP addresses in Tx-only mode\n");
        printf("  --tx-udp=src[,dst]: UDP ports in Tx-only mode\n");
        printf("  --eth-link-speed: force link speed.\n");
@@ -1566,6 +1571,13 @@ launch_args_parse(int argc, char** argv)
                case TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM:
                        txonly_multi_flow = 1;
                        break;
+               case TESTPMD_OPT_TXONLY_FLOWS_NUM:
+                       n = atoi(optarg);
+                       if (n >= 1 && n <= 64)
+                               txonly_flows = (uint16_t)n;
+                       else
+                               rte_exit(EXIT_FAILURE, "txonly-flows must be >= 
1 and <= 64\n");
+                       break;
                case TESTPMD_OPT_RXQ_SHARE_NUM:
                        if (optarg == NULL) {
                                rxq_share = UINT32_MAX;
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index fbacee89ea..aad880aa34 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -287,6 +287,9 @@ enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
 uint8_t txonly_multi_flow;
 /**< Whether multiple flows are generated in TXONLY mode. */
 
+uint16_t txonly_flows = 64;
+/**< Number of unique flows per lcore in TXONLY multi-flow mode. */
+
 uint32_t tx_pkt_times_inter;
 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
 
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index f319471c73..af185540c3 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -673,6 +673,7 @@ enum tx_pkt_split {
 extern enum tx_pkt_split tx_pkt_split;
 
 extern uint8_t txonly_multi_flow;
+extern uint16_t txonly_flows;
 
 extern uint32_t rxq_share;
 
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index bdcf6ea660..64893fa205 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -223,18 +223,17 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct 
rte_mempool *mbp,
                                sizeof(struct rte_ether_hdr) +
                                sizeof(struct rte_ipv4_hdr));
                /*
-                * Generate multiple flows by varying UDP source port.
-                * This enables packets are well distributed by RSS in
-                * receiver side if any and txonly mode can be a decent
-                * packet generator for developer's quick performance
-                * regression test.
+                * Generate a configurable number of flows per lcore by
+                * varying the UDP source port. The low byte is the lcore
+                * ID, ensuring each lcore produces unique ports without
+                * atomic operations. The high byte cycles through
+                * txonly_flows values starting at 0xC0, keeping ports
+                * in the ephemeral range 49152-65535 (RFC 6335).
                 *
-                * Only ports in the range 49152 (0xC000) and 65535 (0xFFFF)
-                * will be used, with the least significant byte representing
-                * the lcore ID. As such, the most significant byte will cycle
-                * through 0xC0 and 0xFF.
+                * Total unique flows = txonly_flows * active_lcores.
                 */
-               src_port = ((src_var++ | 0xC0) << 8) + rte_lcore_id();
+               src_port = (((src_var++ % txonly_flows) + 0xC0) << 8)
+                          + rte_lcore_id();
                udp_hdr->src_port = rte_cpu_to_be_16(src_port);
                RTE_PER_LCORE(_src_port_var) = src_var;
        }
diff --git a/doc/guides/testpmd_app_ug/run_app.rst 
b/doc/guides/testpmd_app_ug/run_app.rst
index 97d6c75716..0340b3e2b3 100644
--- a/doc/guides/testpmd_app_ug/run_app.rst
+++ b/doc/guides/testpmd_app_ug/run_app.rst
@@ -386,6 +386,12 @@ The command line options are:
 
     Generate multiple flows in txonly mode.
 
+*   ``--txonly-flows=N``
+
+    Set the number of unique flows per lcore when txonly multi-flow mode
+    is enabled. Valid range is 1 to 64. Default is 64.
+    Reducing this value limits the number of unique UDP source ports generated.
+
 *   ``--rxq-share=[X]``
 
     Create queues in shared Rx queue mode if device supports.
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index 62bb167d56..6695940dd2 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -924,6 +924,23 @@ Where:
 
 * ``rand`` same as 'on', but number of segments per each packet is a random 
value between 1 and total number of segments.
 
+set txonly-flows
+~~~~~~~~~~~~~~~~
+
+Set the number of unique flows per lcore in txonly multi-flow mode::
+
+   testpmd> set txonly-flows <N>
+
+Where ``N`` is the number of unique UDP source port values each lcore will
+cycle through, in the range 1 to 64. Default is 64. The UDP port value
+is in ephemeral range 49152-65535 (RFC 6335). The least signficant byte
+of the port is the core id, and the most significant byte cycles throgh
+the range of ``N``.
+
+Each lcore generates unique flows by combining the flow index with its lcore
+ID, so the total number of unique flows across the system is
+``txonly-flows * active_lcores``.
+
 set corelist
 ~~~~~~~~~~~~
 
-- 
2.51.0

Reply via email to