> Subject: [EXTERNAL] [RFC] app/testpmd: add configurable flow count for txonly > multi-flow > > Started with Long's patch to change port and added a parameter. > This is suggestion only *DO NOT MERGE* > Not sure what good name for parameter is, this is just a quick hack. > > The txonly multi-flow mode generates 64 unique UDP source ports per lcore by > cycling the high byte from 0xC0 to 0xFF. On SmartNICs with limited hardware > flow table caching, this fixed count can exhaust the flow cache and degrade > receive-side performance. > > Add --txonly-nb-flows=N command line parameter and 'set txonly-nb-flows' > runtime command to limit the number of unique source ports per lcore to > between 1 and 64. The default remains 64 to preserve existing behavior. > > The source port encoding is unchanged: the low byte carries the lcore ID > (avoiding > atomics) and the high byte cycles through N values starting at 0xC0. Total > unique > flows = txonly_nb_flows * active_lcores. > > Reported-by: Long Li <[email protected]> > Signed-off-by: Stephen Hemminger <[email protected]>
Thank you, worked well for Azure VM at 200Gb/s. Tested-by: Long Li <[email protected]> > --- > > app/test-pmd/cmdline.c | 49 +++++++++++++++++++++ > app/test-pmd/parameters.c | 13 ++++++ > app/test-pmd/testpmd.c | 3 ++ > app/test-pmd/testpmd.h | 1 + > app/test-pmd/txonly.c | 24 +++++----- > doc/guides/testpmd_app_ug/run_app.rst | 8 ++++ > doc/guides/testpmd_app_ug/testpmd_funcs.rst | 16 +++++++ > 7 files changed, 104 insertions(+), 10 deletions(-) > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index > c33c66f327..debd226762 100644 > --- a/app/test-pmd/cmdline.c > +++ b/app/test-pmd/cmdline.c > @@ -384,6 +384,10 @@ static void cmd_help_long_parsed(void *parsed_result, > " Set the scheduling on timestamps" > " timings for the TXONLY mode\n\n" > > + "set txonly-nb-flows (N)\n" > + " Set the number of flows per lcore in" > + " txonly multi-flow mode (1-64)\n\n" > + > "set corelist (x[,y]*)\n" > " Set the list of forwarding cores.\n\n" > > @@ -4612,6 +4616,50 @@ static cmdline_parse_inst_t cmd_set_txtimes = { > }, > }; > > +/* *** SET NUMBER OF FLOWS IN TXONLY MULTI-FLOW MODE *** */ > + > +struct cmd_set_txonly_nb_flows_result { > + cmdline_fixed_string_t cmd_keyword; > + cmdline_fixed_string_t name; > + uint16_t value; > +}; > + > +static void > +cmd_set_txonly_nb_flows_parsed(void *parsed_result, > + __rte_unused struct cmdline *cl, > + __rte_unused void *data) > +{ > + struct cmd_set_txonly_nb_flows_result *res = parsed_result; > + > + if (res->value < 1 || res->value > 64) { > + fprintf(stderr, "txonly-nb-flows must be >= 1 and <= 64\n"); > + return; > + } > + txonly_nb_flows = res->value; > +} > + > +static cmdline_parse_token_string_t cmd_set_txonly_nb_flows_keyword = > + TOKEN_STRING_INITIALIZER(struct cmd_set_txonly_nb_flows_result, > + cmd_keyword, "set"); > +static cmdline_parse_token_string_t cmd_set_txonly_nb_flows_name = > + TOKEN_STRING_INITIALIZER(struct cmd_set_txonly_nb_flows_result, > + name, "txonly-nb-flows"); > +static cmdline_parse_token_num_t cmd_set_txonly_nb_flows_value = > + TOKEN_NUM_INITIALIZER(struct cmd_set_txonly_nb_flows_result, > + value, RTE_UINT16); > + > +static cmdline_parse_inst_t cmd_set_txonly_nb_flows = { > + .f = cmd_set_txonly_nb_flows_parsed, > + .data = NULL, > + .help_str = "set txonly-nb-flows <N>", > + .tokens = { > + (void *)&cmd_set_txonly_nb_flows_keyword, > + (void *)&cmd_set_txonly_nb_flows_name, > + (void *)&cmd_set_txonly_nb_flows_value, > + NULL, > + }, > +}; > + > /* *** ADD/REMOVE ALL VLAN IDENTIFIERS TO/FROM A PORT VLAN RX FILTER > *** */ struct cmd_rx_vlan_filter_all_result { > cmdline_fixed_string_t rx_vlan; > @@ -14102,6 +14150,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = { > &cmd_set_txpkts, > &cmd_set_txsplit, > &cmd_set_txtimes, > + &cmd_set_txonly_nb_flows, > &cmd_set_fwd_list, > &cmd_set_fwd_mask, > &cmd_set_fwd_mode, > diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index > f2037925c2..2432d720fb 100644 > --- a/app/test-pmd/parameters.c > +++ b/app/test-pmd/parameters.c > @@ -193,6 +193,8 @@ enum { > TESTPMD_OPT_MULTI_RX_MEMPOOL_NUM, > #define TESTPMD_OPT_TXONLY_MULTI_FLOW "txonly-multi-flow" > TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM, > +#define TESTPMD_OPT_TXONLY_NB_FLOWS "txonly-nb-flows" > + TESTPMD_OPT_TXONLY_NB_FLOWS_NUM, > #define TESTPMD_OPT_RXQ_SHARE "rxq-share" > TESTPMD_OPT_RXQ_SHARE_NUM, > #define TESTPMD_OPT_ETH_LINK_SPEED "eth-link-speed" > @@ -348,6 +350,7 @@ static const struct option long_options[] = { > REQUIRED_ARG(TESTPMD_OPT_TXPKTS), > NO_ARG(TESTPMD_OPT_MULTI_RX_MEMPOOL), > NO_ARG(TESTPMD_OPT_TXONLY_MULTI_FLOW), > + REQUIRED_ARG(TESTPMD_OPT_TXONLY_NB_FLOWS), > OPTIONAL_ARG(TESTPMD_OPT_RXQ_SHARE), > REQUIRED_ARG(TESTPMD_OPT_ETH_LINK_SPEED), > NO_ARG(TESTPMD_OPT_DISABLE_LINK_CHECK), > @@ -499,6 +502,8 @@ usage(char* progname) > " or total packet length.\n"); > printf(" --multi-rx-mempool: enable multi-rx-mempool support\n"); > printf(" --txonly-multi-flow: generate multiple flows in txonly > mode\n"); > + printf(" --txonly-nb-flows=N: number of flows per lcore in txonly" > + " multi-flow mode (1-64, default 64)\n"); > printf(" --tx-ip=src,dst: IP addresses in Tx-only mode\n"); > printf(" --tx-udp=src[,dst]: UDP ports in Tx-only mode\n"); > printf(" --eth-link-speed: force link speed.\n"); @@ -1566,6 +1571,14 > @@ launch_args_parse(int argc, char** argv) > case TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM: > txonly_multi_flow = 1; > break; > + case TESTPMD_OPT_TXONLY_NB_FLOWS_NUM: > + n = atoi(optarg); > + if (n >= 1 && n <= 64) > + txonly_nb_flows = (uint16_t)n; > + else > + rte_exit(EXIT_FAILURE, > + "txonly-nb-flows must be >= 1 and <= > 64\n"); > + break; > case TESTPMD_OPT_RXQ_SHARE_NUM: > if (optarg == NULL) { > rxq_share = UINT32_MAX; > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index > fbacee89ea..6661bf16cd 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -287,6 +287,9 @@ enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF; > uint8_t txonly_multi_flow; /**< Whether multiple flows are generated in > TXONLY mode. */ > > +uint16_t txonly_nb_flows = 64; > +/**< Number of unique flows per lcore in TXONLY multi-flow mode. */ > + > uint32_t tx_pkt_times_inter; > /**< Timings for send scheduling in TXONLY mode, time between bursts. */ > > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index > f319471c73..13c3915848 100644 > --- a/app/test-pmd/testpmd.h > +++ b/app/test-pmd/testpmd.h > @@ -673,6 +673,7 @@ enum tx_pkt_split { > extern enum tx_pkt_split tx_pkt_split; > > extern uint8_t txonly_multi_flow; > +extern uint16_t txonly_nb_flows; /**< Number of flows in txonly > +multi-flow */ > > extern uint32_t rxq_share; > > diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index > bdcf6ea660..7ba9abf656 100644 > --- a/app/test-pmd/txonly.c > +++ b/app/test-pmd/txonly.c > @@ -223,18 +223,22 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct > rte_mempool *mbp, > sizeof(struct rte_ether_hdr) + > sizeof(struct rte_ipv4_hdr)); > /* > - * Generate multiple flows by varying UDP source port. > - * This enables packets are well distributed by RSS in > - * receiver side if any and txonly mode can be a decent > - * packet generator for developer's quick performance > - * regression test. > + * Generate a configurable number of flows per lcore by > + * varying the UDP source port. The low byte is the lcore > + * ID, ensuring each lcore produces unique ports without > + * atomic operations. The high byte cycles through > + * txonly_nb_flows values starting at 0xC0, keeping ports > + * in the ephemeral range 49152-65535 (RFC 6335). > * > - * Only ports in the range 49152 (0xC000) and 65535 (0xFFFF) > - * will be used, with the least significant byte representing > - * the lcore ID. As such, the most significant byte will cycle > - * through 0xC0 and 0xFF. > + * Total unique flows = txonly_nb_flows * active_lcores. > + * > + * Note: lcore IDs above 255 will alias in the low byte, > + * causing flow overlap between those lcores. This is > + * acceptable as the total flow count at that scale > + * already exceeds typical hardware flow table sizes. > */ > - src_port = ((src_var++ | 0xC0) << 8) + rte_lcore_id(); > + src_port = (((src_var++ % txonly_nb_flows) + 0xC0) << 8) > + + rte_lcore_id(); > udp_hdr->src_port = rte_cpu_to_be_16(src_port); > RTE_PER_LCORE(_src_port_var) = src_var; > } > diff --git a/doc/guides/testpmd_app_ug/run_app.rst > b/doc/guides/testpmd_app_ug/run_app.rst > index 97d6c75716..a4a57ea383 100644 > --- a/doc/guides/testpmd_app_ug/run_app.rst > +++ b/doc/guides/testpmd_app_ug/run_app.rst > @@ -386,6 +386,14 @@ The command line options are: > > Generate multiple flows in txonly mode. > > +* ``--txonly-nb-flows=N`` > + > + Set the number of unique flows per lcore when txonly multi-flow mode > + is enabled. Valid range is 1 to 64. Default is 64, which preserves > + the original behavior. Reducing this value limits the number of unique > + UDP source ports generated, which can prevent exhaustion of hardware > + flow table entries on SmartNICs. > + > * ``--rxq-share=[X]`` > > Create queues in shared Rx queue mode if device supports. > diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst > b/doc/guides/testpmd_app_ug/testpmd_funcs.rst > index 62bb167d56..ff1c8a444d 100644 > --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst > +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst > @@ -924,6 +924,22 @@ Where: > > * ``rand`` same as 'on', but number of segments per each packet is a random > value between 1 and total number of segments. > > +set txonly-nb-flows > +~~~~~~~~~~~~~~~~~~~ > + > +Set the number of unique flows per lcore in txonly multi-flow mode:: > + > + testpmd> set txonly-nb-flows <N> > + > +Where ``N`` is the number of unique UDP source port values each lcore > +will cycle through, in the range 1 to 64. Default is 64. > + > +Each lcore generates unique flows by combining the flow index with its > +lcore ID, so the total number of unique flows across the system is > +``txonly-nb-flows * active_lcores``. Reducing this value can prevent > +exhaustion of hardware flow table entries on SmartNICs that have > +limited flow caching capacity. > + > set corelist > ~~~~~~~~~~~~ > > -- > 2.51.0

