Hi Aaron, thanks for the review. I'll address these in the next version.
Am Fri, Jun 26, 2026 at 05:58:50PM -0400 schrieb Aaron Conole via dev: > Felix Huettner via dev <[email protected]> writes: > > > All previous benchmarks did not test individual connections that > > are quickly opened and closed across multiple zones. This allows us to > > better evaluate how much conntrack is optimized for multithreading. > > > > Signed-off-by: Felix Huettner <[email protected]> > > --- > > tests/test-conntrack.c | 269 ++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 264 insertions(+), 5 deletions(-) > > > > diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c > > index 22db95f91..ae88538cf 100644 > > --- a/tests/test-conntrack.c > > +++ b/tests/test-conntrack.c > > @@ -176,10 +176,14 @@ destroy_packets(struct dp_packet_batch *pkt_batch) > > struct thread_aux { > > pthread_t thread; > > unsigned tid; > > + bool first; > > + uint64_t sleep_counter; > > + uint64_t conn_lookup_errors; > > }; > > > > static struct conntrack *ct; > > -static unsigned long n_threads, n_pkts, batch_size; > > +static unsigned long n_threads, n_pkts, batch_size, n_conns, > > + n_zones, n_data_pkts, n_iterations; > > Can we keep these local instead of promoting to global scope? This > seems like they could be kept local. I need to passt hem to the created threads, but i can move them to thread_aux. > > > static bool change_conn = false; > > static struct ovs_barrier barrier; > > > > @@ -193,6 +197,7 @@ ct_thread_main(void *aux_) > > size_t i; > > long long now = time_msec(); > > > > + ovs_assert(batch_size > 0); > > pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, > > &dl_type); > > ovs_barrier_block(&barrier); > > for (i = 0; i < n_pkts; i += batch_size) { > > @@ -259,10 +264,257 @@ test_benchmark(struct ovs_cmdl_context *ctx) > > free(threads); > > } > > > > +static struct dp_packet_batch *** > > +gen_tcp_connections(size_t n_packets_per_conn, unsigned tid) > > +{ > > + struct eth_addr eth_src, eth_dst; > > + uint16_t sport, dport; > > + ovs_be32 ip_src, ip_dst; > > + struct dp_packet *pkt; > > + size_t i, j; > > + > > + /* Some generic stuff we use for all conns */ > > + struct eth_addr glob_eth_src = ETH_ADDR_C(00, 01, 02, 03, 04, 05); > > + struct eth_addr glob_eth_dst = ETH_ADDR_C(00, 06, 07, 08, 09, 0a); > > + ovs_be32 glob_ip_dst = inet_addr("192.168.0.1"); > > + uint16_t glob_sport = 12345; > > + uint16_t glob_dport = 80; > > + > > + struct dp_packet_batch ***pkt_batches_per_pkt = xzalloc( > > + n_packets_per_conn * sizeof(*pkt_batches_per_pkt)); > > + for (i = 0; i < n_packets_per_conn; i++) { > > + struct dp_packet_batch **pkt_batches = xzalloc( > > + n_conns * sizeof(*pkt_batches)); > > + pkt_batches_per_pkt[i] = pkt_batches; > > + > > + for (j = 0; j < n_conns; j++) { > > + /* We just define 10.X.Y.Y for source ips. > > + * X is the thread id and YY is the connection id. */ > > + ovs_be32 glob_ip_src = htonl( > > + 0x0A000000 | ((tid & 0xFF) << 16) | (j & 0xFFFF)); > > while it isn't common, there may be machines with enough cores to go > beyond 255, and this will cause a weird kind of wrap. Maybe limit it? I added a comment and an assert. > > > + bool forward = true; > > + uint16_t tcp_flags = TCP_ACK; > > + > > + if (i == 0) { > > + tcp_flags = TCP_SYN; > > + } else if (i == 1) { > > + tcp_flags |= TCP_SYN; > > + forward = false; > > + } else if (i == n_packets_per_conn - 3) { > > + tcp_flags |= TCP_FIN; > > + } else if (i == n_packets_per_conn - 2) { > > + tcp_flags |= TCP_FIN; > > + forward = false; > > + } else if (i % 2 == 1) { > > + /* Data. */ > > + tcp_flags |= TCP_PSH; > > + } else { > > + /* ACK for Data. */ > > + forward = false; > > + } > > Am I misunderstanding the counter here? > > Let's say n_packets_per_conn == 15, then we get: > > [0] = tcp_flags=tcp_syn > [1] = tcp_flags=tcp_syn|tcp_ack > [2,4,6,8,10] = tcp_flags=tcp_ack > [3,5,7,9,11] = tcp_flags=tcp_ack|tcp_psh > > [12] = tcp_flags=tcp_ack|tcp_fin > [13] = tcp_flags=tcp_ack|tcp_fin > [14] = tcp_flags=tcp_ack > > Now questions. > > There seems to be a weird ending packet here. > > For i=2, forward will be false - but shouldn't it be in the forward > direction (same as i==0). Yes you are right, fixed. > > For n_packets_per_conn == 16, it looks like the last packet will contain > a tcp_psh flag (and maybe be in the wrong direction)? Actually we can guarantee that n_packets_per_conn is always even as it is calculated from `n_data_pkts * 2 + 6`. But you are right with that. Based on some testing and taking a look at conntrack-tcp.c i would conclude that it does not affect the actual test (even though it is wrong and i'll fix it). conntrack-tcp.c only seems to be concered about TCP_PSH to check for invalid flag combinations. All other code seems to completely ignore it otherwise. So it should not have an effect on the timing of the testresults. > > > + > > + if (forward) { > > + eth_src = glob_eth_src; > > + eth_dst = glob_eth_dst; > > + ip_src = glob_ip_src; > > + ip_dst = glob_ip_dst; > > + sport = glob_sport; > > + dport = glob_dport; > > + } else { > > + eth_src = glob_eth_dst; > > + eth_dst = glob_eth_src; > > + ip_src = glob_ip_dst; > > + ip_dst = glob_ip_src; > > + sport = glob_dport; > > + dport = glob_sport; > > + } > > + > > + pkt = build_eth_ip_packet(NULL, eth_src, eth_dst, > > + ip_src, ip_dst, > > + IPPROTO_TCP, 0); > > + build_tcp_packet(pkt, sport, dport, tcp_flags, NULL, 0); > > + pkt_batches[j] = xzalloc(sizeof(struct dp_packet_batch)); > > + dp_packet_batch_init_packet(pkt_batches[j], pkt); > > + } > > + } > > + return pkt_batches_per_pkt; > > +} > > + > > +static void * > > +ct_thread_tcp_main(void *aux_) > > +{ > > + struct thread_aux *aux = aux_; > > + size_t i, j; > > + > > + /* Prepare packets to run complete tcp connections. > > + * n_data_pkts defines how many normal packets come as part of the > > + * connection. > > + * So we need to generate: > > + * src->dst: SYN > > + * dst->src: SYN+ACK > > + * src->dst: ACK > > + * for each n_data_pkts: > > + * src->dst: Data > > + * dst->src: ACK > > + * src->dst: FIN+ACK > > + * dst->src: FIN+ACK > > + * src->dst: ACK > > + * > > + * So we need 'n_data_pkts * 2 + 6' packets for each connection.*/ > > + size_t n_packets_per_conn = n_data_pkts * 2 + 6; > > + struct dp_packet_batch ***pkt_batches_per_pkt = gen_tcp_connections( > > + n_packets_per_conn, aux->tid); > > + > > + ovs_barrier_block(&barrier); > > + > > + /* Each thread has a zone_offset, so that not all threads try to access > > + * the same zone at the same time and thereby move in lockstep. That > > would > > + * be unrealistic in real cases. We therefor distribute them as much as > > + * possible over all zones. > > + * The thread id will generally be only different by 1 between > > different > > + * threads as they are started directly afterwards. So we need to > > equally > > + * distribute it. */ > > + uint16_t zone_offset = aux->tid * n_zones / n_threads; > > + > > + for (i = 0; i < n_packets_per_conn * n_iterations; i++) { > > + size_t packet_offset = i % n_packets_per_conn; > > + if (packet_offset == 0 && aux->first) { > > + printf("Iteration: %"PRIuSIZE"/%lu\n", > > + i / n_packets_per_conn, n_iterations); > > + } > > + > > + struct dp_packet_batch **pkt_batches = pkt_batches_per_pkt[ > > + i % n_packets_per_conn]; > > + for (j = 0; j < n_conns; j++) { > > + long long now = time_msec(); > > + uint16_t zone = (j + zone_offset) % n_zones; > > + /* We set mark here to some value since it allows us to detect > > if > > + * conntrack actually inserted the connection or not. > > + * If e.g. the conntrack table is full then mark will not be > > set. > > + * We can not use CT_TRACKED, since it is also set on a full > > + * table. */ > > + uint32_t mark[2] = {aux->tid + 1, 0xFFFF}; > > + > > + pkt_metadata_init(&pkt_batches[j]->packets[0]->md, 0); > > + bool commit = packet_offset == 0; > > + conntrack_execute(ct, pkt_batches[j], htons(ETH_TYPE_IP), > > + false, commit, zone, mark, NULL, > > + NULL, NULL, now, 0); > > + > > + uint8_t ct_state = pkt_batches[j]->packets[0]->md.ct_state; > > + > > + if (packet_offset == 0) { > > + /* Check if something prevented a new connection to be > > added to > > + * conntrack we should detect it here. This can generally > > + * happen if the ct_sweep is too slow for all the > > connections > > + * we generate. In this case we will just sleep a little > > and > > + * then try again. */ > > + if (pkt_batches[j]->packets[0]->md.ct_mark == 0) { > > Is it possible that this never converges? We should have a bail on the > number of times through this, I think. Otherwise, we'll keep iterating > on the same element over and over. Honestly the only case i had this was during development. I broke the ct_sweep bad enough that it did not make any progress at all. But other than that i have never seen that happening ever again. > > > + xnanosleep(1000000); > > + aux->sleep_counter++; > > + j--; > > + continue; > > + } > > + ovs_assert(ct_state & CS_NEW); > > + } > > + > > + if (packet_offset != 0 && (ct_state & CS_ESTABLISHED) == 0) { > > + aux->conn_lookup_errors++; > > + continue; > > + } > > + > > + ovs_assert((ct_state & CS_INVALID) == 0); > > + ovs_assert(pkt_batches[j]->packets[0]->md.ct_mark != 0); > > + > > + } > > + ovsrcu_quiesce(); > > + } > > + > > + ovs_barrier_block(&barrier); > > + > > + for (i = 0; i < n_packets_per_conn; i++) { > > + for (j = 0; j < n_conns; j++) { > > + destroy_packets(pkt_batches_per_pkt[i][j]); > > + } > > + free(pkt_batches_per_pkt[i]); > > + } > > + free(pkt_batches_per_pkt); > > + > > + return NULL; > > +} > > + > > +static void > > +test_benchmark_tcp(struct ovs_cmdl_context *ctx) > > +{ > > + struct thread_aux *threads; > > + uint64_t sleep_counter = 0; > > + uint64_t lookup_errors = 0; > > + long long start; > > + unsigned i; > > + > > + fatal_signal_init(); > > + > > + /* Parse arguments */ > > + n_threads = strtoul(ctx->argv[1], NULL, 0); > > + if (!n_threads) { > > + ovs_fatal(0, "n_threads must be at least one"); > > + } > > + n_conns = strtoul(ctx->argv[2], NULL, 0); > > + n_zones = strtoul(ctx->argv[3], NULL, 0); > > + n_data_pkts = strtoul(ctx->argv[4], NULL, 0); > > + n_iterations = strtoul(ctx->argv[5], NULL, 0); > > maybe use str_to_ullong here so it gets validation? ok, fixed. > > > + if (n_conns == 0 || n_zones == 0) { > > + ovs_fatal(0, "n_conns and n_zones must be at least 1"); > > + } > > + if (n_conns > UINT16_MAX) { > > NOTE: n_conns can be larger than uint16_max in test_benchmark_zones, so > having it as a global looks even stranger since the semantic is changing > based on the test. > > > + ovs_fatal(0, "n_conns may be at most %u", UINT16_MAX); > > + } > > + > > + threads = xcalloc(n_threads, sizeof *threads); > > + ovs_barrier_init(&barrier, n_threads + 1); > > + ct = conntrack_init(); > > + conntrack_set_sweep_interval(ct, 10); > > + > > + /* Create threads */ > > + start = time_msec(); > > + for (i = 0; i < n_threads; i++) { > > + threads[i].first = i == 0; > > + threads[i].tid = i; > > + threads[i].sleep_counter = 0; > > + threads[i].conn_lookup_errors = 0; > > + threads[i].thread = ovs_thread_create("ct_thread", > > ct_thread_tcp_main, > > + &threads[i]); > > + } > > + /* Starts the work inside the threads */ > > + ovs_barrier_block(&barrier); > > + printf("initial packet generation time: %lld ms\n", time_msec() - > > start); > > + start = time_msec(); > > + > > + /* Wait for the threads to finish the work */ > > + ovs_barrier_block(&barrier); > > + printf("conntrack: %5lld ms\n", time_msec() - start); > > + > > + for (i = 0; i < n_threads; i++) { > > + xpthread_join(threads[i].thread, NULL); > > + sleep_counter += threads[i].sleep_counter; > > + lookup_errors += threads[i].conn_lookup_errors; > > + } > > + > > + printf("total sleep to wait for conntrack space " > > + "(sum over all threads): %" PRIu64 " ms\n", sleep_counter); > > + if (lookup_errors) { > > + printf("total conntrack lookup errors. Probably the connection > > expired" > > + "because of a timeout policy: %" PRIu64 "\n", > > lookup_errors); > > + } > > + > > + conntrack_destroy(ct); > > + ovs_barrier_destroy(&barrier); > > + free(threads); > > +} > > + > > static void > > test_benchmark_zones(struct ovs_cmdl_context *ctx) > > { > > - unsigned long n_conns, n_zones, iterations; > > long long start; > > unsigned i, j; > > ovs_be16 dl_type; > > @@ -279,8 +531,8 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx) > > if (n_zones == 0 || n_zones >= UINT16_MAX) { > > ovs_fatal(0, "n_zones must be between 1 and 2^16"); > > } > > - iterations = strtoul(ctx->argv[3], NULL, 0); > > - if (iterations == 0) { > > + n_iterations = strtoul(ctx->argv[3], NULL, 0); > > + if (n_iterations == 0) { > > ovs_fatal(0, "iterations must be greater than 0"); > > } > > > > @@ -289,6 +541,7 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx) > > /* Create initial connection entries */ > > start = time_msec(); > > struct dp_packet_batch **pkt_batch = xzalloc(n_conns * sizeof > > *pkt_batch); > > + ovs_assert(n_conns > 0); > > This looks dead. Earlier in this function, we validate: > > if (n_conns == 0 || n_conns >= UINT32_MAX) { > ovs_fatal(0, "n_conns must be between 1 and 2^32"); > } I'll remove it. Thanks for all the comments, Felix > > > for (i = 0; i < n_conns; i++) { > > pkt_batch[i] = xzalloc(sizeof(struct dp_packet_batch)); > > dp_packet_batch_init(pkt_batch[i]); > > @@ -322,7 +575,7 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx) > > stopwatch_create(STOPWATCH_FLUSH_FULL_ZONE, SW_US); > > stopwatch_create(STOPWATCH_FLUSH_EMPTY_ZONE, SW_US); > > start = time_msec(); > > - for (i = 0; i < iterations; i++) { > > + for (i = 0; i < n_iterations; i++) { > > /* Testing flushing a full zone */ > > stopwatch_start(STOPWATCH_FLUSH_FULL_ZONE, time_usec()); > > uint16_t zone = 1; > > @@ -597,6 +850,12 @@ static const struct ovs_cmdl_command commands[] = { > > * is rewritten to the SNAT target rather than causing a crash. */ > > {"ftp-alg-large-payload", "", 0, 0, > > test_ftp_alg_large_payload, OVS_RO}, > > + /* Starts 'n_threads' threads. Each thread will open 'n_conns' tcp > > + * connections across 'n_zones' separate conntrack zones. Each conn > > will > > + * send 'n_data_pkts' data packets within each connection. > > + * Each conn will run through conntrack 'iterations' times. */ > > + {"benchmark-tcp", "n_threads n_conns n_zones n_data_pkts iterations", > > 5, 5, > > + test_benchmark_tcp, OVS_RO}, > > > > {NULL, NULL, 0, 0, NULL, OVS_RO}, > > }; > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
