All previous benchmarks did not test individual connections that
are quickly opened and closed across multiple zones. This allows us to
better evaluate how much conntrack is optimized for multithreading.

Signed-off-by: Felix Huettner <[email protected]>
---
 tests/test-conntrack.c | 269 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 264 insertions(+), 5 deletions(-)

diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index 22db95f91..ae88538cf 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -176,10 +176,14 @@ destroy_packets(struct dp_packet_batch *pkt_batch)
 struct thread_aux {
     pthread_t thread;
     unsigned tid;
+    bool first;
+    uint64_t sleep_counter;
+    uint64_t conn_lookup_errors;
 };
 
 static struct conntrack *ct;
-static unsigned long n_threads, n_pkts, batch_size;
+static unsigned long n_threads, n_pkts, batch_size, n_conns,
+                     n_zones, n_data_pkts, n_iterations;
 static bool change_conn = false;
 static struct ovs_barrier barrier;
 
@@ -193,6 +197,7 @@ ct_thread_main(void *aux_)
     size_t i;
     long long now = time_msec();
 
+    ovs_assert(batch_size > 0);
     pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, &dl_type);
     ovs_barrier_block(&barrier);
     for (i = 0; i < n_pkts; i += batch_size) {
@@ -259,10 +264,257 @@ test_benchmark(struct ovs_cmdl_context *ctx)
     free(threads);
 }
 
+static struct dp_packet_batch ***
+gen_tcp_connections(size_t n_packets_per_conn, unsigned tid)
+{
+    struct eth_addr eth_src, eth_dst;
+    uint16_t sport, dport;
+    ovs_be32 ip_src, ip_dst;
+    struct dp_packet *pkt;
+    size_t i, j;
+
+    /* Some generic stuff we use for all conns */
+    struct eth_addr glob_eth_src = ETH_ADDR_C(00, 01, 02, 03, 04, 05);
+    struct eth_addr glob_eth_dst = ETH_ADDR_C(00, 06, 07, 08, 09, 0a);
+    ovs_be32 glob_ip_dst = inet_addr("192.168.0.1");
+    uint16_t glob_sport = 12345;
+    uint16_t glob_dport = 80;
+
+    struct dp_packet_batch ***pkt_batches_per_pkt = xzalloc(
+        n_packets_per_conn * sizeof(*pkt_batches_per_pkt));
+    for (i = 0; i < n_packets_per_conn; i++) {
+        struct dp_packet_batch **pkt_batches = xzalloc(
+            n_conns * sizeof(*pkt_batches));
+        pkt_batches_per_pkt[i] = pkt_batches;
+
+        for (j = 0; j < n_conns; j++) {
+            /* We just define 10.X.Y.Y for source ips.
+             * X is the thread id and YY is the connection id. */
+            ovs_be32 glob_ip_src = htonl(
+                    0x0A000000 | ((tid & 0xFF) << 16) | (j & 0xFFFF));
+            bool forward = true;
+            uint16_t tcp_flags = TCP_ACK;
+
+            if (i == 0) {
+                tcp_flags = TCP_SYN;
+            } else if (i == 1) {
+                tcp_flags |= TCP_SYN;
+                forward = false;
+            } else if (i == n_packets_per_conn - 3) {
+                tcp_flags |= TCP_FIN;
+            } else if (i == n_packets_per_conn - 2) {
+                tcp_flags |= TCP_FIN;
+                forward = false;
+            } else if (i % 2 == 1) {
+                /* Data. */
+                tcp_flags |= TCP_PSH;
+            } else {
+                /* ACK for Data. */
+                forward = false;
+            }
+
+            if (forward) {
+                eth_src = glob_eth_src;
+                eth_dst = glob_eth_dst;
+                ip_src = glob_ip_src;
+                ip_dst = glob_ip_dst;
+                sport = glob_sport;
+                dport = glob_dport;
+            } else {
+                eth_src = glob_eth_dst;
+                eth_dst = glob_eth_src;
+                ip_src = glob_ip_dst;
+                ip_dst = glob_ip_src;
+                sport = glob_dport;
+                dport = glob_sport;
+            }
+
+            pkt = build_eth_ip_packet(NULL, eth_src, eth_dst,
+                                      ip_src, ip_dst,
+                                      IPPROTO_TCP, 0);
+            build_tcp_packet(pkt, sport, dport, tcp_flags, NULL, 0);
+            pkt_batches[j] = xzalloc(sizeof(struct dp_packet_batch));
+            dp_packet_batch_init_packet(pkt_batches[j], pkt);
+        }
+    }
+    return pkt_batches_per_pkt;
+}
+
+static void *
+ct_thread_tcp_main(void *aux_)
+{
+    struct thread_aux *aux = aux_;
+    size_t i, j;
+
+    /* Prepare packets to run complete tcp connections.
+     * n_data_pkts defines how many normal packets come as part of the
+     * connection.
+     * So we need to generate:
+     * src->dst: SYN
+     * dst->src: SYN+ACK
+     * src->dst: ACK
+     * for each n_data_pkts:
+     *   src->dst: Data
+     *   dst->src: ACK
+     * src->dst: FIN+ACK
+     * dst->src: FIN+ACK
+     * src->dst: ACK
+     *
+     * So we need 'n_data_pkts * 2 + 6' packets for each connection.*/
+    size_t n_packets_per_conn = n_data_pkts * 2 + 6;
+    struct dp_packet_batch ***pkt_batches_per_pkt = gen_tcp_connections(
+            n_packets_per_conn, aux->tid);
+
+    ovs_barrier_block(&barrier);
+
+    /* Each thread has a zone_offset, so that not all threads try to access
+     * the same zone at the same time and thereby move in lockstep. That would
+     * be unrealistic in real cases. We therefor distribute them as much as
+     * possible over all zones.
+     * The thread id will generally be only different by 1 between different
+     * threads as they are started directly afterwards. So we need to equally
+     * distribute it. */
+    uint16_t zone_offset = aux->tid * n_zones / n_threads;
+
+    for (i = 0; i < n_packets_per_conn * n_iterations; i++) {
+        size_t packet_offset = i % n_packets_per_conn;
+        if (packet_offset == 0 && aux->first) {
+            printf("Iteration: %"PRIuSIZE"/%lu\n",
+                i / n_packets_per_conn, n_iterations);
+        }
+
+        struct dp_packet_batch **pkt_batches = pkt_batches_per_pkt[
+            i % n_packets_per_conn];
+        for (j = 0; j < n_conns; j++) {
+            long long now = time_msec();
+            uint16_t zone = (j + zone_offset) % n_zones;
+            /* We set mark here to some value since it allows us to detect if
+             * conntrack actually inserted the connection or not.
+             * If e.g. the conntrack table is full then mark will not be set.
+             * We can not use CT_TRACKED, since it is also set on a full
+             * table. */
+            uint32_t mark[2] = {aux->tid + 1, 0xFFFF};
+
+            pkt_metadata_init(&pkt_batches[j]->packets[0]->md, 0);
+            bool commit = packet_offset == 0;
+            conntrack_execute(ct, pkt_batches[j], htons(ETH_TYPE_IP),
+                              false, commit, zone, mark, NULL,
+                              NULL, NULL, now, 0);
+
+            uint8_t ct_state = pkt_batches[j]->packets[0]->md.ct_state;
+
+            if (packet_offset == 0) {
+                /* Check if something prevented a new connection to be added to
+                 * conntrack we should detect it here. This can generally
+                 * happen if the ct_sweep is too slow for all the connections
+                 * we generate. In this case we will just sleep a little and
+                 * then try again. */
+                if (pkt_batches[j]->packets[0]->md.ct_mark == 0) {
+                    xnanosleep(1000000);
+                    aux->sleep_counter++;
+                    j--;
+                    continue;
+                }
+                ovs_assert(ct_state & CS_NEW);
+            }
+
+            if (packet_offset != 0 && (ct_state & CS_ESTABLISHED) == 0) {
+                aux->conn_lookup_errors++;
+                continue;
+            }
+
+            ovs_assert((ct_state & CS_INVALID) == 0);
+            ovs_assert(pkt_batches[j]->packets[0]->md.ct_mark != 0);
+
+        }
+        ovsrcu_quiesce();
+    }
+
+    ovs_barrier_block(&barrier);
+
+    for (i = 0; i < n_packets_per_conn; i++) {
+        for (j = 0; j < n_conns; j++) {
+            destroy_packets(pkt_batches_per_pkt[i][j]);
+        }
+        free(pkt_batches_per_pkt[i]);
+    }
+    free(pkt_batches_per_pkt);
+
+    return NULL;
+}
+
+static void
+test_benchmark_tcp(struct ovs_cmdl_context *ctx)
+{
+    struct thread_aux *threads;
+    uint64_t sleep_counter = 0;
+    uint64_t lookup_errors = 0;
+    long long start;
+    unsigned i;
+
+    fatal_signal_init();
+
+    /* Parse arguments */
+    n_threads = strtoul(ctx->argv[1], NULL, 0);
+    if (!n_threads) {
+        ovs_fatal(0, "n_threads must be at least one");
+    }
+    n_conns = strtoul(ctx->argv[2], NULL, 0);
+    n_zones = strtoul(ctx->argv[3], NULL, 0);
+    n_data_pkts = strtoul(ctx->argv[4], NULL, 0);
+    n_iterations = strtoul(ctx->argv[5], NULL, 0);
+    if (n_conns == 0 || n_zones == 0) {
+        ovs_fatal(0, "n_conns and n_zones must be at least 1");
+    }
+    if (n_conns > UINT16_MAX) {
+        ovs_fatal(0, "n_conns may be at most %u", UINT16_MAX);
+    }
+
+    threads = xcalloc(n_threads, sizeof *threads);
+    ovs_barrier_init(&barrier, n_threads + 1);
+    ct = conntrack_init();
+    conntrack_set_sweep_interval(ct, 10);
+
+    /* Create threads */
+    start = time_msec();
+    for (i = 0; i < n_threads; i++) {
+        threads[i].first = i == 0;
+        threads[i].tid = i;
+        threads[i].sleep_counter = 0;
+        threads[i].conn_lookup_errors = 0;
+        threads[i].thread = ovs_thread_create("ct_thread", ct_thread_tcp_main,
+                                              &threads[i]);
+    }
+    /* Starts the work inside the threads */
+    ovs_barrier_block(&barrier);
+    printf("initial packet generation time: %lld ms\n", time_msec() - start);
+    start = time_msec();
+
+    /* Wait for the threads to finish the work */
+    ovs_barrier_block(&barrier);
+    printf("conntrack:  %5lld ms\n", time_msec() - start);
+
+    for (i = 0; i < n_threads; i++) {
+        xpthread_join(threads[i].thread, NULL);
+        sleep_counter += threads[i].sleep_counter;
+        lookup_errors += threads[i].conn_lookup_errors;
+    }
+
+    printf("total sleep to wait for conntrack space "
+           "(sum over all threads): %" PRIu64 " ms\n", sleep_counter);
+    if (lookup_errors) {
+        printf("total conntrack lookup errors. Probably the connection expired"
+               "because of a timeout policy: %" PRIu64 "\n", lookup_errors);
+    }
+
+    conntrack_destroy(ct);
+    ovs_barrier_destroy(&barrier);
+    free(threads);
+}
+
 static void
 test_benchmark_zones(struct ovs_cmdl_context *ctx)
 {
-    unsigned long n_conns, n_zones, iterations;
     long long start;
     unsigned i, j;
     ovs_be16 dl_type;
@@ -279,8 +531,8 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx)
     if (n_zones == 0 || n_zones >= UINT16_MAX) {
         ovs_fatal(0, "n_zones must be between 1 and 2^16");
     }
-    iterations = strtoul(ctx->argv[3], NULL, 0);
-    if (iterations == 0) {
+    n_iterations = strtoul(ctx->argv[3], NULL, 0);
+    if (n_iterations == 0) {
         ovs_fatal(0, "iterations must be greater than 0");
     }
 
@@ -289,6 +541,7 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx)
     /* Create initial connection entries */
     start = time_msec();
     struct dp_packet_batch **pkt_batch = xzalloc(n_conns * sizeof *pkt_batch);
+    ovs_assert(n_conns > 0);
     for (i = 0; i < n_conns; i++) {
         pkt_batch[i] = xzalloc(sizeof(struct dp_packet_batch));
         dp_packet_batch_init(pkt_batch[i]);
@@ -322,7 +575,7 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx)
     stopwatch_create(STOPWATCH_FLUSH_FULL_ZONE, SW_US);
     stopwatch_create(STOPWATCH_FLUSH_EMPTY_ZONE, SW_US);
     start = time_msec();
-    for (i = 0; i < iterations; i++) {
+    for (i = 0; i < n_iterations; i++) {
         /* Testing flushing a full zone */
         stopwatch_start(STOPWATCH_FLUSH_FULL_ZONE, time_usec());
         uint16_t zone = 1;
@@ -597,6 +850,12 @@ static const struct ovs_cmdl_command commands[] = {
      * is rewritten to the SNAT target rather than causing a crash. */
     {"ftp-alg-large-payload", "", 0, 0,
         test_ftp_alg_large_payload, OVS_RO},
+    /* Starts 'n_threads' threads. Each thread will open 'n_conns' tcp
+     * connections across 'n_zones' separate conntrack zones. Each conn will
+     * send 'n_data_pkts' data packets within each connection.
+     * Each conn will run through conntrack 'iterations' times. */
+    {"benchmark-tcp", "n_threads n_conns n_zones n_data_pkts iterations", 5, 5,
+    test_benchmark_tcp, OVS_RO},
 
     {NULL, NULL, 0, 0, NULL, OVS_RO},
 };
-- 
2.43.0


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to