(CC'ing Felix)

Aaron Conole <[email protected]> writes:

> Felix Huettner via dev <[email protected]> writes:
>
>> All previous benchmarks did not test individual connections that
>> are quickly opened and closed across multiple zones. This allows us to
>> better evaluate how much conntrack is optimized for multithreading.
>>
>> Signed-off-by: Felix Huettner <[email protected]>
>> ---
>>  tests/test-conntrack.c | 269 ++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 264 insertions(+), 5 deletions(-)
>>
>> diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
>> index 22db95f91..ae88538cf 100644
>> --- a/tests/test-conntrack.c
>> +++ b/tests/test-conntrack.c
>> @@ -176,10 +176,14 @@ destroy_packets(struct dp_packet_batch *pkt_batch)
>>  struct thread_aux {
>>      pthread_t thread;
>>      unsigned tid;
>> +    bool first;
>> +    uint64_t sleep_counter;
>> +    uint64_t conn_lookup_errors;
>>  };
>>  
>>  static struct conntrack *ct;
>> -static unsigned long n_threads, n_pkts, batch_size;
>> +static unsigned long n_threads, n_pkts, batch_size, n_conns,
>> +                     n_zones, n_data_pkts, n_iterations;
>
> Can we keep these local instead of promoting to global scope?  This
> seems like they could be kept local.
>
>>  static bool change_conn = false;
>>  static struct ovs_barrier barrier;
>>  
>> @@ -193,6 +197,7 @@ ct_thread_main(void *aux_)
>>      size_t i;
>>      long long now = time_msec();
>>  
>> +    ovs_assert(batch_size > 0);
>>      pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, 
>> &dl_type);
>>      ovs_barrier_block(&barrier);
>>      for (i = 0; i < n_pkts; i += batch_size) {
>> @@ -259,10 +264,257 @@ test_benchmark(struct ovs_cmdl_context *ctx)
>>      free(threads);
>>  }
>>  
>> +static struct dp_packet_batch ***
>> +gen_tcp_connections(size_t n_packets_per_conn, unsigned tid)
>> +{
>> +    struct eth_addr eth_src, eth_dst;
>> +    uint16_t sport, dport;
>> +    ovs_be32 ip_src, ip_dst;
>> +    struct dp_packet *pkt;
>> +    size_t i, j;
>> +
>> +    /* Some generic stuff we use for all conns */
>> +    struct eth_addr glob_eth_src = ETH_ADDR_C(00, 01, 02, 03, 04, 05);
>> +    struct eth_addr glob_eth_dst = ETH_ADDR_C(00, 06, 07, 08, 09, 0a);
>> +    ovs_be32 glob_ip_dst = inet_addr("192.168.0.1");
>> +    uint16_t glob_sport = 12345;
>> +    uint16_t glob_dport = 80;
>> +
>> +    struct dp_packet_batch ***pkt_batches_per_pkt = xzalloc(
>> +        n_packets_per_conn * sizeof(*pkt_batches_per_pkt));
>> +    for (i = 0; i < n_packets_per_conn; i++) {
>> +        struct dp_packet_batch **pkt_batches = xzalloc(
>> +            n_conns * sizeof(*pkt_batches));
>> +        pkt_batches_per_pkt[i] = pkt_batches;
>> +
>> +        for (j = 0; j < n_conns; j++) {
>> +            /* We just define 10.X.Y.Y for source ips.
>> +             * X is the thread id and YY is the connection id. */
>> +            ovs_be32 glob_ip_src = htonl(
>> +                    0x0A000000 | ((tid & 0xFF) << 16) | (j & 0xFFFF));
>
> while it isn't common, there may be machines with enough cores to go
> beyond 255, and this will cause a weird kind of wrap.  Maybe limit it?
>
>> +            bool forward = true;
>> +            uint16_t tcp_flags = TCP_ACK;
>> +
>> +            if (i == 0) {
>> +                tcp_flags = TCP_SYN;
>> +            } else if (i == 1) {
>> +                tcp_flags |= TCP_SYN;
>> +                forward = false;
>> +            } else if (i == n_packets_per_conn - 3) {
>> +                tcp_flags |= TCP_FIN;
>> +            } else if (i == n_packets_per_conn - 2) {
>> +                tcp_flags |= TCP_FIN;
>> +                forward = false;
>> +            } else if (i % 2 == 1) {
>> +                /* Data. */
>> +                tcp_flags |= TCP_PSH;
>> +            } else {
>> +                /* ACK for Data. */
>> +                forward = false;
>> +            }
>
> Am I misunderstanding the counter here?
>
> Let's say n_packets_per_conn == 15, then we get:
>
>   [0]          = tcp_flags=tcp_syn
>   [1]          = tcp_flags=tcp_syn|tcp_ack
>   [2,4,6,8,10] = tcp_flags=tcp_ack
>   [3,5,7,9,11] = tcp_flags=tcp_ack|tcp_psh
>
>   [12]         = tcp_flags=tcp_ack|tcp_fin
>   [13]         = tcp_flags=tcp_ack|tcp_fin
>   [14]         = tcp_flags=tcp_ack
>
> Now questions.
>
> There seems to be a weird ending packet here.
>
> For i=2, forward will be false - but shouldn't it be in the forward
> direction (same as i==0).
>
> For n_packets_per_conn == 16, it looks like the last packet will contain
> a tcp_psh flag (and maybe be in the wrong direction)?
>
>> +
>> +            if (forward) {
>> +                eth_src = glob_eth_src;
>> +                eth_dst = glob_eth_dst;
>> +                ip_src = glob_ip_src;
>> +                ip_dst = glob_ip_dst;
>> +                sport = glob_sport;
>> +                dport = glob_dport;
>> +            } else {
>> +                eth_src = glob_eth_dst;
>> +                eth_dst = glob_eth_src;
>> +                ip_src = glob_ip_dst;
>> +                ip_dst = glob_ip_src;
>> +                sport = glob_dport;
>> +                dport = glob_sport;
>> +            }
>> +
>> +            pkt = build_eth_ip_packet(NULL, eth_src, eth_dst,
>> +                                      ip_src, ip_dst,
>> +                                      IPPROTO_TCP, 0);
>> +            build_tcp_packet(pkt, sport, dport, tcp_flags, NULL, 0);
>> +            pkt_batches[j] = xzalloc(sizeof(struct dp_packet_batch));
>> +            dp_packet_batch_init_packet(pkt_batches[j], pkt);
>> +        }
>> +    }
>> +    return pkt_batches_per_pkt;
>> +}
>> +
>> +static void *
>> +ct_thread_tcp_main(void *aux_)
>> +{
>> +    struct thread_aux *aux = aux_;
>> +    size_t i, j;
>> +
>> +    /* Prepare packets to run complete tcp connections.
>> +     * n_data_pkts defines how many normal packets come as part of the
>> +     * connection.
>> +     * So we need to generate:
>> +     * src->dst: SYN
>> +     * dst->src: SYN+ACK
>> +     * src->dst: ACK
>> +     * for each n_data_pkts:
>> +     *   src->dst: Data
>> +     *   dst->src: ACK
>> +     * src->dst: FIN+ACK
>> +     * dst->src: FIN+ACK
>> +     * src->dst: ACK
>> +     *
>> +     * So we need 'n_data_pkts * 2 + 6' packets for each connection.*/
>> +    size_t n_packets_per_conn = n_data_pkts * 2 + 6;
>> +    struct dp_packet_batch ***pkt_batches_per_pkt = gen_tcp_connections(
>> +            n_packets_per_conn, aux->tid);
>> +
>> +    ovs_barrier_block(&barrier);
>> +
>> +    /* Each thread has a zone_offset, so that not all threads try to access
>> + * the same zone at the same time and thereby move in
>> lockstep. That would
>> +     * be unrealistic in real cases. We therefor distribute them as much as
>> +     * possible over all zones.
>> +     * The thread id will generally be only different by 1 between different
>> +     * threads as they are started directly afterwards. So we need to 
>> equally
>> +     * distribute it. */
>> +    uint16_t zone_offset = aux->tid * n_zones / n_threads;
>> +
>> +    for (i = 0; i < n_packets_per_conn * n_iterations; i++) {
>> +        size_t packet_offset = i % n_packets_per_conn;
>> +        if (packet_offset == 0 && aux->first) {
>> +            printf("Iteration: %"PRIuSIZE"/%lu\n",
>> +                i / n_packets_per_conn, n_iterations);
>> +        }
>> +
>> +        struct dp_packet_batch **pkt_batches = pkt_batches_per_pkt[
>> +            i % n_packets_per_conn];
>> +        for (j = 0; j < n_conns; j++) {
>> +            long long now = time_msec();
>> +            uint16_t zone = (j + zone_offset) % n_zones;
>> +            /* We set mark here to some value since it allows us to detect 
>> if
>> +             * conntrack actually inserted the connection or not.
>> +             * If e.g. the conntrack table is full then mark will not be 
>> set.
>> +             * We can not use CT_TRACKED, since it is also set on a full
>> +             * table. */
>> +            uint32_t mark[2] = {aux->tid + 1, 0xFFFF};
>> +
>> +            pkt_metadata_init(&pkt_batches[j]->packets[0]->md, 0);
>> +            bool commit = packet_offset == 0;
>> +            conntrack_execute(ct, pkt_batches[j], htons(ETH_TYPE_IP),
>> +                              false, commit, zone, mark, NULL,
>> +                              NULL, NULL, now, 0);
>> +
>> +            uint8_t ct_state = pkt_batches[j]->packets[0]->md.ct_state;
>> +
>> +            if (packet_offset == 0) {
>> + /* Check if something prevented a new connection to be added to
>> +                 * conntrack we should detect it here. This can generally
>> +                 * happen if the ct_sweep is too slow for all the 
>> connections
>> +                 * we generate. In this case we will just sleep a little and
>> +                 * then try again. */
>> +                if (pkt_batches[j]->packets[0]->md.ct_mark == 0) {
>
> Is it possible that this never converges?  We should have a bail on the
> number of times through this, I think.  Otherwise, we'll keep iterating
> on the same element over and over.
>
>> +                    xnanosleep(1000000);
>> +                    aux->sleep_counter++;
>> +                    j--;
>> +                    continue;
>> +                }
>> +                ovs_assert(ct_state & CS_NEW);
>> +            }
>> +
>> +            if (packet_offset != 0 && (ct_state & CS_ESTABLISHED) == 0) {
>> +                aux->conn_lookup_errors++;
>> +                continue;
>> +            }
>> +
>> +            ovs_assert((ct_state & CS_INVALID) == 0);
>> +            ovs_assert(pkt_batches[j]->packets[0]->md.ct_mark != 0);
>> +
>> +        }
>> +        ovsrcu_quiesce();
>> +    }
>> +
>> +    ovs_barrier_block(&barrier);
>> +
>> +    for (i = 0; i < n_packets_per_conn; i++) {
>> +        for (j = 0; j < n_conns; j++) {
>> +            destroy_packets(pkt_batches_per_pkt[i][j]);
>> +        }
>> +        free(pkt_batches_per_pkt[i]);
>> +    }
>> +    free(pkt_batches_per_pkt);
>> +
>> +    return NULL;
>> +}
>> +
>> +static void
>> +test_benchmark_tcp(struct ovs_cmdl_context *ctx)
>> +{
>> +    struct thread_aux *threads;
>> +    uint64_t sleep_counter = 0;
>> +    uint64_t lookup_errors = 0;
>> +    long long start;
>> +    unsigned i;
>> +
>> +    fatal_signal_init();
>> +
>> +    /* Parse arguments */
>> +    n_threads = strtoul(ctx->argv[1], NULL, 0);
>> +    if (!n_threads) {
>> +        ovs_fatal(0, "n_threads must be at least one");
>> +    }
>> +    n_conns = strtoul(ctx->argv[2], NULL, 0);
>> +    n_zones = strtoul(ctx->argv[3], NULL, 0);
>> +    n_data_pkts = strtoul(ctx->argv[4], NULL, 0);
>> +    n_iterations = strtoul(ctx->argv[5], NULL, 0);
>
> maybe use str_to_ullong here so it gets validation?
>
>> +    if (n_conns == 0 || n_zones == 0) {
>> +        ovs_fatal(0, "n_conns and n_zones must be at least 1");
>> +    }
>> +    if (n_conns > UINT16_MAX) {
>
> NOTE: n_conns can be larger than uint16_max in test_benchmark_zones, so
> having it as a global looks even stranger since the semantic is changing
> based on the test.
>
>> +        ovs_fatal(0, "n_conns may be at most %u", UINT16_MAX);
>> +    }
>> +
>> +    threads = xcalloc(n_threads, sizeof *threads);
>> +    ovs_barrier_init(&barrier, n_threads + 1);
>> +    ct = conntrack_init();
>> +    conntrack_set_sweep_interval(ct, 10);
>> +
>> +    /* Create threads */
>> +    start = time_msec();
>> +    for (i = 0; i < n_threads; i++) {
>> +        threads[i].first = i == 0;
>> +        threads[i].tid = i;
>> +        threads[i].sleep_counter = 0;
>> +        threads[i].conn_lookup_errors = 0;
>> +        threads[i].thread = ovs_thread_create("ct_thread", 
>> ct_thread_tcp_main,
>> +                                              &threads[i]);
>> +    }
>> +    /* Starts the work inside the threads */
>> +    ovs_barrier_block(&barrier);
>> +    printf("initial packet generation time: %lld ms\n", time_msec() - 
>> start);
>> +    start = time_msec();
>> +
>> +    /* Wait for the threads to finish the work */
>> +    ovs_barrier_block(&barrier);
>> +    printf("conntrack:  %5lld ms\n", time_msec() - start);
>> +
>> +    for (i = 0; i < n_threads; i++) {
>> +        xpthread_join(threads[i].thread, NULL);
>> +        sleep_counter += threads[i].sleep_counter;
>> +        lookup_errors += threads[i].conn_lookup_errors;
>> +    }
>> +
>> +    printf("total sleep to wait for conntrack space "
>> +           "(sum over all threads): %" PRIu64 " ms\n", sleep_counter);
>> +    if (lookup_errors) {
>> +        printf("total conntrack lookup errors. Probably the connection 
>> expired"
>> +               "because of a timeout policy: %" PRIu64 "\n", lookup_errors);
>> +    }
>> +
>> +    conntrack_destroy(ct);
>> +    ovs_barrier_destroy(&barrier);
>> +    free(threads);
>> +}
>> +
>>  static void
>>  test_benchmark_zones(struct ovs_cmdl_context *ctx)
>>  {
>> -    unsigned long n_conns, n_zones, iterations;
>>      long long start;
>>      unsigned i, j;
>>      ovs_be16 dl_type;
>> @@ -279,8 +531,8 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx)
>>      if (n_zones == 0 || n_zones >= UINT16_MAX) {
>>          ovs_fatal(0, "n_zones must be between 1 and 2^16");
>>      }
>> -    iterations = strtoul(ctx->argv[3], NULL, 0);
>> -    if (iterations == 0) {
>> +    n_iterations = strtoul(ctx->argv[3], NULL, 0);
>> +    if (n_iterations == 0) {
>>          ovs_fatal(0, "iterations must be greater than 0");
>>      }
>>  
>> @@ -289,6 +541,7 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx)
>>      /* Create initial connection entries */
>>      start = time_msec();
>>      struct dp_packet_batch **pkt_batch = xzalloc(n_conns * sizeof 
>> *pkt_batch);
>> +    ovs_assert(n_conns > 0);
>
> This looks dead.  Earlier in this function, we validate:
>
>     if (n_conns == 0 || n_conns >= UINT32_MAX) {
>         ovs_fatal(0, "n_conns must be between 1 and 2^32");
>     }
>
>>      for (i = 0; i < n_conns; i++) {
>>          pkt_batch[i] = xzalloc(sizeof(struct dp_packet_batch));
>>          dp_packet_batch_init(pkt_batch[i]);
>> @@ -322,7 +575,7 @@ test_benchmark_zones(struct ovs_cmdl_context *ctx)
>>      stopwatch_create(STOPWATCH_FLUSH_FULL_ZONE, SW_US);
>>      stopwatch_create(STOPWATCH_FLUSH_EMPTY_ZONE, SW_US);
>>      start = time_msec();
>> -    for (i = 0; i < iterations; i++) {
>> +    for (i = 0; i < n_iterations; i++) {
>>          /* Testing flushing a full zone */
>>          stopwatch_start(STOPWATCH_FLUSH_FULL_ZONE, time_usec());
>>          uint16_t zone = 1;
>> @@ -597,6 +850,12 @@ static const struct ovs_cmdl_command commands[] = {
>>       * is rewritten to the SNAT target rather than causing a crash. */
>>      {"ftp-alg-large-payload", "", 0, 0,
>>          test_ftp_alg_large_payload, OVS_RO},
>> +    /* Starts 'n_threads' threads. Each thread will open 'n_conns' tcp
>> +     * connections across 'n_zones' separate conntrack zones. Each conn will
>> +     * send 'n_data_pkts' data packets within each connection.
>> +     * Each conn will run through conntrack 'iterations' times. */
>> +    {"benchmark-tcp", "n_threads n_conns n_zones n_data_pkts iterations", 
>> 5, 5,
>> +    test_benchmark_tcp, OVS_RO},
>>  
>>      {NULL, NULL, 0, 0, NULL, OVS_RO},
>>  };

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to