Re: [ovs-dev] [ovs-dev, v5, 2/3] dpif-netdev: Detailed performance stats for PMDs

Ilya Maximets Tue, 09 Jan 2018 01:35:58 -0800

On 09.01.2018 12:00, Ilya Maximets wrote:
> +Kevin, +Ian
> 
> On 09.01.2018 11:53, Ilya Maximets wrote:
> Comments inline.
> 
> On 04.01.2018 15:07, Jan Scheurich wrote:> This patch instruments the 
> dpif-netdev datapath to record detailed
>> statistics of what is happening in every iteration of a PMD thread.
>>
>> The collection of detailed statistics can be controlled by a new
>> configuration parameter "other_config:pmd-perf-metrics". By default it
>> is disabled. The run-time overhead, when enabled, is in the order of 1%.
>>
>> The covered metrics per iteration are:
>>   - cycles
>>   - packets
>>   - (rx) batches
>>   - packets/batch
>>   - max. vhostuser qlen
>>   - upcalls
>>   - cycles spent in upcalls
>>
>> This raw recorded data is used threefold:
>>
>> 1. In histograms for each of the following metrics:
>>    - cycles/iteration (log.)
>>    - packets/iteration (log.)
>>    - cycles/packet
>>    - packets/batch
>>    - max. vhostuser qlen (log.)
>>    - upcalls
>>    - cycles/upcall (log)
>>    The histograms bins are divided linear or logarithmic.
>>
>> 2. A cyclic history of the above statistics for 1024 iterations
>>
>> 3. A cyclic history of the cummulative/average values per millisecond
>>    wall clock for the last 1024 milliseconds:
>>    - number of iterations
>>    - avg. cycles/iteration
>>    - packets (Kpps)
>>    - avg. packets/batch
>>    - avg. max vhost qlen
>>    - upcalls
>>    - avg. cycles/upcall
>>
>> The gathered performance statists can be printed at any time with the
>> new CLI command
>>
>> ovs-appctl dpif-netdev/pmd-perf-show [-nh] [-it iter_len] [-ms ms_len]
>>      [-pmd core] [dp]
>>
>> The options are
>>
>> -nh:            Suppress the histograms
>> -it iter_len:   Display the last iter_len iteration stats
>> -ms ms_len:     Display the last ms_len millisecond stats
>> -pmd core:      Display only
>>
>> The performance statistics are reset with the existing
>> dpif-netdev/pmd-stats-clear command.
>>
>> The output always contains the following global PMD statistics,
>> similar to the pmd-stats-show command:
>>
>> Time: 15:24:55.270
>> Measurement duration: 1.008 s
>>
>> pmd thread numa_id 0 core_id 1:
>>
>>   Cycles:            2419034712  (2.40 GHz)
>>   Iterations:            572817  (1.76 us/it)
>>   - idle:                486808  (15.9 % cycles)
>>   - busy:                 86009  (84.1 % cycles)
>>   Packets:              2399607  (2381 Kpps, 848 cycles/pkt)
>>   Datapath passes:      3599415  (1.50 passes/pkt)
>>   - EMC hits:            336472  ( 9.3 %)
>>   - Megaflow hits:      3262943  (90.7 %, 1.00 subtbl lookups/hit)
>>   - Upcalls:                  0  ( 0.0 %, 0.0 us/upcall)
>>   - Lost upcalls:             0  ( 0.0 %)
>>
>> Signed-off-by: Jan Scheurich <[email protected]>
>> ---
>>  lib/dp-packet.h        |   2 +
>>  lib/dpif-netdev-perf.c | 309 
>> ++++++++++++++++++++++++++++++++++++++++++++++++-
>>  lib/dpif-netdev-perf.h | 173 ++++++++++++++++++++++++++-
>>  lib/dpif-netdev.c      | 158 +++++++++++++++++++++++--
>>  lib/netdev-dpdk.c      |  23 +++-
>>  lib/netdev-dpdk.h      |  14 +++
>>  ofproto/ofproto-dpif.c |   3 +-
>>  7 files changed, 664 insertions(+), 18 deletions(-)
>>
>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>> index b4b721c..7950247 100644
>> --- a/lib/dp-packet.h
>> +++ b/lib/dp-packet.h
>> @@ -695,8 +695,10 @@ enum { NETDEV_MAX_BURST = 32 }; /* Maximum number 
>> packets in a batch. */
>>  
>>  struct dp_packet_batch {
>>      size_t count;
>> +    size_t qfill; /* Number of packets in queue when reading rx burst. */
> This change looks completely wrog for me. 'qfill' should not be here.
> This and all the related stuff in further code turns internatls of netdev-dpdk
> inside out and pushes them to the higher level. I don't like this.
> 
> Also, currently there are at least 3 patch-sets in OVS mail list targeted to 
> get
> some statisics and performance counters.
> Can we just use approach similar to "netdev: Custom statistics." ?
> (https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/342724.html)
> 
> We may collect drops or count high values of qfill using coverage counters or
> in some other way.
> 
> 'qfill', IMHO, should not belong to generic dp_packet_batch structure.
> Butches are not for this.
> 
>>      bool trunc; /* true if the batch needs truncate. */
>>      struct dp_packet *packets[NETDEV_MAX_BURST];
>> +
> Not needed empty line.
> 
>>  };
>>  
>>  static inline void
>> diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
>> index 7d8b7b2..a66a48c 100644
>> --- a/lib/dpif-netdev-perf.c
>> +++ b/lib/dpif-netdev-perf.c
>> @@ -15,6 +15,7 @@
>>   */
>>  
>>  #include <config.h>
>> +#include <stdint.h>
>>  
>>  #ifdef DPDK_NETDEV
>>  #include <rte_cycles.h>
>> @@ -27,13 +28,307 @@
>>  
>>  VLOG_DEFINE_THIS_MODULE(pmd_perf);
>>  
>> +#ifdef DPDK_NETDEV
>> +static uint64_t
>> +get_tsc_hz(void)
>> +{
>> +    return rte_get_tsc_hz();
>> +}
>> +#else
>> +static uint64_t
>> +read_tsc(void)
>> +{
>> +    register uint64_t tsc asm("eax");
>> +    asm volatile (".byte 15, 49" : : : "eax", "edx");
>> +    return tsc;
>> +}
> This breaks non-x86 build:
> 
> lib/dpif-netdev-perf.c:58:31: error: unknown register name 'eax' in asm
>     register uint64_t tsc asm("eax");
>                               ^
> lib/dpif-netdev-perf.c:59:40: error: unknown register name 'eax' in asm
>     asm volatile (".byte 15, 49" : : : "eax", "edx");
> 
>> +
>> +static uint64_t
>> +get_tsc_hz(void)
>> +{
>> +    uint64_t tsc_start = read_tsc();
>> +    xsleep(1);
>> +    return read_tsc() - tsc_start;
>> +}
>> +#endif
>> +
>> +static void
>> +histogram_walls_set_lin(struct histogram *hist, uint32_t min, uint32_t max)
>> +{
>> +    int i;
>> +
>> +    ovs_assert(min < max);
>> +    for (i = 0; i < NUM_BINS-1; i++) {
>> +        hist->wall[i] = min + (i * (max - min)) / (NUM_BINS - 2);
>> +    }
>> +    hist->wall[NUM_BINS-1] = UINT32_MAX;
>> +}
>> +
>> +static void
>> +histogram_walls_set_log(struct histogram *hist, uint32_t min, uint32_t max)
>> +{
>> +    int i, start, bins, wall;
>> +    double log_min, log_max;
>> +
>> +    ovs_assert(min < max);
>> +    if (min > 0) {
>> +        log_min = log(min);
>> +        log_max = log(max);
>> +        start = 0;
>> +        bins = NUM_BINS - 1;
>> +    } else {
>> +        hist->wall[0] = 0;
>> +        log_min = log(1);
>> +        log_max = log(max);
>> +        start = 1;
>> +        bins = NUM_BINS - 2;
>> +    }
>> +    wall = start;
>> +    for (i = 0; i < bins; i++) {
>> +        /* Make sure each wall is monotonically increasing. */
>> +        wall = MAX(wall, exp(log_min + (i * (log_max - log_min)) / 
>> (bins-1)));
>> +        hist->wall[start + i] = wall++;
>> +    }
>> +    if (hist->wall[NUM_BINS-2] < max) {
>> +        hist->wall[NUM_BINS-2] = max;
>> +    }
>> +    hist->wall[NUM_BINS-1] = UINT32_MAX;
>> +}
>> +
>> +uint64_t
>> +histogram_samples(const struct histogram *hist)
>> +{
>> +    uint64_t samples = 0;
>> +
>> +    for (int i = 0; i < NUM_BINS; i++) {
>> +        samples += hist->bin[i];
>> +    }
>> +    return samples;
>> +}
>> +
>> +static void
>> +histogram_clear(struct histogram *hist)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < NUM_BINS; i++) {
>> +        hist->bin[i] = 0;
>> +    }
>> +}
>> +
>> +static void
>> +history_init(struct history *h)
>> +{
>> +    memset(h, 0, sizeof(*h));
>> +}
>> +
>>  void
>>  pmd_perf_stats_init(struct pmd_perf_stats *s) {
>> -    memset(s, 0 , sizeof(*s));
>> +    /* The struct has been zeroed at allocation. */
>> +    histogram_walls_set_log(&s->cycles, 500, 24000000);
>> +    histogram_walls_set_log(&s->pkts, 0, 1000);
>> +    histogram_walls_set_lin(&s->cycles_per_pkt, 100, 30000);
>> +    histogram_walls_set_lin(&s->pkts_per_batch, 0, 32);
>> +    histogram_walls_set_lin(&s->upcalls, 0, 30);
>> +    histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000);
>> +    histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
>>      s->start_ms = time_msec();
>>  }
>>  
>>  void
>> +pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
>> +                              double duration)
>> +{
>> +    uint64_t stats[PMD_N_STATS];
>> +    double us_per_cycle = 1000000.0 / get_tsc_hz();
>> +
>> +    if (duration == 0) {
>> +        return;
>> +    }
>> +
>> +    pmd_perf_read_counters(s, stats);
>> +    uint64_t tot_cycles = stats[PMD_CYCLES_ITER_IDLE] +
>> +                          stats[PMD_CYCLES_ITER_BUSY];
>> +    uint64_t packets = stats[PMD_STAT_RECV];
>> +    uint64_t passes = stats[PMD_STAT_RECV] +
>> +                      stats[PMD_STAT_RECIRC];
>> +    uint64_t upcalls = stats[PMD_STAT_MISS];
>> +    uint64_t upcall_cycles = stats[PMD_CYCLES_UPCALL];
>> +    uint64_t tot_iter = histogram_samples(&s->pkts);
>> +    uint64_t idle_iter = s->pkts.bin[0];
>> +    uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0;
>> +
>> +    ds_put_format(str,
>> +            "  Cycles:          %12"PRIu64"  (%.2f GHz)\n"
>> +            "  Iterations:      %12"PRIu64"  (%.2f us/it)\n"
>> +            "  - idle:          %12"PRIu64"  (%4.1f %% cycles)\n"
>> +            "  - busy:          %12"PRIu64"  (%4.1f %% cycles)\n",
>> +            tot_cycles, (tot_cycles / duration) / 1E9,
>> +            tot_iter, tot_cycles * us_per_cycle / tot_iter,
>> +            idle_iter,
>> +            100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles,
>> +            busy_iter,
>> +            100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles);
>> +    if (packets > 0) {
>> +        ds_put_format(str,
>> +            "  Packets:         %12"PRIu64"  (%.0f Kpps, %.0f cycles/pkt)\n"
>> +            "  Datapath passes: %12"PRIu64"  (%.2f passes/pkt)\n"
>> +            "  - EMC hits:      %12"PRIu64"  (%4.1f %%)\n"
>> +            "  - Megaflow hits: %12"PRIu64"  (%4.1f %%, %.2f subtbl 
>> lookups/"
>> +                                                                     
>> "hit)\n"
>> +            "  - Upcalls:       %12"PRIu64"  (%4.1f %%, %.1f us/upcall)\n"
>> +            "  - Lost upcalls:  %12"PRIu64"  (%4.1f %%)\n"
>> +            "\n",
>> +            packets, (packets / duration) / 1000,
>> +            1.0 * stats[PMD_CYCLES_ITER_BUSY] / packets,
>> +            passes, packets ? 1.0 * passes / packets : 0,
>> +            stats[PMD_STAT_EXACT_HIT],
>> +            100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
>> +            stats[PMD_STAT_MASKED_HIT],
>> +            100.0 * stats[PMD_STAT_MASKED_HIT] / passes,
>> +            stats[PMD_STAT_MASKED_HIT]
>> +            ? 1.0 * stats[PMD_STAT_MASKED_LOOKUP] / 
>> stats[PMD_STAT_MASKED_HIT]
>> +            : 0,
>> +            upcalls, 100.0 * upcalls / passes,
>> +            upcalls ? (upcall_cycles * us_per_cycle) / upcalls : 0,
>> +            stats[PMD_STAT_LOST],
>> +            100.0 * stats[PMD_STAT_LOST] / passes);
>> +    } else {
>> +        ds_put_format(str,
>> +                "  Packets:         %12"PRIu64"\n"
>> +                "\n",
>> +                0UL);
>> +    }
>> +}
>> +
>> +void
>> +pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
>> +{
>> +    int i;
>> +
>> +    ds_put_cstr(str, "Histograms\n");
>> +    ds_put_format(str,
>> +                  "   %-21s  %-21s  %-21s  %-21s  %-21s  %-21s  %-21s\n",
>> +                  "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
>> +                  "max vhost qlen", "upcalls/it", "cycles/upcall");
>> +    for (i = 0; i < NUM_BINS-1; i++) {
>> +        ds_put_format(str,
>> +            "   %-9d %-11"PRIu64"  %-9d %-11"PRIu64"  %-9d %-11"PRIu64
>> +            "  %-9d %-11"PRIu64"  %-9d %-11"PRIu64"  %-9d %-11"PRIu64
>> +            "  %-9d %-11"PRIu64"\n",
>> +            s->cycles.wall[i], s->cycles.bin[i],
>> +            s->pkts.wall[i],s->pkts.bin[i],
>> +            s->cycles_per_pkt.wall[i], s->cycles_per_pkt.bin[i],
>> +            s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
>> +            s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
>> +            s->upcalls.wall[i], s->upcalls.bin[i],
>> +            s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
>> +    }
>> +    ds_put_format(str,
>> +                  "   %-9s %-11"PRIu64"  %-9s %-11"PRIu64"  %-9s %-11"PRIu64
>> +                  "  %-9s %-11"PRIu64"  %-9s %-11"PRIu64"  %-9s %-11"PRIu64
>> +                  "  %-9s %-11"PRIu64"\n",
>> +                  ">", s->cycles.bin[i],
>> +                  ">", s->pkts.bin[i],
>> +                  ">", s->cycles_per_pkt.bin[i],
>> +                  ">", s->pkts_per_batch.bin[i],
>> +                  ">", s->max_vhost_qfill.bin[i],
>> +                  ">", s->upcalls.bin[i],
>> +                  ">", s->cycles_per_upcall.bin[i]);
>> +    if (s->totals.iterations > 0) {
>> +        ds_put_cstr(str,
>> +                    "-----------------------------------------------------"
>> +                    "-----------------------------------------------------"
>> +                    "------------------------------------------------\n");
>> +        ds_put_format(str,
>> +                      "   %-21s  %-21s  %-21s  %-21s  %-21s  %-21s  
>> %-21s\n",
>> +                      "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
>> +                      "vhost qlen", "upcalls/it", "cycles/upcall");
>> +        ds_put_format(str,
>> +                      "   %-21"PRIu64"  %-21.5f  %-21"PRIu64
>> +                      "  %-21.5f  %-21.5f  %-21.5f  %-21"PRIu32"\n",
>> +                      s->totals.cycles / s->totals.iterations,
>> +                      1.0 * s->totals.pkts / s->totals.iterations,
>> +                      s->totals.pkts
>> +                          ? s->totals.busy_cycles / s->totals.pkts : 0,
>> +                      s->totals.batches
>> +                          ? 1.0 * s->totals.pkts / s->totals.batches : 0,
>> +                      1.0 * s->totals.max_vhost_qfill / 
>> s->totals.iterations,
>> +                      1.0 * s->totals.upcalls / s->totals.iterations,
>> +                      s->totals.upcalls
>> +                          ? s->totals.upcall_cycles / s->totals.upcalls : 
>> 0);
>> +    }
>> +}
>> +
>> +void
>> +pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
>> +                                  int n_iter)
>> +{
>> +    struct iter_stats *is;
>> +    size_t index;
>> +    int i;
>> +
>> +    if (n_iter == 0) {
>> +        return;
>> +    }
>> +    ds_put_format(str, "   %-17s   %-10s   %-10s   %-10s   %-10s   "
>> +                  "%-10s   %-10s   %-10s\n",
>> +                  "tsc", "cycles", "packets", "cycles/pkt", "pkts/batch",
>> +                  "vhost qlen", "upcalls", "cycles/upcall");
>> +    for (i = 1; i <= n_iter; i++) {
>> +        index = (s->iterations.idx + HISTORY_LEN - i) % HISTORY_LEN;
>> +        is = &s->iterations.sample[index];
>> +        ds_put_format(str,
>> +                      "   %-17"PRIu64"   %-11"PRIu64"  %-11"PRIu32
>> +                      "  %-11"PRIu64"  %-11"PRIu32"  %-11"PRIu32
>> +                      "  %-11"PRIu32"  %-11"PRIu32"\n",
>> +                      is->timestamp,
>> +                      is->cycles,
>> +                      is->pkts,
>> +                      is->pkts ? is->cycles / is->pkts : 0,
>> +                      is->batches ? is->pkts / is->batches : 0,
>> +                      is->max_vhost_qfill,
>> +                      is->upcalls,
>> +                      is->upcalls ? is->upcall_cycles / is->upcalls : 0);
>> +    }
>> +}
>> +
>> +void
>> +pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int 
>> n_ms)
>> +{
>> +    struct iter_stats *is;
>> +    size_t index;
>> +    int i;
>> +
>> +    if (n_ms == 0) {
>> +        return;
>> +    }
>> +    ds_put_format(str,
>> +                  "   %-12s   %-10s   %-10s   %-10s   %-10s"
>> +                  "   %-10s   %-10s   %-10s   %-10s\n",
>> +                  "ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
>> +                  "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
>> +    for (i = 1; i <= n_ms; i++) {
>> +        index = (s->milliseconds.idx + HISTORY_LEN - i) % HISTORY_LEN;
>> +        is = &s->milliseconds.sample[index];
>> +        ds_put_format(str,
>> +                      "   %-12"PRIu64"   %-11"PRIu32"  %-11"PRIu64
>> +                      "  %-11"PRIu32"  %-11"PRIu64"  %-11"PRIu32
>> +                      "  %-11"PRIu32"  %-11"PRIu32"  %-11"PRIu32"\n",
>> +                      is->timestamp,
>> +                      is->iterations,
>> +                      is->iterations ? is->cycles / is->iterations : 0,
>> +                      is->pkts,
>> +                      is->pkts ? is->busy_cycles / is->pkts : 0,
>> +                      is->batches ? is->pkts / is->batches : 0,
>> +                      is->iterations
>> +                          ? is->max_vhost_qfill / is->iterations : 0,
>> +                      is->upcalls,
>> +                      is->upcalls ? is->upcall_cycles / is->upcalls : 0);
>> +    }
>> +}
>> +
>> +void
>>  pmd_perf_read_counters(struct pmd_perf_stats *s,
>>                         uint64_t stats[PMD_N_STATS])
>>  {
>> @@ -59,8 +354,20 @@ void
>>  pmd_perf_stats_clear(struct pmd_perf_stats *s)
>>  {
>>      s->start_ms = 0; /* Marks start of clearing. */
>> +    memset(&s->current, 0 , sizeof(struct iter_stats));
>> +    memset(&s->totals, 0 , sizeof(struct iter_stats));
>>      for (int i = 0; i < PMD_N_STATS; i++) {
>>          atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
>>      }
>> +    histogram_clear(&s->cycles);
>> +    histogram_clear(&s->pkts);
>> +    histogram_clear(&s->cycles_per_pkt);
>> +    histogram_clear(&s->upcalls);
>> +    histogram_clear(&s->cycles_per_upcall);
>> +    histogram_clear(&s->pkts_per_batch);
>> +    histogram_clear(&s->max_vhost_qfill);
>> +    history_init(&s->iterations);
>> +    history_init(&s->milliseconds);
>>      s->start_ms = time_msec(); /* Clearing finished. */
>> +    s->milliseconds.sample[0].timestamp = s->start_ms;
>>  }
>> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
>> index fa5cf37..3611723 100644
>> --- a/lib/dpif-netdev-perf.h
>> +++ b/lib/dpif-netdev-perf.h
>> @@ -33,6 +33,11 @@
>>  extern "C" {
>>  #endif
>>  
>> +#define NUM_BINS 32             /* Number of histogram bins. */
>> +#define HISTORY_LEN 1000        /* Length of recorded history
>> +                                   (iterations and ms). */
>> +#define DEF_HIST_SHOW 20        /* Default number of history samples. */
>> +
>>  enum pmd_stat_type {
>>      PMD_STAT_EXACT_HIT,     /* Packets that had an exact match (emc). */
>>      PMD_STAT_MASKED_HIT,    /* Packets that matched in the flow table. */
>> @@ -62,6 +67,28 @@ enum pmd_stat_type {
>>      PMD_N_STATS
>>  };
>>  
>> +struct histogram {
>> +    uint32_t wall[NUM_BINS];
>> +    uint64_t bin[NUM_BINS];
>> +};
>> +
>> +struct iter_stats {
>> +    uint64_t timestamp;
>> +    uint64_t cycles;
>> +    uint64_t busy_cycles;
>> +    uint32_t iterations;
>> +    uint32_t pkts;
>> +    uint32_t upcalls;
>> +    uint32_t upcall_cycles;
>> +    uint32_t batches;
>> +    uint32_t max_vhost_qfill;
>> +};
>> +
>> +struct history {
>> +    uint64_t idx;
>> +    struct iter_stats sample[HISTORY_LEN];
>> +};
>> +
>>  struct pmd_counters {
>>      atomic_uint64_t n[PMD_N_STATS];     /* Indexed by PMD_STAT_*. */
>>      uint64_t zero[PMD_N_STATS];
>> @@ -71,6 +98,17 @@ struct pmd_perf_stats {
>>      uint64_t start_ms;
>>      uint64_t last_tsc;
>>      struct pmd_counters counters;
>> +    struct iter_stats current;
>> +    struct iter_stats totals;
>> +    struct histogram cycles;
>> +    struct histogram pkts;
>> +    struct histogram cycles_per_pkt;
>> +    struct histogram upcalls;
>> +    struct histogram cycles_per_upcall;
>> +    struct histogram pkts_per_batch;
>> +    struct histogram max_vhost_qfill;
>> +    struct history iterations;
>> +    struct history milliseconds;
>>  };
>>  
>>  enum pmd_info_type;
>> @@ -98,6 +136,63 @@ pmd_perf_update_counter(struct pmd_perf_stats *s,
>>      atomic_store_relaxed(&s->counters.n[counter], tmp);
>>  }
>>  
>> +void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
>> +                                   double duration);
>> +void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s);
>> +void pmd_perf_format_iteration_history(struct ds *str,
>> +                                       struct pmd_perf_stats *s,
>> +                                       int n_iter);
>> +void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
>> +                                int n_ms);
>> +
>> +void pmd_perf_show(struct unixctl_conn *conn, int argc,
>> +                   const char *argv[],
>> +                   void *aux OVS_UNUSED);
>> +static inline void
>> +histogram_add_sample(struct histogram *hist, uint32_t val)
>> +{
>> +    /* TODO: Can do better with binary search? */
>> +    for (int i = 0; i < NUM_BINS-1; i++) {
>> +        if (val <= hist->wall[i]) {
>> +            hist->bin[i]++;
>> +            return;
>> +        }
>> +    }
>> +    hist->bin[NUM_BINS-1]++;
>> +}
>> +
>> +uint64_t histogram_samples(const struct histogram *hist);
>> +
>> +static inline struct iter_stats *
>> +history_current(struct history *h)
>> +{
>> +    return &h->sample[h->idx];
>> +}
>> +
>> +static inline struct iter_stats *
>> +history_next(struct history *h)
>> +{
>> +    struct iter_stats *next;
>> +
>> +    h->idx++;
>> +    if (h->idx == HISTORY_LEN) {
>> +        h->idx = 0;
>> +    }
>> +    next = &h->sample[h->idx];
>> +    memset(next, 0, sizeof(*next));
>> +    return next;
>> +}
>> +
>> +static inline struct iter_stats *
>> +history_store(struct history *h, struct iter_stats *is)
>> +{
>> +    if (is) {
>> +        h->sample[h->idx] = *is;
>> +    }
>> +    /* Advance the history pointer */
>> +    return history_next(h);
>> +}
>> +
>>  static inline void
>>  pmd_perf_start_iteration(struct pmd_perf_stats *s, uint64_t now_tsc)
>>  {
>> @@ -105,14 +200,25 @@ pmd_perf_start_iteration(struct pmd_perf_stats *s, 
>> uint64_t now_tsc)
>>          /* Stats not yet fully initialised. */
>>          return;
>>      }
>> -    s->last_tsc = now_tsc;
>> +    memset(&s->current, 0, sizeof(struct iter_stats));
>> +    s->current.timestamp = now_tsc;
>>  }
>>  
>>  static inline void
>>  pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t now_tsc,
>> -                       int packets)
>> +                       int packets, bool full_metrics)
>>  {
>> -    uint64_t cycles = now_tsc - s->last_tsc;
>> +    struct iter_stats *cum_ms;
>> +    uint64_t cycles, cycles_per_pkt = 0;
>> +
>> +    if (OVS_UNLIKELY(s->current.timestamp == 0)) {
>> +        /* Stats were cleared during the ongoing iteration. */
>> +        return;
>> +    }
>> +
>> +    cycles = now_tsc - s->current.timestamp;
>> +    s->current.cycles = cycles;
>> +    s->current.pkts = packets;
>>  
>>      /* No need for atomic updates as only invoked within PMD thread. */
>>      if (packets > 0) {
>> @@ -120,7 +226,68 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, 
>> uint64_t now_tsc,
>>      } else {
>>          s->counters.n[PMD_CYCLES_ITER_IDLE] += cycles;
>>      }
>> +    /* Add iteration sample to histograms. */
>> +    histogram_add_sample(&s->cycles, cycles);
>> +    histogram_add_sample(&s->pkts, packets);
>> +
>> +    if (!full_metrics) {
>> +        return;
>> +    }
>> +
>> +    s->counters.n[PMD_CYCLES_UPCALL] += s->current.upcall_cycles;
>> +
>> +    if (packets > 0) {
>> +        cycles_per_pkt = cycles / packets;
>> +        histogram_add_sample(&s->cycles_per_pkt, cycles_per_pkt);
>> +    }
>> +    if (s->current.batches > 0) {
>> +        histogram_add_sample(&s->pkts_per_batch, packets / 
>> s->current.batches);
>> +    }
>> +    histogram_add_sample(&s->upcalls, s->current.upcalls);
>> +    if (s->current.upcalls > 0) {
>> +        histogram_add_sample(&s->cycles_per_upcall,
>> +                             s->current.upcall_cycles / s->current.upcalls);
>> +    }
>> +    histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
>> +
>> +    /* Add iteration samples to millisecond stats. */
>> +    cum_ms = history_current(&s->milliseconds);
>> +    cum_ms->iterations++;
>> +    cum_ms->cycles += cycles;
>> +    if (packets > 0) {
>> +        cum_ms->busy_cycles += cycles;
>> +    }
>> +    cum_ms->pkts += s->current.pkts;
>> +    cum_ms->upcalls += s->current.upcalls;
>> +    cum_ms->upcall_cycles += s->current.upcall_cycles;
>> +    cum_ms->batches += s->current.batches;
>> +    cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
>> +
>> +    /* Store in iteration history. */
>> +    history_store(&s->iterations, &s->current);
>> +    if (now_tsc - s->last_tsc > 10000) {
>> +        /* Check if ms is completed and store in milliseconds history. */
>> +        uint64_t now = time_msec();
>> +        if (now != cum_ms->timestamp) {
>> +            /* Add ms stats to totals. */
>> +            s->totals.iterations += cum_ms->iterations;
>> +            s->totals.cycles += cum_ms->cycles;
>> +            s->totals.busy_cycles += cum_ms->busy_cycles;
>> +            s->totals.pkts += cum_ms->pkts;
>> +            s->totals.upcalls += cum_ms->upcalls;
>> +            s->totals.upcall_cycles += cum_ms->upcall_cycles;
>> +            s->totals.batches += cum_ms->batches;
>> +            s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
>> +            cum_ms = history_next(&s->milliseconds);
>> +            cum_ms->timestamp = now;
>> +        }
>> +        s->last_tsc = now_tsc;
>> +    }
>> +}
>> +
>> +#ifdef  __cplusplus
>>  }
>> +#endif
>>  
>>  #ifdef  __cplusplus
>>  }
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 8b9698a..8f64df3 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -53,6 +53,7 @@
>>  #include "id-pool.h"
>>  #include "latch.h"
>>  #include "netdev.h"
>> +#include "netdev-provider.h"
>>  #include "netdev-vport.h"
>>  #include "netlink.h"
>>  #include "odp-execute.h"
>> @@ -278,6 +279,8 @@ struct dp_netdev {
>>  
>>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
>> +    /* Enable collection of PMD performance metrics. */
>> +    ATOMIC(bool) pmd_perf_metrics;
>>  
>>      /* Protects access to ofproto-dpif-upcall interface during revalidator
>>       * thread synchronization. */
>> @@ -703,6 +706,7 @@ static inline bool emc_entry_alive(struct emc_entry *ce);
>>  static void emc_clear_entry(struct emc_entry *ce);
>>  
>>  static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>> +static inline bool pmd_perf_metrics_enabled(const struct 
>> dp_netdev_pmd_thread *pmd);
>>  
>>  static void
>>  emc_cache_init(struct emc_cache *flow_cache)
>> @@ -786,7 +790,8 @@ get_dp_netdev(const struct dpif *dpif)
>>  enum pmd_info_type {
>>      PMD_INFO_SHOW_STATS,  /* Show how cpu cycles are spent. */
>>      PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
>> -    PMD_INFO_SHOW_RXQ     /* Show poll-lists of pmd threads. */
>> +    PMD_INFO_SHOW_RXQ,    /* Show poll-lists of pmd threads. */
>> +    PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
>>  };
>>  
>>  static void
>> @@ -873,8 +878,46 @@ pmd_info_show_stats(struct ds *reply,
>>      ds_put_format(reply,
>>                    "\tavg processing cycles per packet: "
>>                    "%.02f (%"PRIu64"/%"PRIu64")\n",
>> -                  stats[PMD_CYCLES_POLL_BUSY] / (double)total_packets,
>> -                  stats[PMD_CYCLES_POLL_BUSY], total_packets);
>> +                  stats[PMD_CYCLES_ITER_BUSY] / (double)total_packets,
>> +                  stats[PMD_CYCLES_ITER_BUSY], total_packets);
>> +}
>> +
>> +static void
>> +pmd_info_show_perf(struct ds *reply,
>> +                   struct dp_netdev_pmd_thread *pmd,
>> +                   struct pmd_perf_params *par)
>> +{
>> +    if (pmd->core_id != NON_PMD_CORE_ID) {
>> +        char *time_str =
>> +                xastrftime_msec("%H:%M:%S.###", time_wall_msec(), true);
>> +        long long now = time_msec();
>> +        double duration = (now - pmd->perf_stats.start_ms) / 1000.0;
>> +
>> +        ds_put_cstr(reply, "\n");
>> +        ds_put_format(reply, "Time: %s\n", time_str);
>> +        ds_put_format(reply, "Measurement duration: %.3f s\n", duration);
>> +        ds_put_cstr(reply, "\n");
>> +        format_pmd_thread(reply, pmd);
>> +        ds_put_cstr(reply, "\n");
>> +        pmd_perf_format_overall_stats(reply, &pmd->perf_stats, duration);
>> +        if (pmd_perf_metrics_enabled(pmd)) {
>> +            if (par->histograms) {
>> +                ds_put_cstr(reply, "\n");
>> +                pmd_perf_format_histograms(reply, &pmd->perf_stats);
>> +            }
>> +            if (par->iter_hist_len > 0) {
>> +                ds_put_cstr(reply, "\n");
>> +                pmd_perf_format_iteration_history(reply, &pmd->perf_stats,
>> +                        par->iter_hist_len);
>> +            }
>> +            if (par->ms_hist_len > 0) {
>> +                ds_put_cstr(reply, "\n");
>> +                pmd_perf_format_ms_history(reply, &pmd->perf_stats,
>> +                        par->ms_hist_len);
>> +            }
>> +        }
>> +        free(time_str);
>> +    }
>>  }
>>  
>>  static int
>> @@ -1079,6 +1122,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int 
>> argc, const char *argv[],
>>              pmd_perf_stats_clear(&pmd->perf_stats);
>>          } else if (type == PMD_INFO_SHOW_STATS) {
>>              pmd_info_show_stats(&reply, pmd);
>> +        } else if (type == PMD_INFO_PERF_SHOW) {
>> +            pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
>>          }
>>      }
>>      free(pmd_list);
>> @@ -1088,6 +1133,48 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int 
>> argc, const char *argv[],
>>      unixctl_command_reply(conn, ds_cstr(&reply));
>>      ds_destroy(&reply);
>>  }
>> +
>> +static void
>> +pmd_perf_show_cmd(struct unixctl_conn *conn, int argc,
>> +                          const char *argv[],
>> +                          void *aux OVS_UNUSED)
>> +{
>> +    struct pmd_perf_params par;
>> +    long int it_hist = 0, ms_hist = 0;
>> +    par.histograms = true;
>> +
>> +    while (argc > 1) {
>> +        if (!strcmp(argv[1], "-nh")) {
>> +            par.histograms = false;
>> +            argc -= 1;
>> +            argv += 1;
>> +        } else if (!strcmp(argv[1], "-it") && argc > 2) {
>> +            it_hist = strtol(argv[2], NULL, 10);
>> +            if (it_hist < 0) {
>> +                it_hist = 0;
>> +            } else if (it_hist > HISTORY_LEN) {
>> +                it_hist = HISTORY_LEN;
>> +            }
>> +            argc -= 2;
>> +            argv += 2;
>> +        } else if (!strcmp(argv[1], "-ms") && argc > 2) {
>> +            ms_hist = strtol(argv[2], NULL, 10);
>> +            if (ms_hist < 0) {
>> +                ms_hist = 0;
>> +            } else if (ms_hist > HISTORY_LEN) {
>> +                ms_hist = HISTORY_LEN;
>> +            }
>> +            argc -= 2;
>> +            argv += 2;
>> +        } else {
>> +            break;
>> +        }
>> +    }
>> +    par.iter_hist_len = it_hist;
>> +    par.ms_hist_len = ms_hist;
>> +    par.command_type = PMD_INFO_PERF_SHOW;
>> +    dpif_netdev_pmd_info(conn, argc, argv, &par);
>> +}
>>  
>>  static int
>>  dpif_netdev_init(void)
>> @@ -1105,6 +1192,12 @@ dpif_netdev_init(void)
>>      unixctl_command_register("dpif-netdev/pmd-rxq-show", "[-pmd core | dp]",
>>                               0, 2, dpif_netdev_pmd_info,
>>                               (void *)&poll_aux);
>> +    unixctl_command_register("dpif-netdev/pmd-perf-show",
>> +                             "[-nh] [-it iter-history-len]"
>> +                             " [-ms ms-history-len]"
>> +                             " [-pmd core | dp]",
>> +                             0, 7, pmd_perf_show_cmd,
>> +                             NULL);
>>      unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]",
>>                               0, 1, dpif_netdev_pmd_rebalance,
>>                               NULL);
>> @@ -2980,6 +3073,18 @@ dpif_netdev_set_config(struct dpif *dpif, const 
>> struct smap *other_config)
>>          }
>>      }
>>  
>> +    bool perf_enabled = smap_get_bool(other_config, "pmd-perf-metrics", 
>> false);
>> +    bool cur_perf_enabled;
>> +    atomic_read_relaxed(&dp->pmd_perf_metrics, &cur_perf_enabled);
>> +    if (perf_enabled != cur_perf_enabled) {
>> +        atomic_store_relaxed(&dp->pmd_perf_metrics, perf_enabled);
>> +        if (perf_enabled) {
>> +            VLOG_INFO("PMD performance metrics collection enabled");
>> +        } else {
>> +            VLOG_INFO("PMD performance metrics collection disabled");
>> +        }
>> +    }
>> +
>>      return 0;
>>  }
>>  
>> @@ -3160,6 +3265,14 @@ cycles_count_intermediate(struct dp_netdev_pmd_thread 
>> *pmd,
>>      }
>>  }
>>  
>> +static inline bool
>> +pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd)
>> +{
>> +    bool pmd_perf_enabled;
>> +    atomic_read_relaxed(&pmd->dp->pmd_perf_metrics, &pmd_perf_enabled);
>> +    return pmd_perf_enabled;
>> +}
>> +
>>  static void
>>  dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
>>                           enum rxq_cycles_counter_type type,
>> @@ -3236,6 +3349,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
>> *pmd,
>>                             struct netdev_rxq *rx,
>>                             odp_port_t port_no)
>>  {
>> +    struct pmd_perf_stats *s = &pmd->perf_stats;
>>      struct dp_packet_batch batch;
>>      int error;
>>      int batch_cnt = 0;
>> @@ -3245,8 +3359,25 @@ dp_netdev_process_rxq_port(struct 
>> dp_netdev_pmd_thread *pmd,
>>      if (!error) {
>>          *recirc_depth_get() = 0;
>>          pmd_thread_ctx_time_update(pmd);
>> -
>>          batch_cnt = batch.count;
>> +        if (pmd_perf_metrics_enabled(pmd)) {
>> +            /* Update batch histogram. */
>> +            s->current.batches++;
>> +            histogram_add_sample(&s->pkts_per_batch, batch_cnt);
>> +            /* Update the maximum Rx queue fill level. */
>> +            uint32_t qfill = batch.qfill;
>> +            switch (netdev_dpdk_get_type(netdev_rxq_get_netdev(rx))) {
>> +            case DPDK_DEV_VHOST:
>> +                if (qfill > s->current.max_vhost_qfill) {
>> +                    s->current.max_vhost_qfill = qfill;
>> +                }
>> +                break;
>> +            case DPDK_DEV_ETH:
>> +            default:
>> +                break;
>> +            }
> I'm sorry, but I'll vote to not accept changes like this. This is contraversal
> to all the modular object-oriented architecure of OVS.
> 
> In the worst case you may pass something from this layer to be filled by 
> netdev.
> But I don't really like such approach too.
> 
>> +        }
>> +        /* Process packet batch. */
>>          dp_netdev_input(pmd, &batch, port_no);
>>          dp_netdev_pmd_flush_output_packets(pmd);
>>      } else if (error != EAGAIN && error != EOPNOTSUPP) {
>> @@ -4084,7 +4215,9 @@ reload:
>>      cycles_count_start(pmd);
>>      for (;;) {
>>          uint64_t iter_packets = 0;
>> +
>>          pmd_perf_start_iteration(s, pmd->ctx.last_cycles);
>> +
>>          for (i = 0; i < poll_cnt; i++) {
>>              process_packets =
>>                  dp_netdev_process_rxq_port(pmd, poll_list[i].rxq->rx,
>> @@ -4116,7 +4249,9 @@ reload:
>>              }
>>              cycles_count_intermediate(pmd, NULL, PMD_CYCLES_OVERHEAD);
>>          }
>> -        pmd_perf_end_iteration(s, pmd->ctx.last_cycles, iter_packets);
>> +
>> +        pmd_perf_end_iteration(s, pmd->ctx.last_cycles, iter_packets,
>> +                               pmd_perf_metrics_enabled(pmd));
>>      }
>>  
>>      cycles_count_end(pmd, PMD_CYCLES_OVERHEAD);
>> @@ -5012,6 +5147,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
>>      struct match match;
>>      ovs_u128 ufid;
>>      int error;
>> +    uint64_t cycles = cycles_counter();
>>  
>>      match.tun_md.valid = false;
>>      miniflow_expand(&key->mf, &match.flow);
>> @@ -5065,9 +5201,15 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
>>          ovs_mutex_unlock(&pmd->flow_mutex);
>>          emc_probabilistic_insert(pmd, key, netdev_flow);
>>      }
>> -    /* Only error ENOSPC can reach here. We process the packet but do not
>> -     * install a datapath flow. Treat as successful. */
>> -    return 0;
>> +    if (pmd_perf_metrics_enabled(pmd)) {
>> +        /* Update upcall stats. */
>> +        cycles = cycles_counter() - cycles;
>> +        struct pmd_perf_stats *s = &pmd->perf_stats;
>> +        s->current.upcalls++;
>> +        s->current.upcall_cycles += cycles;
>> +        histogram_add_sample(&s->cycles_per_upcall, cycles);
> Hmm. Why this is done manually? Maybe PMD_CYCLES_UPCALL should be used here?
> 
>> +    }
>> +    return error;
>>  }
>>  
>>  static inline void
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 364f545..d701621 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -36,6 +36,7 @@
>>  #include <rte_mbuf.h>
>>  #include <rte_meter.h>
>>  #include <rte_pci.h>
>> +#include <rte_version.h>
>>  #include <rte_vhost.h>
>>  #include <rte_version.h>
>>  
>> @@ -188,11 +189,6 @@ enum { DPDK_RING_SIZE = 256 };
>>  BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
>>  enum { DRAIN_TSC = 200000ULL };
>>  
>> -enum dpdk_dev_type {
>> -    DPDK_DEV_ETH = 0,
>> -    DPDK_DEV_VHOST = 1,
>> -};
>> -
>>  /* Quality of Service */
>>  
>>  /* An instance of a QoS configuration.  Always associated with a particular
>> @@ -835,6 +831,13 @@ netdev_dpdk_cast(const struct netdev *netdev)
>>      return CONTAINER_OF(netdev, struct netdev_dpdk, up);
>>  }
>>  
>> +enum dpdk_dev_type
>> +netdev_dpdk_get_type(const struct netdev *netdev)
>> +{
>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> +    return dev->type;
>> +}
>> +
>>  static struct netdev *
>>  netdev_dpdk_alloc(void)
>>  {
>> @@ -1644,6 +1647,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>>          return EAGAIN;
>>      }
>>  
>> +    batch->qfill = nb_rx;
>> +
>> +    if (OVS_UNLIKELY(nb_rx == NETDEV_MAX_BURST)) {
>> +        batch->qfill += rte_vhost_rx_queue_count(netdev_dpdk_get_vid(dev),
>> +                                                 qid * VIRTIO_QNUM
>> +                                                     + VIRTIO_TXQ);
> This looks strange.
> Isn't it illogical to spend addtitional time requesting queue count if
> we're already late in packet processing?


And the most important thing at this point:
'netdev_dpdk_get_vid(dev)' without checking may lead to segmentation fault.
See daf22bf7a826 ("netdev-dpdk: Fix calling vhost API with negative vid.")
for details.

> 
>> +    }
>> +
>>      if (policer) {
>>          dropped = nb_rx;
>>          nb_rx = ingress_policer_run(policer,
>> @@ -1683,6 +1694,8 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct 
>> dp_packet_batch *batch)
>>          return EAGAIN;
>>      }
>>  
>> +    batch->qfill = nb_rx;
>> +
>>      if (policer) {
>>          dropped = nb_rx;
>>          nb_rx = ingress_policer_run(policer,
>> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
>> index b7d02a7..2b357db 100644
>> --- a/lib/netdev-dpdk.h
>> +++ b/lib/netdev-dpdk.h
>> @@ -22,11 +22,18 @@
>>  #include "openvswitch/compiler.h"
>>  
>>  struct dp_packet;
>> +struct netdev;
>> +
>> +enum dpdk_dev_type {
>> +    DPDK_DEV_ETH = 0,
>> +    DPDK_DEV_VHOST = 1,
>> +};
>>  
>>  #ifdef DPDK_NETDEV
>>  
>>  void netdev_dpdk_register(void);
>>  void free_dpdk_buf(struct dp_packet *);
>> +enum dpdk_dev_type netdev_dpdk_get_type(const struct netdev *netdev);
>>  
>>  #else
>>  
>> @@ -41,6 +48,13 @@ free_dpdk_buf(struct dp_packet *buf OVS_UNUSED)
>>      /* Nothing */
>>  }
>>  
>> +static inline enum dpdk_dev_type
>> +netdev_dpdk_get_type(const struct netdev *netdev OVS_UNUSED)
>> +{
>> +    /* Nothing to do. Return value zero to make compiler happy. */
>> +    return DPDK_DEV_ETH;
>> +}
>> +
>>  #endif
>>  
>>  #endif /* netdev-dpdk.h */
>> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
>> index 43b7b89..3845b12 100644
>> --- a/ofproto/ofproto-dpif.c
>> +++ b/ofproto/ofproto-dpif.c
>> @@ -5317,7 +5317,8 @@ dpif_show_backer(const struct dpif_backer *backer, 
>> struct ds *ds)
>>  
>>      dpif_get_dp_stats(backer->dpif, &dp_stats);
>>      ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
>> -                  dpif_name(backer->dpif), dp_stats.n_hit, 
>> dp_stats.n_missed);
>> +                  dpif_name(backer->dpif),
>> +                  dp_stats.n_hit + dp_stats.n_mask_hit, dp_stats.n_missed);
>>  
>>      shash_init(&ofproto_shash);
>>      ofprotos = get_ofprotos(&ofproto_shash);
>>
> 
> 
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [ovs-dev, v5, 2/3] dpif-netdev: Detailed performance stats for PMDs

Reply via email to