Acked-by: Ilya Maximets <[email protected]>
On 15.01.2018 14:27, Jan Scheurich wrote: > Simplify the historically grown TSC cycle counting in PMD threads. > Cycles are currently counted for the following purposes: > > 1. Measure PMD ustilization > > PMD utilization is defined as ratio of cycles spent in busy iterations > (at least one packet received or sent) over the total number of cycles. > > This is already done in pmd_perf_start_iteration() and > pmd_perf_end_iteration() based on a TSC timestamp saved in current > iteration at start_iteration() and the actual TSC at end_iteration(). > No dependency on intermediate cycle accounting. > > 2. Measure the processing load per RX queue > > This comprises cycles spend on polling and processing packets received > from the rx queue and the cycles spent on delayed sending of these packets > to tx queues (with time-based batching). > > The previous scheme using cycles_count_start(), cycles_count_intermediate() > and cycles-count_end() originally introduced to simplify cycle counting > and saving calls to rte_get_tsc_cycles() was rather obscuring things. > > Replace by a nestable cycle_timer with with start and stop functions to > embrace a code segment to be timed. The timed code may contain arbitrary > nested cycle_timers. The duration of nested timers is excluded from the > outer timer. > > The caller must ensure that each call to cycle_timer_start() is > followed by a call to cycle_timer_end(). Failure to do so will lead to > assertion failure or a memory leak. > > The new cycle_timer is used to measure the processing cycles per rx queue. > This is not yet strictly necessary but will be made use of in a subsequent > commit. > > All cycle count functions and data are relocated to module > dpif-netdev-perf. > > Signed-off-by: Jan Scheurich <[email protected]> > --- > lib/dpif-netdev-perf.h | 110 +++++++++++++++++++++++++++++++++++++++++---- > lib/dpif-netdev.c | 118 > ++++++++++++++----------------------------------- > 2 files changed, 134 insertions(+), 94 deletions(-) > > diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h > index 53d60d3..5993c25 100644 > --- a/lib/dpif-netdev-perf.h > +++ b/lib/dpif-netdev-perf.h > @@ -23,6 +23,11 @@ > #include <string.h> > #include <math.h> > > +#ifdef DPDK_NETDEV > +#include <rte_config.h> > +#include <rte_cycles.h> > +#endif > + > #include "openvswitch/vlog.h" > #include "ovs-atomic.h" > #include "timeval.h" > @@ -59,10 +64,6 @@ enum pmd_stat_type { > * recirculation. */ > PMD_STAT_SENT_PKTS, /* Packets that have been sent. */ > PMD_STAT_SENT_BATCHES, /* Number of batches sent. */ > - PMD_CYCLES_POLL_IDLE, /* Cycles spent unsuccessful polling. */ > - PMD_CYCLES_POLL_BUSY, /* Cycles spent successfully polling and > - * processing polled packets. */ > - PMD_CYCLES_OVERHEAD, /* Cycles spent for other tasks. */ > PMD_CYCLES_ITER_IDLE, /* Cycles spent in idle iterations. */ > PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */ > PMD_N_STATS > @@ -85,11 +86,95 @@ struct pmd_counters { > > struct pmd_perf_stats { > /* Start of the current PMD iteration in TSC cycles.*/ > + uint64_t start_it_tsc; > + /* Latest TSC time stamp taken in PMD. */ > uint64_t last_tsc; > + /* If non-NULL, outermost cycle timer currently running in PMD. */ > + struct cycle_timer *cur_timer; > /* Set of PMD counters with their zero offsets. */ > struct pmd_counters counters; > }; > > +/* Support for accurate timing of PMD execution on TSC clock cycle level. > + * These functions are intended to be invoked in the context of pmd threads. > */ > + > +/* Read the TSC cycle register and cache it. Any function not requiring clock > + * cycle accuracy should read the cached value using cycles_counter_get() to > + * avoid the overhead of reading the TSC register. */ > + > +static inline uint64_t > +cycles_counter_update(struct pmd_perf_stats *s) > +{ > +#ifdef DPDK_NETDEV > + return s->last_tsc = rte_get_tsc_cycles(); > +#else > + return s->last_tsc = 0; > +#endif > +} > + > +static inline uint64_t > +cycles_counter_get(struct pmd_perf_stats *s) > +{ > + return s->last_tsc; > +} > + > +/* A nestable timer for measuring execution time in TSC cycles. > + * > + * Usage: > + * struct cycle_timer timer; > + * > + * cycle_timer_start(pmd, &timer); > + * <Timed execution> > + * uint64_t cycles = cycle_timer_stop(pmd, &timer); > + * > + * The caller must guarantee that a call to cycle_timer_start() is always > + * paired with a call to cycle_stimer_stop(). > + * > + * Is is possible to have nested cycles timers within the timed code. The > + * execution time measured by the nested timers is excluded from the time > + * measured by the embracing timer. > + */ > + > +struct cycle_timer { > + uint64_t start; > + uint64_t suspended; > + struct cycle_timer *interrupted; > +}; > + > +static inline void > +cycle_timer_start(struct pmd_perf_stats *s, > + struct cycle_timer *timer) > +{ > + struct cycle_timer *cur_timer = s->cur_timer; > + uint64_t now = cycles_counter_update(s); > + > + if (cur_timer) { > + cur_timer->suspended = now; > + } > + timer->interrupted = cur_timer; > + timer->start = now; > + timer->suspended = 0; > + s->cur_timer = timer; > +} > + > +static inline uint64_t > +cycle_timer_stop(struct pmd_perf_stats *s, > + struct cycle_timer *timer) > +{ > + /* Assert that this is the current cycle timer. */ > + ovs_assert(s->cur_timer == timer); > + uint64_t now = cycles_counter_update(s); > + struct cycle_timer *intr_timer = timer->interrupted; > + > + if (intr_timer) { > + /* Adjust the start offset by the suspended cycles. */ > + intr_timer->start += now - intr_timer->suspended; > + } > + /* Restore suspended timer, if any. */ > + s->cur_timer = intr_timer; > + return now - timer->start; > +} > + > void pmd_perf_stats_init(struct pmd_perf_stats *s); > void pmd_perf_stats_clear(struct pmd_perf_stats *s); > void pmd_perf_read_counters(struct pmd_perf_stats *s, > @@ -115,16 +200,23 @@ pmd_perf_update_counter(struct pmd_perf_stats *s, > } > > static inline void > -pmd_perf_start_iteration(struct pmd_perf_stats *s, uint64_t now_tsc) > +pmd_perf_start_iteration(struct pmd_perf_stats *s) > { > - s->last_tsc = now_tsc; > + if (OVS_LIKELY(s->last_tsc)) { > + /* We assume here that last_tsc was updated immediately prior at > + * the end of the previous iteration, or just before the first > + * iteration. */ > + s->start_it_tsc = s->last_tsc; > + } else { > + /* In case last_tsc has never been set before. */ > + s->start_it_tsc = cycles_counter_update(s); > + } > } > > static inline void > -pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t now_tsc, > - int rx_packets) > +pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets) > { > - uint64_t cycles = now_tsc - s->last_tsc; > + uint64_t cycles = cycles_counter_update(s) - s->start_it_tsc; > > if (rx_packets > 0) { > pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles); > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index deb63c6..e371d11 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -32,10 +32,6 @@ > #include <sys/stat.h> > #include <unistd.h> > > -#ifdef DPDK_NETDEV > -#include <rte_cycles.h> > -#endif > - > #include "bitmap.h" > #include "cmap.h" > #include "conntrack.h" > @@ -509,8 +505,6 @@ struct tx_port { > struct dp_netdev_pmd_thread_ctx { > /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */ > long long now; > - /* Used to count cycles. See 'cycles_count_end()' */ > - unsigned long long last_cycles; > }; > > /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate > @@ -3111,64 +3105,20 @@ dp_netdev_actions_free(struct dp_netdev_actions > *actions) > free(actions); > } > > -static inline unsigned long long > -cycles_counter(void) > -{ > -#ifdef DPDK_NETDEV > - return rte_get_tsc_cycles(); > -#else > - return 0; > -#endif > -} > - > -/* Fake mutex to make sure that the calls to cycles_count_* are balanced */ > -extern struct ovs_mutex cycles_counter_fake_mutex; > - > -/* Start counting cycles. Must be followed by 'cycles_count_end()' */ > -static inline void > -cycles_count_start(struct dp_netdev_pmd_thread *pmd) > - OVS_ACQUIRES(&cycles_counter_fake_mutex) > - OVS_NO_THREAD_SAFETY_ANALYSIS > -{ > - pmd->ctx.last_cycles = cycles_counter(); > -} > - > -/* Stop counting cycles and add them to the counter 'type' */ > -static inline void > -cycles_count_end(struct dp_netdev_pmd_thread *pmd, > - enum pmd_stat_type type) > - OVS_RELEASES(&cycles_counter_fake_mutex) > - OVS_NO_THREAD_SAFETY_ANALYSIS > -{ > - unsigned long long interval = cycles_counter() - pmd->ctx.last_cycles; > - > - pmd_perf_update_counter(&pmd->perf_stats, type, interval); > -} > - > -/* Calculate the intermediate cycle result and add to the counter 'type' */ > -static inline void > -cycles_count_intermediate(struct dp_netdev_pmd_thread *pmd, > - struct dp_netdev_rxq *rxq, > - enum pmd_stat_type type) > - OVS_NO_THREAD_SAFETY_ANALYSIS > +static void > +dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx, > + enum rxq_cycles_counter_type type, > + unsigned long long cycles) > { > - unsigned long long new_cycles = cycles_counter(); > - unsigned long long interval = new_cycles - pmd->ctx.last_cycles; > - pmd->ctx.last_cycles = new_cycles; > - > - pmd_perf_update_counter(&pmd->perf_stats, type, interval); > - if (rxq && (type == PMD_CYCLES_POLL_BUSY)) { > - /* Add to the amount of current processing cycles. */ > - non_atomic_ullong_add(&rxq->cycles[RXQ_CYCLES_PROC_CURR], interval); > - } > + atomic_store_relaxed(&rx->cycles[type], cycles); > } > > static void > -dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx, > +dp_netdev_rxq_add_cycles(struct dp_netdev_rxq *rx, > enum rxq_cycles_counter_type type, > unsigned long long cycles) > { > - atomic_store_relaxed(&rx->cycles[type], cycles); > + non_atomic_ullong_add(&rx->cycles[type], cycles); > } > > static uint64_t > @@ -3234,27 +3184,40 @@ dp_netdev_pmd_flush_output_packets(struct > dp_netdev_pmd_thread *pmd) > > static int > dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, > - struct netdev_rxq *rx, > + struct dp_netdev_rxq *rxq, > odp_port_t port_no) > { > struct dp_packet_batch batch; > + struct cycle_timer timer; > int error; > int batch_cnt = 0; > > + /* Measure duration for polling and processing rx burst. */ > + cycle_timer_start(&pmd->perf_stats, &timer); > dp_packet_batch_init(&batch); > - error = netdev_rxq_recv(rx, &batch); > + error = netdev_rxq_recv(rxq->rx, &batch); > if (!error) { > + /* At least one packet received. */ > *recirc_depth_get() = 0; > pmd_thread_ctx_time_update(pmd); > > batch_cnt = batch.count; > dp_netdev_input(pmd, &batch, port_no); > dp_netdev_pmd_flush_output_packets(pmd); > - } else if (error != EAGAIN && error != EOPNOTSUPP) { > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > > - VLOG_ERR_RL(&rl, "error receiving data from %s: %s", > - netdev_rxq_get_name(rx), ovs_strerror(error)); > + /* Assign processing cycles to rx queue. */ > + uint64_t cycles = cycle_timer_stop(&pmd->perf_stats, &timer); > + dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles); > + > + } else { > + /* Discard cycles. */ > + cycle_timer_stop(&pmd->perf_stats, &timer); > + if (error != EAGAIN && error != EOPNOTSUPP) { > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > + > + VLOG_ERR_RL(&rl, "error receiving data from %s: %s", > + netdev_rxq_get_name(rxq->rx), ovs_strerror(error)); > + } > } > > return batch_cnt; > @@ -3880,30 +3843,22 @@ dpif_netdev_run(struct dpif *dpif) > struct dp_netdev *dp = get_dp_netdev(dpif); > struct dp_netdev_pmd_thread *non_pmd; > uint64_t new_tnl_seq; > - int process_packets = 0; > > ovs_mutex_lock(&dp->port_mutex); > non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID); > if (non_pmd) { > ovs_mutex_lock(&dp->non_pmd_mutex); > - cycles_count_start(non_pmd); > HMAP_FOR_EACH (port, node, &dp->ports) { > if (!netdev_is_pmd(port->netdev)) { > int i; > > for (i = 0; i < port->n_rxq; i++) { > - process_packets = > - dp_netdev_process_rxq_port(non_pmd, > - port->rxqs[i].rx, > - port->port_no); > - cycles_count_intermediate(non_pmd, NULL, > - process_packets > - ? PMD_CYCLES_POLL_BUSY > - : PMD_CYCLES_POLL_IDLE); > + dp_netdev_process_rxq_port(non_pmd, > + &port->rxqs[i], > + port->port_no); > } > } > } > - cycles_count_end(non_pmd, PMD_CYCLES_POLL_IDLE); > pmd_thread_ctx_time_update(non_pmd); > dpif_netdev_xps_revalidate_pmd(non_pmd, false); > ovs_mutex_unlock(&dp->non_pmd_mutex); > @@ -4082,18 +4037,14 @@ reload: > lc = UINT_MAX; > } > > - cycles_count_start(pmd); > + cycles_counter_update(s); > for (;;) { > uint64_t iter_packets = 0; > - pmd_perf_start_iteration(s, pmd->ctx.last_cycles); > + pmd_perf_start_iteration(s); > for (i = 0; i < poll_cnt; i++) { > process_packets = > - dp_netdev_process_rxq_port(pmd, poll_list[i].rxq->rx, > + dp_netdev_process_rxq_port(pmd, poll_list[i].rxq, > poll_list[i].port_no); > - cycles_count_intermediate(pmd, poll_list[i].rxq, > - process_packets > - ? PMD_CYCLES_POLL_BUSY > - : PMD_CYCLES_POLL_IDLE); > iter_packets += process_packets; > } > > @@ -4115,13 +4066,10 @@ reload: > if (reload) { > break; > } > - cycles_count_intermediate(pmd, NULL, PMD_CYCLES_OVERHEAD); > } > - pmd_perf_end_iteration(s, pmd->ctx.last_cycles, iter_packets); > + pmd_perf_end_iteration(s, iter_packets); > } > > - cycles_count_end(pmd, PMD_CYCLES_OVERHEAD); > - > poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); > exiting = latch_is_set(&pmd->exit_latch); > /* Signal here to make sure the pmd finishes > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
