On 02/17/2017 10:39 AM, Ciara Loftus wrote: > Instead of counting all polling cycles as processing cycles, only count > the cycles where packets were received from the polling. > > Signed-off-by: Georg Schmuecking <[email protected]> > Signed-off-by: Ciara Loftus <[email protected]> > Co-authored-by: Georg Schmuecking <[email protected]> > Acked-by: Kevin Traynor <[email protected]> > --- > v3: > - Updated acks & co-author tags > - Removed unnecessary PMD_CYCLES_POLLING counter type > - Explain terms 'idle' and 'processing' cycles in > vswitchd/ovs-vswitchd.8.in > v2: > - Rebase > > lib/dpif-netdev.c | 57 > ++++++++++++++++++++++++++++++++++------------ > vswitchd/ovs-vswitchd.8.in | 5 +++- > 2 files changed, 47 insertions(+), 15 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 30907b7..6bf6809 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -279,8 +279,9 @@ enum dp_stat_type { > }; > > enum pmd_cycles_counter_type { > - PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */ > - PMD_CYCLES_PROCESSING, /* Cycles spent processing packets */ > + PMD_CYCLES_IDLE, /* Cycles spent idle or unsuccessful polling > */ > + PMD_CYCLES_PROCESSING, /* Cycles spent successfully polling and > + * processing polled packets */ > PMD_N_CYCLES > }; > > @@ -755,10 +756,10 @@ pmd_info_show_stats(struct ds *reply, > } > > ds_put_format(reply, > - "\tpolling cycles:%"PRIu64" (%.02f%%)\n" > + "\tidle cycles:%"PRIu64" (%.02f%%)\n" > "\tprocessing cycles:%"PRIu64" (%.02f%%)\n", > - cycles[PMD_CYCLES_POLLING], > - cycles[PMD_CYCLES_POLLING] / (double)total_cycles * 100, > + cycles[PMD_CYCLES_IDLE], > + cycles[PMD_CYCLES_IDLE] / (double)total_cycles * 100, > cycles[PMD_CYCLES_PROCESSING], > cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * > 100); > > @@ -2953,30 +2954,43 @@ cycles_count_end(struct dp_netdev_pmd_thread *pmd, > non_atomic_ullong_add(&pmd->cycles.n[type], interval); > } > > -static void > +/* Calculate the intermediate cycle result and add to the counter 'type' */ > +static inline void > +cycles_count_intermediate(struct dp_netdev_pmd_thread *pmd, > + enum pmd_cycles_counter_type type) > + OVS_NO_THREAD_SAFETY_ANALYSIS > +{ > + unsigned long long new_cycles = cycles_counter(); > + unsigned long long interval = new_cycles - pmd->last_cycles; > + pmd->last_cycles = new_cycles; > + > + non_atomic_ullong_add(&pmd->cycles.n[type], interval); > +} > + > +static int > dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, > struct netdev_rxq *rx, > odp_port_t port_no) > { > struct dp_packet_batch batch; > int error; > + int batch_cnt = 0; > > dp_packet_batch_init(&batch); > - cycles_count_start(pmd); > error = netdev_rxq_recv(rx, &batch); > - cycles_count_end(pmd, PMD_CYCLES_POLLING); > if (!error) { > *recirc_depth_get() = 0; > > - cycles_count_start(pmd); > + batch_cnt = batch.count; > dp_netdev_input(pmd, &batch, port_no); > - cycles_count_end(pmd, PMD_CYCLES_PROCESSING); > } else if (error != EAGAIN && error != EOPNOTSUPP) { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > > VLOG_ERR_RL(&rl, "error receiving data from %s: %s", > netdev_rxq_get_name(rx), ovs_strerror(error)); > } > + > + return batch_cnt; > } > > static struct tx_port * > @@ -3438,21 +3452,27 @@ dpif_netdev_run(struct dpif *dpif) > struct dp_netdev *dp = get_dp_netdev(dpif); > struct dp_netdev_pmd_thread *non_pmd; > uint64_t new_tnl_seq; > + int process_packets = 0; > > ovs_mutex_lock(&dp->port_mutex); > non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID); > if (non_pmd) { > ovs_mutex_lock(&dp->non_pmd_mutex); > + cycles_count_start(non_pmd); > HMAP_FOR_EACH (port, node, &dp->ports) { > if (!netdev_is_pmd(port->netdev)) { > int i; > > for (i = 0; i < port->n_rxq; i++) { > - dp_netdev_process_rxq_port(non_pmd, port->rxqs[i].rx, > - port->port_no); > + process_packets += > + dp_netdev_process_rxq_port(non_pmd, > + port->rxqs[i].rx, > + port->port_no); > } > } > } > + cycles_count_end(non_pmd, process_packets ? PMD_CYCLES_PROCESSING > + : PMD_CYCLES_IDLE); > dpif_netdev_xps_revalidate_pmd(non_pmd, time_msec(), false); > ovs_mutex_unlock(&dp->non_pmd_mutex); > > @@ -3577,6 +3597,7 @@ pmd_thread_main(void *f_) > bool exiting; > int poll_cnt; > int i; > + int process_packets = 0; > > poll_list = NULL; > > @@ -3603,10 +3624,12 @@ reload: > lc = UINT_MAX; > } > > + cycles_count_start(pmd); > for (;;) { > for (i = 0; i < poll_cnt; i++) { > - dp_netdev_process_rxq_port(pmd, poll_list[i].rx, > - poll_list[i].port_no); > + process_packets += > + dp_netdev_process_rxq_port(pmd, poll_list[i].rx, > + poll_list[i].port_no); > } > > if (lc++ > 1024) { > @@ -3625,8 +3648,14 @@ reload: > break; > } > } > + cycles_count_intermediate(pmd, process_packets ? > PMD_CYCLES_PROCESSING > + : PMD_CYCLES_IDLE);
If there are multiple queues in a poll list and only one has packets, the cycles polling the empty queues for packets will be counted in the processing time - whereas you'd expect them to be in the idle time. Is there much of a performance hit if the cycle_count_intermediate() is moved into the poll_cnt loop? OTOH, how much would the idle queues skew the counters? If it's a small amount it may be better than doing a cycle_count per q. > + process_packets = 0; > } > > + cycles_count_end(pmd, process_packets ? PMD_CYCLES_PROCESSING > + : PMD_CYCLES_IDLE); > + > poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); > exiting = latch_is_set(&pmd->exit_latch); > /* Signal here to make sure the pmd finishes > diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in > index fc19f3b..2e6e684 100644 > --- a/vswitchd/ovs-vswitchd.8.in > +++ b/vswitchd/ovs-vswitchd.8.in > @@ -253,7 +253,10 @@ The sum of ``emc hits'', ``masked hits'' and ``miss'' is > the number of > packets received by the datapath. Cycles are counted using the TSC or > similar > facilities (when available on the platform). To reset these counters use > \fBdpif-netdev/pmd-stats-clear\fR. The duration of one cycle depends on the > -measuring infrastructure. > +measuring infrastructure. ``idle cycles'' refers to cycles spent polling > +devices but not receiving any packets. ``processing cycles'' refers to cycles > +spent polling devices and sucessfully receiving packets, plus the cycles > +spent processing said packets. > .IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]" > Resets to zero the per pmd thread performance numbers shown by the > \fBdpif-netdev/pmd-stats-show\fR command. It will NOT reset datapath or > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
