Hi Jan, Thanks these patches are really very useful.
I haven't finished trying them out but thought you'd prefer to get initial comments earlier. I'll continue to try to them out and revert with any further other comments later. Regards, Billy. > -----Original Message----- > From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev- > boun...@openvswitch.org] On Behalf Of Jan Scheurich > Sent: Thursday, January 4, 2018 12:08 PM > To: d...@openvswitch.org > Subject: [ovs-dev] [PATCH v5 3/3] dpif-netdev: Detection and logging of > suspicious PMD iterations > > This patch enhances dpif-netdev-perf to detect iterations with suspicious > statistics according to the following criteria: > > - iteration lasts longer than US_THR microseconds (default 250). > This can be used to capture events where a PMD is blocked or > interrupted for such a period of time that there is a risk for > dropped packets on any of its Rx queues. > > - max vhost qlen exceeds a threshold Q_THR (default 128). This can > be used to infer virtio queue overruns and dropped packets inside > a VM, which are not visible in OVS otherwise. > > Such suspicious iterations can be logged together with their iteration > statistics to be able to correlate them to packet drop or other events outside > OVS. > > A new command is introduced to enable/disable logging at run-time and to > adjust the above thresholds for suspicious iterations: > > ovs-appctl dpif-netdev/pmd-perf-log-set [on|off] > [-b before] [-a after] [-us usec] [-q qlen] > > Turn logging on or off at run-time (on|off). > > -b before: The number of iterations before the suspicious iteration to > be logged (default 5). > -a after: The number of iterations after the suspicious iteration to > be logged (default 5). > -q qlen: Suspicious vhost queue fill level threshold. Increase this > to 512 if the Qemu supports 1024 virtio queue length. > (default 128). > -us usec: change the duration threshold for a suspicious iteration > (default 250 us). > > If more than 100 iterations before or after a suspicious iteration have been > looged once, OVS falls back to the safe default values (5/5) to avoid that [[BO'M]] typo > logging itself causes continuos further logging. [[BO'M]] typo > > Signed-off-by: Jan Scheurich <jan.scheur...@ericsson.com> > --- > lib/dpif-netdev-perf.c | 142 > +++++++++++++++++++++++++++++++++++++++++++++++++ > lib/dpif-netdev-perf.h | 32 +++++++++++ > lib/dpif-netdev.c | 5 ++ > 3 files changed, 179 insertions(+) > > diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c index > a66a48c..3fd19b0 100644 > --- a/lib/dpif-netdev-perf.c > +++ b/lib/dpif-netdev-perf.c > @@ -28,6 +28,23 @@ > > VLOG_DEFINE_THIS_MODULE(pmd_perf); > > +#define ITER_US_THRESHOLD 250 /* Warning threshold for iteration > duration > + in microseconds. */ > +#define VHOST_QUEUE_FULL 128 /* Size of the virtio TX queue. */ > +#define LOG_IT_BEFORE 5 /* Number of iteration to log before > + suspicious iteration. */ > +#define LOG_IT_AFTER 5 /* Number of iteration to log after > + suspicious iteration. */ > + > +bool log_on = false; > +static uint32_t log_it_before = LOG_IT_BEFORE; static uint32_t > +log_it_after = LOG_IT_AFTER; static uint32_t log_us_thr = > +ITER_US_THRESHOLD; uint32_t log_q_thr = VHOST_QUEUE_FULL; uint64_t > +iter_cycle_threshold; > + > +static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600, > +600); > + > #ifdef DPDK_NETDEV > static uint64_t > get_tsc_hz(void) > @@ -133,6 +150,8 @@ pmd_perf_stats_init(struct pmd_perf_stats *s) { > histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000); > histogram_walls_set_log(&s->max_vhost_qfill, 0, 512); > s->start_ms = time_msec(); > + s->log_begin_it = UINT64_MAX; > + s->log_end_it = UINT64_MAX; > } > > void > @@ -368,6 +387,129 @@ pmd_perf_stats_clear(struct pmd_perf_stats *s) > histogram_clear(&s->max_vhost_qfill); > history_init(&s->iterations); > history_init(&s->milliseconds); > + s->log_begin_it = UINT64_MAX; > + s->log_end_it = UINT64_MAX; > s->start_ms = time_msec(); /* Clearing finished. */ > s->milliseconds.sample[0].timestamp = s->start_ms; } > + > +void > +pmd_perf_log_suspicious_iteration(struct pmd_perf_stats *s, > + uint64_t cycles, > + char *reason) { > + VLOG_WARN_RL(&latency_rl, > + "Suspicious iteration (%s): tsc=%"PRIu64 > + " duration=%"PRIu64" us\n", > + reason, s->current.timestamp, > + (1000000L * cycles) / get_tsc_hz()); [[BO'M]] get_tsc_hz calls sleep(1) so won't this block the datapath when logging is turned on? I haven't tested trying to inject occasional long processing interval. > + if (log_it_before + log_it_after > 0) { > + if (s->log_begin_it == UINT64_MAX) { > + s->log_begin_it = > + (s->iterations.idx + HISTORY_LEN - log_it_before) > + % HISTORY_LEN; > + } > + s->log_end_it = > + (s->iterations.idx + log_it_after) % HISTORY_LEN; > + } > +} > + > +void > +pmd_perf_log_iteration_neighborhood(struct pmd_perf_stats *s) { > + struct ds log = DS_EMPTY_INITIALIZER; > + pmd_perf_format_iteration_history(&log, s, > + (s->log_end_it + HISTORY_LEN - s->log_begin_it) % HISTORY_LEN); > + VLOG_WARN_RL(&latency_rl, > + "Neighborhood of suspicious iteration:\n" > + "%s", ds_cstr(&log)); > + ds_destroy(&log); > + s->log_end_it = s->log_begin_it = UINT64_MAX; > + if (log_it_before > 100 || log_it_after > 100) { > + /* Reset to safe default values to avoid disturbance. */ > + log_it_before = LOG_IT_BEFORE; > + log_it_after = LOG_IT_AFTER; > + } > +} > + > +void > +pmd_perf_log_set_cmd(struct unixctl_conn *conn, > + int argc, const char *argv[], > + void *aux OVS_UNUSED) > +{ > + int it_before, it_after, us_thr, q_thr; > + bool on; > + bool usage = false; > + > + on = log_on; > + it_before = log_it_before; > + it_after = log_it_after; > + q_thr = log_q_thr; > + us_thr = log_us_thr; > + > + while (argc > 1) { > + if (!strcmp(argv[1], "on")) { > + on = true; > + argc--; > + argv++; > + } else if (!strcmp(argv[1], "off")) { > + on = false; > + argc--; > + argv++; > + } else if (!strcmp(argv[1], "-a")) { > + it_after = strtol(argv[2], NULL, 10); > + if (it_after < 0) { > + it_after = 0; > + } else if (it_before + it_after > HISTORY_LEN-1) { > + it_after = HISTORY_LEN-1 - it_before; > + } > + argc -= 2; > + argv += 2; > + } else if (!strcmp(argv[1], "-b")) { > + it_before = strtol(argv[2], NULL, 10); > + if (it_before < 0) { > + it_before = 0; > + } else if (it_before > HISTORY_LEN-10) { > + it_before = HISTORY_LEN-10; > + } > + argc -= 2; > + argv += 2; > + } else if (!strcmp(argv[1], "-q")) { > + q_thr = strtol(argv[2], NULL, 10); > + if (q_thr < 0) { > + q_thr = 0; > + } > + argc -= 2; > + argv += 2; > + } else if (!strcmp(argv[1], "-us")) { > + us_thr = strtol(argv[2], NULL, 10); > + if (us_thr < 0) { > + us_thr = 0; > + } > + argc -= 2; > + argv += 2; > + } else { > + usage = true; > + break; > + } > + } > + > + if (usage) { > + unixctl_command_reply_error(conn, > + "Usage: ovs-appctl dpif-netdev/pmd-perf-log-set " > + "[on|off] [-b before] [-a after] [-us usec] [-q qlen]"); > + return; > + } > + > + VLOG_INFO("pmd-perf-log-set: %s, before=%d, after=%d, us_thr=%d," > + " q_thr=%d\n", > + on ? "on" : "off", it_before, it_after, us_thr, q_thr); > + log_on = on; > + log_it_before = it_before; > + log_it_after = it_after; > + log_q_thr = q_thr; > + log_us_thr = us_thr; > + iter_cycle_threshold = (log_us_thr * get_tsc_hz()) / 1000000L; > + > + unixctl_command_reply(conn, ""); > +} > diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h index > 3611723..392ca80 100644 > --- a/lib/dpif-netdev-perf.h > +++ b/lib/dpif-netdev-perf.h > @@ -109,6 +109,8 @@ struct pmd_perf_stats { > struct histogram max_vhost_qfill; > struct history iterations; > struct history milliseconds; > + uint64_t log_begin_it; > + uint64_t log_end_it; > }; > > enum pmd_info_type; > @@ -121,6 +123,10 @@ struct pmd_perf_params { > size_t ms_hist_len; > }; > > +extern bool log_on; > +extern uint32_t log_q_thr; > +extern uint64_t iter_cycle_threshold; > + > void pmd_perf_stats_init(struct pmd_perf_stats *s); void > pmd_perf_stats_clear(struct pmd_perf_stats *s); void > pmd_perf_read_counters(struct pmd_perf_stats *s, @@ -145,9 +151,17 > @@ void pmd_perf_format_iteration_history(struct ds *str, void > pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, > int n_ms); > > +void pmd_perf_log_suspicious_iteration(struct pmd_perf_stats *s, > + uint64_t cycles, > + char *reason); void > +pmd_perf_log_iteration_neighborhood(struct pmd_perf_stats *s); > void pmd_perf_show(struct unixctl_conn *conn, int argc, > const char *argv[], > void *aux OVS_UNUSED); > +void pmd_perf_log_set_cmd(struct unixctl_conn *conn, > + int argc, const char *argv[], > + void *aux OVS_UNUSED); > + > static inline void > histogram_add_sample(struct histogram *hist, uint32_t val) { @@ -210,6 > +224,7 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t > now_tsc, { > struct iter_stats *cum_ms; > uint64_t cycles, cycles_per_pkt = 0; > + char *reason = NULL; > > if (OVS_UNLIKELY(s->current.timestamp == 0)) { > /* Stats were cleared during the ongoing iteration. */ @@ -263,6 > +278,23 > @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t now_tsc, > cum_ms->batches += s->current.batches; > cum_ms->max_vhost_qfill += s->current.max_vhost_qfill; > > + if (log_on) { > + /* Log suspicious iterations. */ > + if (cycles > iter_cycle_threshold) { > + reason = "Excessive total cycles"; > + } else if (s->current.max_vhost_qfill >= log_q_thr) { > + reason = "Vhost RX queue full"; > + } > + if (OVS_UNLIKELY(reason)) { > + pmd_perf_log_suspicious_iteration(s, cycles, reason); > + } > + > + /* Log iteration stats interval when required. */ > + if (OVS_UNLIKELY(s->log_end_it == s->iterations.idx)) { > + pmd_perf_log_iteration_neighborhood(s); > + } > + } > + > /* Store in iteration history. */ > history_store(&s->iterations, &s->current); > if (now_tsc - s->last_tsc > 10000) { diff --git a/lib/dpif-netdev.c > b/lib/dpif- > netdev.c index 8f64df3..96cc4d5 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -1201,6 +1201,11 @@ dpif_netdev_init(void) > unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]", > 0, 1, dpif_netdev_pmd_rebalance, > NULL); > + unixctl_command_register("dpif-netdev/pmd-perf-log-set", > + "[on|off] [-b before] [-a after] " > + "[-us usec] [-q qlen]", > + 0, 10, pmd_perf_log_set_cmd, > + NULL); > return 0; > } > > -- > 1.9.1 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev