Sleep for an incremental amount of time if none of the Rx queues assigned to a PMD have at least half a batch of packets (i.e. 16 pkts) on an polling iteration of the PMD.
Upon detecting >= 16 pkts on an Rxq, reset the sleep time to zero. Sleep time will be increased by 1 uS on each iteration where the low load conditions remain up to a total of 250 uS. The feature is off by default and can be enabled by: ovs-vsctl set Open_vSwitch . other_config:pmd-powersave=true Also add new stats to pmd-perf-show to get visibility of operation: Iterations: ... - No-sleep thresh: 576 ( 98.1 % of busy it) - Max sleeps: 19424 (167 us/it avg.) ... Signed-off-by: Kevin Traynor <[email protected]> --- Documentation/topics/dpdk/pmd.rst | 20 +++++++++++ lib/dpif-netdev-perf.c | 25 ++++++++++++-- lib/dpif-netdev-perf.h | 7 +++- lib/dpif-netdev.c | 56 +++++++++++++++++++++++++++++-- vswitchd/vswitch.xml | 17 ++++++++++ 5 files changed, 118 insertions(+), 7 deletions(-) diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst index b259cc8b3..a42e34956 100644 --- a/Documentation/topics/dpdk/pmd.rst +++ b/Documentation/topics/dpdk/pmd.rst @@ -312,4 +312,24 @@ reassignment due to PMD Auto Load Balance. For example, this could be set (in min) such that a reassignment is triggered at most every few hours. +PMD Power Saving +---------------- + +PMD threads constantly poll Rx queues which are assigned to them. In order to +reduce the CPU cycles they use, they can sleep for small periods of time +when there is no load or very-low load from the Rx queues it polls. + +This can be enabled by:: + + $ ovs-vsctl set open_vswitch . other_config:pmd-powersave="true" + +With this enabled a PMD may sleep by an incrementing amount of time up to +a total of 250 uS after polling all Rx queues assigned to it, if none of the +Rx queues have at least half a batch of packets (i.e. 16). + +.. note:: + + Sleeping in a PMD thread may cause extra packet latency due to the sleep + and possible processor low-power states which may be invoked. + .. _ovs-vswitchd(8): http://openvswitch.org/support/dist-docs/ovs-vswitchd.8.html diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c index a2a7d8f0b..1bdeda265 100644 --- a/lib/dpif-netdev-perf.c +++ b/lib/dpif-netdev-perf.c @@ -231,4 +231,7 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, uint64_t idle_iter = s->pkts.bin[0]; uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0; + uint64_t no_sleep_hit = stats[PMD_PWR_NO_SLEEP_HIT]; + uint64_t max_sleep_hit = stats[PMD_PWR_MAX_SLEEP_HIT]; + uint64_t tot_sleep_time = stats[PMD_PWR_SLEEP_TIME]; ds_put_format(str, @@ -236,5 +239,7 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, " - Used TSC cycles: %12"PRIu64" (%5.1f %% of total cycles)\n" " - idle iterations: %12"PRIu64" (%5.1f %% of used cycles)\n" - " - busy iterations: %12"PRIu64" (%5.1f %% of used cycles)\n", + " - busy iterations: %12"PRIu64" (%5.1f %% of used cycles)\n" + " - No-sleep thresh: %12"PRIu64" (%5.1f %% of busy it)\n" + " - Max sleeps: %12"PRIu64" (%3"PRIu64" us/it avg.)\n", tot_iter, tot_cycles * us_per_cycle / tot_iter, tot_cycles, 100.0 * (tot_cycles / duration) / tsc_hz, @@ -242,5 +247,7 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, 100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles, busy_iter, - 100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles); + 100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles, + no_sleep_hit, busy_iter ? 100.0 * no_sleep_hit / busy_iter : 0, + max_sleep_hit, tot_iter ? (tot_sleep_time / 1000) / tot_iter : 0); if (rx_packets > 0) { ds_put_format(str, @@ -519,5 +526,7 @@ OVS_REQUIRES(s->stats_mutex) void pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets, - int tx_packets, bool full_metrics) + int tx_packets, bool max_sleep_hit, + bool no_sleep_hit, uint64_t sleep_time, + bool full_metrics) { uint64_t now_tsc = cycles_counter_update(s); @@ -540,4 +549,14 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets, histogram_add_sample(&s->pkts, rx_packets); + if (no_sleep_hit) { + pmd_perf_update_counter(s, PMD_PWR_NO_SLEEP_HIT, 1); + } + if (max_sleep_hit) { + pmd_perf_update_counter(s, PMD_PWR_MAX_SLEEP_HIT, 1); + } + if (sleep_time) { + pmd_perf_update_counter(s, PMD_PWR_SLEEP_TIME, sleep_time); + } + if (!full_metrics) { return; diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h index 9673dddd8..c3d6cd435 100644 --- a/lib/dpif-netdev-perf.h +++ b/lib/dpif-netdev-perf.h @@ -81,4 +81,7 @@ enum pmd_stat_type { PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */ PMD_CYCLES_UPCALL, /* Cycles spent processing upcalls. */ + PMD_PWR_NO_SLEEP_HIT, /* Iterations with Rx above no-sleep thresh. */ + PMD_PWR_MAX_SLEEP_HIT, /* Number of times max sleep value hit. */ + PMD_PWR_SLEEP_TIME, /* Total time slept to save power. */ PMD_N_STATS }; @@ -409,5 +412,7 @@ pmd_perf_start_iteration(struct pmd_perf_stats *s); void pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets, - int tx_packets, bool full_metrics); + int tx_packets, bool max_sleep_hit, + bool no_sleep_hit, uint64_t sleep_time, + bool full_metrics); /* Formatting the output of commands. */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2c08a71c8..91a323c74 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -170,4 +170,11 @@ static struct odp_support dp_netdev_support = { #define PMD_RCU_QUIESCE_INTERVAL 10000LL +/* Max time in nanoseconds for a pmd thread to sleep based on load. */ +#define PMD_PWR_MAX_SLEEP 250000 +/* Number of pkts Rx on an interface that will stop pmd thread sleeping. */ +#define PMD_PWR_NO_SLEEP_THRESH (NETDEV_MAX_BURST/2) +/* Time in nanosecond to increment a pmd thread sleep time. */ +#define PMD_PWR_INC 1000 + struct dpcls { struct cmap_node node; /* Within dp_netdev_pmd_thread.classifiers */ @@ -278,4 +285,6 @@ struct dp_netdev { /* Enable collection of PMD performance metrics. */ atomic_bool pmd_perf_metrics; + /* Enable PMD load based sleeping. */ + atomic_bool pmd_powersave; /* Enable the SMC cache from ovsdb config */ atomic_bool smc_enable_db; @@ -4915,4 +4924,17 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config) set_pmd_auto_lb(dp, autolb_state, log_autolb); + + bool powersave = smap_get_bool(other_config, "pmd-powersave", false); + bool cur_powersave; + atomic_read_relaxed(&dp->pmd_powersave, &cur_powersave); + if (powersave != cur_powersave) { + atomic_store_relaxed(&dp->pmd_powersave, powersave); + if (powersave) { + VLOG_INFO("PMD powersave is enabled."); + } else { + VLOG_INFO("PMD powersave is disabled."); + } + } + return 0; } @@ -6873,7 +6895,9 @@ pmd_thread_main(void *f_) bool exiting; bool reload; + bool powersave; int poll_cnt; int i; int process_packets = 0; + uint64_t sleep_time = 0; poll_list = NULL; @@ -6935,7 +6959,14 @@ reload: for (;;) { uint64_t rx_packets = 0, tx_packets = 0; + bool nosleep_hit = false; + bool max_sleep_hit = false; + uint64_t time_slept = 0; pmd_perf_start_iteration(s); - + atomic_read_relaxed(&pmd->dp->pmd_powersave, &powersave); + if (!powersave) { + /* Reset sleep_time as policy may have changed. */ + sleep_time = 0; + } atomic_read_relaxed(&pmd->dp->smc_enable_db, &pmd->ctx.smc_enable_db); @@ -6957,4 +6988,8 @@ reload: poll_list[i].port_no); rx_packets += process_packets; + if (process_packets >= PMD_PWR_NO_SLEEP_THRESH) { + nosleep_hit = true; + sleep_time = 0; + } } @@ -6964,5 +6999,19 @@ reload: * There was no time updates on current iteration. */ pmd_thread_ctx_time_update(pmd); - tx_packets = dp_netdev_pmd_flush_output_packets(pmd, false); + tx_packets = dp_netdev_pmd_flush_output_packets(pmd, + sleep_time ? true : false); + } + + if (powersave) { + if (sleep_time) { + xnanosleep(sleep_time); + time_slept = sleep_time; + } + if (sleep_time < PMD_PWR_MAX_SLEEP) { + /* Increase potential sleep time for next iteration. */ + sleep_time += PMD_PWR_INC; + } else { + max_sleep_hit = true; + } } @@ -7004,5 +7053,6 @@ reload: } - pmd_perf_end_iteration(s, rx_packets, tx_packets, + pmd_perf_end_iteration(s, rx_packets, tx_packets, max_sleep_hit, + nosleep_hit, time_slept, pmd_perf_metrics_enabled(pmd)); } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 928821a82..d51a486d8 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -789,4 +789,21 @@ </p> </column> + <column name="other_config" key="pmd-powersave" + type='{"type": "boolean"}'> + <p> + Enables PMD thread sleeping up to 250 uS per iteration based on + rx queue batch sizes. + </p> + <p> + The default value is <code>false</code>. + </p> + <p> + Set this value to <code>true</code> to enable this option. + </p> + <p> + Sleep time will be reset to 0 when any rx queue has a batch of + 16 or more packets. + </p> + </column> <column name="other_config" key="userspace-tso-enable" type='{"type": "boolean"}'> -- 2.38.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
