This commit adds the support to check the PMD cycle stats. If the cycles aren't changing for a duration of time this can be flagged as possible PMD stall.
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com> --- lib/dpif-netdev.c | 17 ++++++++++------- lib/dpif-netdev.h | 6 ++++++ lib/keepalive.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/keepalive.h | 4 ++++ 4 files changed, 70 insertions(+), 7 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 1d98c0b..3574b5c 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -327,12 +327,6 @@ enum dp_stat_type { DP_N_STATS }; -enum pmd_cycles_counter_type { - PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */ - PMD_CYCLES_PROCESSING, /* Cycles spent processing packets */ - PMD_N_CYCLES -}; - #define XPS_TIMEOUT_MS 500LL /* Contained by struct dp_netdev_port's 'rxqs' member. */ @@ -977,6 +971,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd) struct rxq_poll *poll; int port_link_status = 0; int port_stats = 0; + int pmd_polling = 0; + uint64_t cycles[PMD_N_CYCLES]; struct svec pmd_poll_list; svec_init(&pmd_poll_list); @@ -1011,11 +1007,18 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd) } svec_destroy(&pmd_poll_list); + /* Update the cycle counters in SHM. */ + for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) { + atomic_read_relaxed(&pmd->cycles.n[idx], &cycles[idx]); + } + + pmd_polling = ka_shm_update_pmd_cycles(pmd->core_id, cycles); + port_link_status = ka_get_polled_ports_status(pmd->core_id); port_stats = ka_get_polled_ports_stats(pmd->core_id); if (port_link_status == ACTIVE_RUN_STATE && - port_stats == ACTIVE_RUN_STATE ) { + port_stats == ACTIVE_RUN_STATE && pmd_polling == ACTIVE_RUN_STATE) { ka_set_pmd_state(pmd->core_id, KA_STATE_ALIVE); } else { ka_set_pmd_state(pmd->core_id, KA_STATE_CHECK); diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index 6db6ed2..e7c2400 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -33,6 +33,12 @@ extern "C" { * headers to be aligned on a 4-byte boundary. */ enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; +enum pmd_cycles_counter_type { + PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */ + PMD_CYCLES_PROCESSING, /* Cycles spent processing packets */ + PMD_N_CYCLES +}; + bool dpif_is_netdev(const struct dpif *); #define NR_QUEUE 1 diff --git a/lib/keepalive.c b/lib/keepalive.c index f513921..3b00d01 100644 --- a/lib/keepalive.c +++ b/lib/keepalive.c @@ -497,6 +497,56 @@ ka_shm_update_port_statistics(const struct netdev *netdev, state; } +int +ka_shm_update_pmd_cycles(int pmd_core_id, uint64_t cycles[PMD_N_CYCLES]) +{ + struct keepalive_shm *ka_shm = get_ka_shm(); + if (!ka_shm) { + VLOG_ERR_RL(&rl, "KeepAlive: Invalid shared memory block."); + return -1; + } + + uint64_t total_cycles = 0; + for (int i = 0; i < PMD_N_CYCLES; i++) { + if (cycles[i] > 0) { + total_cycles += cycles[i]; + } + } + + if (!total_cycles) + return -1; + + int pmd_state = ka_get_pmd_state(pmd_core_id); + VLOG_DBG("%s_%d: PMD_STATE:%d", __FUNCTION__, __LINE__, pmd_state); + if (pmd_state == KA_STATE_CHECK) { + uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0; + uint64_t prev_poll_cycles = + ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_POLLING]; + uint64_t prev_proc_cycles = + ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_PROCESSING]; + + VLOG_DBG_RL(&rl, "Keepalive: Going to check the PMD thresholds now."); + + polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles; + + proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING] + - prev_proc_cycles; + + if (!polling_cycles_cnt && !proc_cycles_cnt) { + VLOG_DBG("PMD FAILURE"); + return FAILURE_STATE; + } + } else { + ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_POLLING] = + cycles[PMD_CYCLES_POLLING]; + + ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_PROCESSING] = + cycles[PMD_CYCLES_PROCESSING]; + } + + return ACTIVE_RUN_STATE; +} + static void ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *ka_shm_) diff --git a/lib/keepalive.h b/lib/keepalive.h index f9bdf12..d8e55d5 100644 --- a/lib/keepalive.h +++ b/lib/keepalive.h @@ -26,6 +26,7 @@ #define KEEPALIVE_MAXCORES 128 #endif /* DPDK_NETDEV */ +#include "dpif-netdev.h" #include "netdev.h" #define MAX_POLL_PORTS 20 @@ -61,6 +62,7 @@ struct pmd_extended_stats { char *health_status; int num_poll_ports; struct poll_port_stats port_stats[MAX_POLL_PORTS]; + uint64_t cycles[PMD_N_CYCLES]; }; struct keepalive_shm { @@ -98,6 +100,7 @@ void ka_enable_pmd_health_check(unsigned); void ka_disable_pmd_health_check(unsigned); bool ka_is_pmdhealth_check_needed(unsigned); + void ka_get_tid(unsigned core); bool is_ka_enabled(void); uint32_t get_ka_interval(void); @@ -109,5 +112,6 @@ void ka_shm_update_port_status(const char *,int,char *,int,int); enum pmdhealth_status ka_get_polled_ports_status(unsigned); void ka_shm_update_port_statistics(const struct netdev *,int,int); enum pmdhealth_status ka_get_polled_ports_stats(unsigned); +int ka_shm_update_pmd_cycles(int, uint64_t cycles[PMD_N_CYCLES]); #endif /* keepalive.h */ -- 2.4.11 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev