This commit adds the support to check the PMD cycle stats. If the cycles aren't changing for a duration of time this can be flagged as possible PMD stall.
Signed-off-by: Bhanuprakash Bodireddy <[email protected]> --- lib/dpif-netdev.c | 18 +++++++++++------- lib/dpif-netdev.h | 6 ++++++ lib/keepalive.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/keepalive.h | 3 +++ 4 files changed, 72 insertions(+), 7 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 06ca7fb..dd9d396 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -327,12 +327,6 @@ enum dp_stat_type { DP_N_STATS }; -enum pmd_cycles_counter_type { - PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */ - PMD_CYCLES_PROCESSING, /* Cycles spent processing packets */ - PMD_N_CYCLES -}; - #define XPS_TIMEOUT_MS 500LL /* Contained by struct dp_netdev_port's 'rxqs' member. */ @@ -977,6 +971,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd) struct rxq_poll *poll; int port_link_status = 0; int port_stats = 0; + int pmd_polling = 0; + uint64_t cycles[PMD_N_CYCLES]; struct svec pmd_poll_list; svec_init(&pmd_poll_list); @@ -1011,6 +1007,13 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd) } svec_destroy(&pmd_poll_list); + /* Update the cycle counters in SHM. */ + for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) { + atomic_read_relaxed(&pmd->cycles.n[idx], &cycles[idx]); + } + + pmd_polling = ka_shm_update_pmd_cycles(pmd->core_id, cycles); + port_link_status = ka_get_polled_ports_status(pmd->core_id); port_stats = ka_get_polled_ports_stats(pmd->core_id); @@ -1024,7 +1027,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd) break; case PMD_HC_COMPLETE: if (port_link_status == ACTIVE_RUN_STATE && - port_stats == ACTIVE_RUN_STATE ) { + port_stats == ACTIVE_RUN_STATE && + pmd_polling == ACTIVE_RUN_STATE) { ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0); } break; diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index 6db6ed2..e7c2400 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -33,6 +33,12 @@ extern "C" { * headers to be aligned on a 4-byte boundary. */ enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; +enum pmd_cycles_counter_type { + PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */ + PMD_CYCLES_PROCESSING, /* Cycles spent processing packets */ + PMD_N_CYCLES +}; + bool dpif_is_netdev(const struct dpif *); #define NR_QUEUE 1 diff --git a/lib/keepalive.c b/lib/keepalive.c index b702ebc..84813bf 100644 --- a/lib/keepalive.c +++ b/lib/keepalive.c @@ -527,6 +527,58 @@ ka_shm_update_port_statistics(const struct netdev *netdev, state; } +int +ka_shm_update_pmd_cycles(int core_id, uint64_t cycles[PMD_N_CYCLES]) +{ + int pmd_state = ACTIVE_RUN_STATE; + struct keepalive_shm *ka_shm = get_ka_shm(); + if (!ka_shm) { + VLOG_ERR_RL(&rl, "KeepAlive: Invalid shared memory block."); + return -1; + } + + uint64_t total_cycles = 0; + for (int i = 0; i < PMD_N_CYCLES; i++) { + if (cycles[i] > 0) { + total_cycles += cycles[i]; + } + } + + if (!total_cycles) + return -1; + + int pmd_hc_state = ka_get_pmd_health_check_state(core_id); + if (PMD_HC_ENABLE == pmd_hc_state) { + ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_POLLING] = + cycles[PMD_CYCLES_POLLING]; + + ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING] = + cycles[PMD_CYCLES_PROCESSING]; + } + + if (PMD_HC_PROGRESS == pmd_hc_state) { + uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0; + uint64_t prev_poll_cycles = + ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_POLLING]; + uint64_t prev_proc_cycles = + ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING]; + + VLOG_DBG_RL(&rl, "Keepalive: Going to check the PMD thresholds now."); + + polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles; + + proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING] + - prev_proc_cycles; + + if (!polling_cycles_cnt && !proc_cycles_cnt) { + VLOG_DBG("PMD FAILURE!"); + pmd_state = FAILURE_STATE; + } + } + + return pmd_state; +} + static void ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *ka_shm_) diff --git a/lib/keepalive.h b/lib/keepalive.h index 1f1f1c1..7501065 100644 --- a/lib/keepalive.h +++ b/lib/keepalive.h @@ -26,6 +26,7 @@ #define KEEPALIVE_MAXCORES 128 #endif /* DPDK_NETDEV */ +#include "dpif-netdev.h" #include "netdev.h" #define MAX_POLL_PORTS 20 @@ -61,6 +62,7 @@ struct pmd_extended_stats { char *health_status; int num_poll_ports; struct poll_port_stats port_stats[MAX_POLL_PORTS]; + uint64_t cycles[PMD_N_CYCLES]; }; struct keepalive_shm { @@ -122,5 +124,6 @@ void ka_shm_update_port_status(const char *,int,char *,int,int); enum pmdhealth_status ka_get_polled_ports_status(unsigned); void ka_shm_update_port_statistics(const struct netdev *,int,int); enum pmdhealth_status ka_get_polled_ports_stats(unsigned); +int ka_shm_update_pmd_cycles(int, uint64_t cycles[PMD_N_CYCLES]); #endif /* keepalive.h */ -- 2.4.11 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
