>> >> +/* >> + * OVS Shared Memory structure >> + * >> + * The information in the shared memory block will be read by collectd. >> + * */ >> +struct dpdk_keepalive_shm { >> + /* IPC semaphore. Posted when a core dies */ >> + sem_t core_died; >> + >> + /* >> + * Relayed status of each core. >> + * UNUSED[0], ALIVE[1], DEAD[2], GONE[3], MISSING[4], DOZING[5], >SLEEP[6] >> + **/ >> + enum rte_keepalive_state core_state[RTE_KEEPALIVE_MAXCORES]; > >What is 'DOZING'? What is 'MISSING'? Where is a definition of these states >and what they mean? What is DEAD&GONE?
State 'DOZING' means core going idle, 'MISSING' indicates first heartbeat miss, 'DEAD' indicates two heart beat misses, 'GONE' means it missed three or more heartbeats and is completely 'burried' Note that these states are defined in DPDK Keepalive library. [rte_keepalive.h] > >> + /* Last seen timestamp of the core */ >> + uint64_t core_last_seen_times[RTE_KEEPALIVE_MAXCORES]; >> + >> + /* Store pmd thread tid */ >> + pid_t thread_id[RTE_KEEPALIVE_MAXCORES]; >> +}; >> + >> +static struct dpdk_keepalive_shm *ka_shm; >> static int netdev_dpdk_class_init(void); static int >> netdev_dpdk_vhost_class_init(void); >> >> @@ -586,6 +613,202 @@ netdev_dpdk_mempool_configure(struct >netdev_dpdk *dev) >> return 0; >> } >> >> +void >> +dpdk_ka_get_tid(unsigned core_idx) >> +{ >> + uint32_t tid = rte_sys_gettid(); >> + >> + if (dpdk_is_ka_enabled() && ka_shm) { >> + ka_shm->thread_id[core_idx] = tid; >> + } >> +} >> + >> +/* Callback function invoked on heartbeat miss. Verify if it is >> +genuine >> + * heartbeat miss or a false positive and log the message accordingly. >> + */ >> +static void >> +dpdk_failcore_cb(void *ptr_data, const int core_id) { >> + struct dpdk_keepalive_shm *ka_shm = (struct dpdk_keepalive_shm >> +*)ptr_data; >> + >> + if (ka_shm) { >> + int pstate; >> + uint32_t tid = ka_shm->thread_id[core_id]; >> + int err = get_process_status(tid, &pstate); >> + >> + if (!err) { >> + switch (pstate) { >> + >> + case ACTIVE_STATE: >> + VLOG_INFO_RL(&rl,"False positive, pmd tid[%"PRIu32"] >> alive\n", >> + tid); > >Can we get false positives? Doesn't that diminish the usefulness? You made a good point! Thanks for asking this. On false positives, this can happen in few scenarios and depends on the load and port distribution among PMD threads. This was also observed when the timer intervals aren't appropriately tuned in OvS and collectd service. For example, If single PMD thread is handling multiple PHY ports and vhostuser ports it can have a long list of ports to be polled and at times can miss successive heart beats. This may happen because PMD thread is spending more time processing the packets and haven't marked itself alive for a while. If the timer intervals are aggressive the chances of it missing the heartbeats are higher. This made me to add helper functions to check if the PMD thread is active and subsequently log it appropriately. This doesn't diminish the usefulness of the feature as it all boils down to setting sensible timeout values in ovsdb and collectd services. I would guess millisecond granularity isn't needed by every user and the case I explained above is more for customers looking at <=5ms granularity. > >> + break; >> + case STOPPED_STATE: >> + case TRACED_STATE: >> + case DEFUNC_STATE: >> + case UNINTERRUPTIBLE_SLEEP_STATE: >> + VLOG_WARN_RL(&rl, >> + "PMD tid[%"PRIu32"] on core[%d] is unresponsive\n", >> + tid, core_id); >> + break; >> + default: >> + VLOG_DBG("%s: The process state: %d\n", __FUNCTION__, >pstate); >> + OVS_NOT_REACHED(); >> + } >> + } >> + } >> +} >> + >> +/* Notify the external monitoring application for change in core state. >> + * >> + * On a consecutive heartbeat miss the core is considered dead and >> +the status >> + * is relayed to monitoring framework by unlocking the semaphore. >> + */ >> +static void >> +dpdk_ka_relay_core_state(void *ptr_data, const int core_id, >> + const enum rte_keepalive_state core_state, uint64_t >> +last_alive) { >> + struct dpdk_keepalive_shm *ka_shm = (struct dpdk_keepalive_shm >*)ptr_data; >> + int count; >> + >> + if (!ka_shm) { >> + VLOG_ERR("KeepAlive: Invalid shared memory block\n"); >> + return; >> + } >> + >> + VLOG_DBG_RL(&rl, >> + "TS(%lu):CORE%d, old state:%d, current_state:%d\n", >> + (unsigned >> long)time(NULL),core_id,ka_shm->core_state[core_id], >> + core_state); >> + >> + switch (core_state) { >> + case RTE_KA_STATE_ALIVE: >> + case RTE_KA_STATE_MISSING: >> + ka_shm->core_state[core_id] = RTE_KA_STATE_ALIVE; >> + ka_shm->core_last_seen_times[core_id] = last_alive; >> + break; >> + case RTE_KA_STATE_DOZING: >> + case RTE_KA_STATE_SLEEP: >> + case RTE_KA_STATE_DEAD: >> + case RTE_KA_STATE_GONE: >> + ka_shm->core_state[core_id] = core_state; >> + ka_shm->core_last_seen_times[core_id] = last_alive; >> + break; >> + case RTE_KA_STATE_UNUSED: >> + ka_shm->core_state[core_id] = RTE_KA_STATE_UNUSED; >> + break; >> + } >> + >> + if (OVS_UNLIKELY(core_state == RTE_KA_STATE_DEAD)) { >> + /* To handle inactive collectd, increment the semaphore >> + * if count is '0'. */ > >This whole mechanism seems very error prone. Is it possible to hang a thread >with the subsequent sem_post? The relay function is called by 'ovs_keepalive' thread. I didn't completely understand your concerns here. I would be happy to verify if you have any scenarios in mind that you think pose problems. Please note that this feature is meant to be used by ceilometer service. I would like to test the scenarios keeping OpenStack in mind than simulating lab cases. - Bhanuprakash. _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev