On 12/08/2017 12:04 PM, Bhanuprakash Bodireddy wrote: > This commit introduces the keepalive configuration by adding > 'keepalive' module and also helper and initialization functions > that will be invoked by later commits. > > This commit adds new ovsdb column "keepalive" that shows the status > of the datapath threads. This is implemented for DPDK datapath and > only status of PMD threads is reported. > > Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com> > --- > lib/automake.mk | 2 + > lib/keepalive.c | 147 > +++++++++++++++++++++++++++++++++++++++++++++ > lib/keepalive.h | 86 ++++++++++++++++++++++++++ > vswitchd/bridge.c | 3 + > vswitchd/vswitch.ovsschema | 8 ++- > vswitchd/vswitch.xml | 49 +++++++++++++++ > 6 files changed, 293 insertions(+), 2 deletions(-) > create mode 100644 lib/keepalive.c > create mode 100644 lib/keepalive.h > > diff --git a/lib/automake.mk b/lib/automake.mk > index effe5b5..91d65be 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \ > lib/json.c \ > lib/jsonrpc.c \ > lib/jsonrpc.h \ > + lib/keepalive.c \ > + lib/keepalive.h \ > lib/lacp.c \ > lib/lacp.h \ > lib/latch.h \ > diff --git a/lib/keepalive.c b/lib/keepalive.c > new file mode 100644 > index 0000000..ca8dccb > --- /dev/null > +++ b/lib/keepalive.c > @@ -0,0 +1,147 @@ > +/* > + * Copyright (c) 2017 Intel, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > + > +#include "keepalive.h" > +#include "lib/vswitch-idl.h" > +#include "openvswitch/vlog.h" > +#include "seq.h" > +#include "timeval.h" > + > +VLOG_DEFINE_THIS_MODULE(keepalive); > + > +static bool keepalive_enable = false; /* Keepalive disabled by default. > */ > +static uint32_t keepalive_timer_interval; /* keepalive timer interval. */ > +static struct keepalive_info ka_info; > + > +/* Returns true if keepalive is enabled, false otherwise. */ > +bool > +ka_is_enabled(void) > +{ > + return keepalive_enable; > +} > + > +/* Finds the thread by 'tid' in 'process_list' map and update > + * the thread state and last_seen_time stamp. This is invoked > + * periodically(based on keepalive-interval) as part of callback > + * function in the context of keepalive thread. > + */ > +static void > +ka_set_thread_state_ts(pid_t tid, enum keepalive_state state, > + uint64_t last_alive) > +{ > + struct ka_process_info *pinfo; > + > + ovs_mutex_lock(&ka_info.proclist_mutex); > + HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0), > + &ka_info.process_list) { > + if (pinfo->tid == tid) { > + pinfo->state = state; > + pinfo->last_seen_time = last_alive; > + } > + } > + ovs_mutex_unlock(&ka_info.proclist_mutex); > +} > + > +/* Retrieve and return the keepalive timer interval from OVSDB. */ > +static uint32_t > +ka_get_timer_interval(const struct smap *ovs_other_config) > +{ > + uint32_t ka_interval; > + > + /* Timer granularity in milliseconds > + * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */ > + ka_interval = smap_get_int(ovs_other_config, "keepalive-interval", > + OVS_KEEPALIVE_DEFAULT_TIMEOUT); > + > + VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", > ka_interval); > + return ka_interval; > +} > + > +/* Invoke periodically to update the status and last seen timestamp > + * of the thread in to 'process_list' map. Runs in the context of > + * keepalive thread. > + */ > +static void > +ka_update_thread_state(pid_t tid, const enum keepalive_state state, > + uint64_t last_alive) > +{ > + switch (state) { > + case KA_STATE_ALIVE: > + case KA_STATE_MISSING: > + ka_set_thread_state_ts(tid, KA_STATE_ALIVE, last_alive); > + break; > + case KA_STATE_UNUSED: > + case KA_STATE_SLEEP: > + case KA_STATE_DEAD: > + case KA_STATE_GONE: > + ka_set_thread_state_ts(tid, state, last_alive); > + break; > + default: > + OVS_NOT_REACHED(); > + } > +} > + > +/* Register relay callback function. */ > +static void > +ka_register_relay_cb(ka_relay_cb cb, void *aux) > +{ > + ka_info.relay_cb = cb; > + ka_info.relay_cb_data = aux; > +} > + > +void > +ka_init(const struct smap *ovs_other_config) > +{ > + if (smap_get_bool(ovs_other_config, "enable-keepalive", false)) { > + static struct ovsthread_once once_enable = > OVSTHREAD_ONCE_INITIALIZER; > + > + if (ovsthread_once_start(&once_enable)) { > + keepalive_enable = true;
extra space > + VLOG_INFO("OvS Keepalive enabled."); > + > + keepalive_timer_interval = > + ka_get_timer_interval(ovs_other_config); > + > + ka_register_relay_cb(ka_update_thread_state, NULL); > + ovs_mutex_init(&ka_info.proclist_mutex); > + hmap_init(&ka_info.process_list); > + > + ka_info.init_time = time_wall_msec(); > + > + ovsthread_once_done(&once_enable); > + } > + } > +} > + > +void > +ka_destroy(void) > +{ > + if (!ka_is_enabled()) { > + return; > + } > + > + ovs_mutex_lock(&ka_info.proclist_mutex); > + struct ka_process_info *pinfo; > + HMAP_FOR_EACH_POP (pinfo, node, &ka_info.process_list) { > + free(pinfo); > + } > + ovs_mutex_unlock(&ka_info.proclist_mutex); > + > + hmap_destroy(&ka_info.process_list); > + ovs_mutex_destroy(&ka_info.proclist_mutex); > +} > diff --git a/lib/keepalive.h b/lib/keepalive.h > new file mode 100644 > index 0000000..a738daa > --- /dev/null > +++ b/lib/keepalive.h > @@ -0,0 +1,86 @@ > +/* > + * Copyright (c) 2017 Intel, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef KEEPALIVE_H > +#define KEEPALIVE_H > + > +#include <stdint.h> > +#include "openvswitch/hmap.h" > +#include "ovs-thread.h" > + > +/* Default timeout set to 1000ms */ > +#define OVS_KEEPALIVE_DEFAULT_TIMEOUT 1000 > + > +struct smap; > + > +/* > + * Keepalive states with description > + * > + * KA_STATE_UNUSED - Not registered to KA framework. > + * KA_STATE_ALIVE - Thread is alive. > + * KA_STATE_MISSING - Thread missed first heartbeat. > + * KA_STATE_DEAD - Thread missed two heartbeats. > + * KA_STATE_GONE - Thread missed two or more heartbeats > + * and is completely 'burried'. > + * KA_STATE_SLEEP - Thread is sleeping. > + * > + */ Simpler to just comment the enum entries directly. The states are not really intuitive. According to this "gone" is worse than "dead" - I know which I'd rather be :-) > +enum keepalive_state { > + KA_STATE_UNUSED, > + KA_STATE_ALIVE, > + KA_STATE_DEAD, > + KA_STATE_GONE, > + KA_STATE_MISSING, > + KA_STATE_SLEEP, > +}; > + > +struct ka_process_info { > + /* Thread id of the process, retrieved using ovs_gettid(). */ > + pid_t tid; > + > + /* Core id the thread was last scheduled. */ > + int core_id; > + > + /* Last seen thread state. */ > + enum keepalive_state state; > + > + /* Last seen timestamp of the thread. */ > + uint64_t last_seen_time; > + struct hmap_node node; > +}; > + > +typedef void (*ka_relay_cb)(int, enum keepalive_state, uint64_t); > + > +struct keepalive_info { > + /* Mutex for 'process_list'. */ > + struct ovs_mutex proclist_mutex; > + > + /* List of process/threads monitored by KA framework. */ > + struct hmap process_list OVS_GUARDED; > + > + /* Keepalive initialization time. */ > + uint64_t init_time; > + > + /* keepalive relay handler. */ > + ka_relay_cb relay_cb; > + void *relay_cb_data; > +}; > + > +bool ka_is_enabled(void); > +void ka_init(const struct smap *); > +void ka_destroy(void); > + > +#endif /* keepalive.h */ > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index 630c6fa..f70407f 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -34,6 +34,7 @@ > #include "hmapx.h" > #include "if-notifier.h" > #include "jsonrpc.h" > +#include "keepalive.h" > #include "lacp.h" > #include "mac-learning.h" > #include "mcast-snooping.h" > @@ -506,6 +507,7 @@ bridge_exit(bool delete_datapath) > bridge_destroy(br, delete_datapath); > } > ovsdb_idl_destroy(idl); > + ka_destroy(); > } > > /* Looks at the list of managers in 'ovs_cfg' and extracts their remote IP > @@ -2959,6 +2961,7 @@ bridge_run(void) > if (cfg) { > netdev_set_flow_api_enabled(&cfg->other_config); > dpdk_init(&cfg->other_config); > + ka_init(&cfg->other_config); > } > > /* Initialize the ofproto library. This only needs to run once, but > diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema > index 90e50b6..c56a64c 100644 > --- a/vswitchd/vswitch.ovsschema > +++ b/vswitchd/vswitch.ovsschema > @@ -1,6 +1,6 @@ > {"name": "Open_vSwitch", > - "version": "7.15.1", > - "cksum": "3682332033 23608", > + "version": "7.16.0", > + "cksum": "3631938350 23762", > "tables": { > "Open_vSwitch": { > "columns": { > @@ -30,6 +30,10 @@ > "type": {"key": "string", "value": "string", > "min": 0, "max": "unlimited"}, > "ephemeral": true}, > + "keepalive": { > + "type": {"key": "string", "value": "string", "min": 0, > + "max": "unlimited"}, > + "ephemeral": true}, > "ovs_version": { > "type": {"key": {"type": "string"}, > "min": 0, "max": 1}}, > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > index c145e1a..512292a 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -568,6 +568,55 @@ > </p> > </column> > </group> > + > + <group title="Keepalive"> > + <p> > + The <code>keepalive</code> column contains key-value pairs that > + report health of datapath threads in Open vSwitch. These are > updated > + periodically (based on the keepalive-interval). > + </p> > + > + <column name="other_config" key="enable-keepalive" > + type='{"type": "boolean"}'> > + Keepalive is disabled by default to avoid overhead in the common > + case when heartbeat monitoring is not useful. Set this value to > + <code>true</code> to enable keepalive <ref column="keepalive"/> > + column or to <code>false</code> to explicitly disable it. > + </column> > + > + <column name="other_config" key="keepalive-interval" > + type='{"type": "integer", "minInteger": 100}'> > + <p> > + Specifies the keepalive interval value in milliseconds. > + </p> > + <p> > + If not specified, this will be set to 1000 milliseconds (default > + value). Changing this value requires restarting the daemon. > + </p> > + </column> > + > + <column name="keepalive" key="PMD_ID"> > + <p> > + One such key-value pair, with <code>ID</code> replaced by the > + PMD thread, will exist for each active PMD thread. The value is > a > + comma-separated list of PMD thread status, core number and the > + last seen timestamp of PMD thread. In respective order, these > + values are: > + </p> > + > + <ol> > + <li>Status of PMD thread. Valid status include ALIVE, MISSING, > + DEAD, GONE, SLEEP.</li> > + <li>Core id the PMD thread was scheduled.</li> > + <li>Last seen timestamp(epoch) of the PMD thread.</li> > + </ol> > + > + <p> > + This is only valid for OvS-DPDK Datapath and PMD threads status > + is implemented currently. > + </p> > + </column> > + </group> > </group> > > <group title="Version Reporting"> > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev