--- src/osaf/consensus/Makefile | 18 +++ src/osaf/consensus/keyvalue.cc | 221 ++++++++++++++++++++++++++ src/osaf/consensus/keyvalue.h | 66 ++++++++ src/osaf/consensus/plugins/etcd.plugin | 253 ++++++++++++++++++++++++++++++ src/osaf/consensus/plugins/sample.plugin | 171 ++++++++++++++++++++ src/osaf/consensus/service.cc | 258 +++++++++++++++++++++++++++++++ src/osaf/consensus/service.h | 71 +++++++++ 7 files changed, 1058 insertions(+) create mode 100644 src/osaf/consensus/Makefile create mode 100644 src/osaf/consensus/keyvalue.cc create mode 100644 src/osaf/consensus/keyvalue.h create mode 100644 src/osaf/consensus/plugins/etcd.plugin create mode 100644 src/osaf/consensus/plugins/sample.plugin create mode 100644 src/osaf/consensus/service.cc create mode 100644 src/osaf/consensus/service.h
diff --git a/src/osaf/consensus/Makefile b/src/osaf/consensus/Makefile new file mode 100644 index 000000000..a2c8bc9dd --- /dev/null +++ b/src/osaf/consensus/Makefile @@ -0,0 +1,18 @@ +# -*- OpenSAF -*- +# +# (C) Copyright 2018 The OpenSAF Foundation +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed +# under the GNU Lesser General Public License Version 2.1, February 1999. +# The complete license can be accessed from the following location: +# http://opensource.org/licenses/lgpl-license.php +# See the Copying file included with the OpenSAF distribution for full +# licensing terms. +# +# Author(s): Ericsson AB +# + +all: + $(MAKE) -C ../.. lib/libconsensus.la diff --git a/src/osaf/consensus/keyvalue.cc b/src/osaf/consensus/keyvalue.cc new file mode 100644 index 000000000..eea518585 --- /dev/null +++ b/src/osaf/consensus/keyvalue.cc @@ -0,0 +1,221 @@ +/* -*- OpenSAF -*- + * + * (C) Copyright 2018 The OpenSAF Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Author(s): Ericsson AB + * + */ +#include "osaf/consensus/keyvalue.h" +#include <sys/wait.h> +#include "base/logtrace.h" +#include "base/getenv.h" +#include "base/conf.h" + +int KeyValue::Execute(const std::string& command, std::string& output) { + TRACE_ENTER(); + constexpr size_t buf_size = 128; + std::array<char, buf_size> buffer; + FILE* pipe = popen(command.c_str(), "r"); + if (pipe == nullptr) { + return 1; + } + output = ""; + while (feof(pipe) == 0) { + if (fgets(buffer.data(), buf_size, pipe) != nullptr) { + output += buffer.data(); + } + } + int exit_code = pclose(pipe); + exit_code = WEXITSTATUS(exit_code); + if (output.empty() == false && isspace(output.back()) != 0) { + // remove newline at end of output + output.pop_back(); + } + TRACE("Executed '%s', returning %d", command.c_str(), exit_code); + return exit_code; +} + +SaAisErrorT KeyValue::Get(const std::string& key, std::string& value) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " get " + key); + int rc = KeyValue::Execute(command, value); + TRACE("Read '%s'", value.c_str()); + + if (rc == 0) { + return SA_AIS_OK; + } else { + return SA_AIS_ERR_FAILED_OPERATION; + } +} + +SaAisErrorT KeyValue::Set(const std::string& key, const std::string& value) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " set " + key + " " + value); + std::string output; + int rc = KeyValue::Execute(command, output); + + if (rc == 0) { + return SA_AIS_OK; + } else { + return SA_AIS_ERR_FAILED_OPERATION; + } +} + +SaAisErrorT KeyValue::Erase(const std::string& key) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " erase " + key); + std::string output; + int rc = KeyValue::Execute(command, output); + + if (rc == 0) { + return SA_AIS_OK; + } else { + return SA_AIS_ERR_FAILED_OPERATION; + } +} + +SaAisErrorT KeyValue::Lock(const std::string& owner, + const unsigned int timeout) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " lock " + owner + " " + + std::to_string(timeout)); + std::string output; + int rc = KeyValue::Execute(command, output); + + if (rc == 0) { + return SA_AIS_OK; + } else if (rc == 1) { + // already locked + return SA_AIS_ERR_EXIST; + } else { + return SA_AIS_ERR_TRY_AGAIN; + } +} + +SaAisErrorT KeyValue::Unlock(const std::string& owner) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " unlock " + owner); + std::string output; + int rc = Execute(command, output); + + if (rc == 0) { + return SA_AIS_OK; + } else if (rc == 1) { + LOG_ER("Lock is owned by another node"); + return SA_AIS_ERR_INVALID_PARAM; + } else { + return SA_AIS_ERR_TRY_AGAIN; + } +} + +SaAisErrorT KeyValue::LockOwner(std::string& owner) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " lock_owner"); + std::string output; + int rc = KeyValue::Execute(command, output); + + if (rc == 0) { + TRACE("Lock owner is %s", output.c_str()); + owner = output; + return SA_AIS_OK; + } + + return SA_AIS_ERR_FAILED_OPERATION; +} + +void WatchKeyFunction(const std::string& key, + const ConsensusCallback& callback, + const uint32_t user_defined) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " watch " + key); + std::string value; + uint32_t retries = 0; + int rc; + + rc = KeyValue::Execute(command, value); + while (rc != 0 && retries < KeyValue::kMaxRetry) { + ++retries; + std::this_thread::sleep_for(KeyValue::kSleepInterval); + rc = KeyValue::Execute(command, value); + } + + if (rc == 0) { + TRACE("Read '%s'", value.c_str()); + callback(key, value, user_defined); + } else { + LOG_ER("Failed to watch %s", key.c_str()); + osafassert(false); + } +} + +void WatchLockFunction(const ConsensusCallback& callback, + const uint32_t user_defined) { + TRACE_ENTER(); + + const std::string kv_store_cmd = base::GetEnv( + "FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + const std::string command(kv_store_cmd + " watch_lock"); + std::string value; + uint32_t retries = 0; + int rc; + + rc = KeyValue::Execute(command, value); + while (rc != 0 && retries < KeyValue::kMaxRetry) { + ++retries; + std::this_thread::sleep_for(KeyValue::kSleepInterval); + rc = KeyValue::Execute(command, value); + } + + if (rc == 0) { + TRACE("Read '%s'", value.c_str()); + callback("WatchLockFunction", value, user_defined); + } else { + LOG_ER("Failed to watch lock"); + osafassert(false); + } +} + +void KeyValue::Watch(const std::string& key, + const ConsensusCallback callback, + const uint32_t user_defined) { + std::thread t(WatchKeyFunction, key, callback, user_defined); + t.detach(); + return; +} + +void KeyValue::WatchLock(const ConsensusCallback callback, + const uint32_t user_defined) { + std::thread t(WatchLockFunction, callback, user_defined); + t.detach(); + return; +} diff --git a/src/osaf/consensus/keyvalue.h b/src/osaf/consensus/keyvalue.h new file mode 100644 index 000000000..7cbf96b6b --- /dev/null +++ b/src/osaf/consensus/keyvalue.h @@ -0,0 +1,66 @@ +/* -*- OpenSAF -*- + * + * (C) Copyright 2018 The OpenSAF Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Author(s): Ericsson AB + * + */ +#ifndef OSAF_CONSENSUS_KEYVALUE_H_ +#define OSAF_CONSENSUS_KEYVALUE_H_ + +#include <saAis.h> +#include <string> +#include <functional> +#include <thread> + +typedef std::function<void(const std::string& key, + const std::string& new_value, + const uint32_t user_defined)> ConsensusCallback; + +class KeyValue { + public: + // Retrieve value of key + static SaAisErrorT Get(const std::string& key, std::string& value); + + // Set key to value + static SaAisErrorT Set(const std::string& key, const std::string& value); + + // Erase key + static SaAisErrorT Erase(const std::string& key); + + // Obtain lock, default timeout is 0 seconds (indefinite). If lock + // is called when already locked, the timeout is extended + static SaAisErrorT Lock(const std::string& owner, + const unsigned int timeout = 0); + + // Release lock + static SaAisErrorT Unlock(const std::string& owner); + + // An empty string is returned if there is an error + static SaAisErrorT LockOwner(std::string& owner); + + // starts a thread to watch key and call callback if values changes + static void Watch(const std::string& key, ConsensusCallback callback, + const uint32_t user_defined); + + // starts a thread to watch the lock and call callback if is modified + static void WatchLock(ConsensusCallback callback, + const uint32_t user_defined); + + // internal use + static int Execute(const std::string& command, std::string& output); + static constexpr std::chrono::milliseconds kSleepInterval = + std::chrono::milliseconds(100); // in ms + static constexpr uint32_t kMaxRetry = 100; +}; + +#endif // OSAF_CONSENSUS_KEYVALUE_H_ diff --git a/src/osaf/consensus/plugins/etcd.plugin b/src/osaf/consensus/plugins/etcd.plugin new file mode 100644 index 000000000..762c57900 --- /dev/null +++ b/src/osaf/consensus/plugins/etcd.plugin @@ -0,0 +1,253 @@ +#!/usr/bin/env bash + +readonly keyname="opensaf_consensus_lock" +readonly directory="/opensaf/" +readonly etcd_timeout="5s" + +# get +# retrieve <value> of <key> from key-value store +# params: +# $1 - <key> +# returns: +# 0 - success, <value> is echoed to stdout +# non-zero - failure +get() { + local -r key=$1 + + if value=$(etcdctl --timeout $etcd_timeout get "$directory$key" 2>&1) + then + echo "$value" + return 0 + else + return 1 + fi +} + +# set +# set <key> to <value> in key-value store +# params: +# $1 - <key> +# $2 - <value> +# returns: +# 0 - success +# non-zero - failure +set() { + local -r key=$1 + local -r value=$2 + + if etcdctl --timeout $etcd_timeout set "$directory$key" "$value" 1>& /dev/null + then + return 0 + else + return 1 + fi +} + +# erase +# erase <key> in key-value store +# params: +# $1 - <key> +# returns: +# 0 - success +# non-zero - failure +erase() { + local -r key=$1 + + if etcdctl --timeout $etcd_timeout rm "$directory$key" 1>& /dev/null + then + return 0 + else + return 1 + fi +} + +# lock +# params: +# $1 - <owner>, owner of the lock is set to this +# $2 - <timeout>, will automatically unlock after <timeout> seconds +# returns: +# 0 - success +# 1 - the lock is owned by someone else +# 2 or above - other failure +# NOTE: if lock is already acquired by <owner>, then timeout is extended +lock() { + local -r owner=$1 + local -r timeout=$2 + + if etcdctl --timeout $etcd_timeout mk "$directory$keyname" "$owner" \ + --ttl "$timeout" >& /dev/null + then + return 0 + fi + + if current_owner=$(etcdctl get "$directory$keyname") + then + # see if we already hold the lock + if [ "$current_owner" == "$owner" ]; then + # refresh TTL + if etcdctl --timeout $etcd_timeout set "$directory$keyname" "$owner" \ + --swap-with-value "$owner" --ttl "$timeout" >& /dev/null + then + return 0 + fi + else + # the lock is locked by someone else + return 1 + fi + fi + + return 2 +} + +# get +# retrieve <owner> of lock +# params: +# none +# returns: +# 0 - success, <owner> is echoed to stdout +# non-zero - failure or not locked +lock_owner() { + get "$keyname" + return $? +} + +# unlock +# params: +# $1 - owner +# $2 - <forced> +# - (optional parameter) +# - if set 'true', will unlock even if lock is not held by node +# - defaults to 'false' +# returns: +# 0 - success +# 1 - the lock is owned by someone else +# 2 or above - other failure +# +unlock() { + local -r owner=$1 + local -r forced=${2:-false} + + if [ "$forced" = false ]; then + # unlock only succeeds if owner matches + if etcdctl --timeout $etcd_timeout rm "$directory$keyname" \ + --with-value "$owner" >& /dev/null + then + return 0 + fi + + # failed! check we own the lock + if current_owner=$(etcdctl --timeout $etcd_timeout get \ + "$directory$keyname" 2>&1) + then + if [ "$owner" != "$current_owner" ]; then + return 1 + fi + fi + + return 2 + fi + + if etcdctl --timeout $etcd_timeout rm "$directory$keyname" >& /dev/null + then + return 0 + else + return 2 + fi +} + +# watch +# watch <key> in key-value store +# params: +# $1 - <key> +# returns: +# 0 - success, <new_value> is echoed to stdout +# non-zero - failure +watch() { + local -r key=$1 + + if value=$(etcdctl --timeout $etcd_timeout watch "$directory$key" 2>&1) + then + # if the key is removed, then "PrevNode.Value: <value>" is returned + echo "$value" + return 0 + else + return 1 + fi +} + +# argument parsing +case "$1" in + get) + if [ "$#" -ne 2 ]; then + echo "Usage: $0 get <key>" + exit 1 + fi + get "$2" + exit $? + ;; + set) + if [ "$#" -ne 3 ]; then + echo "Usage: $0 set <key> <value>" + exit 1 + fi + set "$2" "$3" + exit $? + ;; + erase) + if [ "$#" -ne 2 ]; then + echo "Usage: $0 erase <key>" + exit 1 + fi + erase "$2" + exit $? + ;; + lock) + if [ "$#" -ne 3 ]; then + echo "Usage: $0 lock <owner> <timeout>" + exit 1 + fi + lock "$2" "$3" + exit $? + ;; + lock_owner) + if [ "$#" -ne 1 ]; then + echo "Usage: $0 lock_owner" + exit 1 + fi + lock_owner + exit $? + ;; + unlock) + if [ "$#" -eq 2 ]; then + unlock "$2" + exit $? + elif [ "$#" -eq 3 ] && [ "$3" == "--force" ]; then + unlock "$2" 1 + exit $? + else + echo "Usage: $0 unlock <owner> [--force]" + exit 1 + fi + ;; + watch) + if [ "$#" -ne 2 ]; then + echo "Usage: $0 watch <key>" + exit 1 + fi + watch "$2" + exit $? + ;; + watch_lock) + if [ "$#" -ne 1 ]; then + echo "Usage: $0 watch_lock" + exit 1 + fi + watch "$keyname" + exit $? + ;; + *) + echo "Usage: $0 {get|set|erase|lock|unlock|lock_owner|watch|watch_lock}" + ;; +esac + +exit 1 diff --git a/src/osaf/consensus/plugins/sample.plugin b/src/osaf/consensus/plugins/sample.plugin new file mode 100644 index 000000000..74eddaf2a --- /dev/null +++ b/src/osaf/consensus/plugins/sample.plugin @@ -0,0 +1,171 @@ +#!/usr/bin/env bash + +readonly keyname="opensaf_consensus_lock" + +# get +# retrieve <value> of <key> from key-value store +# params: +# $1 - <key> +# returns: +# 0 - success, <value> is echoed to stdout +# non-zero - failure +get() { + local -r key=$1 + ... +} + +# set +# set <key> to <value> in key-value store +# params: +# $1 - <key> +# $2 - <value> +# returns: +# 0 - success +# non-zero - failure +set() { + local -r key=$1 + local -r value=$2 + ... +} + +# erase +# erase <key> in key-value store +# params: +# $1 - <key> +# returns: +# 0 - success +# non-zero - failure +erase() { + local -r key=$1 + ... +} + +# lock +# params: +# $1 - <owner>, owner of the lock is set to this +# $2 - <timeout>, will automatically unlock after <timeout> seconds +# returns: +# 0 - success +# non-zero - failure +lock() { + local -r owner=$1 + local -r timeout=$2 + ... +} + +# get +# retrieve <owner> of lock +# params: +# none +# returns: +# 0 - success, <owner> is echoed to stdout +# non-zero - failure or not locked +lock_owner() { + ... +} + +# unlock +# params: +# $1 - owner +# $2 - <forced> +# - (optional parameter) +# - if set 'true', will unlock even if lock is not held by node +# - defaults to 'false' +# returns: +# 0 - success +# 1 - the lock is owned by someone else +# 2 or above - other failure# +unlock() { + local -r owner=$1 + local -r forced=${2:-false} + ... +} + +# watch +# watch <key> in key-value store +# params: +# $1 - <key> +# returns: +# 0 - success, <new_value> is echoed to stdout +# non-zero - failure +watch() { + local -r key=$1 + .. +} + +# argument parsing +case "$1" in + get) + if [ "$#" -ne 2 ]; then + echo "Usage: $0 get <key>" + exit 1 + fi + get "$2" + exit $? + ;; + set) + if [ "$#" -ne 3 ]; then + echo "Usage: $0 set <key> <value>" + exit 1 + fi + set "$2" "$3" + exit $? + ;; + erase) + if [ "$#" -ne 2 ]; then + echo "Usage: $0 erase <key>" + exit 1 + fi + erase "$2" + exit $? + ;; + lock) + if [ "$#" -ne 3 ]; then + echo "Usage: $0 lock <owner> <timeout>" + exit 1 + fi + lock "$2" "$3" + exit $? + ;; + lock_owner) + if [ "$#" -ne 1 ]; then + echo "Usage: $0 lock_owner" + exit 1 + fi + lock_owner + exit $? + ;; + unlock) + if [ "$#" -eq 2 ]; then + unlock "$2" + exit $? + elif [ "$#" -eq 3 ] && [ "$3" == "--force" ]; then + unlock "$2" 1 + exit $? + else + echo "Usage: $0 unlock <owner> [--force]" + exit 1 + fi + ;; + watch) + if [ "$#" -ne 2 ]; then + echo "Usage: $0 watch <key>" + exit 1 + fi + watch "$2" + exit $? + ;; + watch_lock) + if [ "$#" -ne 1 ]; then + echo "Usage: $0 watch_lock" + exit 1 + fi + watch "$keyname" + exit $? + ;; + *) + echo "Usage: $0 {get|set|erase|lock|unlock|lock_owner|watch|watch_lock}" + ;; +esac + +exit 1 diff --git a/src/osaf/consensus/service.cc b/src/osaf/consensus/service.cc new file mode 100644 index 000000000..b05bb322e --- /dev/null +++ b/src/osaf/consensus/service.cc @@ -0,0 +1,258 @@ +/* -*- OpenSAF -*- + * + * (C) Copyright 2018 The OpenSAF Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Author(s): Ericsson AB + * + */ +#include "osaf/consensus/service.h" +#include <unistd.h> +#include <climits> +#include <thread> +#include "base/logtrace.h" +#include "base/conf.h" +#include "base/getenv.h" +#include "base/ncssysf_def.h" + +SaAisErrorT Consensus::PromoteThisNode() { + TRACE_ENTER(); + SaAisErrorT rc; + + if (use_consensus_ == false) { + return SA_AIS_OK; + } + + uint32_t retries = 0; + rc = KeyValue::Lock(base::Conf::NodeName(), kLockTimeout); + while (rc == SA_AIS_ERR_TRY_AGAIN && retries < kMaxRetry) { + TRACE("Waiting for lock"); + ++retries; + std::this_thread::sleep_for(kSleepInterval); + rc = KeyValue::Lock(base::Conf::NodeName(), kLockTimeout); + } + + if (rc == SA_AIS_ERR_EXIST) { + // get the current active controller + std::string current_active(""); + retries = 0; + rc = KeyValue::LockOwner(current_active); + while (rc != SA_AIS_OK && retries < kMaxRetry) { + ++retries; + std::this_thread::sleep_for(kSleepInterval); + rc = KeyValue::LockOwner(current_active); + } + if (rc != SA_AIS_OK) { + LOG_ER("Failed to get current lock owner. Will attempt to lock anyway"); + } + + osafassert(current_active != base::Conf::NodeName()); + LOG_NO("Current active controller is %s", current_active.c_str()); + + // there's a chance the lock has been released since the lock attempt + if (current_active.empty() == false) { + // remove current active controller's lock and fence it + retries = 0; + rc = KeyValue::Unlock(current_active); + while (rc == SA_AIS_ERR_TRY_AGAIN && retries < kMaxRetry) { + LOG_IN("Trying to unlock"); + ++retries; + std::this_thread::sleep_for(kSleepInterval); + rc = KeyValue::Unlock(current_active); + } + + if (rc == SA_AIS_OK) { + FenceNode(current_active); + } else { + LOG_WA("Unlock failed (%u)", rc); + } + } + + // previous lock has been released, try locking again + retries = 0; + rc = KeyValue::Lock(base::Conf::NodeName(), kLockTimeout); + while (rc == SA_AIS_ERR_TRY_AGAIN && retries < kMaxRetry) { + TRACE("Waiting for lock"); + ++retries; + std::this_thread::sleep_for(kSleepInterval); + rc = KeyValue::Lock(base::Conf::NodeName(), kLockTimeout); + } + } + + if (rc == SA_AIS_OK) { + LOG_NO("Active controller set to %s", base::Conf::NodeName().c_str()); + } else { + LOG_ER("Failed to promote this node (%u)", rc); + } + + return rc; +} + +SaAisErrorT Consensus::Demote(const std::string& node = "") { + TRACE_ENTER(); + if (use_consensus_ == false) { + return SA_AIS_OK; + } + + SaAisErrorT rc = SA_AIS_ERR_FAILED_OPERATION; + uint32_t retries = 0; + + // check current active node + std::string current_active; + rc = KeyValue::LockOwner(current_active); + while (rc != SA_AIS_OK && retries < kMaxRetry) { + ++retries; + std::this_thread::sleep_for(kSleepInterval); + rc = KeyValue::LockOwner(current_active); + } + + if (rc != SA_AIS_OK) { + LOG_ER("Failed to get lock owner"); + return rc; + } + + LOG_NO("Demoting %s as active controller", current_active.c_str()); + + if (node.empty() == false && node != current_active) { + // node is not the current active controller! + osafassert(false); + } + + retries = 0; + rc = KeyValue::Unlock(current_active); + while (rc == SA_AIS_ERR_TRY_AGAIN && retries < kMaxRetry) { + LOG_IN("Trying to unlock"); + ++retries; + std::this_thread::sleep_for(kSleepInterval); + rc = KeyValue::Unlock(current_active); + } + + if (rc != SA_AIS_OK) { + LOG_ER("Unlock failed (%u)", rc); + return rc; + } + + LOG_IN("Released lock"); + return rc; +} + +SaAisErrorT Consensus::DemoteCurrentActive() { + TRACE_ENTER(); + return Demote(); +} + +SaAisErrorT Consensus::DemoteThisNode() { + TRACE_ENTER(); + return Demote(base::Conf::NodeName()); +} + +bool Consensus::IsEnabled() const { + return use_consensus_; +} + +bool Consensus::IsWritable() const { + TRACE_ENTER(); + if (use_consensus_ == false) { + return true; + } + + SaAisErrorT rc; + rc = KeyValue::Set(kTestKeyname, base::Conf::NodeName()); + if (rc == SA_AIS_OK) { + return true; + } else { + return false; + } +} + +bool Consensus::IsRemoteFencingEnabled() const { + return use_remote_fencing_; +} + +std::string Consensus::CurrentActive() const { + TRACE_ENTER(); + if (use_consensus_ == false) { + return ""; + } + + SaAisErrorT rc = SA_AIS_ERR_FAILED_OPERATION; + uint32_t retries = 0; + std::string owner; + + rc = KeyValue::LockOwner(owner); + while (rc != SA_AIS_OK && retries < kMaxRetry) { + ++retries; + std::this_thread::sleep_for(kSleepInterval); + rc = KeyValue::LockOwner(owner); + } + + if (rc == SA_AIS_OK) { + return owner; + } else { + LOG_ER("Failed to get lock owner"); + return ""; + } +} + +Consensus::Consensus() { + TRACE_ENTER(); + + uint32_t split_brain_enable = base::GetEnv("FMS_SPLIT_BRAIN_PREVENTION", 0); + std::string kv_store_cmd = base::GetEnv("FMS_KEYVALUE_STORE_PLUGIN_CMD", ""); + uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING" , 0); + + if (split_brain_enable == 1 && kv_store_cmd.empty() == false) { + use_consensus_ = true; + } else { + use_consensus_ = false; + } + + if (use_remote_fencing == 1) { + use_remote_fencing_ = true; + } + + // needed for base::Conf::NodeName() later + base::Conf::InitNodeName(); + + if (use_consensus_ == true) { + LOG_IN("Split brain prevention is enabled"); + } else { + LOG_IN("Split brain prevention is disabled"); + } +} + +Consensus::~Consensus() { +} + +bool Consensus::FenceNode(const std::string& node) { + if (use_remote_fencing_ == true) { + LOG_WA("Fencing remote node %s", node.c_str()); + // @todo currently passing UINT_MAX as node ID, since + // we can't always obtain a valid node ID? + opensaf_reboot(UINT_MAX, node.c_str(), + "Fencing remote node"); + + return true; + } else { + LOG_WA("Fencing is not enabled. Node %s will not be fenced", node.c_str()); + return false; + } +} + +void Consensus::MonitorLock(ConsensusCallback callback, + const uint32_t user_defined) { + TRACE_ENTER(); + if (use_consensus_ == false) { + return; + } + + KeyValue::WatchLock(callback, user_defined); +} diff --git a/src/osaf/consensus/service.h b/src/osaf/consensus/service.h new file mode 100644 index 000000000..1cd24bed1 --- /dev/null +++ b/src/osaf/consensus/service.h @@ -0,0 +1,71 @@ +/* -*- OpenSAF -*- + * + * (C) Copyright 2018 The OpenSAF Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Author(s): Ericsson AB + * + */ +#ifndef OSAF_CONSENSUS_SERVICE_H_ +#define OSAF_CONSENSUS_SERVICE_H_ + +#include <chrono> +#include <string> +#include "saAis.h" +#include "osaf/consensus/keyvalue.h" + +class Consensus { + public: + // Set active controller to this node + SaAisErrorT PromoteThisNode(); + + // Clear current active controller by releasing lock + SaAisErrorT DemoteCurrentActive(); + + // Clear this node as active controller by releasing lock + SaAisErrorT DemoteThisNode(); + + // Returns active controller as known by the consensus service + std::string CurrentActive() const; + + // If the active controller is changed as known by the consensus service, + // then callback will be run from a new thread, with <user_defined> returned + // in the callback + void MonitorLock(ConsensusCallback callback, const uint32_t user_defined); + + // Is consensus service enabled? + bool IsEnabled() const; + + // Is the key-value store writable? + bool IsWritable() const; + + // Is remote fencing enabled? + bool IsRemoteFencingEnabled() const; + + Consensus(); + virtual ~Consensus(); + + Consensus(const Consensus&) = delete; + Consensus& operator=(const Consensus&) = delete; + + private: + bool use_consensus_ = false; + bool use_remote_fencing_ = false; + const std::string kTestKeyname = "opensaf_write_test"; + const std::chrono::milliseconds kSleepInterval = + std::chrono::milliseconds(100); // in ms + static constexpr uint32_t kLockTimeout = 0; // lock is persistent by default + static constexpr uint32_t kMaxRetry = 600; + SaAisErrorT Demote(const std::string& node); + bool FenceNode(const std::string& node); +}; + +#endif // OSAF_CONSENSUS_SERVICE_H_ -- 2.14.1 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
