Hi

There is one thing that can be improved.

In Anders’ proposal, 
https://sourceforge.net/p/opensaf/tickets/64/attachment/Split-brain%20prevention%20in%20OpenSAF.pdf

“When taking over the system controller role, the new active system controller 
must first acquire the
lock, and then wait for a sufficiently long time that the old active system 
controller has stepped down.”

We can change Consensus::BeginActivePromotion(), so if it sees the current 
active controller is some other node, it sets the controller to itself, but 
sleeps for X seconds before returning from the function.
I’m not sure what X should be though. Perhaps a configurable parameter in 
fmd.conf?

Thanks
Gary

-----Original Message-----
From: gary <[email protected]>
Date: Wednesday, 10 January 2018 at 4:29 pm
To: <[email protected]>, <[email protected]>, Quyen Quoc Dao 
<[email protected]>
Cc: <[email protected]>, gary <[email protected]>
Subject: [PATCH 1/5] osaf: add consensus API [#64]

    ---
     src/osaf/consensus/Makefile              |  18 +++
     src/osaf/consensus/keyvalue.cc           | 165 ++++++++++++++++++++++
     src/osaf/consensus/keyvalue.h            |  57 ++++++++
     src/osaf/consensus/plugins/etcd.plugin   | 217 
+++++++++++++++++++++++++++++
     src/osaf/consensus/plugins/sample.plugin | 162 ++++++++++++++++++++++
     src/osaf/consensus/service.cc            | 231 
+++++++++++++++++++++++++++++++
     src/osaf/consensus/service.h             |  66 +++++++++
     7 files changed, 916 insertions(+)
     create mode 100644 src/osaf/consensus/Makefile
     create mode 100644 src/osaf/consensus/keyvalue.cc
     create mode 100644 src/osaf/consensus/keyvalue.h
     create mode 100644 src/osaf/consensus/plugins/etcd.plugin
     create mode 100644 src/osaf/consensus/plugins/sample.plugin
     create mode 100644 src/osaf/consensus/service.cc
     create mode 100644 src/osaf/consensus/service.h
    
    diff --git a/src/osaf/consensus/Makefile b/src/osaf/consensus/Makefile
    new file mode 100644
    index 000000000..a2c8bc9dd
    --- /dev/null
    +++ b/src/osaf/consensus/Makefile
    @@ -0,0 +1,18 @@
    +#      -*- OpenSAF  -*-
    +#
    +# (C) Copyright 2018 The OpenSAF Foundation
    +#
    +# This program is distributed in the hope that it will be useful, but
    +# WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY
    +# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
    +# under the GNU Lesser General Public License Version 2.1, February 1999.
    +# The complete license can be accessed from the following location:
    +# http://opensource.org/licenses/lgpl-license.php
    +# See the Copying file included with the OpenSAF distribution for full
    +# licensing terms.
    +#
    +# Author(s): Ericsson AB
    +#
    +
    +all:
    +   $(MAKE) -C ../.. lib/libconsensus.la
    diff --git a/src/osaf/consensus/keyvalue.cc b/src/osaf/consensus/keyvalue.cc
    new file mode 100644
    index 000000000..e5a796d33
    --- /dev/null
    +++ b/src/osaf/consensus/keyvalue.cc
    @@ -0,0 +1,165 @@
    +/*      -*- OpenSAF  -*-
    + *
    + * (C) Copyright 2018 The OpenSAF Foundation
    + *
    + * This program is distributed in the hope that it will be useful, but
    + * WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY
    + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
    + * under the GNU Lesser General Public License Version 2.1, February 1999.
    + * The complete license can be accessed from the following location:
    + * http://opensource.org/licenses/lgpl-license.php
    + * See the Copying file included with the OpenSAF distribution for full
    + * licensing terms.
    + *
    + * Author(s): Ericsson AB
    + *
    + */
    +#include "keyvalue.h"
    +#include "base/logtrace.h"
    +#include "base/getenv.h"
    +#include "base/conf.h"
    +
    +int KeyValue::Execute(const std::string& command, std::string& output) {
    +  TRACE_ENTER();
    +  constexpr size_t buf_size = 128;
    +  std::array<char, buf_size> buffer;
    +  FILE* pipe = popen(command.c_str(), "r");
    +  if (pipe == nullptr) {
    +    return 1;
    +  }
    +  output = "";
    +  while (feof(pipe) == 0) {
    +    if (fgets(buffer.data(), buf_size, pipe) != nullptr) {
    +      output += buffer.data();
    +    }
    +  }
    +  const int exit_code = pclose(pipe);
    +  if (output.empty() == false && isspace(output.back()) != 0) {
    +    // remove newline at end of output
    +    output.pop_back();
    +  }
    +  TRACE("Executed '%s', returning %d", command.c_str(), exit_code);
    +  return exit_code;
    +}
    +
    +SaAisErrorT KeyValue::Get(const std::string& key, std::string& value) {
    +  TRACE_ENTER();
    +
    +  const std::string kv_store_cmd = 
base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  const std::string command(kv_store_cmd + " get " + key);
    +  int rc = KeyValue::Execute(command, value);
    +  TRACE("Read '%s'", value.c_str());
    +
    +  if (rc == 0) {
    +    return SA_AIS_OK;
    +  } else {
    +    return SA_AIS_ERR_FAILED_OPERATION;
    +  }
    +}
    +
    +SaAisErrorT KeyValue::Set(const std::string& key, const std::string& 
value) {
    +  TRACE_ENTER();
    +
    +  const std::string kv_store_cmd = 
base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  const std::string command(kv_store_cmd + " set " + key + " " + value);
    +  std::string output;
    +  int rc = KeyValue::Execute(command, output);
    +
    +  if (rc == 0) {
    +    return SA_AIS_OK;
    +  } else {
    +    return SA_AIS_ERR_FAILED_OPERATION;
    +  }
    +}
    +
    +SaAisErrorT KeyValue::Erase(const std::string& key) {
    +  TRACE_ENTER();
    +
    +  const std::string kv_store_cmd = 
base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  const std::string command(kv_store_cmd + " erase " + key);
    +  std::string output;
    +  int rc = KeyValue::Execute(command, output);
    +
    +  if (rc == 0) {
    +    return SA_AIS_OK;
    +  } else {
    +    return SA_AIS_ERR_FAILED_OPERATION;
    +  }
    +}
    +
    +SaAisErrorT KeyValue::Lock(const std::string& owner,
    +                         const unsigned int timeout) {
    +  TRACE_ENTER();
    +
    +  const std::string kv_store_cmd = 
base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  const std::string command(kv_store_cmd + " lock " + owner + " " +
    +                      std::to_string(timeout));
    +  std::string output;
    +  int rc = KeyValue::Execute(command, output);
    +
    +  if (rc == 0) {
    +    return SA_AIS_OK;
    +  } else {
    +    return SA_AIS_ERR_FAILED_OPERATION;
    +  }
    +}
    +
    +SaAisErrorT KeyValue::Unlock() {
    +  TRACE_ENTER();
    +
    +  const std::string kv_store_cmd = 
base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  const std::string command(kv_store_cmd + " unlock");
    +  std::string output;
    +  int rc = Execute(command, output);
    +
    +  if (rc == 0) {
    +    return SA_AIS_OK;
    +  } else {
    +    return SA_AIS_ERR_FAILED_OPERATION;
    +  }
    +}
    +
    +bool KeyValue::IsLockedByThisNode() {
    +  TRACE_ENTER();
    +
    +  const std::string kv_store_cmd = 
base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  const std::string command(kv_store_cmd + " lock_owner");
    +  std::string output;
    +  int rc = KeyValue::Execute(command, output);
    +
    +  if (rc == 0) {
    +    TRACE("Lock owner is %s", output.c_str());
    +    if (output.compare(base::Conf::NodeName()) == 0) {
    +      return true;
    +    }
    +  }
    +
    +  return false;
    +}
    +
    +void threadFunction(const std::string& key,
    +  const ConsensusCallback& callback,
    +  const uint32_t user_defined) {
    +  TRACE_ENTER();
    +
    +  const std::string kv_store_cmd = 
base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  const std::string command(kv_store_cmd + " watch " + key);
    +  std::string value;
    +  int rc = KeyValue::Execute(command, value);
    +  TRACE("Read '%s'", value.c_str());
    +
    +  if (rc == 0) {
    +    callback(value, user_defined);
    +  } else {
    +    LOG_ER("Failed to watch %s", key.c_str());
    +  }
    +}
    +
    +void KeyValue::Watch(const std::string& key,
    +  const ConsensusCallback callback,
    +  const uint32_t user_defined)
    +{
    +  std::thread t(threadFunction, key, callback, user_defined);
    +  t.detach();
    +  return;
    +}
    diff --git a/src/osaf/consensus/keyvalue.h b/src/osaf/consensus/keyvalue.h
    new file mode 100644
    index 000000000..347c820d0
    --- /dev/null
    +++ b/src/osaf/consensus/keyvalue.h
    @@ -0,0 +1,57 @@
    +/*      -*- OpenSAF  -*-
    + *
    + * (C) Copyright 2018 The OpenSAF Foundation
    + *
    + * This program is distributed in the hope that it will be useful, but
    + * WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY
    + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
    + * under the GNU Lesser General Public License Version 2.1, February 1999.
    + * The complete license can be accessed from the following location:
    + * http://opensource.org/licenses/lgpl-license.php
    + * See the Copying file included with the OpenSAF distribution for full
    + * licensing terms.
    + *
    + * Author(s): Ericsson AB
    + *
    + */
    +#ifndef CONSENSUS_KEYVALUE_H_
    +#define CONSENSUS_KEYVALUE_H_
    +
    +#include <saAis.h>
    +#include <string>
    +#include <functional>
    +#include <thread>
    +
    +typedef std::function<void(const std::string& new_value,
    +  const uint32_t user_defined)> ConsensusCallback;
    +
    +class KeyValue {
    + public:
    +  // Retrieve value of key
    +  static SaAisErrorT Get(const std::string& key, std::string& value);
    +
    +  // Set key to value
    +  static SaAisErrorT Set(const std::string& key, const std::string& value);
    +
    +  // Erase key
    +  static SaAisErrorT Erase(const std::string& key);
    +
    +  // Obtain lock, default timeout is 20 seconds
    +  static SaAisErrorT Lock(const std::string& owner,
    +    const unsigned int timeout = 20);
    +
    +  // Release lock
    +  static SaAisErrorT Unlock();
    +
    +  // Is locked by this node?
    +  static bool IsLockedByThisNode();
    +
    +  // starts a thread to watch key and call callback if values changes
    +  static void Watch(const std::string& key, ConsensusCallback callback,
    +    const uint32_t user_defined);
    +
    +  // internal use
    +  static int Execute(const std::string& command, std::string& output);
    +};
    +
    +#endif
    diff --git a/src/osaf/consensus/plugins/etcd.plugin 
b/src/osaf/consensus/plugins/etcd.plugin
    new file mode 100644
    index 000000000..0b8c77b4e
    --- /dev/null
    +++ b/src/osaf/consensus/plugins/etcd.plugin
    @@ -0,0 +1,217 @@
    +#!/usr/bin/env bash
    +
    +readonly keyname="opensaf_consensus_lock"
    +
    +# get
    +#   retrieve <value> of <key> from key-value store
    +# params:
    +#   $1 - <key>
    +# returns:
    +#   0 - success, <value> is echoed to stdout
    +#   non-zero - failure
    +get() {
    +  local readonly key=$1
    +
    +  value=`etcdctl get $key 2>&1`
    +  if [ $? -eq 0 ]; then
    +    echo "$value"
    +    return 0
    +  else
    +    return 1
    +  fi
    +}
    +
    +# set
    +#   set <key> to <value> in key-value store
    +# params:
    +#   $1 - <key>
    +#   $2 - <value>
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +set() {
    +  local readonly key=$1
    +  local readonly value=$2
    +
    +  etcdctl set $key $value 1>& /dev/null
    +  if [ $? -eq 0 ]; then
    +    return 0
    +  else
    +    return 1
    +  fi
    +}
    +
    +# erase
    +#   erase <key> in key-value store
    +# params:
    +#   $1 - <key>
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +erase() {
    +  local readonly key=$1
    +
    +  etcdctl rm $key 1>& /dev/null
    +  if [ $? -eq 0 ]; then
    +    return 0
    +  else
    +    return 1
    +  fi
    +}
    +
    +# lock
    +# params:
    +#   $1 - <owner>, owner of the lock is set to this
    +#   $2 - <timeout>, will automatically unlock after <timeout> seconds
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +lock() {
    +  local readonly owner=$1
    +  local readonly timeout=$2
    +
    +  #implementation here
    +  etcdctl mk $keyname $owner --ttl $timeout >& /dev/null
    +  if [ $? -ne 0 ]; then
    +    current_owner=`etcdctl get $keyname`
    +    # see if we already hold the lock
    +    if [ "$current_owner" == "$owner" ]; then
    +      return 0
    +    fi
    +    return 1
    +  else
    +    return 0
    +  fi
    +}
    +
    +# get
    +#   retrieve <owner> of lock
    +# params:
    +#   none
    +# returns:
    +#   0 - success, <owner> is echoed to stdout
    +#   non-zero - failure or not locked
    +lock_owner() {
    +  get $keyname
    +  return $?
    +}
    +
    +# unlock
    +# params:
    +#   $1 - <forced>
    +#      - (optional parameter)
    +#      - if set 'true', will unlock even if lock is not held by node
    +#      - defaults to 'false'
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +#
    +unlock() {
    +  local readonly forced=${1:-false}
    +  local readonly hostname=$(hostname)
    +
    +  if [ "$forced" = false ]; then
    +    # check we own the lock
    +    owner=`etcdctl get $keyname 2>&1`
    +    if [ "$owner" != "$hostname" ]; then
    +      return 1
    +    fi
    +  fi
    +
    +  etcdctl rm $keyname >& /dev/null
    +  if [ $? -eq 0 ]; then
    +    return 0
    +  else
    +    return 1
    +  fi
    +}
    +
    +# watch
    +#   watch <key> in key-value store
    +# params:
    +#   $1 - <key>
    +# returns:
    +#   0 - success, <new_value> is echoed to stdout
    +#   non-zero - failure
    +watch() {
    +  local readonly key=$1
    +
    +  value=`etcdctl watch $key 2>&1`
    +  if [ $? -eq 0 ]; then
    +    # if the key is removed, then "PrevNode.Value: <value>" is returned
    +    echo "$value"
    +    return 0
    +  else
    +    return 1
    +  fi
    +}
    +
    +
    +# argument parsing
    +case "$1" in
    +  get)
    +    if [ "$#" -ne 2 ]; then
    +      echo "Usage: $0 get <key>"
    +      exit 1
    +    fi
    +    get $2
    +    exit $?
    +    ;;
    +  set)
    +    if [ "$#" -ne 3 ]; then
    +      echo "Usage: $0 set <key> <value>"
    +      exit 1
    +    fi
    +    set $2 $3
    +    exit $?
    +    ;;
    +  erase)
    +    if [ "$#" -ne 2 ]; then
    +      echo "Usage: $0 erase <key>"
    +      exit 1
    +    fi
    +    erase $2 ""
    +    exit $?
    +    ;;
    +  lock)
    +    if [ "$#" -ne 3 ]; then
    +      echo "Usage: $0 lock <owner> <timeout>"
    +      exit 1
    +    fi
    +    lock $2 $3
    +    exit $?
    +    ;;
    +  lock_owner)
    +    if [ "$#" -ne 1 ]; then
    +      echo "Usage: $0 lock_owner"
    +      exit 1
    +    fi
    +    lock_owner
    +    exit $?
    +    ;;
    +  unlock)
    +    if [ "$#" -eq 1 ]; then
    +      unlock
    +      exit $?
    +    elif [ "$#" -eq 2 ] && [ "$2" == "--force" ]; then
    +      unlock 1
    +      exit $?
    +    else
    +      echo "Usage: $0 unlock [--force]"
    +      exit 1
    +    fi
    +    ;;
    +  watch)
    +    if [ "$#" -ne 2 ]; then
    +      echo "Usage: $0 watch <key>"
    +      exit 1
    +    fi
    +    watch $2
    +    exit $?
    +    ;;
    +  *)
    +    echo $"Usage: $0 {get|set|erase|lock|unlock|lock_owner|watch}"
    +    ;;
    +esac
    +
    +exit 1
    diff --git a/src/osaf/consensus/plugins/sample.plugin 
b/src/osaf/consensus/plugins/sample.plugin
    new file mode 100644
    index 000000000..424982448
    --- /dev/null
    +++ b/src/osaf/consensus/plugins/sample.plugin
    @@ -0,0 +1,162 @@
    +#!/usr/bin/env bash
    +
    +readonly keyname="opensaf_consensus_lock"
    +
    +# get
    +#   retrieve <value> of <key> from key-value store
    +# params:
    +#   $1 - <key>
    +# returns:
    +#   0 - success, <value> is echoed to stdout
    +#   non-zero - failure
    +get() {
    +  local readonly key=$1
    +  ...
    +}
    +
    +# set
    +#   set <key> to <value> in key-value store
    +# params:
    +#   $1 - <key>
    +#   $2 - <value>
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +set() {
    +  local readonly key=$1
    +  local readonly value=$2
    +  ...
    +}
    +
    +# erase
    +#   erase <key> in key-value store
    +# params:
    +#   $1 - <key>
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +erase() {
    +  local readonly key=$1
    +  ...
    +}
    +
    +# lock
    +# params:
    +#   $1 - <owner>, owner of the lock is set to this
    +#   $2 - <timeout>, will automatically unlock after <timeout> seconds
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +lock() {
    +  local readonly owner=$1
    +  local readonly timeout=$2
    +  ...
    +}
    +
    +# get
    +#   retrieve <owner> of lock
    +# params:
    +#   none
    +# returns:
    +#   0 - success, <owner> is echoed to stdout
    +#   non-zero - failure or not locked
    +lock_owner() {
    +  ...
    +}
    +
    +# unlock
    +# params:
    +#   $1 - <forced>
    +#      - (optional parameter)
    +#      - if set 'true', will unlock even if lock is not held by node
    +#      - defaults to 'false'
    +# returns:
    +#   0 - success
    +#   non-zero - failure
    +#
    +unlock() {
    +  local readonly forced=${1:-false}
    +  local readonly hostname=$(hostname)
    +  ...
    +}
    +
    +# watch
    +#   watch <key> in key-value store
    +# params:
    +#   $1 - <key>
    +# returns:
    +#   0 - success, <new_value> is echoed to stdout
    +#   non-zero - failure
    +watch() {
    +  local readonly key=$1
    +  ..
    +}
    +
    +# argument parsing
    +case "$1" in
    +  get)
    +    if [ "$#" -ne 2 ]; then
    +      echo "Usage: $0 get <key>"
    +      exit 1
    +    fi
    +    get $2
    +    exit $?
    +    ;;
    +  set)
    +    if [ "$#" -ne 3 ]; then
    +      echo "Usage: $0 set <key> <value>"
    +      exit 1
    +    fi
    +    set $2 $3
    +    exit $?
    +    ;;
    +  erase)
    +    if [ "$#" -ne 2 ]; then
    +      echo "Usage: $0 erase <key>"
    +      exit 1
    +    fi
    +    erase $2 ""
    +    exit $?
    +    ;;
    +  lock)
    +    if [ "$#" -ne 3 ]; then
    +      echo "Usage: $0 lock <owner> <timeout>"
    +      exit 1
    +    fi
    +    lock $2 $3
    +    exit $?
    +    ;;
    +  lock_owner)
    +    if [ "$#" -ne 1 ]; then
    +      echo "Usage: $0 lock_owner"
    +      exit 1
    +    fi
    +    lock_owner
    +    exit $?
    +    ;;
    +  unlock)
    +    if [ "$#" -eq 1 ]; then
    +      unlock
    +      exit $?
    +    elif [ "$#" -eq 2 ] && [ "$2" == "--force" ]; then
    +      unlock 1
    +      exit $?
    +    else
    +      echo "Usage: $0 unlock [--force]"
    +      exit 1
    +    fi
    +    ;;
    +  watch)
    +    if [ "$#" -ne 2 ]; then
    +      echo "Usage: $0 watch <key>"
    +      exit 1
    +    fi
    +    watch $2
    +    exit $?
    +    ;;
    +  *)
    +    echo $"Usage: $0 {get|set|erase|lock|unlock|lock_owner|watch}"
    +    ;;
    +esac
    +
    +exit 1
    diff --git a/src/osaf/consensus/service.cc b/src/osaf/consensus/service.cc
    new file mode 100644
    index 000000000..fd525b6d3
    --- /dev/null
    +++ b/src/osaf/consensus/service.cc
    @@ -0,0 +1,231 @@
    +/*      -*- OpenSAF  -*-
    + *
    + * (C) Copyright 2018 The OpenSAF Foundation
    + *
    + * This program is distributed in the hope that it will be useful, but
    + * WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY
    + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
    + * under the GNU Lesser General Public License Version 2.1, February 1999.
    + * The complete license can be accessed from the following location:
    + * http://opensource.org/licenses/lgpl-license.php
    + * See the Copying file included with the OpenSAF distribution for full
    + * licensing terms.
    + *
    + * Author(s): Ericsson AB
    + *
    + */
    +#include "service.h"
    +#include "base/logtrace.h"
    +#include "base/conf.h"
    +#include "base/getenv.h"
    +#include "base/ncssysf_def.h"
    +#include <unistd.h>
    +#include <climits>
    +
    +SaAisErrorT Consensus::BeginActivePromotion() {
    +  TRACE_ENTER();
    +  SaAisErrorT rc;
    +
    +  if (use_consensus_ == false) {
    +    return SA_AIS_OK;
    +  }
    +
    +  rc = KeyValue::Lock(base::Conf::NodeName(), 30);
    +  while (rc != SA_AIS_OK) {
    +    TRACE("Waiting for lock");
    +    usleep(sleep_internal);
    +    rc = KeyValue::Lock(base::Conf::NodeName(), 30);
    +  }
    +
    +  LOG_IN("Node %s obtained lock", base::Conf::NodeName().c_str());
    +
    +  // check current active node
    +  std::string current;
    +  rc = KeyValue::Get(keyname, current);
    +  if (rc == SA_AIS_OK) {
    +    LOG_NO("Current active controller is %s", current.c_str());
    +    if (current != base::Conf::NodeName()) {
    +      FenceNode(current);
    +    }
    +  }
    +  LOG_NO("Setting active controller to %s", 
base::Conf::NodeName().c_str());
    +  rc = KeyValue::Set(keyname, base::Conf::NodeName());
    +  return rc;
    +}
    +
    +SaAisErrorT Consensus::EndActivePromotion() {
    +  TRACE_ENTER();
    +  if (use_consensus_ == false) {
    +    return SA_AIS_OK;
    +  }
    +
    +  bool locked = KeyValue::IsLockedByThisNode();
    +  if (locked == false) {
    +    LOG_ER("Lock unexpectedly released");
    +  } else {
    +    SaAisErrorT rc;
    +    rc = KeyValue::Unlock();
    +    while (rc != SA_AIS_OK) {
    +      LOG_IN("Trying to unlock");
    +      usleep(sleep_internal);
    +      rc = KeyValue::Unlock();
    +    }
    +    LOG_IN("Released lock");
    +  }
    +  return SA_AIS_OK;
    +}
    +
    +SaAisErrorT Consensus::Demote(const std::string node = "")
    +{
    +  TRACE_ENTER();
    +  SaAisErrorT rc;
    +  if (use_consensus_ == false) {
    +    return SA_AIS_OK;
    +  }
    +
    +  rc = KeyValue::Lock(base::Conf::NodeName(), 30);
    +  while (rc != SA_AIS_OK) {
    +    LOG_IN("Waiting for lock");
    +    usleep(sleep_internal);
    +    rc = KeyValue::Lock(base::Conf::NodeName(), 30);
    +  }
    +
    +  // check current active node
    +  std::string current;
    +  rc = KeyValue::Get(keyname, current);
    +  if (rc == SA_AIS_OK) {
    +    LOG_NO("Demoting %s as active controller", current.c_str());
    +    if (node.empty() == false && node != current) {
    +      FenceNode(node);
    +    }
    +    // if Demote() was called as DemoteCurrentActive() from fmd,
    +    // then fmd will fence the node itself
    +
    +    rc = KeyValue::Erase(keyname);
    +    if (rc != SA_AIS_OK) {
    +      LOG_ER("Failed to clear active controller in KeyValue");
    +    }
    +    LOG_NO("Node %s demoted", current.c_str());
    +  }
    +
    +  rc = KeyValue::Unlock();
    +  while (rc != SA_AIS_OK) {
    +    LOG_IN("Trying to unlock");
    +    usleep(sleep_internal);
    +    rc = KeyValue::Unlock();
    +  }
    +  LOG_IN("Released lock");
    +
    +  return SA_AIS_OK;
    +}
    +
    +SaAisErrorT Consensus::DemoteCurrentActive() {
    +  TRACE_ENTER();
    +  return Demote();
    +}
    +
    +SaAisErrorT Consensus::DemoteThisNode() {
    +  TRACE_ENTER();
    +  return Demote(base::Conf::NodeName());
    +}
    +
    +bool Consensus::IsEnabled() const {
    +  return use_consensus_;
    +}
    +
    +std::string Consensus::CurrentActive() const {
    +  TRACE_ENTER();
    +  SaAisErrorT rc;
    +  bool success = false;
    +  if (use_consensus_ == false) {
    +    return "";
    +  }
    +
    +  rc = KeyValue::Lock(base::Conf::NodeName(), 30);
    +  while (rc != SA_AIS_OK) {
    +    LOG_IN("Waiting for lock");
    +    usleep(sleep_internal);
    +    rc = KeyValue::Lock(base::Conf::NodeName(), 30);
    +  }
    +
    +  // check current active node
    +  std::string current;
    +  rc = KeyValue::Get(keyname, current);
    +  if (rc == SA_AIS_OK) {
    +    success = true;
    +    LOG_IN("Current active controller is %s", current.c_str());
    +  }
    +
    +  rc = KeyValue::Unlock();
    +  while (rc != SA_AIS_OK) {
    +    LOG_IN("Trying to unlock");
    +    usleep(sleep_internal);
    +    rc = KeyValue::Unlock();
    +  }
    +
    +  LOG_IN("Released lock");
    +
    +  if (success == true) {
    +    return current;
    +  } else {
    +    return "";
    +  }
    +}
    +
    +Consensus::Consensus() {
    +  TRACE_ENTER();
    +
    +  uint32_t split_brain_enable = base::GetEnv("SPLIT_BRAIN_PREVENTION", 0);
    +  std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
    +  uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING" , 0);
    +
    +  if (split_brain_enable == 1 && kv_store_cmd.empty() == false) {
    +    use_consensus_ = true;
    +  } else {
    +    use_consensus_ = false;
    +  }
    +
    +  if (use_remote_fencing == 1) {
    +    use_remote_fencing_ = true;
    +  }
    +
    +  // needed for base::Conf::NodeName() later
    +  base::Conf::InitNodeName();
    +
    +  if (use_consensus_ == true) {
    +    LOG_NO("Split brain prevention is enabled");
    +  } else {
    +    LOG_NO("Split brain prevention is disabled");
    +  }
    +}
    +
    +Consensus::~Consensus()
    +{
    +}
    +
    +bool Consensus::FenceNode(const std::string& node)
    +{
    +  if (use_remote_fencing_ == true) {
    +    LOG_WA("Fencing remote node %s", node.c_str());
    +    // @todo currently passing UINT_MAX as node ID, since
    +    // we can't always obtain a valid node ID?
    +    opensaf_reboot(UINT_MAX, node.c_str(),
    +      "Fencing remote node");
    +
    +    return true;
    +  } else {
    +    LOG_WA("Fencing is not enabled. Node %s will not be fenced", 
node.c_str());
    +    return false;
    +  }
    +}
    +
    +void Consensus::MonitorActive(ConsensusCallback callback,
    +  const uint32_t user_defined)
    +{
    +  TRACE_ENTER();
    +  if (use_consensus_ == false) {
    +    return;
    +  }
    +
    +  KeyValue::Watch(keyname, callback, user_defined);
    +}
    diff --git a/src/osaf/consensus/service.h b/src/osaf/consensus/service.h
    new file mode 100644
    index 000000000..03f7f26f9
    --- /dev/null
    +++ b/src/osaf/consensus/service.h
    @@ -0,0 +1,66 @@
    +/*      -*- OpenSAF  -*-
    + *
    + * (C) Copyright 2018 The OpenSAF Foundation
    + *
    + * This program is distributed in the hope that it will be useful, but
    + * WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY
    + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
    + * under the GNU Lesser General Public License Version 2.1, February 1999.
    + * The complete license can be accessed from the following location:
    + * http://opensource.org/licenses/lgpl-license.php
    + * See the Copying file included with the OpenSAF distribution for full
    + * licensing terms.
    + *
    + * Author(s): Ericsson AB
    + *
    + */
    +#ifndef CONSENSUS_SERVICE_H_
    +#define CONSENSUS_SERVICE_H_
    +
    +#include "keyvalue.h"
    +#include "saAis.h"
    +#include <string>
    +
    +class Consensus {
    +public:
    +
    +  // Obtain lock, set active controller to this node
    +  SaAisErrorT BeginActivePromotion();
    +
    +  // Release lock
    +  SaAisErrorT EndActivePromotion();
    +
    +  // Obtain lock, clear current active controller, release lock
    +  SaAisErrorT DemoteCurrentActive();
    +
    +  // Obtain lock, clear this node as active controller, release lock
    +  SaAisErrorT DemoteThisNode();
    +
    +  // Returns active controller as known by the consensus service
    +  std::string CurrentActive() const;
    +
    +  // If the active controller is changed as known by the consensus service,
    +  // then callback will be run from a new thread, with <user_defined> 
returned
    +  // in the callback
    +  void MonitorActive(ConsensusCallback callback, const uint32_t 
user_defined);
    +
    +  // Is consensus service enabled?
    +  bool IsEnabled() const;
    +
    +  explicit Consensus();
    +  virtual ~Consensus();
    +
    +  Consensus(const Consensus&) = delete;
    +  Consensus& operator=(const Consensus&) = delete;
    +
    +private:
    +  bool use_consensus_ = false;
    +  bool use_remote_fencing_ = false;
    +  const std::string keyname = "opensaf_active_controller";
    +  static constexpr int sleep_internal = 100000; // in us
    +
    +  SaAisErrorT Demote(const std::string node);
    +  bool FenceNode(const std::string& node);
    +};
    +
    +#endif
    -- 
    2.14.1
    
    



------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to