ack, review only, one question below/Thanks HansN

On 1/21/19 04:52, Gary Lee wrote:
> Allow promotion of node to active at cluster startup, even if the
> consensus service is unavailable, if the peer SC can be seen.
>
> During normal cluster operation, if the consensus service becomes
> unavailable but the peer SC can still be seen, allow the existing
> active SC to remain active.
>
> A new NCSMDS_SVC_ID_RDE_DISCOVERY service ID is exported by rded.
> This is installed as soon as rded is started, unlike
> NCSMDS_SVC_ID_RDE which is only installed when it becomes
> a candidate for election.
> ---
>   src/mds/mds_papi.h       |  1 +
>   src/rde/rded/rde_cb.h    | 12 +++++-
>   src/rde/rded/rde_main.cc | 71 +++++++++++++++++++++++++++++++----
>   src/rde/rded/rde_mds.cc  | 94 ++++++++++++++++++++++++++++++++++++++++++++--
>   src/rde/rded/role.cc     | 97 
> +++++++++++++++++++++++++++++++++++++++++++-----
>   src/rde/rded/role.h      |  4 +-
>   6 files changed, 256 insertions(+), 23 deletions(-)
>
> diff --git a/src/mds/mds_papi.h b/src/mds/mds_papi.h
> index 03d755d..7cd543c 100644
> --- a/src/mds/mds_papi.h
> +++ b/src/mds/mds_papi.h
> @@ -191,6 +191,7 @@ typedef enum ncsmds_svc_id {
>     NCSMDS_SVC_ID_PLMS = 37,
>     NCSMDS_SVC_ID_PLMS_HRB = 38,
>     NCSMDS_SVC_ID_PLMA = 39,
> +  NCSMDS_SVC_ID_RDE_DISCOVERY = 40,
>     NCSMDS_SVC_ID_NCSMAX, /* This mnemonic always last */
>   
>     /* The range below is for OpenSAF internal use */
> diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
> index d3f5a24..9a0919c 100644
> --- a/src/rde/rded/rde_cb.h
> +++ b/src/rde/rded/rde_cb.h
> @@ -34,6 +34,9 @@
>    **
>    */
>   
> +enum class State {kNotActive = 0, kNotActiveSeenPeer, kActiveElected,
> +                  kActiveElectedSeenPeer, kActiveFailover};
> +
>   struct RDE_CONTROL_BLOCK {
>     SYSF_MBX mbx;
>     NCSCONTEXT task_handle;
> @@ -43,6 +46,9 @@ struct RDE_CONTROL_BLOCK {
>     bool monitor_lock_thread_running{false};
>     bool monitor_takeover_req_thread_running{false};
>     std::set<NODE_ID> cluster_members{};
> +  // used for discovering peer controllers, regardless of their role
> +  std::set<NODE_ID> peer_controllers{};
> +  State state{State::kNotActive};
>   };
>   
>   enum RDE_MSG_TYPE {
> @@ -54,7 +60,9 @@ enum RDE_MSG_TYPE {
>     RDE_MSG_NODE_UP = 6,
>     RDE_MSG_NODE_DOWN = 7,
>     RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8,
> -  RDE_MSG_ACTIVE_PROMOTION_SUCCESS = 9
> +  RDE_MSG_ACTIVE_PROMOTION_SUCCESS = 9,
> +  RDE_MSG_CONTROLLER_UP = 10,
> +  RDE_MSG_CONTROLLER_DOWN = 11
>   };
>   
>   struct rde_peer_info {
> @@ -82,7 +90,9 @@ extern const char *rde_msg_name[];
>   
>   extern RDE_CONTROL_BLOCK *rde_get_control_block();
>   extern uint32_t rde_mds_register();
> +extern uint32_t rde_discovery_mds_register();
>   extern uint32_t rde_mds_unregister();
> +extern uint32_t rde_discovery_mds_unregister();
>   extern uint32_t rde_mds_send(rde_msg *msg, MDS_DEST to_dest);
>   extern uint32_t rde_set_role(PCS_RDA_ROLE role);
>   
> diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
> index e5813e4..2d9aa51 100644
> --- a/src/rde/rded/rde_main.cc
> +++ b/src/rde/rded/rde_main.cc
> @@ -39,6 +39,7 @@
>   #include "osaf/consensus/consensus.h"
>   #include "rde/rded/rde_cb.h"
>   #include "rde/rded/role.h"
> +#include "rde_cb.h"
>   
>   #define RDA_MAX_CLIENTS 32
>   
> @@ -56,7 +57,9 @@ const char *rde_msg_name[] = {"-",
>                                 "RDE_MSG_NODE_UP(6)",
>                                 "RDE_MSG_NODE_DOWN(7)",
>                                 "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)",
> -                              "RDE_MSG_ACTIVE_PROMOTION_SUCCESS(9)"};
> +                              "RDE_MSG_ACTIVE_PROMOTION_SUCCESS(9)",
> +                              "RDE_MSG_CONTROLLER_UP(10)",
> +                              "RDE_MSG_CONTROLLER_DOWN(11)"};
>   
>   static RDE_CONTROL_BLOCK _rde_cb;
>   static RDE_CONTROL_BLOCK *rde_cb = &_rde_cb;
> @@ -157,6 +160,23 @@ static void handle_mbx_event() {
>         rde_cb->cluster_members.erase(msg->fr_node_id);
>         TRACE("cluster_size %zu", rde_cb->cluster_members.size());
>         break;
> +    case RDE_MSG_CONTROLLER_UP:
> +      if (msg->fr_node_id != own_node_id) {
> +        rde_cb->peer_controllers.insert(msg->fr_node_id);
> +        TRACE("peer_controllers: size %zu", rde_cb->peer_controllers.size());
> +        if (rde_cb->state == State::kNotActive) {
> +          TRACE("Set state to kNotActiveSeenPeer");
> +          rde_cb->state = State::kNotActiveSeenPeer;
> +        } else if (rde_cb->state == State::kActiveElected) {
> +          TRACE("Set state to kActiveElectedSeenPeer");
> +          rde_cb->state = State::kActiveElectedSeenPeer;
> +        }
> +      }
> +      break;
> +    case RDE_MSG_CONTROLLER_DOWN:
> +      rde_cb->peer_controllers.erase(msg->fr_node_id);
> +      TRACE("peer_controllers: size %zu", rde_cb->peer_controllers.size());
> +      break;
>       case RDE_MSG_TAKEOVER_REQUEST_CALLBACK: {
>         rde_cb->monitor_takeover_req_thread_running = false;
>   
> @@ -179,13 +199,44 @@ static void handle_mbx_event() {
>                              "Another controller is taking over the active 
> role. "
>                              "Rebooting this node");
>             }
> -        } else {
> -          LOG_NO("Rejected takeover request");
> -
[HansN] is these curly braces correctly placed?
> -          rde_cb->monitor_takeover_req_thread_running = true;
> -          consensus_service.MonitorTakeoverRequest(Role::MonitorCallback,
> -                                                   rde_cb->mbx);
> +        } else if (state == Consensus::TakeoverState::UNDEFINED) {
> +          bool fencing_required = true;
> +
> +          // differentiate when this occurs after election or
> +          // rde has been set active due to failover
> +          if (consensus_service.IsRelaxedNodePromotionEnabled() == true) {
> +              if (rde_cb->state == State::kActiveElected) {
> +                TRACE("Relaxed mode is enabled");
> +                TRACE(" No peer SC yet seen, ignore consensus service 
> failure");
> +                // if relaxed node promotion is enabled, and we have yet to 
> see
> +                // a peer SC after being promoted, tolerate consensus service
> +                // not working
> +                fencing_required = false;
> +              } else if ((rde_cb->state == State::kActiveElectedSeenPeer ||
> +                         rde_cb->state == State::kActiveFailover) &&
> +                         role->IsPeerPresent() == true) {
> +                TRACE("Relaxed mode is enabled");
> +                TRACE("Peer SC can be seen, ignore consensus service 
> failure");
> +                // we have seen the peer, and peer is still connected, 
> tolerate
> +                // consensus service not working
> +                fencing_required = false;
> +              }
> +          }
> +          if (fencing_required == true) {
> +            LOG_NO("Lost connectivity to consensus service");
> +            if (consensus_service.IsRemoteFencingEnabled() == false) {
> +                opensaf_reboot(0, nullptr,
> +                               "Lost connectivity to consensus service. "
> +                               "Rebooting this node");
> +            }
> +          }
>           }
> +
> +        LOG_NO("Rejected takeover request");
> +
> +        rde_cb->monitor_takeover_req_thread_running = true;
> +        consensus_service.MonitorTakeoverRequest(Role::MonitorCallback,
> +                                                 rde_cb->mbx);
>         } else {
>           LOG_WA("Received takeover request when not active");
>         }
> @@ -267,6 +318,11 @@ static int initialize_rde() {
>       goto init_failed;
>     }
>   
> +  if (rde_discovery_mds_register() != NCSCC_RC_SUCCESS) {
> +    LOG_ER("rde_discovery_mds_register() failed");
> +    rc = NCSCC_RC_FAILURE;
> +  }
> +
>     rc = NCSCC_RC_SUCCESS;
>   
>   init_failed:
> @@ -343,6 +399,7 @@ int main(int argc, char *argv[]) {
>       }
>   
>       if (fds[FD_TERM].revents & POLLIN) {
> +      rde_discovery_mds_unregister();
>         daemon_exit();
>       }
>   
> diff --git a/src/rde/rded/rde_mds.cc b/src/rde/rded/rde_mds.cc
> index 00922ea..bc335f0 100644
> --- a/src/rde/rded/rde_mds.cc
> +++ b/src/rde/rded/rde_mds.cc
> @@ -149,6 +149,31 @@ static uint32_t process_amfnd_mds_evt(struct 
> ncsmds_callback_info *info) {
>     return rc;
>   }
>   
> +static uint32_t process_rde_discovery_mds_evt(
> +  struct ncsmds_callback_info *info) {
> +  uint32_t rc = NCSCC_RC_SUCCESS;
> +
> +  TRACE_ENTER();
> +  osafassert(info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_RDE_DISCOVERY);
> +
> +  // process these events in the main thread to avoid
> +  // synchronisation issues
> +  switch (info->info.svc_evt.i_change) {
> +    case NCSMDS_DOWN:
> +      rc = mbx_send(RDE_MSG_CONTROLLER_DOWN, info->info.svc_evt.i_dest,
> +                    info->info.svc_evt.i_node_id);
> +      break;
> +    case NCSMDS_UP:
> +      rc = mbx_send(RDE_MSG_CONTROLLER_UP, info->info.svc_evt.i_dest,
> +                    info->info.svc_evt.i_node_id);
> +      break;
> +    default:
> +      break;
> +  }
> +
> +  return rc;
> +}
> +
>   static uint32_t mds_callback(struct ncsmds_callback_info *info) {
>     struct rde_msg *msg;
>     uint32_t rc = NCSCC_RC_SUCCESS;
> @@ -185,8 +210,10 @@ static uint32_t mds_callback(struct ncsmds_callback_info 
> *info) {
>         if (info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_AVND) {
>           rc = process_amfnd_mds_evt(info);
>           break;
> -      }
> -      if (info->info.svc_evt.i_change == NCSMDS_DOWN) {
> +      } else if (info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_RDE_DISCOVERY) 
> {
> +        rc = process_rde_discovery_mds_evt(info);
> +        break;
> +      } else if (info->info.svc_evt.i_change == NCSMDS_DOWN) {
>           TRACE("MDS DOWN dest: %" PRIx64 ", node ID: %x, svc_id: %d",
>                 info->info.svc_evt.i_dest, info->info.svc_evt.i_node_id,
>                 info->info.svc_evt.i_svc_id);
> @@ -218,7 +245,8 @@ done:
>   uint32_t rde_mds_register() {
>     NCSADA_INFO ada_info;
>     NCSMDS_INFO svc_info;
> -  MDS_SVC_ID svc_id[] = {NCSMDS_SVC_ID_RDE, NCSMDS_SVC_ID_AVND};
> +  MDS_SVC_ID svc_id[] = {NCSMDS_SVC_ID_RDE, NCSMDS_SVC_ID_AVND,
> +                         NCSMDS_SVC_ID_RDE_DISCOVERY};
>     MDS_DEST mds_adest;
>   
>     TRACE_ENTER();
> @@ -252,7 +280,7 @@ uint32_t rde_mds_register() {
>     svc_info.i_mds_hdl = mds_hdl;
>     svc_info.i_svc_id = NCSMDS_SVC_ID_RDE;
>     svc_info.i_op = MDS_RED_SUBSCRIBE;
> -  svc_info.info.svc_subscribe.i_num_svcs = 2;
> +  svc_info.info.svc_subscribe.i_num_svcs = 3;
>     svc_info.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE;
>     svc_info.info.svc_subscribe.i_svc_ids = svc_id;
>   
> @@ -266,6 +294,43 @@ uint32_t rde_mds_register() {
>     return NCSCC_RC_SUCCESS;
>   }
>   
> +uint32_t rde_discovery_mds_register() {
> +  NCSADA_INFO ada_info;
> +  NCSMDS_INFO svc_info;
> +  MDS_DEST mds_adest;
> +
> +  TRACE_ENTER();
> +
> +  ada_info.req = NCSADA_GET_HDLS;
> +  if (ncsada_api(&ada_info) != NCSCC_RC_SUCCESS) {
> +    LOG_ER("%s: NCSADA_GET_HDLS Failed", __FUNCTION__);
> +    return NCSCC_RC_FAILURE;
> +  }
> +
> +  mds_hdl = ada_info.info.adest_get_hdls.o_mds_pwe1_hdl;
> +  mds_adest = ada_info.info.adest_get_hdls.o_adest;
> +
> +  svc_info.i_mds_hdl = mds_hdl;
> +  svc_info.i_svc_id = NCSMDS_SVC_ID_RDE_DISCOVERY;
> +  svc_info.i_op = MDS_INSTALL;
> +
> +  svc_info.info.svc_install.i_yr_svc_hdl = 0;
> +  // node specific
> +  svc_info.info.svc_install.i_install_scope = NCSMDS_SCOPE_NONE;
> +  svc_info.info.svc_install.i_svc_cb = mds_callback; /* callback */
> +  svc_info.info.svc_install.i_mds_q_ownership = false;
> +  svc_info.info.svc_install.i_mds_svc_pvt_ver = RDE_MDS_PVT_SUBPART_VERSION;
> +
> +  if (ncsmds_api(&svc_info) == NCSCC_RC_FAILURE) {
> +    LOG_ER("%s: MDS Install Failed", __FUNCTION__);
> +    return NCSCC_RC_FAILURE;
> +  }
> +
> +  TRACE_LEAVE2("NodeId:%x, mds_adest:%" PRIx64, ncs_get_node_id(), 
> mds_adest);
> +
> +  return NCSCC_RC_SUCCESS;
> +}
> +
>   uint32_t rde_mds_unregister() {
>     NCSMDS_INFO mds_info;
>     TRACE_ENTER();
> @@ -287,6 +352,27 @@ uint32_t rde_mds_unregister() {
>     return rc;
>   }
>   
> +uint32_t rde_discovery_mds_unregister() {
> +  NCSMDS_INFO mds_info;
> +  TRACE_ENTER();
> +
> +  /* Un-install your service into MDS.
> +   No need to cancel the services that are subscribed */
> +  memset(&mds_info, 0, sizeof(NCSMDS_INFO));
> +
> +  mds_info.i_mds_hdl = mds_hdl;
> +  mds_info.i_svc_id = NCSMDS_SVC_ID_RDE_DISCOVERY;
> +  mds_info.i_op = MDS_UNINSTALL;
> +
> +  uint32_t rc = ncsmds_api(&mds_info);
> +  if (rc != NCSCC_RC_SUCCESS) {
> +    LOG_WA("MDS Unregister Failed");
> +  }
> +
> +  TRACE_LEAVE2("retval = %u", rc);
> +  return rc;
> +}
> +
>   uint32_t rde_mds_send(struct rde_msg *msg, MDS_DEST to_dest) {
>     NCSMDS_INFO info;
>     uint32_t rc;
> diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
> index ecab773..a967bd5 100644
> --- a/src/rde/rded/role.cc
> +++ b/src/rde/rded/role.cc
> @@ -91,22 +91,26 @@ void Role::MonitorCallback(const std::string& key, const 
> std::string& new_value,
>     osafassert(status == NCSCC_RC_SUCCESS);
>   }
>   
> -void Role::PromoteNode(const uint64_t cluster_size) {
> +void Role::PromoteNode(const uint64_t cluster_size,
> +                       const bool relaxed_mode) {
>     TRACE_ENTER();
>     SaAisErrorT rc;
>   
>     Consensus consensus_service;
> +  bool promotion_pending = false;
>   
>     rc = consensus_service.PromoteThisNode(true, cluster_size);
> -  if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
> -    LOG_ER("Unable to set active controller in consensus service");
> -    opensaf_reboot(0, nullptr,
> -                   "Unable to set active controller in consensus service");
> -  }
> -
>     if (rc == SA_AIS_ERR_EXIST) {
>       LOG_WA("Another controller is already active");
>       return;
> +  } else if (rc != SA_AIS_OK && relaxed_mode == true) {
> +    LOG_WA("Unable to set active controller in consensus service");
> +    LOG_WA("Will become active anyway");
> +    promotion_pending = true;
> +  } else if (rc != SA_AIS_OK) {
> +    LOG_ER("Unable to set active controller in consensus service");
> +    opensaf_reboot(0, nullptr,
> +                   "Unable to set active controller in consensus service");
>     }
>   
>     RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> @@ -117,9 +121,26 @@ void Role::PromoteNode(const uint64_t cluster_size) {
>     uint32_t status;
>     status = m_NCS_IPC_SEND(&cb->mbx, msg, NCS_IPC_PRIORITY_HIGH);
>     osafassert(status == NCSCC_RC_SUCCESS);
> +
> +  if (promotion_pending) {
> +    osafassert(consensus_service.IsRelaxedNodePromotionEnabled() == true);
> +    // the node has been promoted, even though the lock has not been obtained
> +    // keep trying the consensus service
> +    while (rc != SA_AIS_OK) {
> +      rc = consensus_service.PromoteThisNode(true, cluster_size);
> +      if (rc == SA_AIS_ERR_EXIST) {
> +        LOG_ER("Unable to set active controller in consensus service");
> +        opensaf_reboot(0, nullptr,
> +                       "Unable to set active controller in consensus 
> service");
> +      }
> +      std::this_thread::sleep_for(std::chrono::seconds(1));
> +    }
> +    LOG_NO("Successfully set active controller in consensus service");
> +  }
>   }
>   
>   void Role::NodePromoted() {
> +  // promoted to active from election
>     ExecutePreActiveScript();
>     LOG_NO("Switched to ACTIVE from %s", to_string(role()));
>     role_ = PCS_RDA_ACTIVE;
> @@ -127,6 +148,13 @@ void Role::NodePromoted() {
>   
>     Consensus consensus_service;
>     RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> +  if (cb->peer_controllers.empty() == false) {
> +    TRACE("Set state to kActiveElectedSeenPeer");
> +    cb->state = State::kActiveElectedSeenPeer;
> +  } else {
> +    TRACE("Set state to kActiveElected");
> +    cb->state = State::kActiveElected;
> +  }
>   
>     // register for callback if active controller is changed
>     // in consensus service
> @@ -161,8 +189,24 @@ timespec* Role::Poll(timespec* ts) {
>       } else {
>         election_end_time_ = base::kTimespecMax;
>         RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> -      std::thread(&Role::PromoteNode,
> -                 this, cb->cluster_members.size()).detach();
> +
> +      bool is_candidate = IsCandidate();
> +      Consensus consensus_service;
> +      if (consensus_service.IsEnabled() == true &&
> +        is_candidate == false &&
> +        consensus_service.IsWritable() == false) {
> +        // node promotion will fail resulting in node reboot,
> +        // reset timer and try later
> +        TRACE("reset timer and try later");
> +        ResetElectionTimer();
> +        now = base::ReadMonotonicClock();
> +        *ts = election_end_time_ - now;
> +        timeout = ts;
> +      } else {
> +        std::thread(&Role::PromoteNode,
> +                    this, cb->cluster_members.size(),
> +                    is_candidate).detach();
> +      }
>       }
>     }
>     return timeout;
> @@ -177,10 +221,42 @@ void Role::ExecutePreActiveScript() {
>   
>   void Role::AddPeer(NODE_ID node_id) {
>     auto result = known_nodes_.insert(node_id);
> -  if (result.second) ResetElectionTimer();
> +  if (result.second) {
> +    ResetElectionTimer();
> +  }
> +}
> +
> +// call from main thread only
> +bool Role::IsCandidate() {
> +  TRACE_ENTER();
> +  bool result = false;
> +  Consensus consensus_service;
> +  RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> +
> +  // if relaxed node promotion is enabled, allow this node to be promoted
> +  // active if it can see a peer SC and this node has the lowest node ID
> +  if (consensus_service.IsRelaxedNodePromotionEnabled() == true &&
> +      cb->state == State::kNotActiveSeenPeer) {
> +    LOG_NO("Relaxed node promotion enabled. This node is a candidate.");
> +    result = true;
> +  }
> +
> +  return result;
> +}
> +
> +bool Role::IsPeerPresent() {
> +  bool result = false;
> +  RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> +
> +  if (cb->peer_controllers.empty() == false) {
> +    result = true;
> +  }
> +
> +  return result;
>   }
>   
>   uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
> +  TRACE_ENTER();
>     PCS_RDA_ROLE old_role = role_;
>     if (new_role == PCS_RDA_ACTIVE &&
>         (old_role == PCS_RDA_UNDEFINED || old_role == PCS_RDA_QUIESCED)) {
> @@ -196,6 +272,7 @@ uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
>         // in consensus service
>         Consensus consensus_service;
>         RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> +      cb->state = State::kActiveFailover;
>         if (cb->monitor_lock_thread_running == false) {
>           cb->monitor_lock_thread_running = true;
>           consensus_service.MonitorLock(MonitorCallback, cb->mbx);
> diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
> index 9780deb..1920f59 100644
> --- a/src/rde/rded/role.h
> +++ b/src/rde/rded/role.h
> @@ -34,6 +34,8 @@ class Role {
>    public:
>     explicit Role(NODE_ID own_node_id);
>     void AddPeer(NODE_ID node_id);
> +  bool IsCandidate();
> +  bool IsPeerPresent();
>     void SetPeerState(PCS_RDA_ROLE node_role, NODE_ID node_id);
>     timespec* Poll(timespec* ts);
>     uint32_t SetRole(PCS_RDA_ROLE new_role);
> @@ -49,7 +51,7 @@ class Role {
>     void ExecutePreActiveScript();
>     void ResetElectionTimer();
>     uint32_t UpdateMdsRegistration(PCS_RDA_ROLE new_role, PCS_RDA_ROLE 
> old_role);
> -  void PromoteNode(const uint64_t cluster_size);
> +  void PromoteNode(const uint64_t cluster_size, const bool relaxed_mode);
>   
>     std::set<NODE_ID> known_nodes_;
>     PCS_RDA_ROLE role_;

_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to