ack, review only, one question below/Thanks HansN
On 1/21/19 04:52, Gary Lee wrote:
> Allow promotion of node to active at cluster startup, even if the
> consensus service is unavailable, if the peer SC can be seen.
>
> During normal cluster operation, if the consensus service becomes
> unavailable but the peer SC can still be seen, allow the existing
> active SC to remain active.
>
> A new NCSMDS_SVC_ID_RDE_DISCOVERY service ID is exported by rded.
> This is installed as soon as rded is started, unlike
> NCSMDS_SVC_ID_RDE which is only installed when it becomes
> a candidate for election.
> ---
> src/mds/mds_papi.h | 1 +
> src/rde/rded/rde_cb.h | 12 +++++-
> src/rde/rded/rde_main.cc | 71 +++++++++++++++++++++++++++++++----
> src/rde/rded/rde_mds.cc | 94 ++++++++++++++++++++++++++++++++++++++++++++--
> src/rde/rded/role.cc | 97
> +++++++++++++++++++++++++++++++++++++++++++-----
> src/rde/rded/role.h | 4 +-
> 6 files changed, 256 insertions(+), 23 deletions(-)
>
> diff --git a/src/mds/mds_papi.h b/src/mds/mds_papi.h
> index 03d755d..7cd543c 100644
> --- a/src/mds/mds_papi.h
> +++ b/src/mds/mds_papi.h
> @@ -191,6 +191,7 @@ typedef enum ncsmds_svc_id {
> NCSMDS_SVC_ID_PLMS = 37,
> NCSMDS_SVC_ID_PLMS_HRB = 38,
> NCSMDS_SVC_ID_PLMA = 39,
> + NCSMDS_SVC_ID_RDE_DISCOVERY = 40,
> NCSMDS_SVC_ID_NCSMAX, /* This mnemonic always last */
>
> /* The range below is for OpenSAF internal use */
> diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
> index d3f5a24..9a0919c 100644
> --- a/src/rde/rded/rde_cb.h
> +++ b/src/rde/rded/rde_cb.h
> @@ -34,6 +34,9 @@
> **
> */
>
> +enum class State {kNotActive = 0, kNotActiveSeenPeer, kActiveElected,
> + kActiveElectedSeenPeer, kActiveFailover};
> +
> struct RDE_CONTROL_BLOCK {
> SYSF_MBX mbx;
> NCSCONTEXT task_handle;
> @@ -43,6 +46,9 @@ struct RDE_CONTROL_BLOCK {
> bool monitor_lock_thread_running{false};
> bool monitor_takeover_req_thread_running{false};
> std::set<NODE_ID> cluster_members{};
> + // used for discovering peer controllers, regardless of their role
> + std::set<NODE_ID> peer_controllers{};
> + State state{State::kNotActive};
> };
>
> enum RDE_MSG_TYPE {
> @@ -54,7 +60,9 @@ enum RDE_MSG_TYPE {
> RDE_MSG_NODE_UP = 6,
> RDE_MSG_NODE_DOWN = 7,
> RDE_MSG_TAKEOVER_REQUEST_CALLBACK = 8,
> - RDE_MSG_ACTIVE_PROMOTION_SUCCESS = 9
> + RDE_MSG_ACTIVE_PROMOTION_SUCCESS = 9,
> + RDE_MSG_CONTROLLER_UP = 10,
> + RDE_MSG_CONTROLLER_DOWN = 11
> };
>
> struct rde_peer_info {
> @@ -82,7 +90,9 @@ extern const char *rde_msg_name[];
>
> extern RDE_CONTROL_BLOCK *rde_get_control_block();
> extern uint32_t rde_mds_register();
> +extern uint32_t rde_discovery_mds_register();
> extern uint32_t rde_mds_unregister();
> +extern uint32_t rde_discovery_mds_unregister();
> extern uint32_t rde_mds_send(rde_msg *msg, MDS_DEST to_dest);
> extern uint32_t rde_set_role(PCS_RDA_ROLE role);
>
> diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
> index e5813e4..2d9aa51 100644
> --- a/src/rde/rded/rde_main.cc
> +++ b/src/rde/rded/rde_main.cc
> @@ -39,6 +39,7 @@
> #include "osaf/consensus/consensus.h"
> #include "rde/rded/rde_cb.h"
> #include "rde/rded/role.h"
> +#include "rde_cb.h"
>
> #define RDA_MAX_CLIENTS 32
>
> @@ -56,7 +57,9 @@ const char *rde_msg_name[] = {"-",
> "RDE_MSG_NODE_UP(6)",
> "RDE_MSG_NODE_DOWN(7)",
> "RDE_MSG_TAKEOVER_REQUEST_CALLBACK(8)",
> - "RDE_MSG_ACTIVE_PROMOTION_SUCCESS(9)"};
> + "RDE_MSG_ACTIVE_PROMOTION_SUCCESS(9)",
> + "RDE_MSG_CONTROLLER_UP(10)",
> + "RDE_MSG_CONTROLLER_DOWN(11)"};
>
> static RDE_CONTROL_BLOCK _rde_cb;
> static RDE_CONTROL_BLOCK *rde_cb = &_rde_cb;
> @@ -157,6 +160,23 @@ static void handle_mbx_event() {
> rde_cb->cluster_members.erase(msg->fr_node_id);
> TRACE("cluster_size %zu", rde_cb->cluster_members.size());
> break;
> + case RDE_MSG_CONTROLLER_UP:
> + if (msg->fr_node_id != own_node_id) {
> + rde_cb->peer_controllers.insert(msg->fr_node_id);
> + TRACE("peer_controllers: size %zu", rde_cb->peer_controllers.size());
> + if (rde_cb->state == State::kNotActive) {
> + TRACE("Set state to kNotActiveSeenPeer");
> + rde_cb->state = State::kNotActiveSeenPeer;
> + } else if (rde_cb->state == State::kActiveElected) {
> + TRACE("Set state to kActiveElectedSeenPeer");
> + rde_cb->state = State::kActiveElectedSeenPeer;
> + }
> + }
> + break;
> + case RDE_MSG_CONTROLLER_DOWN:
> + rde_cb->peer_controllers.erase(msg->fr_node_id);
> + TRACE("peer_controllers: size %zu", rde_cb->peer_controllers.size());
> + break;
> case RDE_MSG_TAKEOVER_REQUEST_CALLBACK: {
> rde_cb->monitor_takeover_req_thread_running = false;
>
> @@ -179,13 +199,44 @@ static void handle_mbx_event() {
> "Another controller is taking over the active
> role. "
> "Rebooting this node");
> }
> - } else {
> - LOG_NO("Rejected takeover request");
> -
[HansN] is these curly braces correctly placed?
> - rde_cb->monitor_takeover_req_thread_running = true;
> - consensus_service.MonitorTakeoverRequest(Role::MonitorCallback,
> - rde_cb->mbx);
> + } else if (state == Consensus::TakeoverState::UNDEFINED) {
> + bool fencing_required = true;
> +
> + // differentiate when this occurs after election or
> + // rde has been set active due to failover
> + if (consensus_service.IsRelaxedNodePromotionEnabled() == true) {
> + if (rde_cb->state == State::kActiveElected) {
> + TRACE("Relaxed mode is enabled");
> + TRACE(" No peer SC yet seen, ignore consensus service
> failure");
> + // if relaxed node promotion is enabled, and we have yet to
> see
> + // a peer SC after being promoted, tolerate consensus service
> + // not working
> + fencing_required = false;
> + } else if ((rde_cb->state == State::kActiveElectedSeenPeer ||
> + rde_cb->state == State::kActiveFailover) &&
> + role->IsPeerPresent() == true) {
> + TRACE("Relaxed mode is enabled");
> + TRACE("Peer SC can be seen, ignore consensus service
> failure");
> + // we have seen the peer, and peer is still connected,
> tolerate
> + // consensus service not working
> + fencing_required = false;
> + }
> + }
> + if (fencing_required == true) {
> + LOG_NO("Lost connectivity to consensus service");
> + if (consensus_service.IsRemoteFencingEnabled() == false) {
> + opensaf_reboot(0, nullptr,
> + "Lost connectivity to consensus service. "
> + "Rebooting this node");
> + }
> + }
> }
> +
> + LOG_NO("Rejected takeover request");
> +
> + rde_cb->monitor_takeover_req_thread_running = true;
> + consensus_service.MonitorTakeoverRequest(Role::MonitorCallback,
> + rde_cb->mbx);
> } else {
> LOG_WA("Received takeover request when not active");
> }
> @@ -267,6 +318,11 @@ static int initialize_rde() {
> goto init_failed;
> }
>
> + if (rde_discovery_mds_register() != NCSCC_RC_SUCCESS) {
> + LOG_ER("rde_discovery_mds_register() failed");
> + rc = NCSCC_RC_FAILURE;
> + }
> +
> rc = NCSCC_RC_SUCCESS;
>
> init_failed:
> @@ -343,6 +399,7 @@ int main(int argc, char *argv[]) {
> }
>
> if (fds[FD_TERM].revents & POLLIN) {
> + rde_discovery_mds_unregister();
> daemon_exit();
> }
>
> diff --git a/src/rde/rded/rde_mds.cc b/src/rde/rded/rde_mds.cc
> index 00922ea..bc335f0 100644
> --- a/src/rde/rded/rde_mds.cc
> +++ b/src/rde/rded/rde_mds.cc
> @@ -149,6 +149,31 @@ static uint32_t process_amfnd_mds_evt(struct
> ncsmds_callback_info *info) {
> return rc;
> }
>
> +static uint32_t process_rde_discovery_mds_evt(
> + struct ncsmds_callback_info *info) {
> + uint32_t rc = NCSCC_RC_SUCCESS;
> +
> + TRACE_ENTER();
> + osafassert(info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_RDE_DISCOVERY);
> +
> + // process these events in the main thread to avoid
> + // synchronisation issues
> + switch (info->info.svc_evt.i_change) {
> + case NCSMDS_DOWN:
> + rc = mbx_send(RDE_MSG_CONTROLLER_DOWN, info->info.svc_evt.i_dest,
> + info->info.svc_evt.i_node_id);
> + break;
> + case NCSMDS_UP:
> + rc = mbx_send(RDE_MSG_CONTROLLER_UP, info->info.svc_evt.i_dest,
> + info->info.svc_evt.i_node_id);
> + break;
> + default:
> + break;
> + }
> +
> + return rc;
> +}
> +
> static uint32_t mds_callback(struct ncsmds_callback_info *info) {
> struct rde_msg *msg;
> uint32_t rc = NCSCC_RC_SUCCESS;
> @@ -185,8 +210,10 @@ static uint32_t mds_callback(struct ncsmds_callback_info
> *info) {
> if (info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_AVND) {
> rc = process_amfnd_mds_evt(info);
> break;
> - }
> - if (info->info.svc_evt.i_change == NCSMDS_DOWN) {
> + } else if (info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_RDE_DISCOVERY)
> {
> + rc = process_rde_discovery_mds_evt(info);
> + break;
> + } else if (info->info.svc_evt.i_change == NCSMDS_DOWN) {
> TRACE("MDS DOWN dest: %" PRIx64 ", node ID: %x, svc_id: %d",
> info->info.svc_evt.i_dest, info->info.svc_evt.i_node_id,
> info->info.svc_evt.i_svc_id);
> @@ -218,7 +245,8 @@ done:
> uint32_t rde_mds_register() {
> NCSADA_INFO ada_info;
> NCSMDS_INFO svc_info;
> - MDS_SVC_ID svc_id[] = {NCSMDS_SVC_ID_RDE, NCSMDS_SVC_ID_AVND};
> + MDS_SVC_ID svc_id[] = {NCSMDS_SVC_ID_RDE, NCSMDS_SVC_ID_AVND,
> + NCSMDS_SVC_ID_RDE_DISCOVERY};
> MDS_DEST mds_adest;
>
> TRACE_ENTER();
> @@ -252,7 +280,7 @@ uint32_t rde_mds_register() {
> svc_info.i_mds_hdl = mds_hdl;
> svc_info.i_svc_id = NCSMDS_SVC_ID_RDE;
> svc_info.i_op = MDS_RED_SUBSCRIBE;
> - svc_info.info.svc_subscribe.i_num_svcs = 2;
> + svc_info.info.svc_subscribe.i_num_svcs = 3;
> svc_info.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE;
> svc_info.info.svc_subscribe.i_svc_ids = svc_id;
>
> @@ -266,6 +294,43 @@ uint32_t rde_mds_register() {
> return NCSCC_RC_SUCCESS;
> }
>
> +uint32_t rde_discovery_mds_register() {
> + NCSADA_INFO ada_info;
> + NCSMDS_INFO svc_info;
> + MDS_DEST mds_adest;
> +
> + TRACE_ENTER();
> +
> + ada_info.req = NCSADA_GET_HDLS;
> + if (ncsada_api(&ada_info) != NCSCC_RC_SUCCESS) {
> + LOG_ER("%s: NCSADA_GET_HDLS Failed", __FUNCTION__);
> + return NCSCC_RC_FAILURE;
> + }
> +
> + mds_hdl = ada_info.info.adest_get_hdls.o_mds_pwe1_hdl;
> + mds_adest = ada_info.info.adest_get_hdls.o_adest;
> +
> + svc_info.i_mds_hdl = mds_hdl;
> + svc_info.i_svc_id = NCSMDS_SVC_ID_RDE_DISCOVERY;
> + svc_info.i_op = MDS_INSTALL;
> +
> + svc_info.info.svc_install.i_yr_svc_hdl = 0;
> + // node specific
> + svc_info.info.svc_install.i_install_scope = NCSMDS_SCOPE_NONE;
> + svc_info.info.svc_install.i_svc_cb = mds_callback; /* callback */
> + svc_info.info.svc_install.i_mds_q_ownership = false;
> + svc_info.info.svc_install.i_mds_svc_pvt_ver = RDE_MDS_PVT_SUBPART_VERSION;
> +
> + if (ncsmds_api(&svc_info) == NCSCC_RC_FAILURE) {
> + LOG_ER("%s: MDS Install Failed", __FUNCTION__);
> + return NCSCC_RC_FAILURE;
> + }
> +
> + TRACE_LEAVE2("NodeId:%x, mds_adest:%" PRIx64, ncs_get_node_id(),
> mds_adest);
> +
> + return NCSCC_RC_SUCCESS;
> +}
> +
> uint32_t rde_mds_unregister() {
> NCSMDS_INFO mds_info;
> TRACE_ENTER();
> @@ -287,6 +352,27 @@ uint32_t rde_mds_unregister() {
> return rc;
> }
>
> +uint32_t rde_discovery_mds_unregister() {
> + NCSMDS_INFO mds_info;
> + TRACE_ENTER();
> +
> + /* Un-install your service into MDS.
> + No need to cancel the services that are subscribed */
> + memset(&mds_info, 0, sizeof(NCSMDS_INFO));
> +
> + mds_info.i_mds_hdl = mds_hdl;
> + mds_info.i_svc_id = NCSMDS_SVC_ID_RDE_DISCOVERY;
> + mds_info.i_op = MDS_UNINSTALL;
> +
> + uint32_t rc = ncsmds_api(&mds_info);
> + if (rc != NCSCC_RC_SUCCESS) {
> + LOG_WA("MDS Unregister Failed");
> + }
> +
> + TRACE_LEAVE2("retval = %u", rc);
> + return rc;
> +}
> +
> uint32_t rde_mds_send(struct rde_msg *msg, MDS_DEST to_dest) {
> NCSMDS_INFO info;
> uint32_t rc;
> diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
> index ecab773..a967bd5 100644
> --- a/src/rde/rded/role.cc
> +++ b/src/rde/rded/role.cc
> @@ -91,22 +91,26 @@ void Role::MonitorCallback(const std::string& key, const
> std::string& new_value,
> osafassert(status == NCSCC_RC_SUCCESS);
> }
>
> -void Role::PromoteNode(const uint64_t cluster_size) {
> +void Role::PromoteNode(const uint64_t cluster_size,
> + const bool relaxed_mode) {
> TRACE_ENTER();
> SaAisErrorT rc;
>
> Consensus consensus_service;
> + bool promotion_pending = false;
>
> rc = consensus_service.PromoteThisNode(true, cluster_size);
> - if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
> - LOG_ER("Unable to set active controller in consensus service");
> - opensaf_reboot(0, nullptr,
> - "Unable to set active controller in consensus service");
> - }
> -
> if (rc == SA_AIS_ERR_EXIST) {
> LOG_WA("Another controller is already active");
> return;
> + } else if (rc != SA_AIS_OK && relaxed_mode == true) {
> + LOG_WA("Unable to set active controller in consensus service");
> + LOG_WA("Will become active anyway");
> + promotion_pending = true;
> + } else if (rc != SA_AIS_OK) {
> + LOG_ER("Unable to set active controller in consensus service");
> + opensaf_reboot(0, nullptr,
> + "Unable to set active controller in consensus service");
> }
>
> RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> @@ -117,9 +121,26 @@ void Role::PromoteNode(const uint64_t cluster_size) {
> uint32_t status;
> status = m_NCS_IPC_SEND(&cb->mbx, msg, NCS_IPC_PRIORITY_HIGH);
> osafassert(status == NCSCC_RC_SUCCESS);
> +
> + if (promotion_pending) {
> + osafassert(consensus_service.IsRelaxedNodePromotionEnabled() == true);
> + // the node has been promoted, even though the lock has not been obtained
> + // keep trying the consensus service
> + while (rc != SA_AIS_OK) {
> + rc = consensus_service.PromoteThisNode(true, cluster_size);
> + if (rc == SA_AIS_ERR_EXIST) {
> + LOG_ER("Unable to set active controller in consensus service");
> + opensaf_reboot(0, nullptr,
> + "Unable to set active controller in consensus
> service");
> + }
> + std::this_thread::sleep_for(std::chrono::seconds(1));
> + }
> + LOG_NO("Successfully set active controller in consensus service");
> + }
> }
>
> void Role::NodePromoted() {
> + // promoted to active from election
> ExecutePreActiveScript();
> LOG_NO("Switched to ACTIVE from %s", to_string(role()));
> role_ = PCS_RDA_ACTIVE;
> @@ -127,6 +148,13 @@ void Role::NodePromoted() {
>
> Consensus consensus_service;
> RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> + if (cb->peer_controllers.empty() == false) {
> + TRACE("Set state to kActiveElectedSeenPeer");
> + cb->state = State::kActiveElectedSeenPeer;
> + } else {
> + TRACE("Set state to kActiveElected");
> + cb->state = State::kActiveElected;
> + }
>
> // register for callback if active controller is changed
> // in consensus service
> @@ -161,8 +189,24 @@ timespec* Role::Poll(timespec* ts) {
> } else {
> election_end_time_ = base::kTimespecMax;
> RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> - std::thread(&Role::PromoteNode,
> - this, cb->cluster_members.size()).detach();
> +
> + bool is_candidate = IsCandidate();
> + Consensus consensus_service;
> + if (consensus_service.IsEnabled() == true &&
> + is_candidate == false &&
> + consensus_service.IsWritable() == false) {
> + // node promotion will fail resulting in node reboot,
> + // reset timer and try later
> + TRACE("reset timer and try later");
> + ResetElectionTimer();
> + now = base::ReadMonotonicClock();
> + *ts = election_end_time_ - now;
> + timeout = ts;
> + } else {
> + std::thread(&Role::PromoteNode,
> + this, cb->cluster_members.size(),
> + is_candidate).detach();
> + }
> }
> }
> return timeout;
> @@ -177,10 +221,42 @@ void Role::ExecutePreActiveScript() {
>
> void Role::AddPeer(NODE_ID node_id) {
> auto result = known_nodes_.insert(node_id);
> - if (result.second) ResetElectionTimer();
> + if (result.second) {
> + ResetElectionTimer();
> + }
> +}
> +
> +// call from main thread only
> +bool Role::IsCandidate() {
> + TRACE_ENTER();
> + bool result = false;
> + Consensus consensus_service;
> + RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> +
> + // if relaxed node promotion is enabled, allow this node to be promoted
> + // active if it can see a peer SC and this node has the lowest node ID
> + if (consensus_service.IsRelaxedNodePromotionEnabled() == true &&
> + cb->state == State::kNotActiveSeenPeer) {
> + LOG_NO("Relaxed node promotion enabled. This node is a candidate.");
> + result = true;
> + }
> +
> + return result;
> +}
> +
> +bool Role::IsPeerPresent() {
> + bool result = false;
> + RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> +
> + if (cb->peer_controllers.empty() == false) {
> + result = true;
> + }
> +
> + return result;
> }
>
> uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
> + TRACE_ENTER();
> PCS_RDA_ROLE old_role = role_;
> if (new_role == PCS_RDA_ACTIVE &&
> (old_role == PCS_RDA_UNDEFINED || old_role == PCS_RDA_QUIESCED)) {
> @@ -196,6 +272,7 @@ uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
> // in consensus service
> Consensus consensus_service;
> RDE_CONTROL_BLOCK* cb = rde_get_control_block();
> + cb->state = State::kActiveFailover;
> if (cb->monitor_lock_thread_running == false) {
> cb->monitor_lock_thread_running = true;
> consensus_service.MonitorLock(MonitorCallback, cb->mbx);
> diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
> index 9780deb..1920f59 100644
> --- a/src/rde/rded/role.h
> +++ b/src/rde/rded/role.h
> @@ -34,6 +34,8 @@ class Role {
> public:
> explicit Role(NODE_ID own_node_id);
> void AddPeer(NODE_ID node_id);
> + bool IsCandidate();
> + bool IsPeerPresent();
> void SetPeerState(PCS_RDA_ROLE node_role, NODE_ID node_id);
> timespec* Poll(timespec* ts);
> uint32_t SetRole(PCS_RDA_ROLE new_role);
> @@ -49,7 +51,7 @@ class Role {
> void ExecutePreActiveScript();
> void ResetElectionTimer();
> uint32_t UpdateMdsRegistration(PCS_RDA_ROLE new_role, PCS_RDA_ROLE
> old_role);
> - void PromoteNode(const uint64_t cluster_size);
> + void PromoteNode(const uint64_t cluster_size, const bool relaxed_mode);
>
> std::set<NODE_ID> known_nodes_;
> PCS_RDA_ROLE role_;
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel