[devel] [PATCH 1/1] fmd: prevent data races between MDS and main threads [#2763]
--- src/fm/fmd/fm_cb.h| 89 ++- src/fm/fmd/fm_main.cc | 38 +++--- src/fm/fmd/fm_mds.cc | 35 +++- src/fm/fmd/fm_mds.h | 2 ++ src/fm/fmd/fm_mem.h | 8 - 5 files changed, 86 insertions(+), 86 deletions(-) diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h index ffb3d8478..cfa50d883 100644 --- a/src/fm/fmd/fm_cb.h +++ b/src/fm/fmd/fm_cb.h @@ -18,19 +18,20 @@ #ifndef FM_FMD_FM_CB_H_ #define FM_FMD_FM_CB_H_ -#include -#include #include #include +#include +#include +#include +#include +#include +#include "base/mutex.h" #include "base/ncssysf_ipc.h" #include "base/ncssysf_tmr.h" -#include "fm_amf.h" +#include "fm/fmd/fm_amf.h" #include "mds/mds_papi.h" #include "rde/agent/rda_papi.h" -#include -#include -#include extern uint32_t gl_fm_hdl; @@ -55,58 +56,60 @@ typedef struct fm_tmr { } FM_TMR; /* FM control block */ -typedef struct fm_cb { - uint32_t cb_hdl; - SYSF_MBX mbx; +struct FM_CB { + uint32_t cb_hdl{}; + SYSF_MBX mbx{}; /* FM AMF CB */ - FM_AMF_CB fm_amf_cb; - NODE_ID node_id; - SaNameT node_name; + FM_AMF_CB fm_amf_cb{}; + std::atomic node_id{}; + SaNameT node_name{}; - SaNameT peer_node_name; - NODE_ID peer_node_id; - MDS_DEST peer_adest; /* will be 0 if peer is not up */ + std::string peer_node_name{}; + std::atomic peer_node_id{}; + std::atomic peer_adest{}; /* will be 0 if peer is not up */ /* Holds own role. */ - PCS_RDA_ROLE role; + PCS_RDA_ROLE role{}; /* AMF HA state for FM */ - SaAmfHAStateT amf_state; + SaAmfHAStateT amf_state{}; /* MDS handles. */ - MDS_DEST adest; - MDS_HDL adest_hdl; - MDS_HDL adest_pwe1_hdl; + MDS_DEST adest{}; + MDS_HDL adest_hdl{}; + MDS_HDL adest_pwe1_hdl{}; /* Timers */ - FM_TMR promote_active_tmr; - FM_TMR activation_supervision_tmr; + FM_TMR promote_active_tmr{}; + FM_TMR activation_supervision_tmr{}; /* Time in terms of one hundredth of seconds (500 for 5 secs.) */ - uint32_t active_promote_tmr_val; - uint32_t activation_supervision_tmr_val; - bool fully_initialized; - bool csi_assigned; + uint32_t active_promote_tmr_val{}; + uint32_t activation_supervision_tmr_val{}; + bool fully_initialized{false}; + bool csi_assigned{false}; /* Variable to indicate OpenSAF control of TIPC transport */ - bool control_tipc; + bool control_tipc{true}; /* Booleans to mark service down events of critical Osaf Services */ - bool immd_down; - bool immnd_down; - bool amfnd_down; - bool amfd_down; - bool fm_down; - - bool peer_sc_up; - bool well_connected; - uint64_t cluster_size; - struct timespec last_well_connected; - struct timespec node_isolation_timeout; - SaClmHandleT clm_hdl; - bool use_remote_fencing; - SaNameT peer_clm_node_name; - bool peer_node_terminated; -} FM_CB; + bool immd_down{true}; + bool immnd_down{true}; + std::atomic amfnd_down{true}; + bool amfd_down{true}; + bool fm_down{false}; + + std::atomic peer_sc_up{false}; + bool well_connected{false}; + uint64_t cluster_size{}; + struct timespec last_well_connected{}; + struct timespec node_isolation_timeout{}; + SaClmHandleT clm_hdl{}; + bool use_remote_fencing{false}; + SaNameT peer_clm_node_name{}; + std::atomic peer_node_terminated{false}; + + base::Mutex mutex_{}; +}; extern const char *role_string[]; extern FM_CB *fm_cb; diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc index 693bf9438..1244c2347 100644 --- a/src/fm/fmd/fm_main.cc +++ b/src/fm/fmd/fm_main.cc @@ -30,7 +30,7 @@ This file contains the main() routine for FM. #include "base/osaf_extended_name.h" #include "base/osaf_poll.h" #include "base/osaf_time.h" -#include "fm.h" +#include "fm/fmd/fm.h" #include "nid/agent/nid_api.h" #include "osaf/configmake.h" #include "osaf/consensus/consensus.h" @@ -107,7 +107,7 @@ void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info, rc = ncs_ipc_send(_cb->mbx, (NCS_IPC_MSG *)evt, NCS_IPC_PRIORITY_HIGH); if (rc != NCSCC_RC_SUCCESS) { -syslog(LOG_ERR, "IPC send failed %d", rc); +syslog(LOG_ERR, "IPC send failed %u", rc); free(evt); } @@ -144,17 +144,10 @@ int main(int argc, char *argv[]) { } /* Allocate memory for FM_CB. */ - fm_cb = m_MMGR_ALLOC_FM_CB; - if (NULL == fm_cb) { -syslog(LOG_ERR, "CB Allocation failed."); -goto fm_init_failed; - } + fm_cb = new FM_CB(); - memset(fm_cb, 0, sizeof(FM_CB)); fm_cb->fm_amf_cb.nid_started = nid_started; fm_cb->fm_amf_cb.amf_fd = -1; - fm_cb->fully_initialized = false; - fm_cb->csi_assigned = false; /* Variable to control whether FM will trigger failover immediately * upon recieving down event of critical services or will wait @@ -169,11 +162,6 @@ int main(int argc, char *argv[]) { */ fm_cb->control_tipc = true; /* Default behaviour */ - fm_cb->immd_down = true; - fm_cb->immnd_down = true; - fm_cb->amfnd_down = true; - fm_cb->amfd_down = true; - /* Create
Re: [devel] [PATCH 1/1] fmd: prevent data races between MDS and main threads [#2763]
Hi Anders Thanks, please see [GL]. On 08/02/18 02:06, Anders Widell wrote: Ack with minor comments inline, marked AndersW>. regards, Anders Widell On 02/06/2018 03:43 AM, Gary Lee wrote: --- src/fm/fmd/fm_cb.h | 89 ++- src/fm/fmd/fm_main.cc | 32 +- src/fm/fmd/fm_mds.cc | 27 src/fm/fmd/fm_mds.h | 2 ++ 4 files changed, 79 insertions(+), 71 deletions(-) diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h index ffb3d8478..cfa50d883 100644 --- a/src/fm/fmd/fm_cb.h +++ b/src/fm/fmd/fm_cb.h @@ -18,19 +18,20 @@ #ifndef FM_FMD_FM_CB_H_ #define FM_FMD_FM_CB_H_ -#include -#include #include #include +#include +#include +#include +#include +#include +#include "base/mutex.h" #include "base/ncssysf_ipc.h" #include "base/ncssysf_tmr.h" -#include "fm_amf.h" +#include "fm/fmd/fm_amf.h" #include "mds/mds_papi.h" #include "rde/agent/rda_papi.h" -#include -#include -#include extern uint32_t gl_fm_hdl; @@ -55,58 +56,60 @@ typedef struct fm_tmr { } FM_TMR; /* FM control block */ -typedef struct fm_cb { - uint32_t cb_hdl; - SYSF_MBX mbx; +struct FM_CB { + uint32_t cb_hdl{}; + SYSF_MBX mbx{}; /* FM AMF CB */ - FM_AMF_CB fm_amf_cb; - NODE_ID node_id; - SaNameT node_name; + FM_AMF_CB fm_amf_cb{}; + std::atomic node_id{}; + SaNameT node_name{}; - SaNameT peer_node_name; - NODE_ID peer_node_id; - MDS_DEST peer_adest; /* will be 0 if peer is not up */ + std::string peer_node_name{}; + std::atomic peer_node_id{}; + std::atomic peer_adest{}; /* will be 0 if peer is not up */ /* Holds own role. */ - PCS_RDA_ROLE role; + PCS_RDA_ROLE role{}; /* AMF HA state for FM */ - SaAmfHAStateT amf_state; + SaAmfHAStateT amf_state{}; /* MDS handles. */ - MDS_DEST adest; - MDS_HDL adest_hdl; - MDS_HDL adest_pwe1_hdl; + MDS_DEST adest{}; + MDS_HDL adest_hdl{}; + MDS_HDL adest_pwe1_hdl{}; /* Timers */ - FM_TMR promote_active_tmr; - FM_TMR activation_supervision_tmr; + FM_TMR promote_active_tmr{}; + FM_TMR activation_supervision_tmr{}; /* Time in terms of one hundredth of seconds (500 for 5 secs.) */ - uint32_t active_promote_tmr_val; - uint32_t activation_supervision_tmr_val; - bool fully_initialized; - bool csi_assigned; + uint32_t active_promote_tmr_val{}; + uint32_t activation_supervision_tmr_val{}; + bool fully_initialized{false}; + bool csi_assigned{false}; /* Variable to indicate OpenSAF control of TIPC transport */ - bool control_tipc; + bool control_tipc{true}; /* Booleans to mark service down events of critical Osaf Services */ - bool immd_down; - bool immnd_down; - bool amfnd_down; - bool amfd_down; - bool fm_down; - - bool peer_sc_up; - bool well_connected; - uint64_t cluster_size; - struct timespec last_well_connected; - struct timespec node_isolation_timeout; - SaClmHandleT clm_hdl; - bool use_remote_fencing; - SaNameT peer_clm_node_name; - bool peer_node_terminated; -} FM_CB; + bool immd_down{true}; + bool immnd_down{true}; + std::atomic amfnd_down{true}; + bool amfd_down{true}; + bool fm_down{false}; + + std::atomic peer_sc_up{false}; + bool well_connected{false}; + uint64_t cluster_size{}; + struct timespec last_well_connected{}; + struct timespec node_isolation_timeout{}; + SaClmHandleT clm_hdl{}; + bool use_remote_fencing{false}; + SaNameT peer_clm_node_name{}; + std::atomic peer_node_terminated{false}; + + base::Mutex mutex_{}; +}; extern const char *role_string[]; extern FM_CB *fm_cb; diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc index 693bf9438..d8adcb583 100644 --- a/src/fm/fmd/fm_main.cc +++ b/src/fm/fmd/fm_main.cc @@ -30,7 +30,7 @@ This file contains the main() routine for FM. #include "base/osaf_extended_name.h" #include "base/osaf_poll.h" #include "base/osaf_time.h" -#include "fm.h" +#include "fm/fmd/fm.h" #include "nid/agent/nid_api.h" #include "osaf/configmake.h" #include "osaf/consensus/consensus.h" @@ -107,7 +107,7 @@ void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info, rc = ncs_ipc_send(_cb->mbx, (NCS_IPC_MSG *)evt, NCS_IPC_PRIORITY_HIGH); if (rc != NCSCC_RC_SUCCESS) { - syslog(LOG_ERR, "IPC send failed %d", rc); + syslog(LOG_ERR, "IPC send failed %u", rc); free(evt); } @@ -150,11 +150,8 @@ int main(int argc, char *argv[]) { goto fm_init_failed; } - memset(fm_cb, 0, sizeof(FM_CB)); fm_cb->fm_amf_cb.nid_started = nid_started; fm_cb->fm_amf_cb.amf_fd = -1; - fm_cb->fully_initialized = false; - fm_cb->csi_assigned = false; /* Variable to control whether FM will trigger failover immediately * upon recieving down event of critical services or will wait @@ -169,11 +166,6 @@ int main(int argc, char *argv[]) { */ fm_cb->control_tipc = true; /* Default behaviour */ AndersW> control_tipc is already initialized, so no need to do it here again? [GL] I
Re: [devel] [PATCH 1/1] fmd: prevent data races between MDS and main threads [#2763]
Ack with minor comments inline, marked AndersW>. regards, Anders Widell On 02/06/2018 03:43 AM, Gary Lee wrote: --- src/fm/fmd/fm_cb.h| 89 ++- src/fm/fmd/fm_main.cc | 32 +- src/fm/fmd/fm_mds.cc | 27 src/fm/fmd/fm_mds.h | 2 ++ 4 files changed, 79 insertions(+), 71 deletions(-) diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h index ffb3d8478..cfa50d883 100644 --- a/src/fm/fmd/fm_cb.h +++ b/src/fm/fmd/fm_cb.h @@ -18,19 +18,20 @@ #ifndef FM_FMD_FM_CB_H_ #define FM_FMD_FM_CB_H_ -#include -#include #include #include +#include +#include +#include +#include +#include +#include "base/mutex.h" #include "base/ncssysf_ipc.h" #include "base/ncssysf_tmr.h" -#include "fm_amf.h" +#include "fm/fmd/fm_amf.h" #include "mds/mds_papi.h" #include "rde/agent/rda_papi.h" -#include -#include -#include extern uint32_t gl_fm_hdl; @@ -55,58 +56,60 @@ typedef struct fm_tmr { } FM_TMR; /* FM control block */ -typedef struct fm_cb { - uint32_t cb_hdl; - SYSF_MBX mbx; +struct FM_CB { + uint32_t cb_hdl{}; + SYSF_MBX mbx{}; /* FM AMF CB */ - FM_AMF_CB fm_amf_cb; - NODE_ID node_id; - SaNameT node_name; + FM_AMF_CB fm_amf_cb{}; + std::atomic node_id{}; + SaNameT node_name{}; - SaNameT peer_node_name; - NODE_ID peer_node_id; - MDS_DEST peer_adest; /* will be 0 if peer is not up */ + std::string peer_node_name{}; + std::atomic peer_node_id{}; + std::atomic peer_adest{}; /* will be 0 if peer is not up */ /* Holds own role. */ - PCS_RDA_ROLE role; + PCS_RDA_ROLE role{}; /* AMF HA state for FM */ - SaAmfHAStateT amf_state; + SaAmfHAStateT amf_state{}; /* MDS handles. */ - MDS_DEST adest; - MDS_HDL adest_hdl; - MDS_HDL adest_pwe1_hdl; + MDS_DEST adest{}; + MDS_HDL adest_hdl{}; + MDS_HDL adest_pwe1_hdl{}; /* Timers */ - FM_TMR promote_active_tmr; - FM_TMR activation_supervision_tmr; + FM_TMR promote_active_tmr{}; + FM_TMR activation_supervision_tmr{}; /* Time in terms of one hundredth of seconds (500 for 5 secs.) */ - uint32_t active_promote_tmr_val; - uint32_t activation_supervision_tmr_val; - bool fully_initialized; - bool csi_assigned; + uint32_t active_promote_tmr_val{}; + uint32_t activation_supervision_tmr_val{}; + bool fully_initialized{false}; + bool csi_assigned{false}; /* Variable to indicate OpenSAF control of TIPC transport */ - bool control_tipc; + bool control_tipc{true}; /* Booleans to mark service down events of critical Osaf Services */ - bool immd_down; - bool immnd_down; - bool amfnd_down; - bool amfd_down; - bool fm_down; - - bool peer_sc_up; - bool well_connected; - uint64_t cluster_size; - struct timespec last_well_connected; - struct timespec node_isolation_timeout; - SaClmHandleT clm_hdl; - bool use_remote_fencing; - SaNameT peer_clm_node_name; - bool peer_node_terminated; -} FM_CB; + bool immd_down{true}; + bool immnd_down{true}; + std::atomic amfnd_down{true}; + bool amfd_down{true}; + bool fm_down{false}; + + std::atomic peer_sc_up{false}; + bool well_connected{false}; + uint64_t cluster_size{}; + struct timespec last_well_connected{}; + struct timespec node_isolation_timeout{}; + SaClmHandleT clm_hdl{}; + bool use_remote_fencing{false}; + SaNameT peer_clm_node_name{}; + std::atomic peer_node_terminated{false}; + + base::Mutex mutex_{}; +}; extern const char *role_string[]; extern FM_CB *fm_cb; diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc index 693bf9438..d8adcb583 100644 --- a/src/fm/fmd/fm_main.cc +++ b/src/fm/fmd/fm_main.cc @@ -30,7 +30,7 @@ This file contains the main() routine for FM. #include "base/osaf_extended_name.h" #include "base/osaf_poll.h" #include "base/osaf_time.h" -#include "fm.h" +#include "fm/fmd/fm.h" #include "nid/agent/nid_api.h" #include "osaf/configmake.h" #include "osaf/consensus/consensus.h" @@ -107,7 +107,7 @@ void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info, rc = ncs_ipc_send(_cb->mbx, (NCS_IPC_MSG *)evt, NCS_IPC_PRIORITY_HIGH); if (rc != NCSCC_RC_SUCCESS) { -syslog(LOG_ERR, "IPC send failed %d", rc); +syslog(LOG_ERR, "IPC send failed %u", rc); free(evt); } @@ -150,11 +150,8 @@ int main(int argc, char *argv[]) { goto fm_init_failed; } - memset(fm_cb, 0, sizeof(FM_CB)); fm_cb->fm_amf_cb.nid_started = nid_started; fm_cb->fm_amf_cb.amf_fd = -1; - fm_cb->fully_initialized = false; - fm_cb->csi_assigned = false; /* Variable to control whether FM will trigger failover immediately * upon recieving down event of critical services or will wait @@ -169,11 +166,6 @@ int main(int argc, char *argv[]) { */ fm_cb->control_tipc = true; /* Default behaviour */ AndersW> control_tipc is already initialized, so no need to do it here again? - fm_cb->immd_down = true; - fm_cb->immnd_down = true; -
[devel] [PATCH 1/1] fmd: prevent data races between MDS and main threads [#2763]
--- src/fm/fmd/fm_cb.h| 89 ++- src/fm/fmd/fm_main.cc | 32 +- src/fm/fmd/fm_mds.cc | 27 src/fm/fmd/fm_mds.h | 2 ++ 4 files changed, 79 insertions(+), 71 deletions(-) diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h index ffb3d8478..cfa50d883 100644 --- a/src/fm/fmd/fm_cb.h +++ b/src/fm/fmd/fm_cb.h @@ -18,19 +18,20 @@ #ifndef FM_FMD_FM_CB_H_ #define FM_FMD_FM_CB_H_ -#include -#include #include #include +#include +#include +#include +#include +#include +#include "base/mutex.h" #include "base/ncssysf_ipc.h" #include "base/ncssysf_tmr.h" -#include "fm_amf.h" +#include "fm/fmd/fm_amf.h" #include "mds/mds_papi.h" #include "rde/agent/rda_papi.h" -#include -#include -#include extern uint32_t gl_fm_hdl; @@ -55,58 +56,60 @@ typedef struct fm_tmr { } FM_TMR; /* FM control block */ -typedef struct fm_cb { - uint32_t cb_hdl; - SYSF_MBX mbx; +struct FM_CB { + uint32_t cb_hdl{}; + SYSF_MBX mbx{}; /* FM AMF CB */ - FM_AMF_CB fm_amf_cb; - NODE_ID node_id; - SaNameT node_name; + FM_AMF_CB fm_amf_cb{}; + std::atomic node_id{}; + SaNameT node_name{}; - SaNameT peer_node_name; - NODE_ID peer_node_id; - MDS_DEST peer_adest; /* will be 0 if peer is not up */ + std::string peer_node_name{}; + std::atomic peer_node_id{}; + std::atomic peer_adest{}; /* will be 0 if peer is not up */ /* Holds own role. */ - PCS_RDA_ROLE role; + PCS_RDA_ROLE role{}; /* AMF HA state for FM */ - SaAmfHAStateT amf_state; + SaAmfHAStateT amf_state{}; /* MDS handles. */ - MDS_DEST adest; - MDS_HDL adest_hdl; - MDS_HDL adest_pwe1_hdl; + MDS_DEST adest{}; + MDS_HDL adest_hdl{}; + MDS_HDL adest_pwe1_hdl{}; /* Timers */ - FM_TMR promote_active_tmr; - FM_TMR activation_supervision_tmr; + FM_TMR promote_active_tmr{}; + FM_TMR activation_supervision_tmr{}; /* Time in terms of one hundredth of seconds (500 for 5 secs.) */ - uint32_t active_promote_tmr_val; - uint32_t activation_supervision_tmr_val; - bool fully_initialized; - bool csi_assigned; + uint32_t active_promote_tmr_val{}; + uint32_t activation_supervision_tmr_val{}; + bool fully_initialized{false}; + bool csi_assigned{false}; /* Variable to indicate OpenSAF control of TIPC transport */ - bool control_tipc; + bool control_tipc{true}; /* Booleans to mark service down events of critical Osaf Services */ - bool immd_down; - bool immnd_down; - bool amfnd_down; - bool amfd_down; - bool fm_down; - - bool peer_sc_up; - bool well_connected; - uint64_t cluster_size; - struct timespec last_well_connected; - struct timespec node_isolation_timeout; - SaClmHandleT clm_hdl; - bool use_remote_fencing; - SaNameT peer_clm_node_name; - bool peer_node_terminated; -} FM_CB; + bool immd_down{true}; + bool immnd_down{true}; + std::atomic amfnd_down{true}; + bool amfd_down{true}; + bool fm_down{false}; + + std::atomic peer_sc_up{false}; + bool well_connected{false}; + uint64_t cluster_size{}; + struct timespec last_well_connected{}; + struct timespec node_isolation_timeout{}; + SaClmHandleT clm_hdl{}; + bool use_remote_fencing{false}; + SaNameT peer_clm_node_name{}; + std::atomic peer_node_terminated{false}; + + base::Mutex mutex_{}; +}; extern const char *role_string[]; extern FM_CB *fm_cb; diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc index 693bf9438..d8adcb583 100644 --- a/src/fm/fmd/fm_main.cc +++ b/src/fm/fmd/fm_main.cc @@ -30,7 +30,7 @@ This file contains the main() routine for FM. #include "base/osaf_extended_name.h" #include "base/osaf_poll.h" #include "base/osaf_time.h" -#include "fm.h" +#include "fm/fmd/fm.h" #include "nid/agent/nid_api.h" #include "osaf/configmake.h" #include "osaf/consensus/consensus.h" @@ -107,7 +107,7 @@ void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info, rc = ncs_ipc_send(_cb->mbx, (NCS_IPC_MSG *)evt, NCS_IPC_PRIORITY_HIGH); if (rc != NCSCC_RC_SUCCESS) { -syslog(LOG_ERR, "IPC send failed %d", rc); +syslog(LOG_ERR, "IPC send failed %u", rc); free(evt); } @@ -150,11 +150,8 @@ int main(int argc, char *argv[]) { goto fm_init_failed; } - memset(fm_cb, 0, sizeof(FM_CB)); fm_cb->fm_amf_cb.nid_started = nid_started; fm_cb->fm_amf_cb.amf_fd = -1; - fm_cb->fully_initialized = false; - fm_cb->csi_assigned = false; /* Variable to control whether FM will trigger failover immediately * upon recieving down event of critical services or will wait @@ -169,11 +166,6 @@ int main(int argc, char *argv[]) { */ fm_cb->control_tipc = true; /* Default behaviour */ - fm_cb->immd_down = true; - fm_cb->immnd_down = true; - fm_cb->amfnd_down = true; - fm_cb->amfd_down = true; - /* Create CB handle */ gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_GFM, (NCSCONTEXT)fm_cb); @@ -395,8 +387,9 @@ static uint32_t fm_get_args(FM_CB *fm_cb) {