osaf/services/saf/clmsv/clms/clms_amf.c | 9 +- osaf/services/saf/clmsv/clms/clms_cb.h | 4 + osaf/services/saf/clmsv/clms/clms_imm.c | 243 +++++++++++++++++++++++++++--- osaf/services/saf/clmsv/clms/clms_imm.h | 3 + osaf/services/saf/clmsv/clms/clms_main.c | 25 ++- 5 files changed, 252 insertions(+), 32 deletions(-)
ERR_EXIST and TRY_AGAIN are treated the same. Also, a rtu_pending flag is introduced. The flag is toggled when an IMM API throws a TRY_AGAIN or ERR_EXIST and when the call is reattempted later and if the call succeeds the flag is turned off. diff --git a/osaf/services/saf/clmsv/clms/clms_amf.c b/osaf/services/saf/clmsv/clms/clms_amf.c --- a/osaf/services/saf/clmsv/clms/clms_amf.c +++ b/osaf/services/saf/clmsv/clms/clms_amf.c @@ -256,8 +256,11 @@ static void clms_amf_csi_set_callback(Sa if (role_change == true) { - if(clms_cb->ha_state == SA_AMF_HA_ACTIVE) + if(clms_cb->ha_state == SA_AMF_HA_ACTIVE) { clms_imm_impl_set(clms_cb); + /* Unconditionally refresh IMM for runtime attributes + clms_switchon_all_pending_rtupdates(); */ + } if ((rc = clms_mds_change_role(clms_cb)) != NCSCC_RC_SUCCESS) { LOG_ER("clms_mds_change_role FAILED"); @@ -266,6 +269,10 @@ static void clms_amf_csi_set_callback(Sa /* Inform MBCSV of HA state change */ if (NCSCC_RC_SUCCESS != (error = clms_mbcsv_change_HA_state(clms_cb))) error = SA_AIS_ERR_FAILED_OPERATION; + + /* Clear up any rtu updates, the active will take care of it */ + if (clms_cb->ha_state == SA_AMF_HA_STANDBY) + clms_switchoff_all_pending_rtupdates(); } response: diff --git a/osaf/services/saf/clmsv/clms/clms_cb.h b/osaf/services/saf/clmsv/clms/clms_cb.h --- a/osaf/services/saf/clmsv/clms/clms_cb.h +++ b/osaf/services/saf/clmsv/clms/clms_cb.h @@ -40,6 +40,7 @@ typedef struct cluster_db_t { SaNameT name; SaUint32T num_nodes; SaTimeT init_time; + SaBoolT rtu_pending; /*struct cluster_db_t *next; */ /* Multiple cluster is not supported as of now */ } CLMS_CLUSTER_INFO; @@ -69,6 +70,8 @@ typedef struct cluster_node_t { ADMIN_OP admin_op; /*plm or clm operation */ timer_t lock_timerid; /*Timer id for admin lock operation */ SaInvocationT plm_invid; /*plmtrack callback invocation id */ + SaBoolT rtu_pending; /* Flag to mark whether an IMM RunTime attribute Update is pending and to be retried */ + SaBoolT admin_rtu_pending; /* Flag to mark whether an IMM RunTime attribute Update is pending and to be retried */ struct cluster_node_t *dep_node_list; /*Dependent nodes list - in case of plm operation */ struct cluster_node_t *next; } CLMS_CLUSTER_NODE; @@ -172,6 +175,7 @@ typedef struct clms_cb_t { SaSelectionObjectT plm_sel_obj; /* PLMSv selection object */ SaNtfHandleT ntf_hdl; /* Handled obtained from NTFSv */ SaBoolT reg_with_plm; /*plm present in system */ + SaBoolT rtu_pending; /* Global flag to determine a pending RTU update and the poll timeout */ CLMA_DOWN_LIST *clma_down_list_head; /* CLMA down reccords - Fix for Failover missed down events Processing */ CLMA_DOWN_LIST *clma_down_list_tail; diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c b/osaf/services/saf/clmsv/clms/clms_imm.c --- a/osaf/services/saf/clmsv/clms/clms_imm.c +++ b/osaf/services/saf/clmsv/clms/clms_imm.c @@ -22,6 +22,9 @@ extern struct ImmutilWrapperProfile immutilWrapperProfile; void clms_all_node_rattr_update(void); +void clms_retry_pending_rtupdates(void); +void clms_switchoff_all_pending_rtupdates(void); +void clms_switchon_all_pending_rtupdates(void); SaAisErrorT clms_node_ccb_comp_cb(CcbUtilOperationData_t * opdata); uint32_t clms_imm_node_unlock(CLMS_CLUSTER_NODE * nodeop); uint32_t clms_imm_node_lock(CLMS_CLUSTER_NODE * nodeop); @@ -32,6 +35,10 @@ static uint32_t clms_lock_send_no_start_ static SaVersionT immVersion = { 'A', 2, 1 }; + +const unsigned int sleep_delay_ms = 500; +const unsigned int max_waiting_time_ms = 60 * 1000; /* 60 seconds */ + /** * Initialize the track response patricia tree for the node * @param[in] node node to initialize trackresponse tree @@ -64,22 +71,49 @@ static void *imm_impl_set_node_down_proc NODE_DOWN_LIST *node_down_rec = NULL; NODE_DOWN_LIST *temp_node_down_rec = NULL; CLMS_CLUSTER_NODE *node = NULL; + int msecs_waited; TRACE_ENTER(); /* Update IMM */ - if ((rc = immutil_saImmOiImplementerSet(cb->immOiHandle, IMPLEMENTER_NAME)) != SA_AIS_OK) { - LOG_ER("saImmOiImplementerSet failed rc:%u, exiting", rc); + + msecs_waited = 0; + rc = saImmOiImplementerSet(cb->immOiHandle, IMPLEMENTER_NAME); + while (((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_EXIST)) && + (msecs_waited < max_waiting_time_ms)) { + usleep(sleep_delay_ms * 1000); + msecs_waited += sleep_delay_ms; + rc = saImmOiImplementerSet(cb->immOiHandle, IMPLEMENTER_NAME); + } + if (rc != SA_AIS_OK) { + /* We have tried enough, now just exit */ + LOG_ER("saImmOiImplementerSet failed, rc = %u", rc); exit(EXIT_FAILURE); } - - if ((rc = immutil_saImmOiClassImplementerSet(cb->immOiHandle, "SaClmNode")) != SA_AIS_OK) { - LOG_ER("saImmOiClassImplementerSet failed for class SaClmNode rc:%u, exiting", rc); + + msecs_waited = 0; + rc = saImmOiClassImplementerSet(cb->immOiHandle, "SaClmNode"); + while (((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_EXIST)) && + (msecs_waited < max_waiting_time_ms)) { + usleep(sleep_delay_ms * 1000); + msecs_waited += sleep_delay_ms; + rc = saImmOiClassImplementerSet(cb->immOiHandle, "SaClmNode"); + } + if (rc != SA_AIS_OK) { + LOG_ER("saImmOiClassImplementerSet failed for class SaClmNode, rc = %u", rc); exit(EXIT_FAILURE); } - - if ((rc = immutil_saImmOiClassImplementerSet(cb->immOiHandle, "SaClmCluster")) != SA_AIS_OK) { - LOG_ER("saImmOiClassImplementerSet failed for class SaClmCluster rc:%u, exiting", rc); + + msecs_waited = 0; + rc = saImmOiClassImplementerSet(cb->immOiHandle, "SaClmCluster"); + while (((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_EXIST)) && + (msecs_waited < max_waiting_time_ms)) { + usleep(sleep_delay_ms * 1000); + msecs_waited += sleep_delay_ms; + rc = saImmOiClassImplementerSet(cb->immOiHandle, "SaClmCluster"); + } + if (rc != SA_AIS_OK) { + LOG_ER("saImmOiClassImplementerSet failed for class SaClmCluster, rc = %u,", rc); exit(EXIT_FAILURE); } @@ -175,7 +209,10 @@ CLMS_CLUSTER_NODE *clms_node_new(SaNameT node->node_name.length = name->length; node->node_addr.family = 1; node->admin_state = SA_CLM_ADMIN_UNLOCKED; + node->rtu_pending = false; + node->admin_rtu_pending = false; + TRACE("RTU pending flag is switched off"); TRACE("nodename %s", node->node_name.value); while ((attr = attrs[i++]) != NULL) { @@ -350,6 +387,9 @@ SaAisErrorT clms_cluster_config_get(void if (clms_cb->ha_state == SA_AMF_HA_ACTIVE) { osaf_cluster->init_time = clms_get_SaTime(); } + + osaf_cluster->rtu_pending = false; + TRACE("RTU pending flag is switched off"); } rc = SA_AIS_OK; done2: @@ -405,8 +445,8 @@ SaAisErrorT clms_imm_activate(CLMS_CB *c goto done; } + cb->is_impl_set = true; clms_all_node_rattr_update(); - cb->is_impl_set = true; } rc = SA_AIS_OK; @@ -428,6 +468,13 @@ void clms_admin_state_update_rattr(CLMS_ TRACE_ENTER2("Admin state %d update for node %s", nd->admin_state, nd->node_name.value); + CLMS_CLUSTER_NODE *node = NULL; + /* If this update was attempted was for a node down and as a part of try-again-later, then + * we need to lookup using name, because the node_id record would + * have been deleted as a part of node down processing + */ + node = clms_node_get_by_name(&nd->node_name); + SaImmAttrValueT attrUpdateValue[] = { &nd->admin_state }; const SaImmAttrModificationT_2 *attrMods[] = { &attr_Mod[0], @@ -440,13 +487,33 @@ void clms_admin_state_update_rattr(CLMS_ attr_Mod[0].modAttr.attrValueType = SA_IMM_ATTR_SAUINT32T; attr_Mod[0].modAttr.attrValues = attrUpdateValue; - int errorsAreFatal = immutilWrapperProfile.errorsAreFatal; - immutilWrapperProfile.errorsAreFatal = 0; - rc = immutil_saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, &nd->node_name, attrMods); - immutilWrapperProfile.errorsAreFatal = errorsAreFatal; + rc = saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, &nd->node_name, attrMods); + if (rc == SA_AIS_OK) { + node->admin_rtu_pending = false; + /* Update the global flag for try-again only if cluster update is done. + * Note the cluster is done only after node update. + */ + if ((clms_cb->rtu_pending == true) && (node->rtu_pending == false) + && (osaf_cluster->rtu_pending == false)){ + clms_cb->rtu_pending = false; + TRACE("RTUpdate success. Turning off flag"); + } + } else if ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_TIMEOUT)) { + LOG_IN("saImmOiRtObjectUpdate for %s failed with rc = %u. Trying again", node->node_name.value, rc); + node->admin_rtu_pending = true; + clms_cb->rtu_pending = true; + } else { + /* Right now, there is no guarantee on IMM error codes. So Reinit for everything else */ + LOG_IN("saImmOiRtObjectUpdate for %s failed with rc = %u. Reinit with IMM", node->node_name.value, rc); + node->admin_rtu_pending = true; + clms_cb->rtu_pending = true; - if (rc != SA_AIS_OK) { - LOG_ER("saImmOiRtObjectUpdate FAILED %u, '%s'", rc, nd->node_name.value); + saImmOiFinalize(clms_cb->immOiHandle); + clms_cb->immOiHandle = 0; + clms_cb->is_impl_set = false; + + /* Initiate IMM reinitializtion in the background */ + clm_imm_reinit_bg(clms_cb); } TRACE_LEAVE(); @@ -473,7 +540,18 @@ void clms_node_update_rattr(CLMS_CLUSTER NULL }; + CLMS_CLUSTER_NODE *node = NULL; + TRACE_ENTER(); + node = clms_node_get_by_name(&nd->node_name); + + if (clms_cb->is_impl_set == false) { + TRACE("Implementer not yet set: Switching on the tryagain flag"); + node->rtu_pending = true; + clms_cb->rtu_pending = true; + TRACE_LEAVE(); + return; + } attr_Mod[0].modType = SA_IMM_ATTR_VALUES_REPLACE; attr_Mod[0].modAttr.attrName = "saClmNodeIsMember"; @@ -499,20 +577,37 @@ void clms_node_update_rattr(CLMS_CLUSTER attr_Mod[3].modAttr.attrValueType = SA_IMM_ATTR_SAUINT64T; attr_Mod[3].modAttr.attrValues = attrUpdateValue3; - int errorsAreFatal = immutilWrapperProfile.errorsAreFatal; - immutilWrapperProfile.errorsAreFatal = 0; - rc = immutil_saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, &nd->node_name, attrMods); - immutilWrapperProfile.errorsAreFatal = errorsAreFatal; + rc = saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, &nd->node_name, attrMods); - if (rc != SA_AIS_OK) { - LOG_ER("saImmOiRtObjectUpdate FAILED %u, '%s'", rc, nd->node_name.value); + if (rc == SA_AIS_OK) { + node->rtu_pending = false; + if ((clms_cb->rtu_pending == true) && (node->rtu_pending == false) + && (osaf_cluster->rtu_pending == false)){ + clms_cb->rtu_pending = false; + TRACE("RTU success. Turning off tryagain flag"); + } + } else if ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_TIMEOUT)) { + LOG_IN("saImmOiRtObjectUpdate for %s failed with rc = %u. Trying again", node->node_name.value, rc); + node->rtu_pending = true; + clms_cb->rtu_pending = true; + } else { + LOG_IN("saImmOiRtObjectUpdate for %s failed with rc = %u. Reinit with IMM", node->node_name.value, rc); + node->rtu_pending = true; + clms_cb->rtu_pending = true; + + saImmOiFinalize(clms_cb->immOiHandle); + clms_cb->immOiHandle = 0; + clms_cb->is_impl_set = false; + + /* Initiate IMM reinitializtion in the background */ + clm_imm_reinit_bg(clms_cb); } TRACE_LEAVE(); } /** -* Update IMMSv the runtime info of all node +* Update IMMSv the runtime info of all nodes */ void clms_all_node_rattr_update(void) { @@ -528,6 +623,74 @@ void clms_all_node_rattr_update(void) } /** +* Process all pending runtime attribute updates toward IMM +*/ +void clms_retry_pending_rtupdates(void) +{ + CLMS_CLUSTER_NODE *node = NULL; + SaNameT nodename = {0}; + TRACE_ENTER(); + + if (clms_cb->is_impl_set == false) { + TRACE_LEAVE2("Implementerset yet to happen, try later"); + return; + } + for (node = clms_node_getnext_by_name(&nodename); node != NULL; node = clms_node_getnext_by_name(&nodename)) { + if (node->rtu_pending == true) + clms_node_update_rattr(node); + if (node->admin_rtu_pending == true) + clms_admin_state_update_rattr(node); + memcpy(&nodename, &node->node_name, sizeof(SaNameT)); + } + + if (osaf_cluster->rtu_pending == true) + clms_cluster_update_rattr(osaf_cluster); + TRACE_LEAVE(); +} + +/** +* As a standby, clear all pending runtime attribute updates toward IMM +* The new active will take care of it. +*/ +void clms_switchoff_all_pending_rtupdates(void) +{ + CLMS_CLUSTER_NODE *node = NULL; + SaNameT nodename = {0}; + TRACE_ENTER(); + + for (node = clms_node_getnext_by_name(&nodename); node != NULL; node = clms_node_getnext_by_name(&nodename)) { + TRACE("Switching on the tryagain flag"); + node->rtu_pending = false; + node->admin_rtu_pending = false; + memcpy(&nodename, &node->node_name, sizeof(SaNameT)); + } + osaf_cluster->rtu_pending = false; + clms_cb->rtu_pending = false; + TRACE_LEAVE(); +} + +/** +* As a standby, clear all pending runtime attribute updates toward IMM +* The new active will take care of it. +*/ +void clms_switchon_all_pending_rtupdates(void) +{ + CLMS_CLUSTER_NODE *node = NULL; + SaNameT nodename = {0}; + TRACE_ENTER(); + + for (node = clms_node_getnext_by_name(&nodename); node != NULL; node = clms_node_getnext_by_name(&nodename)) { + TRACE("Switching on the pending RTUs"); + node->rtu_pending = true; + node->admin_rtu_pending = true; + memcpy(&nodename, &node->node_name, sizeof(SaNameT)); + } + osaf_cluster->rtu_pending = true; + clms_cb->rtu_pending = true; + TRACE_LEAVE(); +} + +/** * Update IMMSv with the runtime info of the osaf cluster * @param[in] osaf_cluster pointer to CLM Cluster */ @@ -546,6 +709,14 @@ void clms_cluster_update_rattr(CLMS_CLUS TRACE_ENTER(); + if (clms_cb->is_impl_set == false) { + TRACE("Implementer is not set. Switching on flag in %s", __FUNCTION__); + osaf_cluster->rtu_pending = true; + clms_cb->rtu_pending = true; + TRACE_LEAVE(); + return; + } + attr_Mod[0].modType = SA_IMM_ATTR_VALUES_REPLACE; attr_Mod[0].modAttr.attrName = "saClmClusterNumNodes"; attr_Mod[0].modAttr.attrValuesNumber = 1; @@ -558,13 +729,29 @@ void clms_cluster_update_rattr(CLMS_CLUS attr_Mod[1].modAttr.attrValueType = SA_IMM_ATTR_SATIMET; attr_Mod[1].modAttr.attrValues = attrUpdateValue1; - int errorsAreFatal = immutilWrapperProfile.errorsAreFatal; - immutilWrapperProfile.errorsAreFatal = 0; - rc = immutil_saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, &osaf_cluster->name, attrMods); - immutilWrapperProfile.errorsAreFatal = errorsAreFatal; + rc = saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, &osaf_cluster->name, attrMods); - if (rc != SA_AIS_OK) { - LOG_ER("saImmOiRtObjectUpdate FAILED %u, '%s'", rc, osaf_cluster->name.value); + if (rc == SA_AIS_OK){ + osaf_cluster->rtu_pending = false; + if (clms_cb->rtu_pending == true) {/* Cluster update is done after node update */ + clms_cb->rtu_pending = false; + TRACE("RTU success, Switching off"); + } + } else if ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_TIMEOUT)) { + LOG_IN("saImmOiRtObjectUpdate failed for cluster object with rc = %u. Trying again", rc); + osaf_cluster->rtu_pending = true; + clms_cb->rtu_pending = true; + } else { + LOG_IN("saImmOiRtObjectUpdate failed for cluster object with rc = %u. Reinit with IMM", rc); + osaf_cluster->rtu_pending = true; + clms_cb->rtu_pending = true; + + saImmOiFinalize(clms_cb->immOiHandle); + clms_cb->immOiHandle = 0; + clms_cb->is_impl_set = false; + + /* Initiate IMM reinitializtion in the background */ + clm_imm_reinit_bg(clms_cb); } /* TBD: We need to handle a case where there's only one node, diff --git a/osaf/services/saf/clmsv/clms/clms_imm.h b/osaf/services/saf/clmsv/clms/clms_imm.h --- a/osaf/services/saf/clmsv/clms/clms_imm.h +++ b/osaf/services/saf/clmsv/clms/clms_imm.h @@ -45,4 +45,7 @@ extern uint32_t clms_prep_and_send_track extern uint32_t clms_send_track_local(CLMS_CLUSTER_NODE * node, CLMS_CLIENT_INFO * client, SaClmChangeStepT step); extern void clms_trackresp_patricia_init(CLMS_CLUSTER_NODE * node); +extern void clms_switchoff_all_pending_rtupdates(void); +extern void clms_switchon_all_pending_rtupdates(void); +extern void clms_retry_pending_rtupdates(void); #endif diff --git a/osaf/services/saf/clmsv/clms/clms_main.c b/osaf/services/saf/clmsv/clms/clms_main.c --- a/osaf/services/saf/clmsv/clms/clms_main.c +++ b/osaf/services/saf/clmsv/clms/clms_main.c @@ -227,6 +227,7 @@ uint32_t clms_cb_init(CLMS_CB * clms_cb) clms_cb->curr_invid = 1; clms_cb->immOiHandle = 0; clms_cb->is_impl_set = false; + clms_cb->rtu_pending = false; /* Flag to control try-again of rt-updates */ /* Assign Version. Currently, hardcoded, This will change later */ clms_cb->clm_ver.releaseCode = CLM_RELEASE_CODE; @@ -378,6 +379,7 @@ int main(int argc, char *argv[]) SaAisErrorT error = SA_AIS_OK; uint32_t rc; osaf_cluster = NULL; + int timeout = -1; daemonize(argc, argv); @@ -405,6 +407,13 @@ int main(int argc, char *argv[]) while (1) { + if (clms_cb->rtu_pending == true) { + LOG_IN("There is an IMM task to be tried again. setting poll time out to 500"); + timeout = 500; + }else { + timeout = -1; + } + if ((clms_cb->immOiHandle != 0) && (clms_cb->is_impl_set == true)) { fds[FD_IMM].fd = clms_cb->imm_sel_obj; fds[FD_IMM].events = POLLIN; @@ -412,8 +421,7 @@ int main(int argc, char *argv[]) } else { nfds = NUM_FD - 1; } - - int ret = poll(fds, nfds, -1); + int ret = poll(fds, nfds, timeout); if (ret == -1) { if (errno == EINTR) @@ -422,6 +430,14 @@ int main(int argc, char *argv[]) LOG_ER("poll failed - %s", strerror(errno)); break; } + + if (ret == 0) { + /* Process any/all pending RTAttribute updates to IMM */ + TRACE("poll time out processing pending updates"); + clms_retry_pending_rtupdates(); + continue; + } + if (fds[FD_AMF].revents & POLLIN) { if (clms_cb->amf_hdl != 0) { if ((error = saAmfDispatch(clms_cb->amf_hdl, SA_DISPATCH_ALL)) != SA_AIS_OK) { @@ -496,7 +512,10 @@ int main(int argc, char *argv[]) } } } - } + /* Retry any pending updates */ + if (clms_cb->rtu_pending == true) + clms_retry_pending_rtupdates(); + } /* End while (1) */ done: LOG_ER("Failed, exiting..."); ------------------------------------------------------------------------------ DreamFactory - Open Source REST & JSON Services for HTML5 & Native Apps OAuth, Users, Roles, SQL, NoSQL, BLOB Storage and External API Access Free app hosting. Or install the open source package on any LAMP server. Sign up and see examples for AngularJS, jQuery, Sencha Touch and Native! http://pubads.g.doubleclick.net/gampad/clk?id=63469471&iu=/4140/ostg.clktrk _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel