Hi Thang, I assume this is the fix we have already discussed with no changes. If that's the case Ack.
Thanks Lennart -----Original Message----- From: thang.d.nguyen <thang.d.ngu...@dektech.com.au> Sent: den 26 augusti 2019 12:10 To: Lennart Lund <lennart.l...@ericsson.com>; Vu Minh Nguyen <vu.m.ngu...@dektech.com.au> Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen <thang.d.ngu...@dektech.com.au> Subject: [PATCH 1/1] smf: improve ccb handler rules for recovery [#3071] Retryin ccb handler when ccb aborted to sync. --- src/smf/smfd/SmfUpgradeAction.cc | 83 +++++------------------- src/smf/smfd/SmfUpgradeStep.cc | 77 ++++------------------ src/smf/smfd/imm_modify_config/immccb.cc | 32 +++++++-- 3 files changed, 57 insertions(+), 135 deletions(-) diff --git a/src/smf/smfd/SmfUpgradeAction.cc b/src/smf/smfd/SmfUpgradeAction.cc index b0919c2b8..fca9311c7 100644 --- a/src/smf/smfd/SmfUpgradeAction.cc +++ b/src/smf/smfd/SmfUpgradeAction.cc @@ -485,73 +485,26 @@ SaAisErrorT SmfImmCcbAction::execute(SaImmOiHandleT i_oiHandle, if (m_operations.size() > 0) { TRACE("Imm Ccb Action"); - SmfImmUtils immUtil; - uint32_t retry_count = 0; - while (1) { - if (i_rollbackDn != NULL) { - rollbackCcb = - new (std::nothrow) SmfRollbackCcb(immRollbackCcbDn, i_oiHandle); - if (rollbackCcb == NULL) { - LOG_ER("SmfImmCcbAction::execute failed to create SmfRollbackCcb"); - return SA_AIS_ERR_NO_MEMORY; - } + SmfImmUtils smfImmUtils; + + if (i_rollbackDn != NULL) { + rollbackCcb = + new (std::nothrow) SmfRollbackCcb(immRollbackCcbDn, i_oiHandle); + if (rollbackCcb == NULL) { + LOG_ER("SmfImmCcbAction::execute failed to create SmfRollbackCcb"); + return SA_AIS_ERR_NO_MEMORY; } - result = immUtil.doImmOperations(m_operations, rollbackCcb); - if (((result == SA_AIS_ERR_TIMEOUT) || (result == SA_AIS_ERR_NOT_EXIST)) - && (retry_count <= 6)) { - int interval = 5; // seconds - // When IMM aborts a CCB because of synch request from a payload, then - // the next call of CCBInitialize() will return TRY_AGAIN till the time - // the synch is complete. - // There is no direct information available to the OM that can indicate - // that the CCB or the Adminownerset failed because of an abort and also - // there is no notification that can indicate that IMM is ready now. - // That leaves SMF with the option to correlate error codes returned. - // - // Notes on treatment of SA_AIS_ERR_TIMEOUT and SA_AIS_ERR_NOT_EXIST - // error codes: - // - // 1) CCB abort when it is not the first operation(create/modify/delete) - // in the CCB - // and if there is dependency between objects in the CCB:- - // - // An abort of a CCB and if the objects(Create/Modify/delete) had - // some dependency(parent-child) among them, then an API call of - // AdminOwnerSet() or the CCBCreate/Delete/Modify() on a dependant - // object will return SA_AIS_ERR_NOT_EXIST, because the CCB aborted. - // - // 2) CCB abort when it is a first operation and/or there is no - // intra-ccb objects-dependency:- - // - // When an ongoing CCB is aborted because of a synch request originating - // from a payload, then the AdminOwnerSet() or the - // CCBCreate/Delete/Modify() will return timeout. - - ++retry_count; - LOG_NO("SmfImmCcbAction::execute failed with error: %s", - saf_error(result)); - LOG_NO("CCB was aborted!?, Retrying: %u", retry_count); - // Total retry time of 2.5 minutes for a worst case IMM loaded with say - // < 300k objects. Retry every 25 seconds. i.e. (nanosleep for 5 - // seconds) + (immutil_ccbInitialize will worstcase wait till 20 - // seconds). - struct timespec sleepTime = {interval, 0}; - osaf_nanosleep(&sleepTime); - if (rollbackCcb != NULL) { - delete rollbackCcb; - rollbackCcb = NULL; - } - continue; - } else if (result != SA_AIS_OK) { - LOG_ER("SmfImmCcbAction::execute failed, result=%s", - saf_error(result)); - if (rollbackCcb != NULL) { - delete rollbackCcb; - rollbackCcb = NULL; - } + } + + result = smfImmUtils.doImmOperations(m_operations, rollbackCcb); + if (result != SA_AIS_OK) { + LOG_ER("SmfImmCcbAction::execute failed, result=%s", + saf_error(result)); + if (rollbackCcb != NULL) { + delete rollbackCcb; + rollbackCcb = NULL; } - break; - }/* End while (1) */ + } } if (rollbackCcb != NULL) { diff --git a/src/smf/smfd/SmfUpgradeStep.cc b/src/smf/smfd/SmfUpgradeStep.cc index 60fd08372..a029a63e1 100644 --- a/src/smf/smfd/SmfUpgradeStep.cc +++ b/src/smf/smfd/SmfUpgradeStep.cc @@ -692,76 +692,25 @@ SaAisErrorT SmfUpgradeStep::modifyInformationModel() { if (m_modificationList.size() > 0) { TRACE("Modifying information model"); - SmfImmUtils immUtil; - uint32_t retry_count = 0; - - while (1) { - SmfRollbackCcb rollbackCcb( - modifyRollbackCcbDn, getProcedure()->getProcThread()->getImmHandle()); - - rc = immUtil.doImmOperations(m_modificationList, &rollbackCcb); - if (((rc == SA_AIS_ERR_TIMEOUT) || (rc == SA_AIS_ERR_NOT_EXIST)) && - (retry_count <= 6)) { - int interval = 5; // seconds - // When IMM aborts a CCB because of synch request from a payload, then - // the next call of CCBInitialize() will return TRY_AGAIN till the time - // the synch is complete. - // There is no direct information available to the OM that can indicate - // that the CCB or the Adminownerset failed because of an abort and also - // there is no notification that can indicate that IMM is ready now. - // That leaves SMF with the option to correlate error codes returned. - // - // Notes on treatment of SA_AIS_ERR_TIMEOUT and SA_AIS_ERR_NOT_EXIST - // error codes: - // - // 1) CCB abort when it is not the first operation(create/modify/delete) - // in the CCB - // and if there is dependency between objects in the CCB:- - // - // An abort of a CCB and if the objects(Create/Modify/delete) had - // some dependency(parent-child) among them, then an API call of - // AdminOwnerSet() or the CCBCreate/Delete/Modify() on a dependant - // object will return SA_AIS_ERR_NOT_EXIST, because the CCB aborted. - // - // 2) CCB abort when it is a first operation and/or there is no - // intra-ccb objects-dependency:- - // - // When an ongoing CCB is aborted because of a synch request originating - // from a payload, then the AdminOwnerSet() or the - // CCBCreate/Delete/Modify() will return timeout. - - ++retry_count; - LOG_NO("SmfUpgradeStep modify IMM failed with error: %s", - saf_error(rc)); - LOG_NO("CCB was aborted!?, Retrying: %u", retry_count); - // Total retry time of 2.5 minutes for a worst case IMM loaded with say - // < 300k objects. Retry every 25 seconds. i.e. (nanosleep for 5 - // seconds) + (immutil_ccbInitialize will worstcase wait till 20 - // seconds). - struct timespec sleepTime = {interval, 0}; - osaf_nanosleep(&sleepTime); - // Note: Alternatively Make rollbackCcb unique by adding a method for - // this to the rollbackCcb. - continue; - } else if (rc != SA_AIS_OK) { - LOG_NO("Giving up, SmfUpgradeStep modify IMM failed, rc=%s", - saf_error(rc)); - return rc; - } else { /* Things went fine */ + SmfImmUtils smfimmUtils; - if ((rc = rollbackCcb.execute()) != SA_AIS_OK) { - LOG_NO("SmfUpgradeStep failed to store rollback CCB, rc=%s", - saf_error(rc)); - return rc; - } - break; + SmfRollbackCcb rollbackCcb( + modifyRollbackCcbDn, + getProcedure()->getProcThread()->getImmHandle()); + + rc = smfimmUtils.doImmOperations(m_modificationList, &rollbackCcb); + if (rc == SA_AIS_OK) { + if ((rc = rollbackCcb.execute()) != SA_AIS_OK) { + LOG_NO("SmfUpgradeStep failed to store rollback CCB, rc=%s", + saf_error(rc)); } - } /* End while (1) */ + } else { + LOG_NO("SmfUpgradeStep::modifyInformationModel: Fail, %s", saf_error(rc)); + } } else { TRACE("Nothing to modify in information model"); } - return SA_AIS_OK; + return rc; } //------------------------------------------------------------------------------ diff --git a/src/smf/smfd/imm_modify_config/immccb.cc b/src/smf/smfd/imm_modify_config/immccb.cc index cc62911db..c8cdac2d3 100644 --- a/src/smf/smfd/imm_modify_config/immccb.cc +++ b/src/smf/smfd/imm_modify_config/immccb.cc @@ -220,11 +220,15 @@ int ModelModification::CreateObjectManager() { bool return_state = imm_om_handle_->InitializeHandle(); if (return_state == false) { - // No recovery is possible - LOG_NO("%s: OM-handle, RestoreHandle(), Fail", __FUNCTION__); - recovery_info = kFail; - api_name_ = "saImmOmInitialize"; - ais_error_ = imm_om_handle_->ais_error(); + SaAisErrorT ais_error = imm_om_handle_->ais_error(); + if (ais_error == SA_AIS_ERR_TRY_AGAIN) { + recovery_info = kRestartOm; + } else { + // No recovery is possible + LOG_NO("%s: OM-handle, RestoreHandle(), Fail", __FUNCTION__); + recovery_info = kFail; + api_name_ = "saImmOmInitialize"; + } } else { recovery_info = kContinue; } @@ -418,6 +422,14 @@ int ModelModification::AdminOwnerSet(const std::vector<std::string>& objects, return recovery_info; } +static bool has_object(const std::vector<CreateDescriptor>& create_descriptors, + const std::string& object) { + for (const auto& desc : create_descriptors) { + if (object == desc.parent_name) return true; + } + return false; +} + // Add create requests for all objects to be created // Set admin ownership for parent to all objects to be created with scope // SA_IMM_ONE. @@ -440,9 +452,17 @@ int ModelModification::AddCreates(const std::vector<CreateDescriptor>& // Become admin owner of parent if there is a parent. If no parent the // IMM object will be created as a root object std::vector<std::string> imm_objects; - imm_objects.push_back(create_descriptor.parent_name); + std::string parent = create_descriptor.parent_name; + imm_objects.push_back(parent); recovery_info = AdminOwnerSet(imm_objects, SA_IMM_ONE); if (recovery_info == kFail) { + // data for this ccb is valid but it is aborted by imm to start + // IMM sync, therefore marking this ccb is restartable. + if (ais_error_ == SA_AIS_ERR_NOT_EXIST && + has_object(create_descriptors, parent)) { + recovery_info = kRestartOm; + break; + } LOG_NO("%s: AdminOwnerSet() Fail", __FUNCTION__); break; } else if (recovery_info == kRestartOm) { -- 2.17.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel