Thanks Mahesh. Below is the updated patch. //> diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc b/osaf/services/saf/logsv/lgs/lgs_clm.cc --- a/osaf/services/saf/logsv/lgs/lgs_clm.cc +++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc @@ -16,6 +16,7 @@ */ #include "osaf/services/saf/logsv/lgs/lgs.h" #include "osaf/services/saf/logsv/lgs/lgs_clm.h" +#include "osaf/libs/core/cplusplus/base/time.h"
static bool clm_initialized; static void *clm_node_db = NULL; /* used for C++ STL map */ @@ -348,13 +349,25 @@ void *lgs_clm_init_thread(void *cb) { static SaVersionT clmVersion = { 'B', 0x04, 0x01 }; lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb); SaAisErrorT rc; + TRACE_ENTER(); + rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, &clmVersion); + while ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_TIMEOUT)) { + if (_lgs_cb->clm_hdl != 0) { + saClmFinalize(_lgs_cb->clm_hdl); + _lgs_cb->clm_hdl = 0; + } + + base::Sleep(base::kOneHundredMilliseconds); + rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, &clmVersion); + } if (rc != SA_AIS_OK) { LOG_ER("saClmInitialize failed with error: %d", rc); TRACE_LEAVE(); exit(EXIT_FAILURE); } + rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl, &lgs_cb->clmSelectionObject); if (rc != SA_AIS_OK) { LOG_ER("saClmSelectionObjectGet failed with error: %d", rc); //< Regards, Vu > -----Original Message----- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Friday, November 25, 2016 10:43 AM > To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; 'Lennart Lund' > <lennart.l...@ericsson.com> > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] log: handle TRY_AGAIN error code of > saClmInitialize() [#2192] > > HI Vu, > > SA_AIS_ERR_TIMEOUT returnd means it is unspecified whether the call > succeeded or did not, > so you can do : > > Do a saClmFinalize() with out capuring error of `lgs_cb->clm_hdl` , > before saClmInitialize_4 > in while loop. > > or > > validate ` _lgs_cb->clm_hdl` before saClmInitialize_4 in while loop. > > -AVM > > > On 11/24/2016 1:46 PM, Vu Minh Nguyen wrote: > > Hi Mahesh, > > > > Have you had time to look at this? > > > > Regards, Vu > > > >> -----Original Message----- > >> From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au] > >> Sent: Tuesday, November 22, 2016 11:04 AM > >> To: 'Lennart Lund' <lennart.l...@ericsson.com>; > >> 'mahesh.va...@oracle.com' <mahesh.va...@oracle.com> > >> Cc: 'opensaf-devel@lists.sourceforge.net' <opensaf- > >> de...@lists.sourceforge.net> > >> Subject: RE: [PATCH 1 of 1] log: handle TRY_AGAIN error code of > >> saClmInitialize() [#2192] > >> > >> Thanks Lennart for your comments. Below is the fix due to your > comments. > >> > >> //> ==================== > >> diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc > >> b/osaf/services/saf/logsv/lgs/lgs_clm.cc > >> --- a/osaf/services/saf/logsv/lgs/lgs_clm.cc > >> +++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc > >> @@ -16,6 +16,7 @@ > >> */ > >> #include "osaf/services/saf/logsv/lgs/lgs.h" > >> #include "osaf/services/saf/logsv/lgs/lgs_clm.h" > >> +#include "osaf/libs/core/cplusplus/base/time.h" > >> > >> static bool clm_initialized; > >> static void *clm_node_db = NULL; /* used for C++ STL map */ > >> @@ -348,13 +349,20 @@ void *lgs_clm_init_thread(void *cb) { > >> static SaVersionT clmVersion = { 'B', 0x04, 0x01 }; > >> lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb); > >> SaAisErrorT rc; > >> + > >> TRACE_ENTER(); > >> + > >> rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, > &clmVersion); > >> + while ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == > SA_AIS_ERR_TIMEOUT)) { > >> + base::Sleep(base::kOneHundredMilliseconds); > >> + rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, > > &clmVersion); > >> + } > >> if (rc != SA_AIS_OK) { > >> LOG_ER("saClmInitialize failed with error: %d", rc); > >> TRACE_LEAVE(); > >> exit(EXIT_FAILURE); > >> } > >> + > >> rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl, &lgs_cb- > >>> clmSelectionObject); > >> if (rc != SA_AIS_OK) { > >> LOG_ER("saClmSelectionObjectGet failed with error: %d", rc); > >> > >> //< ==================== > >> > >> Regards, Vu > >> > >>> -----Original Message----- > >>> From: Lennart Lund [mailto:lennart.l...@ericsson.com] > >>> Sent: Monday, November 21, 2016 7:34 PM > >>> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; > >>> mahesh.va...@oracle.com > >>> Cc: opensaf-devel@lists.sourceforge.net > >>> Subject: RE: [PATCH 1 of 1] log: handle TRY_AGAIN error code of > >>> saClmInitialize() [#2192] > >>> > >>> Hi Vu, > >>> > >>> Ack with comment > >>> > >>> There is a generic timeout handling in OpenSAF that can be used with > > this > >>> kind of loops in order to make them simpler. You find that handling by > >>> including "base/time.h" > >>> The following is an example from SMF where this is used. Note that also > >>> base::Sleep is used instead of usleep(): > >>> > >>> const SaTimeT kNanoMillis = 1000000; > >>> SaAisErrorT oi_rc = SA_AIS_OK; > >>> SaAisErrorT imm_rc = SA_AIS_OK; > >>> m_errno = SA_AIS_OK; > >>> bool method_rc = false; > >>> base::Timer adminOpTimer(smfd_cb->adminOpTimeout / > >>> kNanoMillis); > >>> > >>> while (adminOpTimer.is_timeout() == false) { > >>> imm_rc = saImmOmAdminOperationInvoke_2( > >>> m_ownerHandle, > >>> &nodeGroupName, 0, adminOp, params, > >>> &oi_rc, > >>> smfd_cb->adminOpTimeout); > >>> if ((imm_rc == SA_AIS_ERR_TRY_AGAIN) || > >>> (imm_rc == SA_AIS_OK && oi_rc == > >>> SA_AIS_ERR_TRY_AGAIN)) { > >>> base::Sleep(base::MillisToTimespec(2000)); > >>> continue; > >>> } else if (imm_rc != SA_AIS_OK) { > >>> LOG_NO("%s: saImmOmAdminOperationInvoke_2 " > >>> "Fail %s", __FUNCTION__, > >>> saf_error(imm_rc)); > >>> m_errno = imm_rc; > >>> break; > >>> } else if (oi_rc != SA_AIS_OK) { > >>> LOG_NO("%s: SaAmfAdminOperationId %d Fail %s", > >>> __FUNCTION__, adminOp, saf_error(oi_rc)); > >>> m_errno = oi_rc; > >>> break; > >>> } else { > >>> // Operation success > >>> method_rc = true; > >>> break; > >>> } > >>> } > >>> if (adminOpTimer.is_timeout()) { > >>> // Do some error handling because of timeout > >>> . > >>> . > >>> . > >>> } > >>> > >>> Thanks > >>> Lennart > >>> > >>>> -----Original Message----- > >>>> From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au] > >>>> Sent: den 17 november 2016 07:31 > >>>> To: Lennart Lund <lennart.l...@ericsson.com>; > >> mahesh.va...@oracle.com > >>>> Cc: opensaf-devel@lists.sourceforge.net > >>>> Subject: [PATCH 1 of 1] log: handle TRY_AGAIN error code of > >>> saClmInitialize() > >>>> [#2192] > >>>> > >>>> osaf/services/saf/logsv/lgs/lgs_clm.cc | 12 ++++++++++++ > >>>> 1 files changed, 12 insertions(+), 0 deletions(-) > >>>> > >>>> > >>>> LOG did not deal with TRY_AGAIN error code of `saClmInitialize()`, > >>>> LOG would exit, and cause node reboot if getting TRY_AGAIN. > >>>> > >>>> The patch adds a while loop to do retry when getting TRY_AGAIN. > >>>> > >>>> diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc > >>>> b/osaf/services/saf/logsv/lgs/lgs_clm.cc > >>>> --- a/osaf/services/saf/logsv/lgs/lgs_clm.cc > >>>> +++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc > >>>> @@ -348,13 +348,25 @@ void *lgs_clm_init_thread(void *cb) { > >>>> static SaVersionT clmVersion = { 'B', 0x04, 0x01 }; > >>>> lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb); > >>>> SaAisErrorT rc; > >>>> + uint32_t msecs_waited = 0; > >>>> + const uint32_t max_waiting_time_10s = 10 * 1000; /* 10 secs */ > >>>> + > >>>> TRACE_ENTER(); > >>>> + > >>>> rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, > >> &clmVersion); > >>>> + while (((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == > >> SA_AIS_ERR_TIMEOUT) > >>>> || > >>>> + (rc == SA_AIS_ERR_UNAVAILABLE)) && > >>>> + (msecs_waited < max_waiting_time_10s)) { > >>>> + usleep(100*1000); > >>>> + msecs_waited += 100; > >>>> + rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, > >>> &clmVersion); > >>>> + } > >>>> if (rc != SA_AIS_OK) { > >>>> LOG_ER("saClmInitialize failed with error: %d", rc); > >>>> TRACE_LEAVE(); > >>>> exit(EXIT_FAILURE); > >>>> } > >>>> + > >>>> rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl, &lgs_cb- > >>>>> clmSelectionObject); > >>>> if (rc != SA_AIS_OK) { > >>>> LOG_ER("saClmSelectionObjectGet failed with error: %d", rc); ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel