Thanks Lennart for your comments. Below is the fix due to your comments.

//> ====================
diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc
b/osaf/services/saf/logsv/lgs/lgs_clm.cc
--- a/osaf/services/saf/logsv/lgs/lgs_clm.cc
+++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc
@@ -16,6 +16,7 @@
  */
 #include "osaf/services/saf/logsv/lgs/lgs.h"
 #include "osaf/services/saf/logsv/lgs/lgs_clm.h"
+#include "osaf/libs/core/cplusplus/base/time.h"

 static bool clm_initialized;
 static void *clm_node_db = NULL;       /* used for C++ STL map */
@@ -348,13 +349,20 @@ void *lgs_clm_init_thread(void *cb) {
   static SaVersionT clmVersion = { 'B', 0x04, 0x01 };
   lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb);
   SaAisErrorT rc;
+
   TRACE_ENTER();
+
   rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, &clmVersion);
+  while ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_TIMEOUT)) {
+    base::Sleep(base::kOneHundredMilliseconds);
+    rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, &clmVersion);
+  }
   if (rc != SA_AIS_OK) {
     LOG_ER("saClmInitialize failed with error: %d", rc);
     TRACE_LEAVE();
     exit(EXIT_FAILURE);
   }
+
   rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl,
&lgs_cb->clmSelectionObject);
   if (rc != SA_AIS_OK) {
     LOG_ER("saClmSelectionObjectGet failed with error: %d", rc);

//< ====================

Regards, Vu

> -----Original Message-----
> From: Lennart Lund [mailto:lennart.l...@ericsson.com]
> Sent: Monday, November 21, 2016 7:34 PM
> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>;
> mahesh.va...@oracle.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: RE: [PATCH 1 of 1] log: handle TRY_AGAIN error code of
> saClmInitialize() [#2192]
> 
> Hi Vu,
> 
> Ack with comment
> 
> There is a generic timeout handling in OpenSAF that can be used with this
> kind of loops in order to make them simpler. You find that handling by
> including "base/time.h"
> The following is an example from SMF where this is used. Note that also
> base::Sleep is used instead of usleep():
> 
>       const SaTimeT kNanoMillis = 1000000;
>       SaAisErrorT oi_rc = SA_AIS_OK;
>       SaAisErrorT imm_rc = SA_AIS_OK;
>       m_errno = SA_AIS_OK;
>       bool method_rc = false;
>       base::Timer adminOpTimer(smfd_cb->adminOpTimeout /
> kNanoMillis);
> 
>       while (adminOpTimer.is_timeout() == false) {
>               imm_rc = saImmOmAdminOperationInvoke_2(
>                               m_ownerHandle,
>                               &nodeGroupName, 0, adminOp, params,
>                               &oi_rc,
>                               smfd_cb->adminOpTimeout);
>               if ((imm_rc == SA_AIS_ERR_TRY_AGAIN) ||
>                   (imm_rc == SA_AIS_OK && oi_rc ==
> SA_AIS_ERR_TRY_AGAIN)) {
>                       base::Sleep(base::MillisToTimespec(2000));
>                       continue;
>               } else if (imm_rc != SA_AIS_OK) {
>                       LOG_NO("%s: saImmOmAdminOperationInvoke_2 "
>                               "Fail %s", __FUNCTION__,
> saf_error(imm_rc));
>                       m_errno = imm_rc;
>                       break;
>               } else if (oi_rc != SA_AIS_OK) {
>                       LOG_NO("%s: SaAmfAdminOperationId %d Fail %s",
>                               __FUNCTION__, adminOp, saf_error(oi_rc));
>                       m_errno = oi_rc;
>                       break;
>               } else {
>                       // Operation success
>                       method_rc = true;
>                       break;
>               }
>       }
>       if (adminOpTimer.is_timeout()) {
>               // Do some error handling because of timeout
>               .
>               .
>               .
>       }
> 
> Thanks
> Lennart
> 
> > -----Original Message-----
> > From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au]
> > Sent: den 17 november 2016 07:31
> > To: Lennart Lund <lennart.l...@ericsson.com>; mahesh.va...@oracle.com
> > Cc: opensaf-devel@lists.sourceforge.net
> > Subject: [PATCH 1 of 1] log: handle TRY_AGAIN error code of
> saClmInitialize()
> > [#2192]
> >
> >  osaf/services/saf/logsv/lgs/lgs_clm.cc |  12 ++++++++++++
> >  1 files changed, 12 insertions(+), 0 deletions(-)
> >
> >
> > LOG did not deal with TRY_AGAIN error code of `saClmInitialize()`,
> > LOG would exit, and cause node reboot if getting TRY_AGAIN.
> >
> > The patch adds a while loop to do retry when getting TRY_AGAIN.
> >
> > diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc
> > b/osaf/services/saf/logsv/lgs/lgs_clm.cc
> > --- a/osaf/services/saf/logsv/lgs/lgs_clm.cc
> > +++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc
> > @@ -348,13 +348,25 @@ void *lgs_clm_init_thread(void *cb) {
> >    static SaVersionT clmVersion = { 'B', 0x04, 0x01 };
> >    lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb);
> >    SaAisErrorT rc;
> > +  uint32_t msecs_waited = 0;
> > +  const uint32_t max_waiting_time_10s = 10 * 1000; /* 10 secs */
> > +
> >    TRACE_ENTER();
> > +
> >    rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks,
&clmVersion);
> > +  while (((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_TIMEOUT)
> > ||
> > +          (rc == SA_AIS_ERR_UNAVAILABLE)) &&
> > +         (msecs_waited < max_waiting_time_10s)) {
> > +    usleep(100*1000);
> > +    msecs_waited += 100;
> > +    rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks,
> &clmVersion);
> > +  }
> >    if (rc != SA_AIS_OK) {
> >      LOG_ER("saClmInitialize failed with error: %d", rc);
> >      TRACE_LEAVE();
> >      exit(EXIT_FAILURE);
> >    }
> > +
> >    rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl, &lgs_cb-
> > >clmSelectionObject);
> >    if (rc != SA_AIS_OK) {
> >      LOG_ER("saClmSelectionObjectGet failed with error: %d", rc);


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to