Thanks Mahesh. Below is the updated patch.

//>
diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc
b/osaf/services/saf/logsv/lgs/lgs_clm.cc
--- a/osaf/services/saf/logsv/lgs/lgs_clm.cc
+++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc
@@ -16,6 +16,7 @@
  */
 #include "osaf/services/saf/logsv/lgs/lgs.h"
 #include "osaf/services/saf/logsv/lgs/lgs_clm.h"
+#include "osaf/libs/core/cplusplus/base/time.h"

 static bool clm_initialized;
 static void *clm_node_db = NULL;       /* used for C++ STL map */
@@ -348,13 +349,25 @@ void *lgs_clm_init_thread(void *cb) {
   static SaVersionT clmVersion = { 'B', 0x04, 0x01 };
   lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb);
   SaAisErrorT rc;
+
   TRACE_ENTER();
+
   rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, &clmVersion);
+  while ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc == SA_AIS_ERR_TIMEOUT)) {
+    if (_lgs_cb->clm_hdl != 0) {
+      saClmFinalize(_lgs_cb->clm_hdl);
+      _lgs_cb->clm_hdl = 0;
+    }
+
+    base::Sleep(base::kOneHundredMilliseconds);
+    rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks, &clmVersion);
+  }
   if (rc != SA_AIS_OK) {
     LOG_ER("saClmInitialize failed with error: %d", rc);
     TRACE_LEAVE();
     exit(EXIT_FAILURE);
   }
+
   rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl,
&lgs_cb->clmSelectionObject);
   if (rc != SA_AIS_OK) {
     LOG_ER("saClmSelectionObjectGet failed with error: %d", rc);
//<


Regards, Vu

> -----Original Message-----
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Friday, November 25, 2016 10:43 AM
> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; 'Lennart Lund'
> <lennart.l...@ericsson.com>
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] log: handle TRY_AGAIN error code of
> saClmInitialize() [#2192]
> 
> HI Vu,
> 
> SA_AIS_ERR_TIMEOUT returnd means it is unspecified whether the call
> succeeded or did not,
> so you can do  :
> 
> Do a saClmFinalize() with out capuring error of `lgs_cb->clm_hdl` ,
> before saClmInitialize_4
> in while loop.
> 
> or
> 
> validate ` _lgs_cb->clm_hdl`  before saClmInitialize_4 in while loop.
> 
> -AVM
> 
> 
> On 11/24/2016 1:46 PM, Vu Minh Nguyen wrote:
> > Hi Mahesh,
> >
> > Have you had time to look at this?
> >
> > Regards, Vu
> >
> >> -----Original Message-----
> >> From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au]
> >> Sent: Tuesday, November 22, 2016 11:04 AM
> >> To: 'Lennart Lund' <lennart.l...@ericsson.com>;
> >> 'mahesh.va...@oracle.com' <mahesh.va...@oracle.com>
> >> Cc: 'opensaf-devel@lists.sourceforge.net' <opensaf-
> >> de...@lists.sourceforge.net>
> >> Subject: RE: [PATCH 1 of 1] log: handle TRY_AGAIN error code of
> >> saClmInitialize() [#2192]
> >>
> >> Thanks Lennart for your comments. Below is the fix due to your
> comments.
> >>
> >> //> ====================
> >> diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >> b/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >> --- a/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >> +++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >> @@ -16,6 +16,7 @@
> >>    */
> >>   #include "osaf/services/saf/logsv/lgs/lgs.h"
> >>   #include "osaf/services/saf/logsv/lgs/lgs_clm.h"
> >> +#include "osaf/libs/core/cplusplus/base/time.h"
> >>
> >>   static bool clm_initialized;
> >>   static void *clm_node_db = NULL;       /* used for C++ STL map */
> >> @@ -348,13 +349,20 @@ void *lgs_clm_init_thread(void *cb) {
> >>     static SaVersionT clmVersion = { 'B', 0x04, 0x01 };
> >>     lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb);
> >>     SaAisErrorT rc;
> >> +
> >>     TRACE_ENTER();
> >> +
> >>     rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks,
> &clmVersion);
> >> +  while ((rc == SA_AIS_ERR_TRY_AGAIN) || (rc ==
> SA_AIS_ERR_TIMEOUT)) {
> >> +    base::Sleep(base::kOneHundredMilliseconds);
> >> +    rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks,
> > &clmVersion);
> >> +  }
> >>     if (rc != SA_AIS_OK) {
> >>       LOG_ER("saClmInitialize failed with error: %d", rc);
> >>       TRACE_LEAVE();
> >>       exit(EXIT_FAILURE);
> >>     }
> >> +
> >>     rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl, &lgs_cb-
> >>> clmSelectionObject);
> >>     if (rc != SA_AIS_OK) {
> >>       LOG_ER("saClmSelectionObjectGet failed with error: %d", rc);
> >>
> >> //< ====================
> >>
> >> Regards, Vu
> >>
> >>> -----Original Message-----
> >>> From: Lennart Lund [mailto:lennart.l...@ericsson.com]
> >>> Sent: Monday, November 21, 2016 7:34 PM
> >>> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>;
> >>> mahesh.va...@oracle.com
> >>> Cc: opensaf-devel@lists.sourceforge.net
> >>> Subject: RE: [PATCH 1 of 1] log: handle TRY_AGAIN error code of
> >>> saClmInitialize() [#2192]
> >>>
> >>> Hi Vu,
> >>>
> >>> Ack with comment
> >>>
> >>> There is a generic timeout handling in OpenSAF that can be used with
> > this
> >>> kind of loops in order to make them simpler. You find that handling by
> >>> including "base/time.h"
> >>> The following is an example from SMF where this is used. Note that
also
> >>> base::Sleep is used instead of usleep():
> >>>
> >>>   const SaTimeT kNanoMillis = 1000000;
> >>>   SaAisErrorT oi_rc = SA_AIS_OK;
> >>>   SaAisErrorT imm_rc = SA_AIS_OK;
> >>>   m_errno = SA_AIS_OK;
> >>>   bool method_rc = false;
> >>>   base::Timer adminOpTimer(smfd_cb->adminOpTimeout /
> >>> kNanoMillis);
> >>>
> >>>   while (adminOpTimer.is_timeout() == false) {
> >>>           imm_rc = saImmOmAdminOperationInvoke_2(
> >>>                           m_ownerHandle,
> >>>                           &nodeGroupName, 0, adminOp, params,
> >>>                           &oi_rc,
> >>>                           smfd_cb->adminOpTimeout);
> >>>           if ((imm_rc == SA_AIS_ERR_TRY_AGAIN) ||
> >>>               (imm_rc == SA_AIS_OK && oi_rc ==
> >>> SA_AIS_ERR_TRY_AGAIN)) {
> >>>                   base::Sleep(base::MillisToTimespec(2000));
> >>>                   continue;
> >>>           } else if (imm_rc != SA_AIS_OK) {
> >>>                   LOG_NO("%s: saImmOmAdminOperationInvoke_2 "
> >>>                           "Fail %s", __FUNCTION__,
> >>> saf_error(imm_rc));
> >>>                   m_errno = imm_rc;
> >>>                   break;
> >>>           } else if (oi_rc != SA_AIS_OK) {
> >>>                   LOG_NO("%s: SaAmfAdminOperationId %d Fail %s",
> >>>                           __FUNCTION__, adminOp, saf_error(oi_rc));
> >>>                   m_errno = oi_rc;
> >>>                   break;
> >>>           } else {
> >>>                   // Operation success
> >>>                   method_rc = true;
> >>>                   break;
> >>>           }
> >>>   }
> >>>   if (adminOpTimer.is_timeout()) {
> >>>           // Do some error handling because of timeout
> >>>           .
> >>>           .
> >>>           .
> >>>   }
> >>>
> >>> Thanks
> >>> Lennart
> >>>
> >>>> -----Original Message-----
> >>>> From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au]
> >>>> Sent: den 17 november 2016 07:31
> >>>> To: Lennart Lund <lennart.l...@ericsson.com>;
> >> mahesh.va...@oracle.com
> >>>> Cc: opensaf-devel@lists.sourceforge.net
> >>>> Subject: [PATCH 1 of 1] log: handle TRY_AGAIN error code of
> >>> saClmInitialize()
> >>>> [#2192]
> >>>>
> >>>>   osaf/services/saf/logsv/lgs/lgs_clm.cc |  12 ++++++++++++
> >>>>   1 files changed, 12 insertions(+), 0 deletions(-)
> >>>>
> >>>>
> >>>> LOG did not deal with TRY_AGAIN error code of `saClmInitialize()`,
> >>>> LOG would exit, and cause node reboot if getting TRY_AGAIN.
> >>>>
> >>>> The patch adds a while loop to do retry when getting TRY_AGAIN.
> >>>>
> >>>> diff --git a/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >>>> b/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >>>> --- a/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >>>> +++ b/osaf/services/saf/logsv/lgs/lgs_clm.cc
> >>>> @@ -348,13 +348,25 @@ void *lgs_clm_init_thread(void *cb) {
> >>>>     static SaVersionT clmVersion = { 'B', 0x04, 0x01 };
> >>>>     lgs_cb_t *_lgs_cb = reinterpret_cast<lgs_cb_t *> (cb);
> >>>>     SaAisErrorT rc;
> >>>> +  uint32_t msecs_waited = 0;
> >>>> +  const uint32_t max_waiting_time_10s = 10 * 1000; /* 10 secs */
> >>>> +
> >>>>     TRACE_ENTER();
> >>>> +
> >>>>     rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks,
> >> &clmVersion);
> >>>> +  while (((rc == SA_AIS_ERR_TRY_AGAIN) || (rc ==
> >> SA_AIS_ERR_TIMEOUT)
> >>>> ||
> >>>> +          (rc == SA_AIS_ERR_UNAVAILABLE)) &&
> >>>> +         (msecs_waited < max_waiting_time_10s)) {
> >>>> +    usleep(100*1000);
> >>>> +    msecs_waited += 100;
> >>>> +    rc = saClmInitialize_4(&_lgs_cb->clm_hdl, &clm_callbacks,
> >>> &clmVersion);
> >>>> +  }
> >>>>     if (rc != SA_AIS_OK) {
> >>>>       LOG_ER("saClmInitialize failed with error: %d", rc);
> >>>>       TRACE_LEAVE();
> >>>>       exit(EXIT_FAILURE);
> >>>>     }
> >>>> +
> >>>>     rc = saClmSelectionObjectGet(_lgs_cb->clm_hdl, &lgs_cb-
> >>>>> clmSelectionObject);
> >>>>     if (rc != SA_AIS_OK) {
> >>>>       LOG_ER("saClmSelectionObjectGet failed with error: %d", rc);



------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to