Hi Canh,

I am aware of #2899 which causes the LOG unresponsive, but LOG (or any
service) unresponsiveness can be of any other reasons, not only #2899.

Thanks

Minh


On 03/08/18 17:44, Canh Van Truong wrote:
> Hi aMinh,
>
> Thanks for your comments.
> This patch just help enhance to avoid ntf loop long time in case log service 
> may up slowly and user of log (ntf here)  may be delay.
> The issue why Log still does not response is ticket 2899 and that ticket is 
> reviewing. 
>
> Thanks
> Canh
>
> -----Original Message-----
> From: Minh Hon Chau <minh.c...@dektech.com.au> 
> Sent: Friday, August 3, 2018 2:32 PM
> To: Canh Van Truong <canh.v.tru...@dektech.com.au>; vu.m.ngu...@dektech.com.au
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1/1] ntf: Update timeout for initializing log client 
> [#2878]
>
> Hi Canh,
>
> This issue seems to be partially fixed, we now prioritize to respond to
> AMF callback. But if LOG service is still not responsive, NTF will run
> without logging capability. I think we can address the second issue in
> another ticket.
>
> Ack from me for code review, one comment in line.
>
> Thanks
>
> Minh
>
>
> On 26/06/18 16:53, Canh Van Truong wrote:
>> When NTFD is started, the NtfAdmin class object is defined and log client is
>> initialized in this object. The initialiation of client may be loop forever
>> if TRY_AGAIN error always return. Somehow log service has not been started on
>> time or log service is busy, NTFD is kept in the loop. And the AMF hasn't
>> received csi callback.
>>
>> This patch update the timeout when initializing log client.
>> ---
>>  src/ntf/ntfd/NtfLogger.cc | 165 
>> +++++++++++++++++++++++-----------------------
>>  src/ntf/ntfd/NtfLogger.h  |   4 +-
>>  2 files changed, 85 insertions(+), 84 deletions(-)
>>
>> diff --git a/src/ntf/ntfd/NtfLogger.cc b/src/ntf/ntfd/NtfLogger.cc
>> index fd17c58a2..9878824c3 100644
>> --- a/src/ntf/ntfd/NtfLogger.cc
>> +++ b/src/ntf/ntfd/NtfLogger.cc
>> @@ -20,17 +20,18 @@
>>   *   INCLUDE FILES
>>   * ========================================================================
>>   */
>> -#include <sys/poll.h>
>> +#include "ntf/ntfd/NtfLogger.h"
>>  
>> -#include "base/osaf_utility.h"
>> +#include <sys/poll.h>
>>  #include <saAis.h>
>>  #include <saLog.h>
>> -#include "ntf/ntfd/NtfAdmin.h"
>> -#include "ntf/ntfd/NtfLogger.h"
>>  #include "ntfs_com.h"
>> -#include "base/logtrace.h"
>>  #include "ntf/common/ntfsv_mem.h"
>> +#include "ntf/ntfd/NtfAdmin.h"
>> +#include "base/logtrace.h"
>>  #include "base/osaf_extended_name.h"
>> +#include "base/osaf_utility.h"
>> +#include "base/osaf_time.h"
>>  
>>  /* ========================================================================
>>   *   DEFINITIONS
>> @@ -38,7 +39,6 @@
>>   */
>>  #define LOG_OPEN_TIMEOUT SA_TIME_ONE_SECOND
>>  #define LOG_NOTIFICATION_TIMEOUT SA_TIME_ONE_SECOND
>> -#define AIS_TIMEOUT 500000
>>  
>>  /* ========================================================================
>>   *   TYPE DEFINITIONS
>> @@ -65,15 +65,15 @@ void saLogWriteLogCallback(SaInvocationT invocation, 
>> SaAisErrorT error);
>>   */
>>  
>>  static SaLogCallbacksT logCallbacks = {NULL, NULL, saLogWriteLogCallback};
>> -
>>  static SaLogHandleT logHandle;
>>  static SaLogStreamHandleT alarmStreamHandle;
>> +static bool log_client_initialized = false;
>>  const SaVersionT kLogVersion = {'A', 2, 3};
>> +const uint64_t kWaitTime = 8 * 1000; // Wait for timeout is 8 seconds
>>  
>>  NtfLogger::NtfLogger() : readCounter(0) {
>>    if (SA_AIS_OK != initLog()) {
>> -    LOG_ER("initialize saflog failed exiting...");
>> -    exit(EXIT_FAILURE);
>> +    LOG_WA("Initialize saflog failed");
>>    }
>>  }
> [Minh]: I think the exit() was there if initLog() fails, so that NID/AMF
> can restart the NTF service. We can remove it unless NTF has completed
> recycling LOG handle from BAD_HANDLE/TIMEOUT/...
>>  
>> @@ -97,6 +97,8 @@ void saLogWriteLogCallback(SaInvocationT invocation, 
>> SaAisErrorT error) {
>>  
>>      TRACE_1("Error when logging (%d), queue for relogging", error);
>>  
>> +    if (error == SA_AIS_ERR_BAD_HANDLE) log_client_initialized = false;
>> +
>>      notification = NtfAdmin::theNtfAdmin->getNotificationById(
>>          (SaNtfIdentifierT)invocation);
>>  
>> @@ -151,6 +153,15 @@ void NtfLogger::queueNotifcation(NtfSmartPtr& notif) {
>>  
>>  void NtfLogger::checkQueueAndLog(NtfSmartPtr& newNotif) {
>>    TRACE_ENTER();
>> +
>> +  // Initialize client if it hasn't
>> +  if (log_client_initialized == false) {
>> +    if (SA_AIS_OK != initLog()) {
>> +      queueNotifcation(newNotif);
>> +      return;
>> +    }
>> +  }
>> +
>>    /* Check if there are not logged notifications in queue */
>>    while (!queuedNotificationList.empty()) {
>>      NtfSmartPtr notification = queuedNotificationList.front();
>> @@ -174,7 +185,7 @@ void NtfLogger::checkQueueAndLog(NtfSmartPtr& newNotif) {
>>  
>>  SaAisErrorT NtfLogger::logNotification(NtfSmartPtr& notif) {
>>    /* Write to the log if we're the local node */
>> -  SaAisErrorT errorCode = SA_AIS_OK;
>> +  SaAisErrorT rc = SA_AIS_OK;
>>    SaLogHeaderT logHeader;
>>    char addTextBuf[MAX_ADDITIONAL_TEXT_LENGTH] = {0};
>>    SaLogBufferT logBuffer;
>> @@ -219,10 +230,10 @@ SaAisErrorT NtfLogger::logNotification(NtfSmartPtr& 
>> notif) {
>>    if ((notif->sendNotInfo_->notificationType == SA_NTF_TYPE_ALARM) ||
>>        (notif->sendNotInfo_->notificationType == 
>> SA_NTF_TYPE_SECURITY_ALARM)) {
>>      TRACE_2("Logging notification to alarm stream");
>> -    errorCode =
>> +    rc =
>>          saLogWriteLogAsync(alarmStreamHandle, notif->getNotificationId(),
>>                             SA_LOG_RECORD_WRITE_ACK, &logRecord);
>> -    switch (errorCode) {
>> +    switch (rc) {
>>        case SA_AIS_OK:
>>          break;
>>  
>> @@ -230,107 +241,97 @@ SaAisErrorT NtfLogger::logNotification(NtfSmartPtr& 
>> notif) {
>>        case SA_AIS_ERR_TRY_AGAIN:
>>        case SA_AIS_ERR_TIMEOUT:
>>          TRACE("Failed to log notification (ret: %d). Try next time.",
>> -              errorCode);
>> +              rc);
>> +        break;
>> +      case SA_AIS_ERR_BAD_HANDLE:
>> +        TRACE("BAD HANDLE. Log client should be re-initialized");
>> +        log_client_initialized = false;
>>          break;
>> -
>>        default:
>> -        osaf_abort(errorCode);
>> +        LOG_ER("Writing failed: rc = %d", rc);
>> +        osaf_abort(rc);
>>      }
>>    }
>>  
>>    TRACE_LEAVE();
>> -  return errorCode;
>> +  return rc;
>>  }
>>  
>>  SaAisErrorT NtfLogger::initLog() {
>> -  SaAisErrorT result;
>> +  SaAisErrorT rc = SA_AIS_OK;
>>    SaNameT alarmStreamName;
>> -  osaf_extended_name_lend(SA_LOG_STREAM_ALARM, &alarmStreamName);
>> -  int first_try = 1;
>>    SaVersionT log_version = kLogVersion;
>> +  struct timespec timeout_time;
>>  
>>    TRACE_ENTER();
>>  
>> -  /* Initialize the Log service */
>> -  do {
>> -    result = saLogInitialize(&logHandle, &logCallbacks, &log_version);
>> -    if (SA_AIS_ERR_TRY_AGAIN == result) {
>> -      if (first_try) {
>> -        LOG_WA("saLogInitialize returns try again, retries...");
>> -        first_try = 0;
>> -      }
>> -      usleep(AIS_TIMEOUT);
>> -      log_version = kLogVersion;
>> -    }
>> -  } while (SA_AIS_ERR_TRY_AGAIN == result);
>> +  osaf_extended_name_lend(SA_LOG_STREAM_ALARM, &alarmStreamName);
>>  
>> -  if (SA_AIS_OK != result) {
>> -    LOG_ER("Log initialize result is %d", result);
>> -    goto exit_point;
>> -  }
>> -  if (!first_try) {
>> -    LOG_IN("saLogInitialize ok");
>> -    first_try = 1;
>> +  // Initialize the Log service
>> +  osaf_set_millis_timeout(kWaitTime, &timeout_time);
>> +  while (!osaf_is_timeout(&timeout_time)) {
>> +    rc = saLogInitialize(&logHandle, &logCallbacks, &log_version);
>> +    if (rc != SA_AIS_ERR_TRY_AGAIN) break;
>> +    log_version = kLogVersion;
>> +    osaf_nanosleep(&kHundredMilliseconds);
>>    }
>>  
>> -  /* Get file descriptor to use in select */
>> -  do {
>> -    result = saLogSelectionObjectGet(logHandle, 
>> &ntfs_cb->logSelectionObject);
>> -    if (SA_AIS_ERR_TRY_AGAIN == result) {
>> -      if (first_try) {
>> -        LOG_WA("saLogSelectionObjectGet returns try again, retries...");
>> -        first_try = 0;
>> -      }
>> -      usleep(AIS_TIMEOUT);
>> -    }
>> -  } while (SA_AIS_ERR_TRY_AGAIN == result);
>> +  if (rc != SA_AIS_OK) {
>> +    LOG_WA("Initialization of log client failed rc = %d", rc);
>> +    return rc;
>> +  } else {
>> +    log_client_initialized = true;
>> +  }
>>  
>> -  if (SA_AIS_OK != result) {
>> -    LOG_ER("Log SelectionObjectGet result is %d", result);
>> -    goto exit_point;
>> +  // Get file descriptor to use in select
>> +  osaf_set_millis_timeout(kWaitTime, &timeout_time);
>> +  while (!osaf_is_timeout(&timeout_time)) {
>> +    rc = saLogSelectionObjectGet(logHandle, &ntfs_cb->logSelectionObject);
>> +    if (rc != SA_AIS_ERR_TRY_AGAIN) break;
>> +    osaf_nanosleep(&kHundredMilliseconds);
>>    }
>>  
>> -  if (SA_AIS_OK != result) {
>> -    LOG_ER("Failed to open the notification log stream (%d)", result);
>> +  if (rc != SA_AIS_OK) {
>> +    LOG_WA("Log SelectionObjectGet failed rc = %d", rc);
>>      goto exit_point;
>>    }
>> -  if (!first_try) {
>> -    LOG_IN("saLogSelectionObjectGet ok");
>> -    first_try = 1;
>> -  }
>>  
>> -  /* Open the alarm stream */
>> -  do {
>> -    result = saLogStreamOpen_2(logHandle, &alarmStreamName, NULL, 0,
>> -                               LOG_OPEN_TIMEOUT, &alarmStreamHandle);
>> -    if (SA_AIS_ERR_TRY_AGAIN == result) {
>> -      if (first_try) {
>> -        LOG_WA("saLogStreamOpen_2 returns try again, retries...");
>> -        first_try = 0;
>> -      }
>> -      usleep(AIS_TIMEOUT);
>> -    }
>> -  } while (SA_AIS_ERR_TRY_AGAIN == result);
>> -
>> -  if (SA_AIS_OK != result) {
>> -    LOG_ER("Failed to open the alarm log stream (%d)", result);
>> -    goto exit_point;
>> +  // Open the alarm stream
>> +  osaf_set_millis_timeout(kWaitTime, &timeout_time);
>> +  while (!osaf_is_timeout(&timeout_time)) {
>> +    rc = saLogStreamOpen_2(logHandle, &alarmStreamName, NULL, 0,
>> +                           LOG_OPEN_TIMEOUT, &alarmStreamHandle);
>> +    if (rc != SA_AIS_ERR_TRY_AGAIN) break;
>> +    osaf_nanosleep(&kHundredMilliseconds);
>>    }
>> -  if (!first_try) {
>> -    LOG_IN("saLogStreamOpen_2 ok");
>> -    first_try = 1;
>> +
>> +  if (rc != SA_AIS_OK) {
>> +    LOG_WA("Log saLogStreamOpen_2 failed rc = %d", rc);
>>    }
>>  
>>  exit_point:
>> +  if (rc != SA_AIS_OK) {
>> +    // Finalize client
>> +    osaf_set_millis_timeout(kWaitTime/2, &timeout_time);
>> +    while (!osaf_is_timeout(&timeout_time)) {
>> +      SaAisErrorT ret = saLogFinalize(logHandle);
>> +      if (ret != SA_AIS_ERR_TRY_AGAIN) break;
>> +      osaf_nanosleep(&kHundredMilliseconds);
>> +    }
>> +    log_client_initialized = false;
>> +  }
>>    TRACE_LEAVE();
>> -  return (result);
>> +  return rc;
>>  }
>>  
>>  void logEvent() {
>> -  SaAisErrorT errorCode;
>> -  errorCode = saLogDispatch(logHandle, SA_DISPATCH_ALL);
>> -  if (SA_AIS_OK != errorCode) {
>> -    TRACE_1("Failed to dispatch log events (%d)", errorCode);
>> +  SaAisErrorT rc;
>> +  rc = saLogDispatch(logHandle, SA_DISPATCH_ALL);
>> +  if (rc != SA_AIS_OK) {
>> +    LOG_WA("Fail to dispatch log events rc = %d", rc);
>> +    if (rc == SA_AIS_ERR_BAD_HANDLE) {
>> +      log_client_initialized = false;
>> +    }
>>    }
>>    return;
>>  }
>> diff --git a/src/ntf/ntfd/NtfLogger.h b/src/ntf/ntfd/NtfLogger.h
>> index cc0ac7dba..739b97511 100644
>> --- a/src/ntf/ntfd/NtfLogger.h
>> +++ b/src/ntf/ntfd/NtfLogger.h
>> @@ -56,14 +56,14 @@ class NtfLogger {
>>    //    virtual ~NtfLogger();
>>  
>>    void log(NtfSmartPtr& notif, bool isLocal);
>> -  void checkQueueAndLog(NtfSmartPtr& notif);
>> -  SaAisErrorT logNotification(NtfSmartPtr& notif);
>>    void queueNotifcation(NtfSmartPtr& notif);
>>    void printInfo();
>>    void syncRequest(NCS_UBAID *uba);
>>  
>>   private:
>>    SaAisErrorT initLog();
>> +  void checkQueueAndLog(NtfSmartPtr& notif);
>> +  SaAisErrorT logNotification(NtfSmartPtr& notif);
>>  
>>    readerNotificationListT coll_;
>>    unsigned int readCounter;
>
>
>


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to