Hi Canh, I am aware of #2899 which causes the LOG unresponsive, but LOG (or any service) unresponsiveness can be of any other reasons, not only #2899.
Thanks Minh On 03/08/18 17:44, Canh Van Truong wrote: > Hi aMinh, > > Thanks for your comments. > This patch just help enhance to avoid ntf loop long time in case log service > may up slowly and user of log (ntf here) may be delay. > The issue why Log still does not response is ticket 2899 and that ticket is > reviewing. > > Thanks > Canh > > -----Original Message----- > From: Minh Hon Chau <minh.c...@dektech.com.au> > Sent: Friday, August 3, 2018 2:32 PM > To: Canh Van Truong <canh.v.tru...@dektech.com.au>; vu.m.ngu...@dektech.com.au > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1/1] ntf: Update timeout for initializing log client > [#2878] > > Hi Canh, > > This issue seems to be partially fixed, we now prioritize to respond to > AMF callback. But if LOG service is still not responsive, NTF will run > without logging capability. I think we can address the second issue in > another ticket. > > Ack from me for code review, one comment in line. > > Thanks > > Minh > > > On 26/06/18 16:53, Canh Van Truong wrote: >> When NTFD is started, the NtfAdmin class object is defined and log client is >> initialized in this object. The initialiation of client may be loop forever >> if TRY_AGAIN error always return. Somehow log service has not been started on >> time or log service is busy, NTFD is kept in the loop. And the AMF hasn't >> received csi callback. >> >> This patch update the timeout when initializing log client. >> --- >> src/ntf/ntfd/NtfLogger.cc | 165 >> +++++++++++++++++++++++----------------------- >> src/ntf/ntfd/NtfLogger.h | 4 +- >> 2 files changed, 85 insertions(+), 84 deletions(-) >> >> diff --git a/src/ntf/ntfd/NtfLogger.cc b/src/ntf/ntfd/NtfLogger.cc >> index fd17c58a2..9878824c3 100644 >> --- a/src/ntf/ntfd/NtfLogger.cc >> +++ b/src/ntf/ntfd/NtfLogger.cc >> @@ -20,17 +20,18 @@ >> * INCLUDE FILES >> * ======================================================================== >> */ >> -#include <sys/poll.h> >> +#include "ntf/ntfd/NtfLogger.h" >> >> -#include "base/osaf_utility.h" >> +#include <sys/poll.h> >> #include <saAis.h> >> #include <saLog.h> >> -#include "ntf/ntfd/NtfAdmin.h" >> -#include "ntf/ntfd/NtfLogger.h" >> #include "ntfs_com.h" >> -#include "base/logtrace.h" >> #include "ntf/common/ntfsv_mem.h" >> +#include "ntf/ntfd/NtfAdmin.h" >> +#include "base/logtrace.h" >> #include "base/osaf_extended_name.h" >> +#include "base/osaf_utility.h" >> +#include "base/osaf_time.h" >> >> /* ======================================================================== >> * DEFINITIONS >> @@ -38,7 +39,6 @@ >> */ >> #define LOG_OPEN_TIMEOUT SA_TIME_ONE_SECOND >> #define LOG_NOTIFICATION_TIMEOUT SA_TIME_ONE_SECOND >> -#define AIS_TIMEOUT 500000 >> >> /* ======================================================================== >> * TYPE DEFINITIONS >> @@ -65,15 +65,15 @@ void saLogWriteLogCallback(SaInvocationT invocation, >> SaAisErrorT error); >> */ >> >> static SaLogCallbacksT logCallbacks = {NULL, NULL, saLogWriteLogCallback}; >> - >> static SaLogHandleT logHandle; >> static SaLogStreamHandleT alarmStreamHandle; >> +static bool log_client_initialized = false; >> const SaVersionT kLogVersion = {'A', 2, 3}; >> +const uint64_t kWaitTime = 8 * 1000; // Wait for timeout is 8 seconds >> >> NtfLogger::NtfLogger() : readCounter(0) { >> if (SA_AIS_OK != initLog()) { >> - LOG_ER("initialize saflog failed exiting..."); >> - exit(EXIT_FAILURE); >> + LOG_WA("Initialize saflog failed"); >> } >> } > [Minh]: I think the exit() was there if initLog() fails, so that NID/AMF > can restart the NTF service. We can remove it unless NTF has completed > recycling LOG handle from BAD_HANDLE/TIMEOUT/... >> >> @@ -97,6 +97,8 @@ void saLogWriteLogCallback(SaInvocationT invocation, >> SaAisErrorT error) { >> >> TRACE_1("Error when logging (%d), queue for relogging", error); >> >> + if (error == SA_AIS_ERR_BAD_HANDLE) log_client_initialized = false; >> + >> notification = NtfAdmin::theNtfAdmin->getNotificationById( >> (SaNtfIdentifierT)invocation); >> >> @@ -151,6 +153,15 @@ void NtfLogger::queueNotifcation(NtfSmartPtr& notif) { >> >> void NtfLogger::checkQueueAndLog(NtfSmartPtr& newNotif) { >> TRACE_ENTER(); >> + >> + // Initialize client if it hasn't >> + if (log_client_initialized == false) { >> + if (SA_AIS_OK != initLog()) { >> + queueNotifcation(newNotif); >> + return; >> + } >> + } >> + >> /* Check if there are not logged notifications in queue */ >> while (!queuedNotificationList.empty()) { >> NtfSmartPtr notification = queuedNotificationList.front(); >> @@ -174,7 +185,7 @@ void NtfLogger::checkQueueAndLog(NtfSmartPtr& newNotif) { >> >> SaAisErrorT NtfLogger::logNotification(NtfSmartPtr& notif) { >> /* Write to the log if we're the local node */ >> - SaAisErrorT errorCode = SA_AIS_OK; >> + SaAisErrorT rc = SA_AIS_OK; >> SaLogHeaderT logHeader; >> char addTextBuf[MAX_ADDITIONAL_TEXT_LENGTH] = {0}; >> SaLogBufferT logBuffer; >> @@ -219,10 +230,10 @@ SaAisErrorT NtfLogger::logNotification(NtfSmartPtr& >> notif) { >> if ((notif->sendNotInfo_->notificationType == SA_NTF_TYPE_ALARM) || >> (notif->sendNotInfo_->notificationType == >> SA_NTF_TYPE_SECURITY_ALARM)) { >> TRACE_2("Logging notification to alarm stream"); >> - errorCode = >> + rc = >> saLogWriteLogAsync(alarmStreamHandle, notif->getNotificationId(), >> SA_LOG_RECORD_WRITE_ACK, &logRecord); >> - switch (errorCode) { >> + switch (rc) { >> case SA_AIS_OK: >> break; >> >> @@ -230,107 +241,97 @@ SaAisErrorT NtfLogger::logNotification(NtfSmartPtr& >> notif) { >> case SA_AIS_ERR_TRY_AGAIN: >> case SA_AIS_ERR_TIMEOUT: >> TRACE("Failed to log notification (ret: %d). Try next time.", >> - errorCode); >> + rc); >> + break; >> + case SA_AIS_ERR_BAD_HANDLE: >> + TRACE("BAD HANDLE. Log client should be re-initialized"); >> + log_client_initialized = false; >> break; >> - >> default: >> - osaf_abort(errorCode); >> + LOG_ER("Writing failed: rc = %d", rc); >> + osaf_abort(rc); >> } >> } >> >> TRACE_LEAVE(); >> - return errorCode; >> + return rc; >> } >> >> SaAisErrorT NtfLogger::initLog() { >> - SaAisErrorT result; >> + SaAisErrorT rc = SA_AIS_OK; >> SaNameT alarmStreamName; >> - osaf_extended_name_lend(SA_LOG_STREAM_ALARM, &alarmStreamName); >> - int first_try = 1; >> SaVersionT log_version = kLogVersion; >> + struct timespec timeout_time; >> >> TRACE_ENTER(); >> >> - /* Initialize the Log service */ >> - do { >> - result = saLogInitialize(&logHandle, &logCallbacks, &log_version); >> - if (SA_AIS_ERR_TRY_AGAIN == result) { >> - if (first_try) { >> - LOG_WA("saLogInitialize returns try again, retries..."); >> - first_try = 0; >> - } >> - usleep(AIS_TIMEOUT); >> - log_version = kLogVersion; >> - } >> - } while (SA_AIS_ERR_TRY_AGAIN == result); >> + osaf_extended_name_lend(SA_LOG_STREAM_ALARM, &alarmStreamName); >> >> - if (SA_AIS_OK != result) { >> - LOG_ER("Log initialize result is %d", result); >> - goto exit_point; >> - } >> - if (!first_try) { >> - LOG_IN("saLogInitialize ok"); >> - first_try = 1; >> + // Initialize the Log service >> + osaf_set_millis_timeout(kWaitTime, &timeout_time); >> + while (!osaf_is_timeout(&timeout_time)) { >> + rc = saLogInitialize(&logHandle, &logCallbacks, &log_version); >> + if (rc != SA_AIS_ERR_TRY_AGAIN) break; >> + log_version = kLogVersion; >> + osaf_nanosleep(&kHundredMilliseconds); >> } >> >> - /* Get file descriptor to use in select */ >> - do { >> - result = saLogSelectionObjectGet(logHandle, >> &ntfs_cb->logSelectionObject); >> - if (SA_AIS_ERR_TRY_AGAIN == result) { >> - if (first_try) { >> - LOG_WA("saLogSelectionObjectGet returns try again, retries..."); >> - first_try = 0; >> - } >> - usleep(AIS_TIMEOUT); >> - } >> - } while (SA_AIS_ERR_TRY_AGAIN == result); >> + if (rc != SA_AIS_OK) { >> + LOG_WA("Initialization of log client failed rc = %d", rc); >> + return rc; >> + } else { >> + log_client_initialized = true; >> + } >> >> - if (SA_AIS_OK != result) { >> - LOG_ER("Log SelectionObjectGet result is %d", result); >> - goto exit_point; >> + // Get file descriptor to use in select >> + osaf_set_millis_timeout(kWaitTime, &timeout_time); >> + while (!osaf_is_timeout(&timeout_time)) { >> + rc = saLogSelectionObjectGet(logHandle, &ntfs_cb->logSelectionObject); >> + if (rc != SA_AIS_ERR_TRY_AGAIN) break; >> + osaf_nanosleep(&kHundredMilliseconds); >> } >> >> - if (SA_AIS_OK != result) { >> - LOG_ER("Failed to open the notification log stream (%d)", result); >> + if (rc != SA_AIS_OK) { >> + LOG_WA("Log SelectionObjectGet failed rc = %d", rc); >> goto exit_point; >> } >> - if (!first_try) { >> - LOG_IN("saLogSelectionObjectGet ok"); >> - first_try = 1; >> - } >> >> - /* Open the alarm stream */ >> - do { >> - result = saLogStreamOpen_2(logHandle, &alarmStreamName, NULL, 0, >> - LOG_OPEN_TIMEOUT, &alarmStreamHandle); >> - if (SA_AIS_ERR_TRY_AGAIN == result) { >> - if (first_try) { >> - LOG_WA("saLogStreamOpen_2 returns try again, retries..."); >> - first_try = 0; >> - } >> - usleep(AIS_TIMEOUT); >> - } >> - } while (SA_AIS_ERR_TRY_AGAIN == result); >> - >> - if (SA_AIS_OK != result) { >> - LOG_ER("Failed to open the alarm log stream (%d)", result); >> - goto exit_point; >> + // Open the alarm stream >> + osaf_set_millis_timeout(kWaitTime, &timeout_time); >> + while (!osaf_is_timeout(&timeout_time)) { >> + rc = saLogStreamOpen_2(logHandle, &alarmStreamName, NULL, 0, >> + LOG_OPEN_TIMEOUT, &alarmStreamHandle); >> + if (rc != SA_AIS_ERR_TRY_AGAIN) break; >> + osaf_nanosleep(&kHundredMilliseconds); >> } >> - if (!first_try) { >> - LOG_IN("saLogStreamOpen_2 ok"); >> - first_try = 1; >> + >> + if (rc != SA_AIS_OK) { >> + LOG_WA("Log saLogStreamOpen_2 failed rc = %d", rc); >> } >> >> exit_point: >> + if (rc != SA_AIS_OK) { >> + // Finalize client >> + osaf_set_millis_timeout(kWaitTime/2, &timeout_time); >> + while (!osaf_is_timeout(&timeout_time)) { >> + SaAisErrorT ret = saLogFinalize(logHandle); >> + if (ret != SA_AIS_ERR_TRY_AGAIN) break; >> + osaf_nanosleep(&kHundredMilliseconds); >> + } >> + log_client_initialized = false; >> + } >> TRACE_LEAVE(); >> - return (result); >> + return rc; >> } >> >> void logEvent() { >> - SaAisErrorT errorCode; >> - errorCode = saLogDispatch(logHandle, SA_DISPATCH_ALL); >> - if (SA_AIS_OK != errorCode) { >> - TRACE_1("Failed to dispatch log events (%d)", errorCode); >> + SaAisErrorT rc; >> + rc = saLogDispatch(logHandle, SA_DISPATCH_ALL); >> + if (rc != SA_AIS_OK) { >> + LOG_WA("Fail to dispatch log events rc = %d", rc); >> + if (rc == SA_AIS_ERR_BAD_HANDLE) { >> + log_client_initialized = false; >> + } >> } >> return; >> } >> diff --git a/src/ntf/ntfd/NtfLogger.h b/src/ntf/ntfd/NtfLogger.h >> index cc0ac7dba..739b97511 100644 >> --- a/src/ntf/ntfd/NtfLogger.h >> +++ b/src/ntf/ntfd/NtfLogger.h >> @@ -56,14 +56,14 @@ class NtfLogger { >> // virtual ~NtfLogger(); >> >> void log(NtfSmartPtr& notif, bool isLocal); >> - void checkQueueAndLog(NtfSmartPtr& notif); >> - SaAisErrorT logNotification(NtfSmartPtr& notif); >> void queueNotifcation(NtfSmartPtr& notif); >> void printInfo(); >> void syncRequest(NCS_UBAID *uba); >> >> private: >> SaAisErrorT initLog(); >> + void checkQueueAndLog(NtfSmartPtr& notif); >> + SaAisErrorT logNotification(NtfSmartPtr& notif); >> >> readerNotificationListT coll_; >> unsigned int readCounter; > > > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel