Hi Canh,
This issue seems to be partially fixed, we now prioritize to respond to
AMF callback. But if LOG service is still not responsive, NTF will run
without logging capability. I think we can address the second issue in
another ticket.
Ack from me for code review, one comment in line.
Thanks
Minh
On 26/06/18 16:53, Canh Van Truong wrote:
> When NTFD is started, the NtfAdmin class object is defined and log client is
> initialized in this object. The initialiation of client may be loop forever
> if TRY_AGAIN error always return. Somehow log service has not been started on
> time or log service is busy, NTFD is kept in the loop. And the AMF hasn't
> received csi callback.
>
> This patch update the timeout when initializing log client.
> ---
> src/ntf/ntfd/NtfLogger.cc | 165
> +++++++++++++++++++++++-----------------------
> src/ntf/ntfd/NtfLogger.h | 4 +-
> 2 files changed, 85 insertions(+), 84 deletions(-)
>
> diff --git a/src/ntf/ntfd/NtfLogger.cc b/src/ntf/ntfd/NtfLogger.cc
> index fd17c58a2..9878824c3 100644
> --- a/src/ntf/ntfd/NtfLogger.cc
> +++ b/src/ntf/ntfd/NtfLogger.cc
> @@ -20,17 +20,18 @@
> * INCLUDE FILES
> * ========================================================================
> */
> -#include <sys/poll.h>
> +#include "ntf/ntfd/NtfLogger.h"
>
> -#include "base/osaf_utility.h"
> +#include <sys/poll.h>
> #include <saAis.h>
> #include <saLog.h>
> -#include "ntf/ntfd/NtfAdmin.h"
> -#include "ntf/ntfd/NtfLogger.h"
> #include "ntfs_com.h"
> -#include "base/logtrace.h"
> #include "ntf/common/ntfsv_mem.h"
> +#include "ntf/ntfd/NtfAdmin.h"
> +#include "base/logtrace.h"
> #include "base/osaf_extended_name.h"
> +#include "base/osaf_utility.h"
> +#include "base/osaf_time.h"
>
> /* ========================================================================
> * DEFINITIONS
> @@ -38,7 +39,6 @@
> */
> #define LOG_OPEN_TIMEOUT SA_TIME_ONE_SECOND
> #define LOG_NOTIFICATION_TIMEOUT SA_TIME_ONE_SECOND
> -#define AIS_TIMEOUT 500000
>
> /* ========================================================================
> * TYPE DEFINITIONS
> @@ -65,15 +65,15 @@ void saLogWriteLogCallback(SaInvocationT invocation,
> SaAisErrorT error);
> */
>
> static SaLogCallbacksT logCallbacks = {NULL, NULL, saLogWriteLogCallback};
> -
> static SaLogHandleT logHandle;
> static SaLogStreamHandleT alarmStreamHandle;
> +static bool log_client_initialized = false;
> const SaVersionT kLogVersion = {'A', 2, 3};
> +const uint64_t kWaitTime = 8 * 1000; // Wait for timeout is 8 seconds
>
> NtfLogger::NtfLogger() : readCounter(0) {
> if (SA_AIS_OK != initLog()) {
> - LOG_ER("initialize saflog failed exiting...");
> - exit(EXIT_FAILURE);
> + LOG_WA("Initialize saflog failed");
> }
> }
[Minh]: I think the exit() was there if initLog() fails, so that NID/AMF
can restart the NTF service. We can remove it unless NTF has completed
recycling LOG handle from BAD_HANDLE/TIMEOUT/...
>
> @@ -97,6 +97,8 @@ void saLogWriteLogCallback(SaInvocationT invocation,
> SaAisErrorT error) {
>
> TRACE_1("Error when logging (%d), queue for relogging", error);
>
> + if (error == SA_AIS_ERR_BAD_HANDLE) log_client_initialized = false;
> +
> notification = NtfAdmin::theNtfAdmin->getNotificationById(
> (SaNtfIdentifierT)invocation);
>
> @@ -151,6 +153,15 @@ void NtfLogger::queueNotifcation(NtfSmartPtr& notif) {
>
> void NtfLogger::checkQueueAndLog(NtfSmartPtr& newNotif) {
> TRACE_ENTER();
> +
> + // Initialize client if it hasn't
> + if (log_client_initialized == false) {
> + if (SA_AIS_OK != initLog()) {
> + queueNotifcation(newNotif);
> + return;
> + }
> + }
> +
> /* Check if there are not logged notifications in queue */
> while (!queuedNotificationList.empty()) {
> NtfSmartPtr notification = queuedNotificationList.front();
> @@ -174,7 +185,7 @@ void NtfLogger::checkQueueAndLog(NtfSmartPtr& newNotif) {
>
> SaAisErrorT NtfLogger::logNotification(NtfSmartPtr& notif) {
> /* Write to the log if we're the local node */
> - SaAisErrorT errorCode = SA_AIS_OK;
> + SaAisErrorT rc = SA_AIS_OK;
> SaLogHeaderT logHeader;
> char addTextBuf[MAX_ADDITIONAL_TEXT_LENGTH] = {0};
> SaLogBufferT logBuffer;
> @@ -219,10 +230,10 @@ SaAisErrorT NtfLogger::logNotification(NtfSmartPtr&
> notif) {
> if ((notif->sendNotInfo_->notificationType == SA_NTF_TYPE_ALARM) ||
> (notif->sendNotInfo_->notificationType == SA_NTF_TYPE_SECURITY_ALARM))
> {
> TRACE_2("Logging notification to alarm stream");
> - errorCode =
> + rc =
> saLogWriteLogAsync(alarmStreamHandle, notif->getNotificationId(),
> SA_LOG_RECORD_WRITE_ACK, &logRecord);
> - switch (errorCode) {
> + switch (rc) {
> case SA_AIS_OK:
> break;
>
> @@ -230,107 +241,97 @@ SaAisErrorT NtfLogger::logNotification(NtfSmartPtr&
> notif) {
> case SA_AIS_ERR_TRY_AGAIN:
> case SA_AIS_ERR_TIMEOUT:
> TRACE("Failed to log notification (ret: %d). Try next time.",
> - errorCode);
> + rc);
> + break;
> + case SA_AIS_ERR_BAD_HANDLE:
> + TRACE("BAD HANDLE. Log client should be re-initialized");
> + log_client_initialized = false;
> break;
> -
> default:
> - osaf_abort(errorCode);
> + LOG_ER("Writing failed: rc = %d", rc);
> + osaf_abort(rc);
> }
> }
>
> TRACE_LEAVE();
> - return errorCode;
> + return rc;
> }
>
> SaAisErrorT NtfLogger::initLog() {
> - SaAisErrorT result;
> + SaAisErrorT rc = SA_AIS_OK;
> SaNameT alarmStreamName;
> - osaf_extended_name_lend(SA_LOG_STREAM_ALARM, &alarmStreamName);
> - int first_try = 1;
> SaVersionT log_version = kLogVersion;
> + struct timespec timeout_time;
>
> TRACE_ENTER();
>
> - /* Initialize the Log service */
> - do {
> - result = saLogInitialize(&logHandle, &logCallbacks, &log_version);
> - if (SA_AIS_ERR_TRY_AGAIN == result) {
> - if (first_try) {
> - LOG_WA("saLogInitialize returns try again, retries...");
> - first_try = 0;
> - }
> - usleep(AIS_TIMEOUT);
> - log_version = kLogVersion;
> - }
> - } while (SA_AIS_ERR_TRY_AGAIN == result);
> + osaf_extended_name_lend(SA_LOG_STREAM_ALARM, &alarmStreamName);
>
> - if (SA_AIS_OK != result) {
> - LOG_ER("Log initialize result is %d", result);
> - goto exit_point;
> - }
> - if (!first_try) {
> - LOG_IN("saLogInitialize ok");
> - first_try = 1;
> + // Initialize the Log service
> + osaf_set_millis_timeout(kWaitTime, &timeout_time);
> + while (!osaf_is_timeout(&timeout_time)) {
> + rc = saLogInitialize(&logHandle, &logCallbacks, &log_version);
> + if (rc != SA_AIS_ERR_TRY_AGAIN) break;
> + log_version = kLogVersion;
> + osaf_nanosleep(&kHundredMilliseconds);
> }
>
> - /* Get file descriptor to use in select */
> - do {
> - result = saLogSelectionObjectGet(logHandle,
> &ntfs_cb->logSelectionObject);
> - if (SA_AIS_ERR_TRY_AGAIN == result) {
> - if (first_try) {
> - LOG_WA("saLogSelectionObjectGet returns try again, retries...");
> - first_try = 0;
> - }
> - usleep(AIS_TIMEOUT);
> - }
> - } while (SA_AIS_ERR_TRY_AGAIN == result);
> + if (rc != SA_AIS_OK) {
> + LOG_WA("Initialization of log client failed rc = %d", rc);
> + return rc;
> + } else {
> + log_client_initialized = true;
> + }
>
> - if (SA_AIS_OK != result) {
> - LOG_ER("Log SelectionObjectGet result is %d", result);
> - goto exit_point;
> + // Get file descriptor to use in select
> + osaf_set_millis_timeout(kWaitTime, &timeout_time);
> + while (!osaf_is_timeout(&timeout_time)) {
> + rc = saLogSelectionObjectGet(logHandle, &ntfs_cb->logSelectionObject);
> + if (rc != SA_AIS_ERR_TRY_AGAIN) break;
> + osaf_nanosleep(&kHundredMilliseconds);
> }
>
> - if (SA_AIS_OK != result) {
> - LOG_ER("Failed to open the notification log stream (%d)", result);
> + if (rc != SA_AIS_OK) {
> + LOG_WA("Log SelectionObjectGet failed rc = %d", rc);
> goto exit_point;
> }
> - if (!first_try) {
> - LOG_IN("saLogSelectionObjectGet ok");
> - first_try = 1;
> - }
>
> - /* Open the alarm stream */
> - do {
> - result = saLogStreamOpen_2(logHandle, &alarmStreamName, NULL, 0,
> - LOG_OPEN_TIMEOUT, &alarmStreamHandle);
> - if (SA_AIS_ERR_TRY_AGAIN == result) {
> - if (first_try) {
> - LOG_WA("saLogStreamOpen_2 returns try again, retries...");
> - first_try = 0;
> - }
> - usleep(AIS_TIMEOUT);
> - }
> - } while (SA_AIS_ERR_TRY_AGAIN == result);
> -
> - if (SA_AIS_OK != result) {
> - LOG_ER("Failed to open the alarm log stream (%d)", result);
> - goto exit_point;
> + // Open the alarm stream
> + osaf_set_millis_timeout(kWaitTime, &timeout_time);
> + while (!osaf_is_timeout(&timeout_time)) {
> + rc = saLogStreamOpen_2(logHandle, &alarmStreamName, NULL, 0,
> + LOG_OPEN_TIMEOUT, &alarmStreamHandle);
> + if (rc != SA_AIS_ERR_TRY_AGAIN) break;
> + osaf_nanosleep(&kHundredMilliseconds);
> }
> - if (!first_try) {
> - LOG_IN("saLogStreamOpen_2 ok");
> - first_try = 1;
> +
> + if (rc != SA_AIS_OK) {
> + LOG_WA("Log saLogStreamOpen_2 failed rc = %d", rc);
> }
>
> exit_point:
> + if (rc != SA_AIS_OK) {
> + // Finalize client
> + osaf_set_millis_timeout(kWaitTime/2, &timeout_time);
> + while (!osaf_is_timeout(&timeout_time)) {
> + SaAisErrorT ret = saLogFinalize(logHandle);
> + if (ret != SA_AIS_ERR_TRY_AGAIN) break;
> + osaf_nanosleep(&kHundredMilliseconds);
> + }
> + log_client_initialized = false;
> + }
> TRACE_LEAVE();
> - return (result);
> + return rc;
> }
>
> void logEvent() {
> - SaAisErrorT errorCode;
> - errorCode = saLogDispatch(logHandle, SA_DISPATCH_ALL);
> - if (SA_AIS_OK != errorCode) {
> - TRACE_1("Failed to dispatch log events (%d)", errorCode);
> + SaAisErrorT rc;
> + rc = saLogDispatch(logHandle, SA_DISPATCH_ALL);
> + if (rc != SA_AIS_OK) {
> + LOG_WA("Fail to dispatch log events rc = %d", rc);
> + if (rc == SA_AIS_ERR_BAD_HANDLE) {
> + log_client_initialized = false;
> + }
> }
> return;
> }
> diff --git a/src/ntf/ntfd/NtfLogger.h b/src/ntf/ntfd/NtfLogger.h
> index cc0ac7dba..739b97511 100644
> --- a/src/ntf/ntfd/NtfLogger.h
> +++ b/src/ntf/ntfd/NtfLogger.h
> @@ -56,14 +56,14 @@ class NtfLogger {
> // virtual ~NtfLogger();
>
> void log(NtfSmartPtr& notif, bool isLocal);
> - void checkQueueAndLog(NtfSmartPtr& notif);
> - SaAisErrorT logNotification(NtfSmartPtr& notif);
> void queueNotifcation(NtfSmartPtr& notif);
> void printInfo();
> void syncRequest(NCS_UBAID *uba);
>
> private:
> SaAisErrorT initLog();
> + void checkQueueAndLog(NtfSmartPtr& notif);
> + SaAisErrorT logNotification(NtfSmartPtr& notif);
>
> readerNotificationListT coll_;
> unsigned int readCounter;
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel