Hi Lennart,

 

This happens when shutting down log application and causing coredump for the
application. 

 

The coredump backtrace can be found in the ticket.

 

Regards, Vu

 

From: Lennart Lund <lennart.l...@ericsson.com> 
Sent: Wednesday, January 30, 2019 5:04 PM
To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Canh Van Truong
<canh.v.tru...@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net
Subject: SV: [PATCH 1/1] log: fix coredump at log agent application [#3002]

 

Hi Vu,

 

Ack. Not tested

 

If I understand correct this fix solves a problem with shutting down the log
service but what is the problem if the described race condition happen?

 

Thanks 

Lennart

  _____  

Från: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au
<mailto:vu.m.ngu...@dektech.com.au> >
Skickat: den 24 januari 2019 11:14
Till: Lennart Lund; Canh Van Truong
Kopia: opensaf-devel@lists.sourceforge.net
<mailto:opensaf-devel@lists.sourceforge.net> ; Vu Minh Nguyen
Ämne: [PATCH 1/1] log: fix coredump at log agent application [#3002] 

 

There is a race in using singleton-static class object b/w mds thread
and application thread - caller of exit() api.

This patch still uses singleton but making the instance shared_ptr
to ensure the resource will not be destroyed if it is being used.
---
 src/log/agent/lga_agent.cc |  7 ++-----
 src/log/agent/lga_agent.h  | 21 ++++++++++++++-------
 src/log/agent/lga_api.cc   | 20 ++++++++++----------
 src/log/agent/lga_mds.cc   | 32 ++++++++++++++++----------------
 src/log/agent/lga_state.cc |  2 +-
 src/log/agent/lga_util.cc  | 10 +++++-----
 6 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc
index bf9caa935..1000bb3fd 100644
--- a/src/log/agent/lga_agent.cc
+++ b/src/log/agent/lga_agent.cc
@@ -108,7 +108,7 @@ ScopeData::~ScopeData() {
     recovery2_unlock(is_locked_);
   }
 
-  LogAgent::instance().EnterCriticalSection();
+  LogAgent::instance()->EnterCriticalSection();
   LogClient* client = client_data_->client;
   bool* is_updated = client_data_->is_updated;
   RefCounter::Degree client_degree = client_data_->value;
@@ -128,15 +128,12 @@ ScopeData::~ScopeData() {
       stream->RestoreRefCounter(caller, stream_degree, *stream_is_updated);
     }
   }
-  LogAgent::instance().LeaveCriticalSection();
+  LogAgent::instance()->LeaveCriticalSection();
 }
 
 
//--------------------------------------------------------------------------
----
 // LogAgent
 
//--------------------------------------------------------------------------
----
-// Singleton represents LOG agent.
-LogAgent LogAgent::me_;
-
 LogAgent::LogAgent() {
   client_list_.clear();
   // There is high risk of calling one @LogClient method
diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h
index 0049da054..0c32ea33b 100644
--- a/src/log/agent/lga_agent.h
+++ b/src/log/agent/lga_agent.h
@@ -19,12 +19,14 @@
 #define SRC_LOG_AGENT_LGA_AGENT_H_
 
 #include <atomic>
+#include <memory>
 #include <string>
 #include <vector>
-#include "mds/mds_papi.h"
 #include <saAis.h>
-#include "base/macros.h"
 #include <saLog.h>
+
+#include "base/macros.h"
+#include "mds/mds_papi.h"
 #include "log/common/lgsv_msg.h"
 #include "log/common/lgsv_defs.h"
 #include "log/agent/lga_common.h"
@@ -80,7 +82,15 @@ class LogClient;
 //<
 class LogAgent {
  public:
-  static LogAgent& instance() { return me_; }
+  static std::shared_ptr<LogAgent>& instance() {
+    // Ensure this static singleton instance is only destroyed when
+    // no one is using it. Note that: static data can be destroyed
+    // in log application thread which calls exit() libc. So, introducing
+    // shared_ptr<> to avoid races among threads.
+    static std::shared_ptr<LogAgent> me =
+        std::shared_ptr<LogAgent>{new LogAgent()};
+    return me;
+  }
 
   //<
   // C++ APIs wrapper for corresponding C LOG Agent APIs
@@ -158,11 +168,11 @@ class LogAgent {
   // Introduce these public interface for MDS thread use.
   void EnterCriticalSection();
   void LeaveCriticalSection();
+  ~LogAgent() {}
 
  private:
   // Not allow to create @LogAgent object, except the singleton object
@me_.
   LogAgent();
-  ~LogAgent() {}
 
   // True if there is no Active SC, otherwise false
   bool no_active_log_server() const;
@@ -274,9 +284,6 @@ class LogAgent {
   // LGS LGA sync params
   NCS_SEL_OBJ lgs_sync_sel_;
 
-  // Singleton represents LOG Agent in LOG application process
-  static LogAgent me_;
-
   DELETE_COPY_AND_MOVE_OPERATORS(LogAgent);
 };
 
diff --git a/src/log/agent/lga_api.cc b/src/log/agent/lga_api.cc
index 37a1cc650..97dbf1d31 100644
--- a/src/log/agent/lga_api.cc
+++ b/src/log/agent/lga_api.cc
@@ -39,7 +39,7 @@
 SaAisErrorT saLogInitialize(SaLogHandleT* logHandle,
                             const SaLogCallbacksT* callbacks,
                             SaVersionT* version) {
-  return LogAgent::instance().saLogInitialize(logHandle, callbacks,
version);
+  return LogAgent::instance()->saLogInitialize(logHandle, callbacks,
version);
 }
 
 
/***************************************************************************
@@ -69,7 +69,7 @@ SaAisErrorT saLogInitialize(SaLogHandleT* logHandle,
 
***************************************************************************/
 SaAisErrorT saLogSelectionObjectGet(SaLogHandleT logHandle,
                                     SaSelectionObjectT* selectionObject) {
-  return LogAgent::instance().saLogSelectionObjectGet(logHandle,
+  return LogAgent::instance()->saLogSelectionObjectGet(logHandle,
                                                       selectionObject);
 }
 
@@ -96,7 +96,7 @@ SaAisErrorT saLogSelectionObjectGet(SaLogHandleT
logHandle,
 
***************************************************************************/
 SaAisErrorT saLogDispatch(SaLogHandleT logHandle,
                           SaDispatchFlagsT dispatchFlags) {
-  return LogAgent::instance().saLogDispatch(logHandle, dispatchFlags);
+  return LogAgent::instance()->saLogDispatch(logHandle, dispatchFlags);
 }
 
 
/***************************************************************************
@@ -122,7 +122,7 @@ SaAisErrorT saLogDispatch(SaLogHandleT logHandle,
  *
 
***************************************************************************/
 SaAisErrorT saLogFinalize(SaLogHandleT logHandle) {
-  return LogAgent::instance().saLogFinalize(logHandle);
+  return LogAgent::instance()->saLogFinalize(logHandle);
 }
 
 /**
@@ -142,7 +142,7 @@ SaAisErrorT saLogStreamOpen_2(
     const SaLogFileCreateAttributesT_2* logFileCreateAttributes,
     SaLogStreamOpenFlagsT logStreamOpenFlags, SaTimeT timeOut,
     SaLogStreamHandleT* logStreamHandle) {
-  return LogAgent::instance().saLogStreamOpen_2(
+  return LogAgent::instance()->saLogStreamOpen_2(
       logHandle, logStreamName, logFileCreateAttributes,
logStreamOpenFlags,
       timeOut, logStreamHandle);
 }
@@ -161,7 +161,7 @@ SaAisErrorT saLogStreamOpenAsync_2(
     SaLogHandleT logHandle, const SaNameT* logStreamName,
     const SaLogFileCreateAttributesT_2* logFileCreateAttributes,
     SaLogStreamOpenFlagsT logstreamOpenFlags, SaInvocationT invocation) {
-  return LogAgent::instance().saLogStreamOpenAsync_2(
+  return LogAgent::instance()->saLogStreamOpenAsync_2(
       logHandle, logStreamName, logFileCreateAttributes,
logstreamOpenFlags,
       invocation);
 }
@@ -176,7 +176,7 @@ SaAisErrorT saLogStreamOpenAsync_2(
  */
 SaAisErrorT saLogWriteLog(SaLogStreamHandleT logStreamHandle, SaTimeT
timeOut,
                           const SaLogRecordT* logRecord) {
-  return LogAgent::instance().saLogWriteLog(logStreamHandle, timeOut,
+  return LogAgent::instance()->saLogWriteLog(logStreamHandle, timeOut,
                                             logRecord);
 }
 
@@ -193,7 +193,7 @@ SaAisErrorT saLogWriteLogAsync(SaLogStreamHandleT
logStreamHandle,
                                SaInvocationT invocation,
                                SaLogAckFlagsT ackFlags,
                                const SaLogRecordT* logRecord) {
-  return LogAgent::instance().saLogWriteLogAsync(logStreamHandle,
invocation,
+  return LogAgent::instance()->saLogWriteLogAsync(logStreamHandle,
invocation,
                                                  ackFlags, logRecord);
 }
 /**
@@ -204,7 +204,7 @@ SaAisErrorT saLogWriteLogAsync(SaLogStreamHandleT
logStreamHandle,
  * @return SaAisErrorT
  */
 SaAisErrorT saLogStreamClose(SaLogStreamHandleT logStreamHandle) {
-  return LogAgent::instance().saLogStreamClose(logStreamHandle);
+  return LogAgent::instance()->saLogStreamClose(logStreamHandle);
 }
 
 /**
@@ -217,5 +217,5 @@ SaAisErrorT saLogStreamClose(SaLogStreamHandleT
logStreamHandle) {
  */
 SaAisErrorT saLogLimitGet(SaLogHandleT logHandle, SaLogLimitIdT limitId,
                           SaLimitValueT* limitValue) {
-  return LogAgent::instance().saLogLimitGet(logHandle, limitId,
limitValue);
+  return LogAgent::instance()->saLogLimitGet(logHandle, limitId,
limitValue);
 }
diff --git a/src/log/agent/lga_mds.cc b/src/log/agent/lga_mds.cc
index ca949577f..07db48d2c 100644
--- a/src/log/agent/lga_mds.cc
+++ b/src/log/agent/lga_mds.cc
@@ -516,9 +516,9 @@ static uint32_t lga_lgs_msg_proc(lgsv_msg_t *lgsv_msg,
   // Lookup the hdl rec by client_id
   LogClient *client = nullptr;
   uint32_t id = lgsv_msg->info.cbk_info.lgs_client_id;
-  LogAgent::instance().EnterCriticalSection();
-  if (nullptr == (client = LogAgent::instance().SearchClientById(id))) {
-    LogAgent::instance().LeaveCriticalSection();
+  LogAgent::instance()->EnterCriticalSection();
+  if (nullptr == (client = LogAgent::instance()->SearchClientById(id))) {
+    LogAgent::instance()->LeaveCriticalSection();
     TRACE("regid not found");
     lga_msg_destroy(lgsv_msg);
     return NCSCC_RC_FAILURE;
@@ -527,12 +527,12 @@ static uint32_t lga_lgs_msg_proc(lgsv_msg_t *lgsv_msg,
   // @client is being deleted in other thread. DO NOT touch this.
   bool updated = false;
   if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) {
-    LogAgent::instance().LeaveCriticalSection();
+    LogAgent::instance()->LeaveCriticalSection();
     lga_msg_destroy(lgsv_msg);
     TRACE_LEAVE();
     return rc;
   }
-  LogAgent::instance().LeaveCriticalSection();
+  LogAgent::instance()->LeaveCriticalSection();
 
   switch (lgsv_msg->type) {
     case LGSV_LGS_CBK_MSG:
@@ -554,7 +554,7 @@ static uint32_t lga_lgs_msg_proc(lgsv_msg_t *lgsv_msg,
 
           TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK clm_node_status: %d",
status);
           std::atomic<SaClmClusterChangesT> &clm_node_state =
-              LogAgent::instance().atomic_get_clm_node_state();
+              LogAgent::instance()->atomic_get_clm_node_state();
           clm_node_state =
               lgsv_msg->info.cbk_info.clm_node_status_cbk.clm_node_status;
           // A client becomes stale if Node loses CLM Membership.
@@ -644,7 +644,7 @@ static uint32_t lga_mds_svc_evt(struct
ncsmds_callback_info *mds_cb_info) {
       TRACE("%s\t NCSMDS_NO_ACTIVE", __func__);
       // This is a temporary server down e.g. during switch/fail over
       if (mds_cb_info->info.svc_evt.i_svc_id == NCSMDS_SVC_ID_LGS) {
-        LogAgent::instance().NoActiveLogServer();
+        LogAgent::instance()->NoActiveLogServer();
       }
       break;
 
@@ -658,7 +658,7 @@ static uint32_t lga_mds_svc_evt(struct
ncsmds_callback_info *mds_cb_info) {
         // Notify to LOG Agent that no LOG server exist.
         // In turn, it will inform to all its log clients
         // and all log streams belong to each log client.
-        LogAgent::instance().NoLogServer();
+        LogAgent::instance()->NoLogServer();
         // Stop the recovery thread if it is running
         lga_no_server_state_set();
       }
@@ -671,11 +671,11 @@ static uint32_t lga_mds_svc_evt(struct
ncsmds_callback_info *mds_cb_info) {
           TRACE("%s\t NCSMDS_UP", __func__);
           // Inform to LOG agent that LOG server is up from headless
           // and provide it the LOG server destination address too.
-          LogAgent::instance().HasActiveLogServer(
+          LogAgent::instance()->HasActiveLogServer(
               mds_cb_info->info.svc_evt.i_dest);
-          if (LogAgent::instance().waiting_log_server_up() == true) {
+          if (LogAgent::instance()->waiting_log_server_up() == true) {
             // Signal waiting thread
-            m_NCS_SEL_OBJ_IND(LogAgent::instance().get_lgs_sync_sel());
+            m_NCS_SEL_OBJ_IND(LogAgent::instance()->get_lgs_sync_sel());
           }
           // Start recovery
           lga_serv_recov1state_set();
@@ -1236,7 +1236,7 @@ uint32_t lga_mds_init() {
   NCSMDS_INFO mds_info;
   uint32_t rc = NCSCC_RC_SUCCESS;
   MDS_SVC_ID svc = NCSMDS_SVC_ID_LGS;
-  std::atomic<MDS_HDL> &mds_hdl =
LogAgent::instance().atomic_get_mds_hdl();
+  std::atomic<MDS_HDL> &mds_hdl =
LogAgent::instance()->atomic_get_mds_hdl();
 
   TRACE_ENTER();
   // Create the ADEST for LGA and get the pwe hdl
@@ -1307,9 +1307,9 @@ uint32_t lga_mds_msg_sync_send(lgsv_msg_t *i_msg,
lgsv_msg_t **o_msg,
                                SaTimeT timeout, uint32_t prio) {
   NCSMDS_INFO mds_info;
   uint32_t rc = NCSCC_RC_SUCCESS;
-  std::atomic<MDS_HDL> &mds_hdl =
LogAgent::instance().atomic_get_mds_hdl();
+  std::atomic<MDS_HDL> &mds_hdl =
LogAgent::instance()->atomic_get_mds_hdl();
   std::atomic<MDS_DEST> &lgs_mds_dest =
-      LogAgent::instance().atomic_get_lgs_mds_dest();
+      LogAgent::instance()->atomic_get_lgs_mds_dest();
 
   TRACE_ENTER();
 
@@ -1358,9 +1358,9 @@ uint32_t lga_mds_msg_sync_send(lgsv_msg_t *i_msg,
lgsv_msg_t **o_msg,
 
****************************************************************************
**/
 uint32_t lga_mds_msg_async_send(lgsv_msg_t *i_msg, uint32_t prio) {
   NCSMDS_INFO mds_info;
-  std::atomic<MDS_HDL> &mds_hdl =
LogAgent::instance().atomic_get_mds_hdl();
+  std::atomic<MDS_HDL> &mds_hdl =
LogAgent::instance()->atomic_get_mds_hdl();
   std::atomic<MDS_DEST> &lgs_mds_dest =
-      LogAgent::instance().atomic_get_lgs_mds_dest();
+      LogAgent::instance()->atomic_get_lgs_mds_dest();
 
   TRACE_ENTER();
   osafassert(i_msg != nullptr);
diff --git a/src/log/agent/lga_state.cc b/src/log/agent/lga_state.cc
index 05c7a85ee..970fe27d2 100644
--- a/src/log/agent/lga_state.cc
+++ b/src/log/agent/lga_state.cc
@@ -97,7 +97,7 @@ static void *recovery2_thread(void *dummy) {
 
   // Recover all log clients. During this time, any call of LOG APIs
   // return TRY_AGAIN until the recovery is done.
-  LogAgent::instance().RecoverAllLogClients();
+  LogAgent::instance()->RecoverAllLogClients();
 
   // All clients are recovered (or removed). Or recovery is aborted
   // Change to not recovering state RecoveryState::kNormal
diff --git a/src/log/agent/lga_util.cc b/src/log/agent/lga_util.cc
index f4d0f970a..0bb91aaee 100644
--- a/src/log/agent/lga_util.cc
+++ b/src/log/agent/lga_util.cc
@@ -40,17 +40,17 @@ static unsigned int lga_create() {
   unsigned int rc = NCSCC_RC_SUCCESS;
 
   // Create and init sel obj for mds sync
-  NCS_SEL_OBJ* lgs_sync_sel = LogAgent::instance().get_lgs_sync_sel();
+  NCS_SEL_OBJ* lgs_sync_sel = LogAgent::instance()->get_lgs_sync_sel();
   m_NCS_SEL_OBJ_CREATE(lgs_sync_sel);
   std::atomic<bool>& lgs_sync_wait =
-      LogAgent::instance().atomic_get_lgs_sync_wait();
+      LogAgent::instance()->atomic_get_lgs_sync_wait();
   lgs_sync_wait = true;
 
   // register with MDS
   if ((NCSCC_RC_SUCCESS != (rc = lga_mds_init()))) {
     rc = NCSCC_RC_FAILURE;
     // Delete the lga init instances
-    LogAgent::instance().RemoveAllLogClients();
+    LogAgent::instance()->RemoveAllLogClients();
     return rc;
   }
 
@@ -64,7 +64,7 @@ static unsigned int lga_create() {
 
   lgs_sync_wait = false;
   std::atomic<SaClmClusterChangesT>& clm_state =
-      LogAgent::instance().atomic_get_clm_node_state();
+      LogAgent::instance()->atomic_get_clm_node_state();
   clm_state = SA_CLM_NODE_JOINED;
 
   // No longer needed
@@ -82,7 +82,7 @@ static unsigned int lga_create() {
 unsigned int lga_startup() {
   unsigned int rc = NCSCC_RC_SUCCESS;
   ScopeLock lock(init_lock);
-  std::atomic<MDS_HDL>& mds_hdl =
LogAgent::instance().atomic_get_mds_hdl();
+  std::atomic<MDS_HDL>& mds_hdl =
LogAgent::instance()->atomic_get_mds_hdl();
 
   TRACE_ENTER();
 
-- 
2.19.2


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to