Re: [devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-01-29 Thread Tran Thuan
Hi Thang,

Some comments inline with [Thuan]

Best Regards,
ThuanTr

-Original Message-
From: thang.d.nguyen  
Sent: Tuesday, January 29, 2019 4:53 AM
To: gary@dektech.com.au; minh.c...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [devel] [PATCH 1/1] clmd: not send sync respond to client if node
down [#3004]

clmd will not send sync respond to client if the node that client resided on
down. This will avoid timeout when clmd send via mds.
---
 src/clm/clmd/clms_cb.h|  5 
 src/clm/clmd/clms_evt.cc  | 35 ++-
 src/clm/clmd/clms_evt.h   |  1 +
 src/clm/clmd/clms_main.cc |  4 
 src/clm/clmd/clms_mds.cc  | 61
+++
 5 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index
4d7fdc7..6999761 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
   *node_down_list_head; /*NODE_DOWN record - Fix when active node goes
down
  */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Node down list - Updated by MDS thread  std::list 
+ mds_node_down_list;

[Thuan]: The element is simple and small, I suggest to use
std::set. We can avoid new/delete and SET to avoid duplicate
element

   bool is_impl_set;
   bool nid_started; /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
@@ -245,6 +248,8 @@ typedef struct clms_cb_t {
 
   /* Mutex protecting shared data used by the scale-out functionality */
   pthread_mutex_t scale_out_data_mutex;
+  /* Mutex protecting shared data used by the delete/add node-id */  
+ pthread_mutex_t node_down_list_mutex;
   /* Number of occupied indices in the vectors pending_nodes[] and
* pending_node_ids[] */
   size_t no_of_pending_nodes;
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index
c2b83c2..08d4acd 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -17,7 +17,6 @@
  *
  */
 
-#include "osaf/configmake.h"
 #include 
 #include 
 #include 
@@ -31,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include "osaf/configmake.h"
 #include "base/logtrace.h"
 #include "base/ncsgl_defs.h"
 #include "base/osaf_utility.h"
@@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {  }
 
 /**
+ * Return true if mds node down exist
+ * @param node id
+ *
+ * @return bool
+ */
+bool clms_is_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  bool found = false;
+  std::list::iterator it;
+  osaf_mutex_lock_ordie(_cb->node_down_list_mutex);
+
+  for (it = clms_cb->mds_node_down_list.begin();
+it != clms_cb->mds_node_down_list.end(); ++it) {
+if (*(*it) == node_id) {
+  found = true;
+  break;
+}
+  }
+
+  osaf_mutex_unlock_ordie(_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+  return found;
+}
+
+/**
  * Handle a initialize message
  * @param cb
  * @param evt
@@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {
   if (client != nullptr)
 msg.info.api_resp_info.param.client_id = client->client_id;
 
+  if (clms_is_node_down(node_id) == true) {
+LOG_NO("node_id = %d already down, no need sending sync respond",
node_id);
+if (client != nullptr) clms_client_delete(client->client_id);
+return (uint32_t)ais_rc;
+  }
+
[Thuan] Why don't move this block before clms_client_new() then not need to
clms_client_delete()

   rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt,
  MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA);
   if (rc != NCSCC_RC_SUCCESS) {
diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h index
1005456..ef35cbc 100644
--- a/src/clm/clmd/clms_evt.h
+++ b/src/clm/clmd/clms_evt.h
@@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb,
CLMS_CLUSTER_NODE *op_node,
 CLMS_TRACK_INFO *trkrec);  extern uint32_t
clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest);  extern void
clms_remove_node_down_rec(SaClmNodeIdT node_id);
+extern bool clms_is_node_down(SaClmNodeIdT node_id);
 extern uint32_t clms_node_add(CLMS_CLUSTER_NODE *node, int i);  extern void
clms_clmresp_error_timeout(CLMS_CB *cb, CLMS_CLUSTER_NODE *node);  extern
bool clms_clma_entry_valid(CLMS_CB *cb, MDS_DEST mds_dest); diff --git
a/src/clm/clmd/clms_main.cc b/src/clm/clmd/clms_main.cc index
ad6e12e..e2c4f21 100644
--- a/src/clm/clmd/clms_main.cc
+++ b/src/clm/clmd/clms_main.cc
@@ -245,6 +245,10 @@ uint32_t clms_cb_init(CLMS_CB *clms_cb) {
   if (pthread_mutex_init(_cb->scale_out_data_mutex, nullptr) != 0) {
 return NCSCC_RC_FAILURE;
   }
+  if (pthread_mutex_init(_cb->node_down_list_mutex, nullptr) != 0) {
+return NCSCC_RC_FAILURE;
+  }
+
   

[devel] [PATCH 0/1] Review Request for clmd: not send sync respond to client if node down [#3004]

2019-01-29 Thread thang.d.nguyen
Summary: clmd: not send sync respond to client if node down [#3004]
Review request for Ticket(s): 3004
Peer Reviewer(s): Gary, Minh
Pull request to: Gary, Minh
Affected branch(es): develop
Development branch: ticket-3004
Base revision: d5e1fd5be4d554069f62745b645b996abdb0356b
Personal repository: git://git.code.sf.net/u/thangng/review


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-

revision 8b43a05587e37e4d9d258d12307462f7aaab6195
Author: thang.d.nguyen 
Date:   Tue, 29 Jan 2019 04:32:25 +0700

clmd: not send sync respond to client
if node down [#3004]

clmd will not send sync respond to client if the node
that client resided on down. This will avoid timeout
when clmd send via mds.



Complete diffstat:
--
 src/clm/clmd/clms_cb.h|  5 
 src/clm/clmd/clms_evt.cc  | 35 ++-
 src/clm/clmd/clms_evt.h   |  1 +
 src/clm/clmd/clms_main.cc |  4 
 src/clm/clmd/clms_mds.cc  | 61 +++
 5 files changed, 105 insertions(+), 1 deletion(-)


Testing Commands:
-
N/A.

Testing, Expected Results:
--
N/A.

Conditions of Submission:
-
Acked from reviewer.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.gitconfig file (i.e. user.name, user.email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 1/1] clmd: not send sync respond to client if node down [#3004]

2019-01-29 Thread thang.d.nguyen
clmd will not send sync respond to client if the node
that client resided on down. This will avoid timeout
when clmd send via mds.
---
 src/clm/clmd/clms_cb.h|  5 
 src/clm/clmd/clms_evt.cc  | 35 ++-
 src/clm/clmd/clms_evt.h   |  1 +
 src/clm/clmd/clms_main.cc |  4 
 src/clm/clmd/clms_mds.cc  | 61 +++
 5 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h
index 4d7fdc7..6999761 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
   *node_down_list_head; /*NODE_DOWN record - Fix when active node goes down
  */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Node down list - Updated by MDS thread
+  std::list mds_node_down_list;
   bool is_impl_set;
   bool nid_started; /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
@@ -245,6 +248,8 @@ typedef struct clms_cb_t {
 
   /* Mutex protecting shared data used by the scale-out functionality */
   pthread_mutex_t scale_out_data_mutex;
+  /* Mutex protecting shared data used by the delete/add node-id */
+  pthread_mutex_t node_down_list_mutex;
   /* Number of occupied indices in the vectors pending_nodes[] and
* pending_node_ids[] */
   size_t no_of_pending_nodes;
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc
index c2b83c2..08d4acd 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -17,7 +17,6 @@
  *
  */
 
-#include "osaf/configmake.h"
 #include 
 #include 
 #include 
@@ -31,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include "osaf/configmake.h"
 #include "base/logtrace.h"
 #include "base/ncsgl_defs.h"
 #include "base/osaf_utility.h"
@@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb, 
CLMSV_CLMS_EVT *evt) {
 }
 
 /**
+ * Return true if mds node down exist
+ * @param node id
+ *
+ * @return bool
+ */
+bool clms_is_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  bool found = false;
+  std::list::iterator it;
+  osaf_mutex_lock_ordie(_cb->node_down_list_mutex);
+
+  for (it = clms_cb->mds_node_down_list.begin();
+it != clms_cb->mds_node_down_list.end(); ++it) {
+if (*(*it) == node_id) {
+  found = true;
+  break;
+}
+  }
+
+  osaf_mutex_unlock_ordie(_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+  return found;
+}
+
+/**
  * Handle a initialize message
  * @param cb
  * @param evt
@@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb, 
CLMSV_CLMS_EVT *evt) {
   if (client != nullptr)
 msg.info.api_resp_info.param.client_id = client->client_id;
 
+  if (clms_is_node_down(node_id) == true) {
+LOG_NO("node_id = %d already down, no need sending sync respond", node_id);
+if (client != nullptr) clms_client_delete(client->client_id);
+return (uint32_t)ais_rc;
+  }
+
   rc = clms_mds_msg_send(cb, , >fr_dest, >mds_ctxt,
  MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA);
   if (rc != NCSCC_RC_SUCCESS) {
diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h
index 1005456..ef35cbc 100644
--- a/src/clm/clmd/clms_evt.h
+++ b/src/clm/clmd/clms_evt.h
@@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb, 
CLMS_CLUSTER_NODE *op_node,
 CLMS_TRACK_INFO *trkrec);
 extern uint32_t clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest);
 extern void clms_remove_node_down_rec(SaClmNodeIdT node_id);
+extern bool clms_is_node_down(SaClmNodeIdT node_id);
 extern uint32_t clms_node_add(CLMS_CLUSTER_NODE *node, int i);
 extern void clms_clmresp_error_timeout(CLMS_CB *cb, CLMS_CLUSTER_NODE *node);
 extern bool clms_clma_entry_valid(CLMS_CB *cb, MDS_DEST mds_dest);
diff --git a/src/clm/clmd/clms_main.cc b/src/clm/clmd/clms_main.cc
index ad6e12e..e2c4f21 100644
--- a/src/clm/clmd/clms_main.cc
+++ b/src/clm/clmd/clms_main.cc
@@ -245,6 +245,10 @@ uint32_t clms_cb_init(CLMS_CB *clms_cb) {
   if (pthread_mutex_init(_cb->scale_out_data_mutex, nullptr) != 0) {
 return NCSCC_RC_FAILURE;
   }
+  if (pthread_mutex_init(_cb->node_down_list_mutex, nullptr) != 0) {
+return NCSCC_RC_FAILURE;
+  }
+
   clms_cb->no_of_pending_nodes = 0;
   clms_cb->no_of_inprogress_nodes = 0;
   for (int i = 0; i != (MAX_PENDING_NODES + 1); ++i) {
diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc
index 58552cc..a9e004b 100644
--- a/src/clm/clmd/clms_mds.cc
+++ b/src/clm/clmd/clms_mds.cc
@@ -18,10 +18,13 @@
 
 #include 
 #include 
+#include 
+#include 
 #include "base/logtrace.h"
 #include "base/ncsencdec_pub.h"
 #include "clm/clmd/clms.h"
 #include "clm/common/clmsv_enc_dec.h"
+#include "base/osaf_utility.h"
 
 #define CLMS_SVC_PVT_SUBPART_VERSION 1
 #define 

Re: [devel] [PATCH 1/1] osaf: do quick local node reboot when split network [#3001]

2019-01-29 Thread Hans Nordebäck
Hi Vu, see my comment below/Hans

On 1/28/19 10:59, Vu Minh Nguyen wrote:
> Hi Hans,
>
> Thanks for your comments. See my comment inline. Thanks
>
> Regards, Vu
>
>> -Original Message-
>> From: Hans Nordebäck 
>> Sent: Monday, January 28, 2019 4:37 PM
>> To: Hans Nordebäck ; Vu Minh Nguyen
>> ; Gary Lee ;
>> Minh Hon Chau 
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: Re: [devel] [PATCH 1/1] osaf: do quick local node reboot when
> split
>> network [#3001]
>>
>> Hi Vu,
>> See one more comment below/Thanks HansN
>>
>> -Original Message-
>> From: Hans Nordebäck 
>> Sent: den 28 januari 2019 10:15
>> To: Vu Minh Nguyen ; Gary Lee
>> ; Minh Hon Chau 
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: Re: [devel] [PATCH 1/1] osaf: do quick local node reboot when
> split
>> network [#3001]
>>
>> Hi Vu, ack review only. Two comments below/Thanks HansN
>>
>> On 1/25/19 12:34, Vu Minh Nguyen wrote:
>>> ---
>>>scripts/opensaf_reboot   | 33 +++--
>>>src/amf/amfd/ndproc.cc   |  4 ++--
>>>src/base/ncssysf_def.h   |  6 ++
>>>src/base/sysf_def.c  | 10 ++
>>>src/fm/fmd/fm_main.cc|  6 +++---
>>>src/fm/fmd/fm_rda.cc |  5 ++---
>>>src/rde/rded/rde_main.cc |  6 ++
>>>7 files changed, 52 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot index
>>> 727272e1d..2f7a7daeb 100644
>>> --- a/scripts/opensaf_reboot
>>> +++ b/scripts/opensaf_reboot
>>> @@ -31,7 +31,7 @@ export
>> LD_LIBRARY_PATH=$libdir:$LD_LIBRARY_PATH
>>># Node fencing: OpenSAF cannot reboot a node when there's no CLM
>> node to
>>># PLM EE mapping in the information model. In such cases rebooting
>>> would be done -# through proprietary mechanisms, i.e. not through PLM.
>>> Node_id is (the only
>>> +# through proprietary mechanisms, i.e. not through PLM. Node_id is
>>> +(the only
>>># entity) at the disposal of such a mechanism.
>>>
>>>if [ -f "$pkgsysconfdir/fmd.conf" ]; then @@ -81,7 +81,6 @@
>>> opensaf_reboot_with_remote_fencing()
>>>#if plm exists in the system,then the reboot is performed using the
>> eename.
>>>opensaf_reboot_with_plm()
>>>{
>>> -
>>>immadm -o 7 $ee_name
>>>retval=$?
>>>if [ $retval != 0 ]; then
>>> @@ -96,12 +95,29 @@ opensaf_reboot_with_plm()
>>>logger -t "opensaf_reboot" "abrupt restart failed for $ee_name: unable
> to
>> restart remote node"
>>>exit 1
>>>fi
>>> -fi
>>> +fi
>>>fi
>>>#Note: Operation Id SA_PLM_ADMIN_RESTART=7
>>>#In the example the $ee_name would expand to (for eg:-)
>> safEE=my_linux_os,safHE=64bitmulticore,safDomain=my_domain
>>>}
>>>
>>> +# Force local node reboot as fast as possible
>>> +quick_local_node_reboot()
>>> +{
>>> +logger -t "opensaf_reboot" "Do quick local node reboot"
>> [HansN] perhaps reuse the same logic as in sysf_def.c, i.e. use the sysrq
> as
>> fallback and use a short timeout
> [Vu]
> Forcing node reboot by touching /proc/sysrq-trigger is not allowed on
> containers such as LXC
> (as container is immutable), therefore I provided 02 more alternatives below
> in case the first try is failed.
[HansN] preferable to only run the sysrq if the reboot fails, i.e. the 
same logic as in sysf_def.c, see the SIGALRM and supervision_time.
>>> +
>>> +$icmd /bin/echo -n 'b' 2> /dev/null > /proc/sysrq-trigger
>> [HansN] if not run as root, i.e. icmd is sudo, I think you need to use
>> cmd: /bin/echo -n 'b' | $icmd tee /proc/sysrq-trigger , please check
>> [HansN] or $icmd  /bin/sh -c "/bin/echo -n 'b' 2> /dev/null > /proc/sysrq-
>> trigger"
> [Vu] Thanks for your suggestion. Will update accordingly.
>>> +ret_code=$?
>>> +
>>> +if [ $ret_code != 0 ] && [ -x /bin/systemctl ]; then
>>> +$icmd /bin/systemctl --force --force reboot
>>> +ret_code=$?
>>> +fi
>>> +
>>> +if [ $ret_code != 0 ]; then
>>> +$icmd /sbin/reboot -f
>>> +fi
>>> +}
>>>
>>>if ! test -f "$NODE_ID_FILE"; then
>>>logger -t "opensaf_reboot" "$NODE_ID_FILE doesnt exists,reboot failed
> "
>>> @@ -112,8 +128,13 @@ temp_node_id=`cat "$NODE_ID_FILE"`
>>>temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e
>> 's:^:0x:'`
>>>self_node_id=`printf "%d" $temp_node_id`
>>>
>>> -# If clm cluster reboot requested argument one and two are set but
>>> not used, argument 3 is set to 1, "safe reboot" request -if [
>>> "$safe_reboot" = 1 ]; then
>>> +# If no argument is provided, forcing node reboot immediately without
>>> +log # flushing, process terminating, disk un-mounting.
>>> +# If clm cluster reboot requested argument one and two are set but
>>> +not used, # argument 3 is set to 1, "safe reboot" request.
>>> +if [ "$#" = 0 ]; then
>>> +quick_local_node_reboot
>>> +elif [ "$safe_reboot" = 1 ]; then
>>>opensaf_safe_reboot
>>>else
>>># A node ID of zero(0) means an order to reboot the local node @@
>>> -165,7 +186,7 @@ else
>>>logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it
> 

Re: [devel] [PATCH 1/1] log: fix coredump at log agent application [#3002]

2019-01-29 Thread Canh Van Truong
Hi aVu

Ack. Legacy test passed

Thanks
Canh

-Original Message-
From: Vu Minh Nguyen  
Sent: Thursday, January 24, 2019 5:15 PM
To: lennart.l...@ericsson.com; canh.v.tru...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen

Subject: [PATCH 1/1] log: fix coredump at log agent application [#3002]

There is a race in using singleton-static class object b/w mds thread
and application thread - caller of exit() api.

This patch still uses singleton but making the instance shared_ptr
to ensure the resource will not be destroyed if it is being used.
---
 src/log/agent/lga_agent.cc |  7 ++-
 src/log/agent/lga_agent.h  | 21 ++---
 src/log/agent/lga_api.cc   | 20 ++--
 src/log/agent/lga_mds.cc   | 32 
 src/log/agent/lga_state.cc |  2 +-
 src/log/agent/lga_util.cc  | 10 +-
 6 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc
index bf9caa935..1000bb3fd 100644
--- a/src/log/agent/lga_agent.cc
+++ b/src/log/agent/lga_agent.cc
@@ -108,7 +108,7 @@ ScopeData::~ScopeData() {
 recovery2_unlock(is_locked_);
   }
 
-  LogAgent::instance().EnterCriticalSection();
+  LogAgent::instance()->EnterCriticalSection();
   LogClient* client = client_data_->client;
   bool* is_updated = client_data_->is_updated;
   RefCounter::Degree client_degree = client_data_->value;
@@ -128,15 +128,12 @@ ScopeData::~ScopeData() {
   stream->RestoreRefCounter(caller, stream_degree, *stream_is_updated);
 }
   }
-  LogAgent::instance().LeaveCriticalSection();
+  LogAgent::instance()->LeaveCriticalSection();
 }
 
 
//--

 // LogAgent
 
//--

-// Singleton represents LOG agent.
-LogAgent LogAgent::me_;
-
 LogAgent::LogAgent() {
   client_list_.clear();
   // There is high risk of calling one @LogClient method
diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h
index 0049da054..0c32ea33b 100644
--- a/src/log/agent/lga_agent.h
+++ b/src/log/agent/lga_agent.h
@@ -19,12 +19,14 @@
 #define SRC_LOG_AGENT_LGA_AGENT_H_
 
 #include 
+#include 
 #include 
 #include 
-#include "mds/mds_papi.h"
 #include 
-#include "base/macros.h"
 #include 
+
+#include "base/macros.h"
+#include "mds/mds_papi.h"
 #include "log/common/lgsv_msg.h"
 #include "log/common/lgsv_defs.h"
 #include "log/agent/lga_common.h"
@@ -80,7 +82,15 @@ class LogClient;
 //<
 class LogAgent {
  public:
-  static LogAgent& instance() { return me_; }
+  static std::shared_ptr& instance() {
+// Ensure this static singleton instance is only destroyed when
+// no one is using it. Note that: static data can be destroyed
+// in log application thread which calls exit() libc. So, introducing
+// shared_ptr<> to avoid races among threads.
+static std::shared_ptr me =
+std::shared_ptr{new LogAgent()};
+return me;
+  }
 
   //<
   // C++ APIs wrapper for corresponding C LOG Agent APIs
@@ -158,11 +168,11 @@ class LogAgent {
   // Introduce these public interface for MDS thread use.
   void EnterCriticalSection();
   void LeaveCriticalSection();
+  ~LogAgent() {}
 
  private:
   // Not allow to create @LogAgent object, except the singleton object
@me_.
   LogAgent();
-  ~LogAgent() {}
 
   // True if there is no Active SC, otherwise false
   bool no_active_log_server() const;
@@ -274,9 +284,6 @@ class LogAgent {
   // LGS LGA sync params
   NCS_SEL_OBJ lgs_sync_sel_;
 
-  // Singleton represents LOG Agent in LOG application process
-  static LogAgent me_;
-
   DELETE_COPY_AND_MOVE_OPERATORS(LogAgent);
 };
 
diff --git a/src/log/agent/lga_api.cc b/src/log/agent/lga_api.cc
index 37a1cc650..97dbf1d31 100644
--- a/src/log/agent/lga_api.cc
+++ b/src/log/agent/lga_api.cc
@@ -39,7 +39,7 @@
 SaAisErrorT saLogInitialize(SaLogHandleT* logHandle,
 const SaLogCallbacksT* callbacks,
 SaVersionT* version) {
-  return LogAgent::instance().saLogInitialize(logHandle, callbacks,
version);
+  return LogAgent::instance()->saLogInitialize(logHandle, callbacks,
version);
 }
 
 
/***
@@ -69,7 +69,7 @@ SaAisErrorT saLogInitialize(SaLogHandleT* logHandle,
 
***/
 SaAisErrorT saLogSelectionObjectGet(SaLogHandleT logHandle,
 SaSelectionObjectT* selectionObject) {
-  return LogAgent::instance().saLogSelectionObjectGet(logHandle,
+  return LogAgent::instance()->saLogSelectionObjectGet(logHandle,
   selectionObject);
 }
 
@@ -96,7 +96,7 @@ SaAisErrorT saLogSelectionObjectGet(SaLogHandleT
logHandle,
 
***/