Ack

Regards, Vu.

>-----Original Message-----
>From: Lennart Lund [mailto:[email protected]]
>Sent: Monday, May 09, 2016 5:44 PM
>To: [email protected]; Vu Minh Nguyen
>Cc: [email protected]
>Subject: [PATCH 1 of 1] log: Deadlock in log agent makes client hang
[#1805]
>
> osaf/libs/agents/saf/lga/lga_api.c   |  12 +++++
> osaf/libs/agents/saf/lga/lga_mds.c   |   9 ++-
> osaf/libs/agents/saf/lga/lga_state.c |  80
++++++++++-------------------------
> osaf/libs/agents/saf/lga/lga_state.h |   4 -
> osaf/libs/agents/saf/lga/lga_util.c  |  29 ++++++++++++-
> 5 files changed, 69 insertions(+), 65 deletions(-)
>
>
>Remove deadlock when two mutexes conflict in MDS and log agent.
>Make sure that cb_lock is not taken when calling any MDS API
>Some cleaning of code e.g. removing "dead" code and moving functions from
>lga_state.c
>that are not related to lga_state.c
>
>diff --git a/osaf/libs/agents/saf/lga/lga_api.c
>b/osaf/libs/agents/saf/lga/lga_api.c
>--- a/osaf/libs/agents/saf/lga/lga_api.c
>+++ b/osaf/libs/agents/saf/lga/lga_api.c
>@@ -44,6 +44,18 @@ lga_cb_t lga_cb = {
>       .lgs_state = LGS_START
> };
>
>+static bool is_lgs_state(lgs_state_t state)
>+{
>+      bool rc = false;
>+
>+      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>+      if (state == lga_cb.lgs_state)
>+              rc = true;
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>+
>+      return rc;
>+}
>+
> static void populate_open_params(lgsv_stream_open_req_t *open_param,
>                                const SaNameT *logStreamName,
>                                lga_client_hdl_rec_t *hdl_rec,
>diff --git a/osaf/libs/agents/saf/lga/lga_mds.c
>b/osaf/libs/agents/saf/lga/lga_mds.c
>--- a/osaf/libs/agents/saf/lga/lga_mds.c
>+++ b/osaf/libs/agents/saf/lga/lga_mds.c
>@@ -16,6 +16,7 @@
>  */
>
> #include <stdlib.h>
>+#include <saf_error.h>
> #include "lga.h"
> #include "lga_state.h"
>
>@@ -446,8 +447,9 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
>                       {
>                               lga_client_hdl_rec_t *lga_hdl_rec;
>
>-                              TRACE_2("LGSV_LGS_WRITE_LOG_CBK: inv =
>%d, error = %d",
>-                                      (int)lgsv_msg->info.cbk_info.inv,
>(int)lgsv_msg->info.cbk_info.write_cbk.error);
>+                              TRACE_2("LGSV_LGS_WRITE_LOG_CBK: inv_id =
>%d, cbk_error %s",
>+                                      (int)lgsv_msg->info.cbk_info.inv,
>+                                      saf_error((int)lgsv_msg-
>>info.cbk_info.write_cbk.error));
>
>                       /** Create the chan hdl record here before
>                          ** queing this message onto the priority queue
>@@ -930,7 +932,8 @@ static uint32_t lga_mds_dec(struct ncsmd
>                       TRACE_2("LGSV_LGS_CBK_MSG");
>                       switch (msg->info.cbk_info.type) {
>                       case LGSV_WRITE_LOG_CALLBACK_IND:
>-                              TRACE_2("decode writelog message");
>+                              TRACE_2("decode writelog message,
>lgs_client_id=%d",
>+                                      msg->info.cbk_info.lgs_client_id);
>                               total_bytes += lga_dec_write_cbk_msg(uba,
>msg);
>                               break;
>                       default:
>diff --git a/osaf/libs/agents/saf/lga/lga_state.c
>b/osaf/libs/agents/saf/lga/lga_state.c
>--- a/osaf/libs/agents/saf/lga/lga_state.c
>+++ b/osaf/libs/agents/saf/lga/lga_state.c
>@@ -28,13 +28,6 @@
>  * Common data
>  */
>
>-/* And critical sections. Clients APIs must not pass recovery 2 state
check if
>- * recovery 2 thread is started but state has not yet been changed by the
>- * thread. Also the thread must not start recovery if an API is executing
and
>- * has passed the recovery 2 check
>- */
>-static pthread_mutex_t lga_recov2_lock = PTHREAD_MUTEX_INITIALIZER;
>-
> /* Selection object for terminating recovery thread for state 2 (after
timeout)
>  * NOTE: Only for recovery2_thread
>  */
>@@ -228,7 +221,10 @@ static int initialize_one_client(lga_cli
>
>       TRACE_ENTER();
>
>-      if (p_client->initialized_flag == true) {
>+      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>+      bool initialized_flag = p_client->initialized_flag;
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>+      if (initialized_flag == true) {
>               /* The client is already initialized */
>               rc = 1;
>               goto done;
>@@ -242,11 +238,13 @@ static int initialize_one_client(lga_cli
>       /* Restore the client Id with the one returned by the LGS and
>        * set the initialized flag
>        */
>+      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>       p_client->lgs_client_id = client_id;
>       p_client->initialized_flag = true;
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>
> done:
>-      TRACE_LEAVE2("rc = %d", rc);
>+      TRACE_LEAVE2("rc = %d, client_id = %d", rc, client_id);
>       return rc;
> }
>
>@@ -267,7 +265,10 @@ static int recover_one_stream(lga_log_st
>
>       TRACE_ENTER();
>
>-      if (p_stream->recovered_flag == true) {
>+      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>+      bool recovered_flag = p_stream->recovered_flag;
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>+      if (recovered_flag == true) {
>               /* The stream is already recovered */
>               rc = 1;
>               goto done;
>@@ -282,8 +283,10 @@ static int recover_one_stream(lga_log_st
>       /* Restore the stream Id with the Id returned by the LGS and
>        * set the recovered flag
>        */
>+      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>       p_stream->lgs_log_stream_id = stream_id;
>       p_stream->recovered_flag = true;
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>
> done:
>       TRACE_LEAVE2("rc = %d", rc);
>@@ -588,9 +591,10 @@ int lga_recover_one_client(lga_client_hd
>
>       TRACE_ENTER();
>
>-      /* Synchronize b/w client/mds thread */
>       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>-      if (p_client->recovered_flag == true) {
>+      bool recovered_flag = p_client->recovered_flag;
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>+      if (recovered_flag == true) {
>               /* Client is already recovered */
>               TRACE("\t Already recovered");
>               goto done;
>@@ -620,35 +624,20 @@ int lga_recover_one_client(lga_client_hd
>               p_stream = p_stream->next;
>       }
>
>+      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>       p_client->recovered_flag = true;
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> done:
>-      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>       TRACE_LEAVE();
>       return rc;
> }
>
>-/**
>- * Free the memory allocated for one client handle with mutext protection.
>- *
>- * @param p_client_hdl
>- * @return void
>+/* Clients APIs must not pass recovery 2 state check if
>+ * recovery 2 thread is started but state has not yet been changed by the
>+ * thread. Also the thread must not start recovery if an API is executing
and
>+ * has passed the recovery 2 check
>  */
>-void lga_free_client_hdl(lga_client_hdl_rec_t **p_client_hdl)
>-{
>-      lga_client_hdl_rec_t *client_hdl = NULL;
>-
>-      /* Synchronize b/w client & mds thread */
>-      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>-
>-      client_hdl = *p_client_hdl;
>-      if (client_hdl == NULL) goto done;
>-
>-      free(client_hdl);
>-      client_hdl = NULL;
>-
>-done:
>-      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>-}
>+static pthread_mutex_t lga_recov2_lock = PTHREAD_MUTEX_INITIALIZER;
>
> void lga_recovery2_lock(void)
> {
>@@ -660,29 +649,6 @@ void lga_recovery2_unlock(void)
>       osaf_mutex_unlock_ordie(&lga_recov2_lock);
> }
>
>-//>
>-// Handling lga_cb.lgs_state and lga_state in thread safe
>-//<
>-
>-void set_lgs_state(lgs_state_t state)
>-{
>-      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>-      lga_cb.lgs_state = state;
>-      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>-}
>-
>-bool is_lgs_state(lgs_state_t state)
>-{
>-      bool rc = false;
>-
>-      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>-      if (state == lga_cb.lgs_state)
>-              rc = true;
>-      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>-
>-      return rc;
>-}
>-
> typedef struct {
>       lga_state_t state;
>       pthread_mutex_t lock;
>diff --git a/osaf/libs/agents/saf/lga/lga_state.h
>b/osaf/libs/agents/saf/lga/lga_state.h
>--- a/osaf/libs/agents/saf/lga/lga_state.h
>+++ b/osaf/libs/agents/saf/lga/lga_state.h
>@@ -30,11 +30,8 @@ void lga_serv_recov1state_set(void);
> int lga_recover_one_client(lga_client_hdl_rec_t *p_client);
> void lga_recovery2_lock(void);
> void lga_recovery2_unlock(void);
>-void lga_free_client_hdl(lga_client_hdl_rec_t **p_client_hdl);
>
>-void set_lgs_state(lgs_state_t state);
> void set_lga_state(lga_state_t state);
>-bool is_lgs_state(lgs_state_t state);
> bool is_lga_state(lga_state_t state);
>
>
>@@ -48,7 +45,6 @@ bool is_lga_state(lga_state_t state);
> static inline void recovery2_lock(bool *is_locked)
> {
>       if (is_lga_state(LGA_RECOVERY1) || is_lga_state(LGA_RECOVERY2)) {
>-              /* TBD: Is this optimization really needed? */
>               lga_recovery2_lock();
>               *is_locked = true;
>       }
>diff --git a/osaf/libs/agents/saf/lga/lga_util.c
>b/osaf/libs/agents/saf/lga/lga_util.c
>--- a/osaf/libs/agents/saf/lga/lga_util.c
>+++ b/osaf/libs/agents/saf/lga/lga_util.c
>@@ -273,6 +273,29 @@ static uint32_t lga_hdl_cbk_dispatch_blo
> }
>
> /**
>+ * Free the memory allocated for one client handle with mutex protection.
>+ *
>+ * @param p_client_hdl
>+ * @return void
>+ */
>+static void lga_free_client_hdl(lga_client_hdl_rec_t **p_client_hdl)
>+{
>+      lga_client_hdl_rec_t *client_hdl = NULL;
>+
>+      /* Synchronize b/w client & mds thread */
>+      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>+
>+      client_hdl = *p_client_hdl;
>+      if (client_hdl == NULL) goto done;
>+
>+      free(client_hdl);
>+      client_hdl = NULL;
>+
>+done:
>+      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>+}
>+
>+/**
>  * Initiate the agent when first used.
>  * Start NCS service
>  * Register with MDS
>@@ -413,7 +436,11 @@ void lga_msg_destroy(lgsv_msg_t *msg)
>
>   Return Values : LGA_CLIENT_HDL_REC * or NULL
>
>-  Notes         : None
>+  Notes         : The lga_cb in-parameter is most likely pointing to the
global
>+ *                lga_cb structure and that is not thread safe. If that is
the
>+ *                case the lga_cb data must be protected by a mutex before
>+ *                calling this function.
>+ *
>
>******************************************************************
>************/
> lga_client_hdl_rec_t *lga_find_hdl_rec_by_regid(lga_cb_t *lga_cb, uint32_t
>client_id)
> {


------------------------------------------------------------------------------
Mobile security can be enabling, not merely restricting. Employees who
bring their own devices (BYOD) to work are irked by the imposition of MDM
restrictions. Mobile Device Manager Plus allows you to control only the
apps on BYO-devices by containerizing them, leaving personal data untouched!
https://ad.doubleclick.net/ddm/clk/304595813;131938128;j
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to