Hi Lennart, Please see my comment inline.
Regards, Vu Quoting Lennart Lund <lennart.l...@ericsson.com>: > Hi Vu > > Ack with comments > > See my comments inline [Lennart]: > > Thanks > Lennart > >> -----Original Message----- >> From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au] >> Sent: den 23 mars 2016 08:16 >> To: mathi.naic...@oracle.com; Lennart Lund >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: [PATCH 1 of 1] log: miss mutex protection for common resource in >> log agent [#1705] >> >> osaf/libs/agents/saf/lga/lga_state.c | 41 >> +++++++++++++++++++++++++++++++---- >> osaf/libs/agents/saf/lga/lga_state.h | 1 + >> osaf/libs/agents/saf/lga/lga_util.c | 30 +++++++++++-------------- >> 3 files changed, 50 insertions(+), 22 deletions(-) >> >> >> There was an race condition between client thread and recovery thread >> accessing to common resource `client_list`. >> >> Add mutex protection. >> >> diff --git a/osaf/libs/agents/saf/lga/lga_state.c >> b/osaf/libs/agents/saf/lga/lga_state.c >> --- a/osaf/libs/agents/saf/lga/lga_state.c >> +++ b/osaf/libs/agents/saf/lga/lga_state.c >> @@ -355,8 +355,11 @@ static void *recovery2_thread(void *dumm >> /* We have been signaled to exit >> */ >> goto done; >> } >> + >> /* Recover clients one at a time */ >> rc = recover_one_client(p_client); >> + if (p_client == NULL) goto done; >> + > [Lennart] Why is this needed? This check is done in "while (p_client > != NULL) { " and p_client cannot become NULL until "p_client = > p_client->next; " is done as the last line in the while loop. > [Vu] It is my intention. I am afraid if the context is switched to client thread right after the recovery thread enters the while loop. If it is the case, p_client will be NULL after done recover_one_client(). >> TRACE("\t Client %d is recovered", p_client- >> >lgs_client_id); >> if (rc == -1) { >> TRACE("%s recover_one_client >> Fail Deleting cllient (id %d)", >> @@ -593,6 +596,15 @@ int recover_one_client(lga_client_hdl_re >> >> TRACE_ENTER(); >> >> + if (p_client == NULL) { >> + /** >> + * Probably the client_list has been deleted by >> client thread >> + * in context calling saLogFinalize(). So, give up >> recovering. >> + */ >> + rc = -1; >> + goto done; >> + } >> + >> /* This function may be called both in the recovery thread and >> in the >> * client thread. A possible scenario is that the recovery 2 >> thread >> * times out and calls this function in recovery mode 2 while >> there >> @@ -658,13 +670,32 @@ uint32_t delete_one_client( >> lga_client_hdl_rec_t *rm_node >> ) >> { >> - TRACE_ENTER2(); >> - uint32_t ncs_rc; >> + return lga_hdl_rec_del(list_head, rm_node); >> +} >> >> +/** >> + * Free the memory allocated for one client handle with mutext protection. >> + * >> + * @param p_client_hdl >> + * @return void >> + */ >> +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl) >> +{ >> + lga_client_hdl_rec_t *client_hdl; >> + >> + /** >> + * To prevent race condition b/w client thread and recovery >> thread >> + * accessing to common resource `client_list`. [#1705] >> + */ >> osaf_mutex_lock_ordie(&lga_recov_mutex); >> - ncs_rc = lga_hdl_rec_del(list_head, rm_node); >> + >> + client_hdl = *p_client_hdl; >> + if (client_hdl == NULL) goto done; >> + >> + free(client_hdl); >> + client_hdl = NULL; >> + >> +done: >> osaf_mutex_unlock_ordie(&lga_recov_mutex); >> >> - TRACE_LEAVE(); >> - return ncs_rc; >> } >> diff --git a/osaf/libs/agents/saf/lga/lga_state.h >> b/osaf/libs/agents/saf/lga/lga_state.h >> --- a/osaf/libs/agents/saf/lga/lga_state.h >> +++ b/osaf/libs/agents/saf/lga/lga_state.h >> @@ -32,6 +32,7 @@ uint32_t delete_one_client( >> lga_client_hdl_rec_t **list_head, >> lga_client_hdl_rec_t *rm_node >> ); >> +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl); >> >> #ifdef __cplusplus >> } >> diff --git a/osaf/libs/agents/saf/lga/lga_util.c >> b/osaf/libs/agents/saf/lga/lga_util.c >> --- a/osaf/libs/agents/saf/lga/lga_util.c >> +++ b/osaf/libs/agents/saf/lga/lga_util.c >> @@ -19,6 +19,7 @@ >> #include <syslog.h> >> #include "lga.h" >> #include "osaf_poll.h" >> +#include "lga_state.h" >> >> /* Variables used during startup/shutdown only */ >> static pthread_mutex_t lga_lock = PTHREAD_MUTEX_INITIALIZER; >> @@ -449,11 +450,9 @@ void lga_hdl_list_del(lga_client_hdl_rec >> /** clean up the channel records for this lga-client >> **/ >> lga_log_stream_hdl_rec_list_del(&client_hdl- >> >stream_list); >> - /** remove the association with hdl-mngr >> - **/ >> - free(client_hdl); >> - client_hdl = 0; >> + free_client_hdl(&client_hdl); >> } >> + >> TRACE_LEAVE(); >> } >> >> @@ -536,38 +535,35 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_ >> if (list_iter == rm_node) { >> *list_head = rm_node->next; >> >> - /** detach & release the IPC >> - **/ >> + /* detach & release the IPC */ >> m_NCS_IPC_DETACH(&rm_node->mbx, >> lga_clear_mbx, NULL); >> m_NCS_IPC_RELEASE(&rm_node->mbx, NULL); >> >> ncshm_give_hdl(rm_node->local_hdl); >> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, >> rm_node->local_hdl); >> - /** Free the channel records off this hdl >> - **/ >> + >> + /* Free the channel records off this hdl */ >> lga_log_stream_hdl_rec_list_del(&rm_node- >> >stream_list); >> + free_client_hdl(&rm_node); >> >> - /** free the hdl rec >> - **/ >> - free(rm_node); >> rc = NCSCC_RC_SUCCESS; >> goto out; >> - } else { /* find the rec */ >> + } else { >> + /* find the rec */ >> while (NULL != list_iter) { >> if (list_iter->next == rm_node) { >> list_iter->next = >> rm_node->next; >> >> - /** detach & release the IPC */ >> + /* detach & release >> the IPC */ >> >> m_NCS_IPC_DETACH(&rm_node->mbx, lga_clear_mbx, NULL); >> >> m_NCS_IPC_RELEASE(&rm_node->mbx, NULL); >> >> >> ncshm_give_hdl(rm_node->local_hdl); >> >> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, rm_node- >> >local_hdl); >> - /** Free the channel records off this lga_hdl */ >> + >> + /* Free the channel >> records off this lga_hdl */ >> >> lga_log_stream_hdl_rec_list_del(&rm_node->stream_list); >> - >> - /** free the hdl rec */ >> - free(rm_node); >> + >> free_client_hdl(&rm_node); >> >> rc = >> NCSCC_RC_SUCCESS; >> goto out; ------------------------------------------------------------------------------ Transform Data into Opportunity. Accelerate data analysis in your applications with Intel Data Analytics Acceleration Library. Click to learn more. http://pubads.g.doubleclick.net/gampad/clk?id=278785471&iu=/4140 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel