Hi Lennart, My V2 could has been replaced by Canh patch. It is #2670 which already got ack from you long time ago.
With #2870, it only does reset the LogStream pointer to nullptr in case of deleting the client which it belongs to. Regards, Vu > -----Original Message----- > From: Lennart Lund <lennart.l...@ericsson.com> > Sent: Thursday, June 7, 2018 4:15 PM > To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Canh Van Truong > <canh.v.tru...@dektech.com.au> > Cc: opensaf-devel@lists.sourceforge.net; Lennart Lund > <lennart.l...@ericsson.com> > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream > [#2870] > > Hi Vu, > > Ack, with comments. See also attached diff with comments and some > changes. > > In general the code has become rather messy. Too much logic is distributed > and handled "inline" in code which purpose is to handle other things and > these latest fixes adds to that. This makes it very hard to review and maintain > the code. Very big risk for mistakes, many places affected by a fix etc. If this > continue the code will degenerate and become impossible to maintain with > reasonable cost. > > Thanks > Lennart > > > -----Original Message----- > > From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au> > > Sent: den 6 juni 2018 10:27 > > To: Canh Van Truong <canh.v.tru...@dektech.com.au>; Lennart Lund > > <lennart.l...@ericsson.com> > > Cc: opensaf-devel@lists.sourceforge.net > > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream > > [#2870] > > > > Hi Canh, > > > > I forgot that patch. Just reviewed it. Thanks. > > > > Anyway, I will push the previous version for this ticket. > > > > Regards, Vu > > > > > -----Original Message----- > > > From: Canh Van Truong <canh.v.tru...@dektech.com.au> > > > Sent: Tuesday, June 5, 2018 6:20 PM > > > To: 'Vu Minh Nguyen' <vu.m.ngu...@dektech.com.au>; > > > lennart.l...@ericsson.com > > > Cc: opensaf-devel@lists.sourceforge.net > > > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream > > > [#2870] > > > > > > Hi a.Vu, > > > > > > Looking at patch in ticket #2670, we can use ref_counter to decide to > > remove > > > client in case recovery fail. That way is simple. > > > > > > Regards > > > Canh > > > > > > -----Original Message----- > > > From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au> > > > Sent: Tuesday, June 5, 2018 5:54 PM > > > To: lennart.l...@ericsson.com; canh.v.tru...@dektech.com.au > > > Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen > > > <vu.m.ngu...@dektech.com.au> > > > Subject: [PATCH 1/1] log: fix restore ref counter for deleted stream > > [#2870] > > > > > > In the methods LogAgent::saLogStreamClose() and > > > LogAgent::saLogWriteLogAsync(), > > > the client is deleted if failed to recover; however, the pointer to the > > log > > > stream of this client has not been reset. Therefore, when the destrustor > > of > > > ScopeData runs, the reference counter could be restored on deleted log > > > stream. > > > > > > Besides, there are possibilities of race condition b/w deleting the > > > failed-to-recovery client and using that deleted client in other thread. > > > > > > This patch introduces a new attribute to LogClient. When the client fails > > > to recover, that client is not removed at that time but is done at next > > call > > > of log handle initialize or finalize if the attribute is true. > > > --- > > > src/log/agent/lga_agent.cc | 87 > > > ++++++++++++++++++++++++++++++++++++++------- > > > src/log/agent/lga_agent.h | 5 +++ > > > src/log/agent/lga_client.cc | 2 +- > > > src/log/agent/lga_client.h | 7 ++++ > > > 4 files changed, 87 insertions(+), 14 deletions(-) > > > > > > diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc > > > index f33b5dc..831edfa 100644 > > > --- a/src/log/agent/lga_agent.cc > > > +++ b/src/log/agent/lga_agent.cc > > > @@ -255,6 +255,21 @@ void LogAgent::RemoveAllLogClients() { > > > } > > > } > > > > > > +void LogAgent::RemoveAllDeadLogClients() { > > > + TRACE_ENTER(); > > > + ScopeLock scopeLock(mutex_); > > > + auto it = client_list_.begin(); > > > + while (it != client_list_.end()) { > > > + if ((*it)->is_died() == true) { > > > + client_list_.erase(it); > > > + delete *it; > > > + it = client_list_.begin(); > > > + continue; > > > + } > > > + it++; > > > + } > > > +} > > > + > > > // Add one client @client to the list @client_list_ > > > // This method should be called in @saLogInitialize() context > > > void LogAgent::AddLogClient(LogClient* client) { > > > @@ -314,6 +329,14 @@ SaAisErrorT > > > LogAgent::saLogInitialize(SaLogHandleT* > > > logHandle, > > > > > > TRACE_ENTER(); > > > > > > + if (true) { > > > + ScopeLock critical_section(get_delete_obj_sync_mutex_); > > > + if (has_dead_clients == true) { > > > + RemoveAllDeadLogClients(); > > > + has_dead_clients = false; > > > + } > > > + } > > > + > > > // Verify parameters (log handle and that version is given) > > > if ((logHandle == nullptr) || (version == nullptr)) { > > > TRACE("version or handle FAILED"); > > > @@ -453,6 +476,11 @@ SaAisErrorT > LogAgent::saLogSelectionObjectGet( > > > return ais_rc; > > > } > > > > > > + if (client->is_died() == true) { > > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > + return ais_rc; > > > + } > > > + > > > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > > > // @client is being deleted. Don't touch @this client > > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > @@ -504,6 +532,11 @@ SaAisErrorT > > > LogAgent::saLogDispatch(SaLogHandleT > > > logHandle, > > > return ais_rc; > > > } > > > > > > + if (client->is_died() == true) { > > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > + return ais_rc; > > > + } > > > + > > > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > > > // @client is being deleted. DO NOT touch this @client > > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > @@ -584,6 +617,11 @@ SaAisErrorT > LogAgent::saLogFinalize(SaLogHandleT > > > logHandle) { > > > if (true) { > > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > > > > > + if (has_dead_clients == true) { > > > + RemoveAllDeadLogClients(); > > > + has_dead_clients = false; > > > + } > > > + > > > // Get log client from the global list > > > client = SearchClientByHandle(logHandle); > > > if (client == nullptr) { > > > @@ -852,6 +890,11 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( > > > return ais_rc; > > > } > > > > > > + if (client->is_died() == true) { > > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > + return ais_rc; > > > + } > > > + > > > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > > > // @client is being deleted. DO NOT touch this @client > > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > @@ -892,10 +935,12 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( > > > } > > > > > > // This client is failed to do recover in Recovery thread. > > > - // Remove this client from the database. > > > if (client->is_recovery_failed() == true) { > > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > > - RemoveLogClient(&client); > > > + // Mark this client died, and it will be removed from client db > > > later. > > > + client->died(); > > > + client = nullptr; > > > + has_dead_clients = true; > > > ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > return ais_rc; > > > } > > > @@ -1160,6 +1205,11 @@ SaAisErrorT > > > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, > > > return ais_rc; > > > } > > > > > > + if (client->is_died() == true) { > > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > + return ais_rc; > > > + } > > > + > > > if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { > > > // @client is being deleted. DO NOT touch this @client > > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > @@ -1263,10 +1313,13 @@ SaAisErrorT > > > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, > > > } > > > > > > // This client is failed to do recover in Recovery thread. > > > - // Remove this client from the database. > > > if (client->is_recovery_failed() == true) { > > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > > - RemoveLogClient(&client); > > > + // Mark this client died, and it will be removed from client db > > > later. > > > + client->died(); > > > + client = nullptr; > > > + stream = nullptr; > > > + has_dead_clients = true; > > > ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > return ais_rc; > > > } > > > @@ -1321,13 +1374,6 @@ SaAisErrorT > > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > > > return ais_rc; > > > } > > > > > > - if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) > > { > > > - // @stream is being used somewhere (>0), DO NOT delete this > > @stream. > > > - // or @stream is being deleted on other thread (=-1) > > > - ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > - return ais_rc; > > > - } > > > - > > > // Retrieve @LogClient obj based on @client_handle > > > client = SearchClientByHandle(stream->GetMyClientHandle()); > > > if (client == nullptr) { > > > @@ -1336,6 +1382,18 @@ SaAisErrorT > > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > > > return ais_rc; > > > } > > > > > > + if (client->is_died() == true) { > > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > + return ais_rc; > > > + } > > > + > > > + if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) > > { > > > + // @stream is being used somewhere (>0), DO NOT delete this > > @stream. > > > + // or @stream is being deleted on other thread (=-1) > > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > + return ais_rc; > > > + } > > > + > > > if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { > > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > > return ais_rc; > > > @@ -1384,10 +1442,13 @@ SaAisErrorT > > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > > > } > > > > > > // This client is failed to do recover in Recovery thread. > > > - // Remove this client from the database. > > > if (client->is_recovery_failed() == true) { > > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > > - RemoveLogClient(&client); > > > + // Mark this client died, and it will be removed from client db > > > later. > > > + client->died(); > > > + client = nullptr; > > > + stream = nullptr; > > > + has_dead_clients = true; > > > ais_rc = SA_AIS_ERR_BAD_HANDLE; > > > return ais_rc; > > > } > > > diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h > > > index 9f1c370..817a357 100644 > > > --- a/src/log/agent/lga_agent.h > > > +++ b/src/log/agent/lga_agent.h > > > @@ -159,6 +159,8 @@ class LogAgent { > > > void EnterCriticalSection(); > > > void LeaveCriticalSection(); > > > > > > + void RemoveAllDeadLogClients(); > > > + > > > private: > > > // Not allow to create @LogAgent object, except the singleton object > > > @me_. > > > LogAgent(); > > > @@ -272,6 +274,9 @@ class LogAgent { > > > // Hold list of current log clients > > > std::vector<LogClient*> client_list_; > > > > > > + // true if there is any failed-to-recover client > > > + bool has_dead_clients = false; > > > + > > > // LGS LGA sync params > > > NCS_SEL_OBJ lgs_sync_sel_; > > > > > > diff --git a/src/log/agent/lga_client.cc b/src/log/agent/lga_client.cc > > > index 386c849..3c3f893 100644 > > > --- a/src/log/agent/lga_client.cc > > > +++ b/src/log/agent/lga_client.cc > > > @@ -32,7 +32,7 @@ > > > // LogClient > > > > > > > > //-------------------------------------------------------------------------- > > > ---- > > > LogClient::LogClient(const SaLogCallbacksT* cb, uint32_t id, SaVersionT > > > ver) > > > - : client_id_{id}, ref_counter_object_{} { > > > + : client_id_{id}, ref_counter_object_{}, died_{false} { > > > TRACE_ENTER(); > > > // Reset registered callback info > > > memset(&callbacks_, 0, sizeof(callbacks_)); > > > diff --git a/src/log/agent/lga_client.h b/src/log/agent/lga_client.h > > > index 05349be..08137b7 100644 > > > --- a/src/log/agent/lga_client.h > > > +++ b/src/log/agent/lga_client.h > > > @@ -173,6 +173,10 @@ class LogClient { > > > // while the recovery thread is going at RecoveryState::kRecovery2. > > > bool is_recovery_done() const; > > > > > > + // Invoke when the client is not recovered successfully. > > > + void died() { died_ = true; } > > > + bool is_died() const { return died_; } > > > + > > > private: > > > // Search for @LogStreamInfo object from @stream_list_ based on > > > @stream_id > > > LogStreamInfo* SearchLogStreamInfoById(uint32_t); > > > @@ -257,6 +261,9 @@ class LogClient { > > > // LOG handle (derived from hdl-mngr) > > > SaLogHandleT handle_; > > > > > > + // true if this client is not successfully recovered. > > > + bool died_; > > > + > > > DELETE_COPY_AND_MOVE_OPERATORS(LogClient); > > > }; > > > > > > -- > > > 1.9.1 > > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel