Hi Lennart,

My V2 could has been replaced by Canh patch. It is #2670 which already got
ack from you long time ago.

With #2870, it only does reset the LogStream pointer to nullptr in case of
deleting the client which it belongs to.

Regards, Vu

> -----Original Message-----
> From: Lennart Lund <lennart.l...@ericsson.com>
> Sent: Thursday, June 7, 2018 4:15 PM
> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Canh Van Truong
> <canh.v.tru...@dektech.com.au>
> Cc: opensaf-devel@lists.sourceforge.net; Lennart Lund
> <lennart.l...@ericsson.com>
> Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream
> [#2870]
> 
> Hi Vu,
> 
> Ack, with comments. See also attached diff with comments and some
> changes.
> 
> In general the code has become rather messy. Too much logic is distributed
> and handled "inline" in code which purpose is to handle other things and
> these latest fixes adds to that. This makes it very hard to review and
maintain
> the code. Very big risk for mistakes, many places affected by a fix etc.
If this
> continue the code will degenerate and become impossible to maintain with
> reasonable cost.
> 
> Thanks
> Lennart
> 
> > -----Original Message-----
> > From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>
> > Sent: den 6 juni 2018 10:27
> > To: Canh Van Truong <canh.v.tru...@dektech.com.au>; Lennart Lund
> > <lennart.l...@ericsson.com>
> > Cc: opensaf-devel@lists.sourceforge.net
> > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream
> > [#2870]
> >
> > Hi Canh,
> >
> > I forgot that patch. Just reviewed it. Thanks.
> >
> > Anyway, I will push the previous version for this ticket.
> >
> > Regards, Vu
> >
> > > -----Original Message-----
> > > From: Canh Van Truong <canh.v.tru...@dektech.com.au>
> > > Sent: Tuesday, June 5, 2018 6:20 PM
> > > To: 'Vu Minh Nguyen' <vu.m.ngu...@dektech.com.au>;
> > > lennart.l...@ericsson.com
> > > Cc: opensaf-devel@lists.sourceforge.net
> > > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted
stream
> > > [#2870]
> > >
> > > Hi a.Vu,
> > >
> > > Looking at patch in ticket #2670, we can use ref_counter to decide to
> > remove
> > > client in case recovery fail. That way is simple.
> > >
> > > Regards
> > > Canh
> > >
> > > -----Original Message-----
> > > From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>
> > > Sent: Tuesday, June 5, 2018 5:54 PM
> > > To: lennart.l...@ericsson.com; canh.v.tru...@dektech.com.au
> > > Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen
> > > <vu.m.ngu...@dektech.com.au>
> > > Subject: [PATCH 1/1] log: fix restore ref counter for deleted stream
> > [#2870]
> > >
> > > In the methods LogAgent::saLogStreamClose() and
> > > LogAgent::saLogWriteLogAsync(),
> > > the client is deleted if failed to recover; however, the pointer to
the
> > log
> > > stream of this client has not been reset. Therefore, when the
destrustor
> > of
> > > ScopeData runs, the reference counter could be restored on deleted log
> > > stream.
> > >
> > > Besides, there are possibilities of race condition b/w deleting the
> > > failed-to-recovery client and using that deleted client in other
thread.
> > >
> > > This patch introduces a new attribute to LogClient. When the client
fails
> > > to recover, that client is not removed at that time but is done at
next
> > call
> > > of log handle initialize or finalize if the attribute is true.
> > > ---
> > >  src/log/agent/lga_agent.cc  | 87
> > > ++++++++++++++++++++++++++++++++++++++-------
> > >  src/log/agent/lga_agent.h   |  5 +++
> > >  src/log/agent/lga_client.cc |  2 +-
> > >  src/log/agent/lga_client.h  |  7 ++++
> > >  4 files changed, 87 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc
> > > index f33b5dc..831edfa 100644
> > > --- a/src/log/agent/lga_agent.cc
> > > +++ b/src/log/agent/lga_agent.cc
> > > @@ -255,6 +255,21 @@ void LogAgent::RemoveAllLogClients() {
> > >    }
> > >  }
> > >
> > > +void LogAgent::RemoveAllDeadLogClients() {
> > > +  TRACE_ENTER();
> > > +  ScopeLock scopeLock(mutex_);
> > > +  auto it = client_list_.begin();
> > > +  while (it != client_list_.end()) {
> > > +    if ((*it)->is_died() == true) {
> > > +      client_list_.erase(it);
> > > +      delete *it;
> > > +      it = client_list_.begin();
> > > +      continue;
> > > +    }
> > > +    it++;
> > > +  }
> > > +}
> > > +
> > >  // Add one client @client to the list @client_list_
> > >  // This method should be called in @saLogInitialize() context
> > >  void LogAgent::AddLogClient(LogClient* client) {
> > > @@ -314,6 +329,14 @@ SaAisErrorT
> > > LogAgent::saLogInitialize(SaLogHandleT*
> > > logHandle,
> > >
> > >    TRACE_ENTER();
> > >
> > > +  if (true) {
> > > +    ScopeLock critical_section(get_delete_obj_sync_mutex_);
> > > +    if (has_dead_clients == true) {
> > > +      RemoveAllDeadLogClients();
> > > +      has_dead_clients = false;
> > > +    }
> > > +  }
> > > +
> > >    // Verify parameters (log handle and that version is given)
> > >    if ((logHandle == nullptr) || (version == nullptr)) {
> > >      TRACE("version or handle FAILED");
> > > @@ -453,6 +476,11 @@ SaAisErrorT
> LogAgent::saLogSelectionObjectGet(
> > >        return ais_rc;
> > >      }
> > >
> > > +    if (client->is_died() == true) {
> > > +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > > +      return ais_rc;
> > > +    }
> > > +
> > >      if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1)
{
> > >        // @client is being deleted. Don't touch @this client
> > >        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> > > @@ -504,6 +532,11 @@ SaAisErrorT
> > > LogAgent::saLogDispatch(SaLogHandleT
> > > logHandle,
> > >        return ais_rc;
> > >      }
> > >
> > > +    if (client->is_died() == true) {
> > > +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > > +      return ais_rc;
> > > +    }
> > > +
> > >      if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1)
{
> > >        // @client is being deleted. DO NOT touch this @client
> > >        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> > > @@ -584,6 +617,11 @@ SaAisErrorT
> LogAgent::saLogFinalize(SaLogHandleT
> > > logHandle) {
> > >    if (true) {
> > >      ScopeLock critical_section(get_delete_obj_sync_mutex_);
> > >
> > > +    if (has_dead_clients == true) {
> > > +      RemoveAllDeadLogClients();
> > > +      has_dead_clients = false;
> > > +    }
> > > +
> > >      // Get log client from the global list
> > >      client = SearchClientByHandle(logHandle);
> > >      if (client == nullptr) {
> > > @@ -852,6 +890,11 @@ SaAisErrorT LogAgent::saLogStreamOpen_2(
> > >        return ais_rc;
> > >      }
> > >
> > > +    if (client->is_died() == true) {
> > > +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > > +      return ais_rc;
> > > +    }
> > > +
> > >      if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1)
{
> > >        // @client is being deleted. DO NOT touch this @client
> > >        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> > > @@ -892,10 +935,12 @@ SaAisErrorT LogAgent::saLogStreamOpen_2(
> > >      }
> > >
> > >      // This client is failed to do recover in Recovery thread.
> > > -    // Remove this client from the database.
> > >      if (client->is_recovery_failed() == true) {
> > >        ScopeLock critical_section(get_delete_obj_sync_mutex_);
> > > -      RemoveLogClient(&client);
> > > +      // Mark this client died, and it will be removed from client db
> > > later.
> > > +      client->died();
> > > +      client = nullptr;
> > > +      has_dead_clients = true;
> > >        ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > >        return ais_rc;
> > >      }
> > > @@ -1160,6 +1205,11 @@ SaAisErrorT
> > > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle,
> > >        return ais_rc;
> > >      }
> > >
> > > +    if (client->is_died() == true) {
> > > +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > > +      return ais_rc;
> > > +    }
> > > +
> > >      if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) ==
-1) {
> > >        // @client is being deleted. DO NOT touch this @client
> > >        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> > > @@ -1263,10 +1313,13 @@ SaAisErrorT
> > > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle,
> > >      }
> > >
> > >      // This client is failed to do recover in Recovery thread.
> > > -    // Remove this client from the database.
> > >      if (client->is_recovery_failed() == true) {
> > >        ScopeLock critical_section(get_delete_obj_sync_mutex_);
> > > -      RemoveLogClient(&client);
> > > +      // Mark this client died, and it will be removed from client db
> > > later.
> > > +      client->died();
> > > +      client = nullptr;
> > > +      stream = nullptr;
> > > +      has_dead_clients = true;
> > >        ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > >        return ais_rc;
> > >      }
> > > @@ -1321,13 +1374,6 @@ SaAisErrorT
> > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) {
> > >        return ais_rc;
> > >      }
> > >
> > > -    if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0)
> > {
> > > -      // @stream is being used somewhere (>0), DO NOT delete this
> > @stream.
> > > -      // or @stream is being deleted on other thread (=-1)
> > > -      ais_rc = SA_AIS_ERR_TRY_AGAIN;
> > > -      return ais_rc;
> > > -    }
> > > -
> > >      // Retrieve @LogClient obj based on @client_handle
> > >      client = SearchClientByHandle(stream->GetMyClientHandle());
> > >      if (client == nullptr) {
> > > @@ -1336,6 +1382,18 @@ SaAisErrorT
> > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) {
> > >        return ais_rc;
> > >      }
> > >
> > > +    if (client->is_died() == true) {
> > > +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > > +      return ais_rc;
> > > +    }
> > > +
> > > +    if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0)
> > {
> > > +      // @stream is being used somewhere (>0), DO NOT delete this
> > @stream.
> > > +      // or @stream is being deleted on other thread (=-1)
> > > +      ais_rc = SA_AIS_ERR_TRY_AGAIN;
> > > +      return ais_rc;
> > > +    }
> > > +
> > >      if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) ==
-1) {
> > >        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> > >        return ais_rc;
> > > @@ -1384,10 +1442,13 @@ SaAisErrorT
> > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) {
> > >      }
> > >
> > >      // This client is failed to do recover in Recovery thread.
> > > -    // Remove this client from the database.
> > >      if (client->is_recovery_failed() == true) {
> > >        ScopeLock critical_section(get_delete_obj_sync_mutex_);
> > > -      RemoveLogClient(&client);
> > > +      // Mark this client died, and it will be removed from client db
> > > later.
> > > +      client->died();
> > > +      client = nullptr;
> > > +      stream = nullptr;
> > > +      has_dead_clients = true;
> > >        ais_rc = SA_AIS_ERR_BAD_HANDLE;
> > >        return ais_rc;
> > >      }
> > > diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h
> > > index 9f1c370..817a357 100644
> > > --- a/src/log/agent/lga_agent.h
> > > +++ b/src/log/agent/lga_agent.h
> > > @@ -159,6 +159,8 @@ class LogAgent {
> > >    void EnterCriticalSection();
> > >    void LeaveCriticalSection();
> > >
> > > +  void RemoveAllDeadLogClients();
> > > +
> > >   private:
> > >    // Not allow to create @LogAgent object, except the singleton
object
> > > @me_.
> > >    LogAgent();
> > > @@ -272,6 +274,9 @@ class LogAgent {
> > >    // Hold list of current log clients
> > >    std::vector<LogClient*> client_list_;
> > >
> > > +  // true if there is any failed-to-recover client
> > > +  bool has_dead_clients = false;
> > > +
> > >    // LGS LGA sync params
> > >    NCS_SEL_OBJ lgs_sync_sel_;
> > >
> > > diff --git a/src/log/agent/lga_client.cc b/src/log/agent/lga_client.cc
> > > index 386c849..3c3f893 100644
> > > --- a/src/log/agent/lga_client.cc
> > > +++ b/src/log/agent/lga_client.cc
> > > @@ -32,7 +32,7 @@
> > >  // LogClient
> > >
> > >
> >
//--------------------------------------------------------------------------
> > > ----
> > >  LogClient::LogClient(const SaLogCallbacksT* cb, uint32_t id,
SaVersionT
> > > ver)
> > > -    : client_id_{id}, ref_counter_object_{} {
> > > +    : client_id_{id}, ref_counter_object_{}, died_{false} {
> > >    TRACE_ENTER();
> > >    // Reset registered callback info
> > >    memset(&callbacks_, 0, sizeof(callbacks_));
> > > diff --git a/src/log/agent/lga_client.h b/src/log/agent/lga_client.h
> > > index 05349be..08137b7 100644
> > > --- a/src/log/agent/lga_client.h
> > > +++ b/src/log/agent/lga_client.h
> > > @@ -173,6 +173,10 @@ class LogClient {
> > >    // while the recovery thread is going at RecoveryState::kRecovery2.
> > >    bool is_recovery_done() const;
> > >
> > > +  // Invoke when the client is not recovered successfully.
> > > +  void died() { died_ = true; }
> > > +  bool is_died() const { return died_; }
> > > +
> > >   private:
> > >    // Search for @LogStreamInfo object from @stream_list_ based on
> > > @stream_id
> > >    LogStreamInfo* SearchLogStreamInfoById(uint32_t);
> > > @@ -257,6 +261,9 @@ class LogClient {
> > >    // LOG handle (derived from hdl-mngr)
> > >    SaLogHandleT handle_;
> > >
> > > +  // true if this client is not successfully recovered.
> > > +  bool died_;
> > > +
> > >    DELETE_COPY_AND_MOVE_OPERATORS(LogClient);
> > >  };
> > >
> > > --
> > > 1.9.1
> >



------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to