This is an automated email from the ASF dual-hosted git repository.

xyuanlu pushed a commit to branch metaclient
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/metaclient by this push:
     new 0a7587d94 Add comment and description for ZkMetaClient reconnect 
handling (#2443)
0a7587d94 is described below

commit 0a7587d94499fc27b34100c54920a2c77902a2ea
Author: xyuanlu <[email protected]>
AuthorDate: Thu Apr 13 17:34:55 2023 -0700

    Add comment and description for ZkMetaClient reconnect handling (#2443)
    
    Add comment and description for ZkMetaClient reconnect handling
---
 .../helix/metaclient/impl/zk/ZkMetaClient.java     | 59 ++++++++++++++++++++--
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git 
a/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
 
b/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
index af31423d0..984766254 100644
--- 
a/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
+++ 
b/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
@@ -449,12 +449,63 @@ public class ZkMetaClient<T> implements 
MetaClientInterface<T>, AutoCloseable {
     }
   }
 
+  /**
+   * MetaClient uses Helix ZkClient (@see 
org.apache.helix.zookeeper.impl.client.ZkClient) to connect
+   * to ZK. Current implementation of ZkClient auto-reconnects infinitely. We 
use monitor thread
+   * in ZkMetaClient to monitor reconnect status and close ZkClient when the 
client still is in
+   * disconnected state when it reach reconnect timeout.
+   *
+   *
+   * case 1: Start the monitor thread when ZkMetaClient gets disconnected even 
to check connect state
+   *         when timeout reached. If not re-connected when timed out, kill 
the monitor thread
+   *         and close ZkClient.
+   * [MetaClient thread]        
---------------------------------------------------------------
+   *                              ( When disconnected, schedule a event
+   *                              to check connect state after timeout)
+   * [Reconnect monitor thread]          --------------------------------------
+   *                                   ^                                     | 
 not reconnected when timed out
+   *                                  /                                      |
+   *                                 | disconnected event                    v
+   * [ZkClient]               -------X---------------------------------------X 
zkClient.close()
+   * [ZkClient exp back              |         X            X
+   *  -off retry connection]         |--------|--------------|--------------
+   *
+   *
+   * case 2: Start the monitor thread when ZkMetaClient gets disconnected even 
to check connect state
+   *         when timeout reached. If re-connected before timed out, cancel 
the delayed monitor thread.
+   *
+   * [MetaClient thread]       
---------------------------------------------------------------
+   *                            (cancel scheduled task when reconnected)
+   * [Reconnect monitor]               ---------------------------------X
+   *                                  ^                                ^
+   *                                 /                                /
+   *                                | disconnected event             |  
reconnected event
+   * [ZkClient]                
-----X------------------------------------------------------
+   * [ZkClient exp back             |        X                      Y  
Reconnected before timed out
+   *  -off retry connection]        |--------| ---------------------|
+   *
+   *
+   * case 3: Start the monitor thread when ZkMetaClient gets disconnected even 
to check connect state
+   *         when timeout reached. If re-connected errored, kill the monitor 
thread  and cancel the
+   *         delayed monitor thread.
+   * [MetaClient thread]       
---------------------------------------------------------------
+   *                          (cancel scheduled task and close ZkClient when 
reconnected error)
+   * [Reconnect monitor]              ----------------------------------X
+   *                                 ^                               ^  |
+   *                                /                           err /   |
+   *                               | disconnected event            |    v 
close ZkClient
+   * [ZkClient]               -----X-------------------------------X ---X
+   * [ZkClient exp back            |        X                     ^ Reconnect 
error
+   *  -off retry connection]       |--------| --------------------|
+   *
+   */
+
   private class ReconnectStateChangeListener implements IZkStateListener {
     // Schedule a monitor to track ZkClient auto reconnect when Disconnected
     // Cancel the monitor thread when connected.
     @Override
     public void handleStateChanged(Watcher.Event.KeeperState state) throws 
Exception {
-      if (state == Watcher.Event.KeeperState.Disconnected) {
+      if (state == Watcher.Event.KeeperState.Disconnected) {                   
     // ------case 1
         // Expired. start a new event monitoring retry
         _zkClientConnectionMutex.lockInterruptibly();
         try {
@@ -472,7 +523,7 @@ public class ZkMetaClient<T> implements 
MetaClientInterface<T>, AutoCloseable {
           _zkClientConnectionMutex.unlock();
         }
       } else if (state == Watcher.Event.KeeperState.SyncConnected
-          || state == Watcher.Event.KeeperState.ConnectedReadOnly) {
+          || state == Watcher.Event.KeeperState.ConnectedReadOnly) {           
    // ------ case 2
         cleanUpAndClose(true, false);
         LOG.info("ZkClient is SyncConnected, reconnect monitor thread is 
canceled (if any)");
       }
@@ -480,14 +531,14 @@ public class ZkMetaClient<T> implements 
MetaClientInterface<T>, AutoCloseable {
 
     // Cancel the monitor thread when connected.
     @Override
-    public void handleNewSession(String sessionId) throws Exception {
+    public void handleNewSession(String sessionId) throws Exception {          
   // ------ case 2
       cleanUpAndClose(true, false);
       LOG.info("New session initiated in ZkClient, reconnect monitor thread is 
canceled (if any)");
     }
 
     // Cancel the monitor thread and close ZkClient when connect error.
     @Override
-    public void handleSessionEstablishmentError(Throwable error) throws 
Exception {
+    public void handleSessionEstablishmentError(Throwable error) throws 
Exception {    // -- case 3
       cleanUpAndClose(true, true);
       LOG.info("New session initiated in ZkClient, reconnect monitor thread is 
canceled (if any)");
     }

Reply via email to