This is an automated email from the ASF dual-hosted git repository.
xyuanlu pushed a commit to branch metaclient
in repository https://gitbox.apache.org/repos/asf/helix.git
The following commit(s) were added to refs/heads/metaclient by this push:
new 0a7587d94 Add comment and description for ZkMetaClient reconnect
handling (#2443)
0a7587d94 is described below
commit 0a7587d94499fc27b34100c54920a2c77902a2ea
Author: xyuanlu <[email protected]>
AuthorDate: Thu Apr 13 17:34:55 2023 -0700
Add comment and description for ZkMetaClient reconnect handling (#2443)
Add comment and description for ZkMetaClient reconnect handling
---
.../helix/metaclient/impl/zk/ZkMetaClient.java | 59 ++++++++++++++++++++--
1 file changed, 55 insertions(+), 4 deletions(-)
diff --git
a/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
b/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
index af31423d0..984766254 100644
---
a/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
+++
b/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
@@ -449,12 +449,63 @@ public class ZkMetaClient<T> implements
MetaClientInterface<T>, AutoCloseable {
}
}
+ /**
+ * MetaClient uses Helix ZkClient (@see
org.apache.helix.zookeeper.impl.client.ZkClient) to connect
+ * to ZK. Current implementation of ZkClient auto-reconnects infinitely. We
use monitor thread
+ * in ZkMetaClient to monitor reconnect status and close ZkClient when the
client still is in
+ * disconnected state when it reach reconnect timeout.
+ *
+ *
+ * case 1: Start the monitor thread when ZkMetaClient gets disconnected even
to check connect state
+ * when timeout reached. If not re-connected when timed out, kill
the monitor thread
+ * and close ZkClient.
+ * [MetaClient thread]
---------------------------------------------------------------
+ * ( When disconnected, schedule a event
+ * to check connect state after timeout)
+ * [Reconnect monitor thread] --------------------------------------
+ * ^ |
not reconnected when timed out
+ * / |
+ * | disconnected event v
+ * [ZkClient] -------X---------------------------------------X
zkClient.close()
+ * [ZkClient exp back | X X
+ * -off retry connection] |--------|--------------|--------------
+ *
+ *
+ * case 2: Start the monitor thread when ZkMetaClient gets disconnected even
to check connect state
+ * when timeout reached. If re-connected before timed out, cancel
the delayed monitor thread.
+ *
+ * [MetaClient thread]
---------------------------------------------------------------
+ * (cancel scheduled task when reconnected)
+ * [Reconnect monitor] ---------------------------------X
+ * ^ ^
+ * / /
+ * | disconnected event |
reconnected event
+ * [ZkClient]
-----X------------------------------------------------------
+ * [ZkClient exp back | X Y
Reconnected before timed out
+ * -off retry connection] |--------| ---------------------|
+ *
+ *
+ * case 3: Start the monitor thread when ZkMetaClient gets disconnected even
to check connect state
+ * when timeout reached. If re-connected errored, kill the monitor
thread and cancel the
+ * delayed monitor thread.
+ * [MetaClient thread]
---------------------------------------------------------------
+ * (cancel scheduled task and close ZkClient when
reconnected error)
+ * [Reconnect monitor] ----------------------------------X
+ * ^ ^ |
+ * / err / |
+ * | disconnected event | v
close ZkClient
+ * [ZkClient] -----X-------------------------------X ---X
+ * [ZkClient exp back | X ^ Reconnect
error
+ * -off retry connection] |--------| --------------------|
+ *
+ */
+
private class ReconnectStateChangeListener implements IZkStateListener {
// Schedule a monitor to track ZkClient auto reconnect when Disconnected
// Cancel the monitor thread when connected.
@Override
public void handleStateChanged(Watcher.Event.KeeperState state) throws
Exception {
- if (state == Watcher.Event.KeeperState.Disconnected) {
+ if (state == Watcher.Event.KeeperState.Disconnected) {
// ------case 1
// Expired. start a new event monitoring retry
_zkClientConnectionMutex.lockInterruptibly();
try {
@@ -472,7 +523,7 @@ public class ZkMetaClient<T> implements
MetaClientInterface<T>, AutoCloseable {
_zkClientConnectionMutex.unlock();
}
} else if (state == Watcher.Event.KeeperState.SyncConnected
- || state == Watcher.Event.KeeperState.ConnectedReadOnly) {
+ || state == Watcher.Event.KeeperState.ConnectedReadOnly) {
// ------ case 2
cleanUpAndClose(true, false);
LOG.info("ZkClient is SyncConnected, reconnect monitor thread is
canceled (if any)");
}
@@ -480,14 +531,14 @@ public class ZkMetaClient<T> implements
MetaClientInterface<T>, AutoCloseable {
// Cancel the monitor thread when connected.
@Override
- public void handleNewSession(String sessionId) throws Exception {
+ public void handleNewSession(String sessionId) throws Exception {
// ------ case 2
cleanUpAndClose(true, false);
LOG.info("New session initiated in ZkClient, reconnect monitor thread is
canceled (if any)");
}
// Cancel the monitor thread and close ZkClient when connect error.
@Override
- public void handleSessionEstablishmentError(Throwable error) throws
Exception {
+ public void handleSessionEstablishmentError(Throwable error) throws
Exception { // -- case 3
cleanUpAndClose(true, true);
LOG.info("New session initiated in ZkClient, reconnect monitor thread is
canceled (if any)");
}