dasahcc commented on a change in pull request #1494:
URL: https://github.com/apache/helix/pull/1494#discussion_r513786034
##########
File path: helix-core/src/main/java/org/apache/helix/HelixAdmin.java
##########
@@ -228,6 +228,13 @@ void addResource(String clusterName, String resourceName,
int numPartitions, Str
*/
void dropInstance(String clusterName, InstanceConfig instanceConfig);
+ /**
+ * Purge offline instances from a cluster
+ * @param clusterName
+ * @param customizedPurgeMap
Review comment:
Can we specify the map meaning here? It could be confusing if we dont
clarify it in Java doc.
##########
File path:
helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
##########
@@ -2061,4 +2070,45 @@ public ZKHelixAdmin build() {
_zkAddress), false);
}
}
+
+ private Map<String, InstanceConfig> findTimeoutOfflineInstances(String
clusterName,
+ Map<String, String> customizedPurgeMap) {
+ String timeout = customizedPurgeMap.get(OFFLINE_NODE_PURGE_TIMEOUT);
Review comment:
I am also confused here. What could other entries other than this
timeout?
##########
File path:
helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
##########
@@ -105,6 +106,7 @@
public static final String CONNECTION_TIMEOUT = "helixAdmin.timeOutInSec";
private static final String MAINTENANCE_ZNODE_ID = "maintenance";
private static final int DEFAULT_SUPERCLUSTER_REPLICA = 3;
+ public static final String OFFLINE_NODE_PURGE_TIMEOUT = "purge.timeout";
Review comment:
Better to have public constants class to hold this kind of key words.
##########
File path:
helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
##########
@@ -2061,4 +2070,45 @@ public ZKHelixAdmin build() {
_zkAddress), false);
}
}
+
+ private Map<String, InstanceConfig> findTimeoutOfflineInstances(String
clusterName,
+ Map<String, String> customizedPurgeMap) {
+ String timeout = customizedPurgeMap.get(OFFLINE_NODE_PURGE_TIMEOUT);
+ Long timeoutValue;
+ // in case there is no customized timeout value, use the one defined in
cluster config
+ if (timeout == null) {
+ timeoutValue =
_configAccessor.getClusterConfig(clusterName).getOfflineNodeTimeOutForPurge();
+ } else {
+ timeoutValue = Long.valueOf(timeout);
+ }
+
+ Map<String, InstanceConfig> instanceConfigMap = new HashMap<>();
+ String path = PropertyPathBuilder.instanceConfig(clusterName);
+ BaseDataAccessor<ZNRecord> baseAccessor = new
ZkBaseDataAccessor<>(_zkClient);
+ List<ZNRecord> znRecords = baseAccessor.getChildren(path, null, 0, 0, 0);
+ for (ZNRecord record : znRecords) {
+ if (record != null) {
+ InstanceConfig instanceConfig = new InstanceConfig(record);
+ instanceConfigMap.put(instanceConfig.getInstanceName(),
instanceConfig);
+ }
+ }
+
+ path = PropertyPathBuilder.liveInstance(clusterName);
+ List<String> liveNodes = baseAccessor.getChildNames(path, 0);
+ liveNodes.forEach(liveNode -> instanceConfigMap.remove(liveNode));
Review comment:
We can create a set and use instanceConfigMap.keySet().removeAll(set).
##########
File path:
helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
##########
@@ -2061,4 +2070,45 @@ public ZKHelixAdmin build() {
_zkAddress), false);
}
}
+
+ private Map<String, InstanceConfig> findTimeoutOfflineInstances(String
clusterName,
+ Map<String, String> customizedPurgeMap) {
+ String timeout = customizedPurgeMap.get(OFFLINE_NODE_PURGE_TIMEOUT);
+ Long timeoutValue;
+ // in case there is no customized timeout value, use the one defined in
cluster config
+ if (timeout == null) {
+ timeoutValue =
_configAccessor.getClusterConfig(clusterName).getOfflineNodeTimeOutForPurge();
+ } else {
+ timeoutValue = Long.valueOf(timeout);
Review comment:
Shall we do a try catch? If the timeout is invalid string by human
error, shall we stop it or use default value?
##########
File path:
helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
##########
@@ -2061,4 +2070,45 @@ public ZKHelixAdmin build() {
_zkAddress), false);
}
}
+
+ private Map<String, InstanceConfig> findTimeoutOfflineInstances(String
clusterName,
+ Map<String, String> customizedPurgeMap) {
+ String timeout = customizedPurgeMap.get(OFFLINE_NODE_PURGE_TIMEOUT);
+ Long timeoutValue;
+ // in case there is no customized timeout value, use the one defined in
cluster config
+ if (timeout == null) {
+ timeoutValue =
_configAccessor.getClusterConfig(clusterName).getOfflineNodeTimeOutForPurge();
+ } else {
+ timeoutValue = Long.valueOf(timeout);
+ }
+
+ Map<String, InstanceConfig> instanceConfigMap = new HashMap<>();
+ String path = PropertyPathBuilder.instanceConfig(clusterName);
+ BaseDataAccessor<ZNRecord> baseAccessor = new
ZkBaseDataAccessor<>(_zkClient);
+ List<ZNRecord> znRecords = baseAccessor.getChildren(path, null, 0, 0, 0);
+ for (ZNRecord record : znRecords) {
+ if (record != null) {
+ InstanceConfig instanceConfig = new InstanceConfig(record);
+ instanceConfigMap.put(instanceConfig.getInstanceName(),
instanceConfig);
+ }
+ }
+
+ path = PropertyPathBuilder.liveInstance(clusterName);
+ List<String> liveNodes = baseAccessor.getChildNames(path, 0);
+ liveNodes.forEach(liveNode -> instanceConfigMap.remove(liveNode));
+
+ Set<String> toRemoveInstances = new HashSet<>();
+ for (String instanceName : instanceConfigMap.keySet()) {
+ String historyPath = PropertyPathBuilder.instanceHistory(clusterName,
instanceName);
+ ZNRecord znRecord = baseAccessor.get(historyPath, null, 0);
+ ParticipantHistory participantHistory = new ParticipantHistory(znRecord);
+ long lastOfflineTime = participantHistory.getLastOfflineTime();
+ if (lastOfflineTime == -1 || timeoutValue < 0
+ || System.currentTimeMillis() - lastOfflineTime < timeoutValue) {
+ toRemoveInstances.add(instanceName);
+ }
+ }
+ toRemoveInstances.forEach(instance -> instanceConfigMap.remove(instance));
Review comment:
Same here, we can use
instanceConfigMap.keySet().removeAll(toRemoveInstances); It has better
performance.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]