This is an automated email from the ASF dual-hosted git repository.
xyuanlu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git
The following commit(s) were added to refs/heads/master by this push:
new 38a4f477a Open Add NPR guard when reading instance config - Race when
reading config while adding/removing instance (#2669)
38a4f477a is described below
commit 38a4f477a3efb109e18cd0713b947cbe4139d7a1
Author: xyuanlu <[email protected]>
AuthorDate: Wed Oct 18 13:06:01 2023 -0700
Open Add NPR guard when reading instance config - Race when reading config
while adding/removing instance (#2669)
When adding/removing instance from a cluster, it is possible that at a
point of time, instance config is gone but the INSTANCE ZNode is still there.
This would cause config map in helix controller cache to have instance mapped
to null config.
The failure is not a blocking error causing further pipeline failure but we
would like to avoid noise in log.
This change add null check before access instance config.
---
.../controller/rebalancer/util/DelayedRebalanceUtil.java | 7 ++++---
.../java/org/apache/helix/manager/zk/ZKHelixAdmin.java | 16 +++++++++++-----
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java
index 84dd0f4ec..58bad164a 100644
---
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java
+++
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java
@@ -132,9 +132,10 @@ public class DelayedRebalanceUtil {
public static Set<String> filterOutEvacuatingInstances(Map<String,
InstanceConfig> instanceConfigMap,
Set<String> nodes) {
- return nodes.stream()
- .filter(instance ->
!instanceConfigMap.get(instance).getInstanceOperation().equals(
- InstanceConstants.InstanceOperation.EVACUATE.name()))
+ return nodes.stream()
+ .filter(instance -> (instanceConfigMap.get(instance) != null &&
!instanceConfigMap.get(instance)
+ .getInstanceOperation()
+ .equals(InstanceConstants.InstanceOperation.EVACUATE.name())))
.collect(Collectors.toSet());
}
diff --git
a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index 44afee5e1..ebbcf64d0 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -410,15 +410,21 @@ public class ZKHelixAdmin implements HelixAdmin {
@Override
public boolean isEvacuateFinished(String clusterName, String instanceName) {
- return !instanceHasCurrentSateOrMessage(clusterName, instanceName) &&
(getInstanceConfig(clusterName,
-
instanceName).getInstanceOperation().equals(InstanceConstants.InstanceOperation.EVACUATE.name()));
+ if (!instanceHasCurrentSateOrMessage(clusterName, instanceName)) {
+ InstanceConfig config = getInstanceConfig(clusterName, instanceName);
+ return config != null &&
config.getInstanceOperation().equals(InstanceConstants.InstanceOperation.EVACUATE.name());
+ }
+ return false;
}
@Override
public boolean isReadyForPreparingJoiningCluster(String clusterName, String
instanceName) {
- return !instanceHasCurrentSateOrMessage(clusterName, instanceName)
- &&
DelayedAutoRebalancer.INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT.contains(
- getInstanceConfig(clusterName, instanceName).getInstanceOperation());
+ if (!instanceHasCurrentSateOrMessage(clusterName, instanceName)) {
+ InstanceConfig config = getInstanceConfig(clusterName, instanceName);
+ return config != null &&
DelayedAutoRebalancer.INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT.contains(
+ config.getInstanceOperation());
+ }
+ return false;
}
/**