This is an automated email from the ASF dual-hosted git repository.
jxue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git
The following commit(s) were added to refs/heads/master by this push:
new abcb37d add new error message for customized partition check host
connection error (#1984)
abcb37d is described below
commit abcb37d822499094e3e0dab0f56393a1d0878b60
Author: xyuanlu <[email protected]>
AuthorDate: Thu Mar 31 16:19:00 2022 -0700
add new error message for customized partition check host connection error
(#1984)
Add new error message for customized partition check host connection error
---
.../java/org/apache/helix/util/InstanceValidationUtil.java | 14 ++++++++++----
.../TestMaintenanceManagementService.java | 4 ++--
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git
a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
index 5939e19..d35fdb4 100644
--- a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
+++ b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
@@ -55,6 +55,7 @@ public class InstanceValidationUtil {
ImmutableSet.of(HelixDefinedState.DROPPED.name(),
HelixDefinedState.ERROR.name());
static final String UNHEALTHY_PARTITION = "UNHEALTHY_PARTITION";
+ static final String HOST_NO_STATE_ERROR = "HOST_NO_STATE_ERROR:";
// The message that will be shown if partition is in initial state of the
state model and
// partition health check has been skipped for that instance
static final String PARTITION_INITIAL_STATE_FAIL =
"PARTITION_INITIAL_STATE_FAIL";
@@ -257,11 +258,16 @@ public class InstanceValidationUtil {
continue;
}
- // We are checking sibling partition healthy status. So if
partition health does not
- // exist or it is not healthy. We should mark this partition is
unhealthy.
+ // If we failed to get partition assignment for one sibling
instance, we add the
+ // instance name in return error for debuggability.
if (!globalPartitionHealthStatus.containsKey(siblingInstance)
- ||
!globalPartitionHealthStatus.get(siblingInstance).containsKey(partition)
- ||
!globalPartitionHealthStatus.get(siblingInstance).get(partition)) {
+ || globalPartitionHealthStatus.get(siblingInstance).isEmpty())
{
+ unhealthyPartitions.computeIfAbsent(partition, list -> new
ArrayList<>())
+ .add(HOST_NO_STATE_ERROR + siblingInstance);
+ } else if (globalPartitionHealthStatus.get(siblingInstance)
+ .getOrDefault(partition, false)) {
+ // We are checking sibling partition healthy status. So if
partition health does not
+ // exist or it is not healthy. We should mark this partition is
unhealthy.
unhealthyPartitions.computeIfAbsent(partition, list -> new
ArrayList<>())
.add(UNHEALTHY_PARTITION);
}
diff --git
a/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java
b/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java
index a911286..c9ed4b1 100644
---
a/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java
+++
b/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java
@@ -242,12 +242,12 @@ public class TestMaintenanceManagementService {
MaintenanceManagementService.getMapFromJsonPayload(jsonContent),
Collections.singletonList("org.apache.helix.rest.server.TestOperationImpl"),
Collections.EMPTY_MAP, true);
Assert.assertFalse(instanceInfo.isSuccessful());
- Assert.assertEquals(instanceInfo.getMessages().get(0),
"CUSTOM_PARTITION_HEALTH_FAILURE:UNHEALTHY_PARTITION:PARTITION_0");
+ Assert.assertEquals(instanceInfo.getMessages().get(0),
"CUSTOM_PARTITION_HEALTH_FAILURE:HOST_NO_STATE_ERROR:INSTANCE0.LINKEDIN.COM_1236:PARTITION_0");
// Operation should finish even with check failed.
MockMaintenanceManagementService instanceServiceSkipFailure =
new MockMaintenanceManagementService(zkHelixDataAccessor,
_configAccessor, _customRestClient, true,
-
ImmutableSet.of("CUSTOM_PARTITION_HEALTH_FAILURE:UNHEALTHY_PARTITION"),
HelixRestNamespace.DEFAULT_NAMESPACE_NAME);
+
ImmutableSet.of("CUSTOM_PARTITION_HEALTH_FAILURE:HOST_NO_STATE_ERROR"),
HelixRestNamespace.DEFAULT_NAMESPACE_NAME);
MaintenanceManagementInstanceInfo instanceInfo2 =
instanceServiceSkipFailure.takeInstance(TEST_CLUSTER, TEST_INSTANCE,
Collections.singletonList("CustomInstanceStoppableCheck"),
MaintenanceManagementService.getMapFromJsonPayload(jsonContent),
Collections.singletonList("org.apache.helix.rest.server.TestOperationImpl"),