This is an automated email from the ASF dual-hosted git repository.
jiajunwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git
The following commit(s) were added to refs/heads/master by this push:
new 6c62cd9 Add more accurate error message for resetPartition (#1007)
6c62cd9 is described below
commit 6c62cd9b21b102c7effe646103f0c382570d70ed
Author: xyuanlu <[email protected]>
AuthorDate: Wed Jun 3 14:18:59 2020 -0700
Add more accurate error message for resetPartition (#1007)
This commit adds more specific messages for failure reason for
reserPartion. It also adds a unit test for resetPartition.
Co-authored-by: Xiaoyuan Lu <[email protected]>
---
.../org/apache/helix/manager/zk/ZKHelixAdmin.java | 74 +++++++++-------
.../apache/helix/manager/zk/TestZkHelixAdmin.java | 98 ++++++++++++++++++++++
2 files changed, 143 insertions(+), 29 deletions(-)
diff --git
a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index 1660570..e5a5e5c 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -560,6 +560,27 @@ public class ZKHelixAdmin implements HelixAdmin {
}
}
+ private enum ResetPartitionFailureReason {
+ INSTANCE_NOT_ALIVE("%s is not alive in cluster %s"),
+ INSTANCE_NON_EXISTENT("%s does not exist in cluster %s"),
+ RESOURCE_NON_EXISTENT("resource %s is not added to cluster %s"),
+ PARTITION_NON_EXISTENT("not all %s exist in cluster %s"),
+ PARTITION_NOT_ERROR("%s is NOT found in cluster %s"),
+ STATE_MODEL_NON_EXISTENT("%s is NOT found in cluster %s");
+
+ private String message;
+
+ ResetPartitionFailureReason(String message) {
+ this.message = message;
+ }
+
+ public String getMessage(String resourceName, List<String> partitionNames,
String instanceName,
+ String errorStateEntity, String clusterName) {
+ return String.format("Can't reset state for %s.%s on %s, because " +
message, resourceName,
+ partitionNames, instanceName, errorStateEntity, clusterName);
+ }
+ }
+
@Override
public void resetPartition(String clusterName, String instanceName, String
resourceName,
List<String> partitionNames) {
@@ -573,35 +594,30 @@ public class ZKHelixAdmin implements HelixAdmin {
// check the instance is alive
LiveInstance liveInstance =
accessor.getProperty(keyBuilder.liveInstance(instanceName));
if (liveInstance == null) {
- throw new HelixException(
- "Can't reset state for " + resourceName + "/" + partitionNames + "
on " + instanceName
- + ", because " + instanceName + " is not alive");
+ // check if the instance exists in the cluster
+ String instanceConfigPath =
PropertyPathBuilder.instanceConfig(clusterName, instanceName);
+ throw new HelixException(String.format(
+ (_zkClient.exists(instanceConfigPath) ?
ResetPartitionFailureReason.INSTANCE_NOT_ALIVE
+ : ResetPartitionFailureReason.INSTANCE_NON_EXISTENT)
+ .getMessage(resourceName, partitionNames, instanceName,
instanceName, clusterName)));
}
// check resource group exists
IdealState idealState =
accessor.getProperty(keyBuilder.idealStates(resourceName));
if (idealState == null) {
- throw new HelixException(
- "Can't reset state for " + resourceName + "/" + partitionNames + "
on " + instanceName
- + ", because " + resourceName + " is not added");
+ throw new
HelixException(String.format(ResetPartitionFailureReason.RESOURCE_NON_EXISTENT
+ .getMessage(resourceName, partitionNames, instanceName,
resourceName, clusterName)));
}
// check partition exists in resource group
Set<String> resetPartitionNames = new HashSet<String>(partitionNames);
- if (idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED) {
- Set<String> partitions = new
HashSet<String>(idealState.getRecord().getMapFields().keySet());
- if (!partitions.containsAll(resetPartitionNames)) {
- throw new HelixException(
- "Can't reset state for " + resourceName + "/" + partitionNames + "
on " + instanceName
- + ", because not all " + partitionNames + " exist");
- }
- } else {
- Set<String> partitions = new
HashSet<String>(idealState.getRecord().getListFields().keySet());
- if (!partitions.containsAll(resetPartitionNames)) {
- throw new HelixException(
- "Can't reset state for " + resourceName + "/" + partitionNames + "
on " + instanceName
- + ", because not all " + partitionNames + " exist");
- }
+ Set<String> partitions =
+ (idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED) ?
idealState.getRecord()
+ .getMapFields().keySet() :
idealState.getRecord().getListFields().keySet();
+ if (!partitions.containsAll(resetPartitionNames)) {
+ throw new
HelixException(String.format(ResetPartitionFailureReason.PARTITION_NON_EXISTENT
+ .getMessage(resourceName, partitionNames, instanceName,
partitionNames.toString(),
+ clusterName)));
}
// check partition is in ERROR state
@@ -610,9 +626,9 @@ public class ZKHelixAdmin implements HelixAdmin {
accessor.getProperty(keyBuilder.currentState(instanceName, sessionId,
resourceName));
for (String partitionName : resetPartitionNames) {
if
(!curState.getState(partitionName).equals(HelixDefinedState.ERROR.toString())) {
- throw new HelixException(
- "Can't reset state for " + resourceName + "/" + partitionNames + "
on " + instanceName
- + ", because not all " + partitionNames + " are in ERROR
state");
+ throw new
HelixException(String.format(ResetPartitionFailureReason.PARTITION_NOT_ERROR
+ .getMessage(resourceName, partitionNames, instanceName,
partitionNames.toString(),
+ clusterName)));
}
}
@@ -620,9 +636,8 @@ public class ZKHelixAdmin implements HelixAdmin {
String stateModelDef = idealState.getStateModelDefRef();
StateModelDefinition stateModel =
accessor.getProperty(keyBuilder.stateModelDef(stateModelDef));
if (stateModel == null) {
- throw new HelixException(
- "Can't reset state for " + resourceName + "/" + partitionNames + "
on " + instanceName
- + ", because " + stateModelDef + " is NOT found");
+ throw new
HelixException(String.format(ResetPartitionFailureReason.STATE_MODEL_NON_EXISTENT
+ .getMessage(resourceName, partitionNames, instanceName,
stateModelDef, clusterName)));
}
// check there is no pending messages for the partitions exist
@@ -634,9 +649,10 @@ public class ZKHelixAdmin implements HelixAdmin {
continue;
}
- throw new HelixException(
- "Can't reset state for " + resourceName + "/" + partitionNames + "
on " + instanceName
- + ", because a pending message exists: " + message);
+ throw new HelixException(String.format(
+ "Can't reset state for %s.%s on %s, because a pending message %s
exists for resource %s",
+ resourceName, partitionNames, instanceName, message.toString(),
+ message.getResourceName()));
}
String adminName = null;
diff --git
a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
index 2cb8fa3..5fee970 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
@@ -560,6 +560,104 @@ public class TestZkHelixAdmin extends ZkUnitTestBase {
2);
}
+ @Test
+ public void testResetPartition() throws Exception {
+ String className = TestHelper.getTestClassName();
+ String methodName = TestHelper.getTestMethodName();
+ String clusterName = className + "_" + methodName;
+ String instanceName = "TestInstance";
+ String testResource = "TestResource";
+ String wrongTestInstance = "WrongTestInstance";
+ String wrongTestResource = "WrongTestResource";
+ System.out.println("START " + clusterName + " at " + new
Date(System.currentTimeMillis()));
+ HelixAdmin admin = new ZKHelixAdmin(_gZkClient);
+ admin.addCluster(clusterName, true);
+ admin.addInstance(clusterName, new InstanceConfig(instanceName));
+ admin.enableInstance(clusterName, instanceName, true);
+ InstanceConfig instanceConfig = admin.getInstanceConfig(clusterName,
instanceName);
+
+ IdealState idealState = new IdealState(testResource);
+ idealState.setNumPartitions(3);
+ admin.addStateModelDef(clusterName, "MasterSlave", new MasterSlaveSMD());
+ idealState.setStateModelDefRef("MasterSlave");
+ idealState.setRebalanceMode(IdealState.RebalanceMode.FULL_AUTO);
+ admin.addResource(clusterName, testResource, idealState);
+ admin.enableResource(clusterName, testResource, true);
+
+ /*
+ * This is a unit test for sanity check in resetPartition().
+ * There is no running controller in this test. We have end-to-end tests
for resetPartition()
+ * under webapp/TestResetPartitionState and
integration/TestResetPartitionState.
+ */
+ // resetPartition is expected to throw an exception when provided with a
nonexistent instance.
+ try {
+ admin.resetPartition(clusterName, wrongTestInstance, testResource,
Arrays.asList("1", "2"));
+ Assert.fail("Should throw HelixException");
+ } catch (HelixException expected) {
+ // This exception is expected because the instance name is made up.
+ Assert.assertEquals(expected.getMessage(), String.format(
+ "Can't reset state for %s.[1, 2] on WrongTestInstance, because %s
does not exist in cluster %s",
+ testResource, wrongTestInstance, clusterName));
+ }
+
+ // resetPartition is expected to throw an exception when provided with a
non-live instance.
+ try {
+ admin.resetPartition(clusterName, instanceName, testResource,
Arrays.asList("1", "2"));
+ Assert.fail("Should throw HelixException");
+ } catch (HelixException expected) {
+ // This exception is expected because the instance is not alive.
+ Assert.assertEquals(expected.getMessage(), String
+ .format("Can't reset state for %s.[1, 2] on %s, because %s is not
alive in cluster %s",
+ testResource, instanceName, instanceName, clusterName));
+ }
+
+ HelixManager manager = initializeHelixManager(clusterName,
instanceConfig.getInstanceName());
+ manager.connect();
+
+ // resetPartition is expected to throw an exception when provided with a
nonexistent resource.
+ try {
+ admin.resetPartition(clusterName, instanceName, wrongTestResource,
Arrays.asList("1", "2"));
+ Assert.fail("Should throw HelixException");
+ } catch (HelixException expected) {
+ // This exception is expected because the resource is not added.
+ Assert.assertEquals(expected.getMessage(), String.format(
+ "Can't reset state for %s.[1, 2] on %s, because resource %s is not
added to cluster %s",
+ wrongTestResource, instanceName, wrongTestResource, clusterName));
+ }
+
+ try {
+ admin.resetPartition(clusterName, instanceName, testResource,
Arrays.asList("1", "2"));
+ Assert.fail("Should throw HelixException");
+ } catch (HelixException expected) {
+ // This exception is expected because partitions do not exist.
+ Assert.assertEquals(expected.getMessage(), String.format(
+ "Can't reset state for %s.[1, 2] on %s, because not all [1, 2] exist
in cluster %s",
+ testResource, instanceName, clusterName));
+ }
+
+ // clean up
+ manager.disconnect();
+ admin.dropCluster(clusterName);
+
+ // verify the cluster has been removed successfully
+ HelixDataAccessor dataAccessor = new ZKHelixDataAccessor(className, new
ZkBaseDataAccessor<>(_gZkClient));
+ try {
+ Assert.assertTrue(TestHelper.verify(() ->
dataAccessor.getChildNames(dataAccessor.keyBuilder().liveInstances()).isEmpty(),
1000));
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.out.println("There're live instances not cleaned up yet");
+ assert false;
+ }
+
+ try {
+ Assert.assertTrue(TestHelper.verify(() ->
dataAccessor.getChildNames(dataAccessor.keyBuilder().clusterConfig()).isEmpty(),
1000));
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.out.println("The cluster is not cleaned up yet");
+ assert false;
+ }
+ }
+
/**
* Test addResourceWithWeight() and validateResourcesForWagedRebalance() by
trying to add a resource with incomplete ResourceConfig.
*/