This is an automated email from the ASF dual-hosted git repository.

jiajunwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c62cd9  Add more accurate error message for resetPartition  (#1007)
6c62cd9 is described below

commit 6c62cd9b21b102c7effe646103f0c382570d70ed
Author: xyuanlu <[email protected]>
AuthorDate: Wed Jun 3 14:18:59 2020 -0700

    Add more accurate error message for resetPartition  (#1007)
    
    This commit adds more specific messages for failure reason for 
reserPartion. It also adds a unit test for resetPartition.
    
    Co-authored-by: Xiaoyuan Lu <[email protected]>
---
 .../org/apache/helix/manager/zk/ZKHelixAdmin.java  | 74 +++++++++-------
 .../apache/helix/manager/zk/TestZkHelixAdmin.java  | 98 ++++++++++++++++++++++
 2 files changed, 143 insertions(+), 29 deletions(-)

diff --git 
a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java 
b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index 1660570..e5a5e5c 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -560,6 +560,27 @@ public class ZKHelixAdmin implements HelixAdmin {
     }
   }
 
+  private enum ResetPartitionFailureReason {
+    INSTANCE_NOT_ALIVE("%s is not alive in cluster %s"),
+    INSTANCE_NON_EXISTENT("%s does not exist in cluster %s"),
+    RESOURCE_NON_EXISTENT("resource %s is not added to cluster %s"),
+    PARTITION_NON_EXISTENT("not all %s exist in cluster %s"),
+    PARTITION_NOT_ERROR("%s is NOT found in cluster %s"),
+    STATE_MODEL_NON_EXISTENT("%s is NOT found in cluster %s");
+
+    private String message;
+
+    ResetPartitionFailureReason(String message) {
+      this.message = message;
+    }
+
+    public String getMessage(String resourceName, List<String> partitionNames, 
String instanceName,
+        String errorStateEntity, String clusterName) {
+      return String.format("Can't reset state for %s.%s on %s, because " + 
message, resourceName,
+          partitionNames, instanceName, errorStateEntity, clusterName);
+    }
+  }
+
   @Override
   public void resetPartition(String clusterName, String instanceName, String 
resourceName,
       List<String> partitionNames) {
@@ -573,35 +594,30 @@ public class ZKHelixAdmin implements HelixAdmin {
     // check the instance is alive
     LiveInstance liveInstance = 
accessor.getProperty(keyBuilder.liveInstance(instanceName));
     if (liveInstance == null) {
-      throw new HelixException(
-          "Can't reset state for " + resourceName + "/" + partitionNames + " 
on " + instanceName
-              + ", because " + instanceName + " is not alive");
+      // check if the instance exists in the cluster
+      String instanceConfigPath = 
PropertyPathBuilder.instanceConfig(clusterName, instanceName);
+      throw new HelixException(String.format(
+          (_zkClient.exists(instanceConfigPath) ? 
ResetPartitionFailureReason.INSTANCE_NOT_ALIVE
+              : ResetPartitionFailureReason.INSTANCE_NON_EXISTENT)
+              .getMessage(resourceName, partitionNames, instanceName, 
instanceName, clusterName)));
     }
 
     // check resource group exists
     IdealState idealState = 
accessor.getProperty(keyBuilder.idealStates(resourceName));
     if (idealState == null) {
-      throw new HelixException(
-          "Can't reset state for " + resourceName + "/" + partitionNames + " 
on " + instanceName
-              + ", because " + resourceName + " is not added");
+      throw new 
HelixException(String.format(ResetPartitionFailureReason.RESOURCE_NON_EXISTENT
+          .getMessage(resourceName, partitionNames, instanceName, 
resourceName, clusterName)));
     }
 
     // check partition exists in resource group
     Set<String> resetPartitionNames = new HashSet<String>(partitionNames);
-    if (idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED) {
-      Set<String> partitions = new 
HashSet<String>(idealState.getRecord().getMapFields().keySet());
-      if (!partitions.containsAll(resetPartitionNames)) {
-        throw new HelixException(
-            "Can't reset state for " + resourceName + "/" + partitionNames + " 
on " + instanceName
-                + ", because not all " + partitionNames + " exist");
-      }
-    } else {
-      Set<String> partitions = new 
HashSet<String>(idealState.getRecord().getListFields().keySet());
-      if (!partitions.containsAll(resetPartitionNames)) {
-        throw new HelixException(
-            "Can't reset state for " + resourceName + "/" + partitionNames + " 
on " + instanceName
-                + ", because not all " + partitionNames + " exist");
-      }
+    Set<String> partitions =
+        (idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED) ? 
idealState.getRecord()
+            .getMapFields().keySet() : 
idealState.getRecord().getListFields().keySet();
+    if (!partitions.containsAll(resetPartitionNames)) {
+      throw new 
HelixException(String.format(ResetPartitionFailureReason.PARTITION_NON_EXISTENT
+          .getMessage(resourceName, partitionNames, instanceName, 
partitionNames.toString(),
+              clusterName)));
     }
 
     // check partition is in ERROR state
@@ -610,9 +626,9 @@ public class ZKHelixAdmin implements HelixAdmin {
         accessor.getProperty(keyBuilder.currentState(instanceName, sessionId, 
resourceName));
     for (String partitionName : resetPartitionNames) {
       if 
(!curState.getState(partitionName).equals(HelixDefinedState.ERROR.toString())) {
-        throw new HelixException(
-            "Can't reset state for " + resourceName + "/" + partitionNames + " 
on " + instanceName
-                + ", because not all " + partitionNames + " are in ERROR 
state");
+        throw new 
HelixException(String.format(ResetPartitionFailureReason.PARTITION_NOT_ERROR
+            .getMessage(resourceName, partitionNames, instanceName, 
partitionNames.toString(),
+                clusterName)));
       }
     }
 
@@ -620,9 +636,8 @@ public class ZKHelixAdmin implements HelixAdmin {
     String stateModelDef = idealState.getStateModelDefRef();
     StateModelDefinition stateModel = 
accessor.getProperty(keyBuilder.stateModelDef(stateModelDef));
     if (stateModel == null) {
-      throw new HelixException(
-          "Can't reset state for " + resourceName + "/" + partitionNames + " 
on " + instanceName
-              + ", because " + stateModelDef + " is NOT found");
+      throw new 
HelixException(String.format(ResetPartitionFailureReason.STATE_MODEL_NON_EXISTENT
+          .getMessage(resourceName, partitionNames, instanceName, 
stateModelDef, clusterName)));
     }
 
     // check there is no pending messages for the partitions exist
@@ -634,9 +649,10 @@ public class ZKHelixAdmin implements HelixAdmin {
         continue;
       }
 
-      throw new HelixException(
-          "Can't reset state for " + resourceName + "/" + partitionNames + " 
on " + instanceName
-              + ", because a pending message exists: " + message);
+      throw new HelixException(String.format(
+          "Can't reset state for %s.%s on %s, because a pending message %s 
exists for resource %s",
+          resourceName, partitionNames, instanceName, message.toString(),
+          message.getResourceName()));
     }
 
     String adminName = null;
diff --git 
a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java 
b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
index 2cb8fa3..5fee970 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
@@ -560,6 +560,104 @@ public class TestZkHelixAdmin extends ZkUnitTestBase {
         2);
   }
 
+  @Test
+  public void testResetPartition() throws Exception {
+    String className = TestHelper.getTestClassName();
+    String methodName = TestHelper.getTestMethodName();
+    String clusterName = className + "_" + methodName;
+    String instanceName = "TestInstance";
+    String testResource = "TestResource";
+    String wrongTestInstance = "WrongTestInstance";
+    String wrongTestResource = "WrongTestResource";
+    System.out.println("START " + clusterName + " at " + new 
Date(System.currentTimeMillis()));
+    HelixAdmin admin = new ZKHelixAdmin(_gZkClient);
+    admin.addCluster(clusterName, true);
+    admin.addInstance(clusterName, new InstanceConfig(instanceName));
+    admin.enableInstance(clusterName, instanceName, true);
+    InstanceConfig instanceConfig = admin.getInstanceConfig(clusterName, 
instanceName);
+
+    IdealState idealState = new IdealState(testResource);
+    idealState.setNumPartitions(3);
+    admin.addStateModelDef(clusterName, "MasterSlave", new MasterSlaveSMD());
+    idealState.setStateModelDefRef("MasterSlave");
+    idealState.setRebalanceMode(IdealState.RebalanceMode.FULL_AUTO);
+    admin.addResource(clusterName, testResource, idealState);
+    admin.enableResource(clusterName, testResource, true);
+
+    /*
+     * This is a unit test for sanity check in resetPartition().
+     * There is no running controller in this test. We have end-to-end tests 
for resetPartition()
+     * under webapp/TestResetPartitionState and 
integration/TestResetPartitionState.
+     */
+    // resetPartition is expected to throw an exception when provided with a 
nonexistent instance.
+    try {
+      admin.resetPartition(clusterName, wrongTestInstance, testResource, 
Arrays.asList("1", "2"));
+      Assert.fail("Should throw HelixException");
+    } catch (HelixException expected) {
+      // This exception is expected because the instance name is made up.
+      Assert.assertEquals(expected.getMessage(), String.format(
+          "Can't reset state for %s.[1, 2] on WrongTestInstance, because %s 
does not exist in cluster %s",
+          testResource, wrongTestInstance, clusterName));
+    }
+
+    // resetPartition is expected to throw an exception when provided with a 
non-live instance.
+    try {
+      admin.resetPartition(clusterName, instanceName, testResource, 
Arrays.asList("1", "2"));
+      Assert.fail("Should throw HelixException");
+    } catch (HelixException expected) {
+      // This exception is expected because the instance is not alive.
+      Assert.assertEquals(expected.getMessage(), String
+          .format("Can't reset state for %s.[1, 2] on %s, because %s is not 
alive in cluster %s",
+              testResource, instanceName, instanceName, clusterName));
+    }
+
+    HelixManager manager = initializeHelixManager(clusterName, 
instanceConfig.getInstanceName());
+    manager.connect();
+
+    // resetPartition is expected to throw an exception when provided with a 
nonexistent resource.
+    try {
+      admin.resetPartition(clusterName, instanceName, wrongTestResource, 
Arrays.asList("1", "2"));
+      Assert.fail("Should throw HelixException");
+    } catch (HelixException expected) {
+      // This exception is expected because the resource is not added.
+      Assert.assertEquals(expected.getMessage(), String.format(
+          "Can't reset state for %s.[1, 2] on %s, because resource %s is not 
added to cluster %s",
+          wrongTestResource, instanceName, wrongTestResource, clusterName));
+    }
+
+    try {
+      admin.resetPartition(clusterName, instanceName, testResource, 
Arrays.asList("1", "2"));
+      Assert.fail("Should throw HelixException");
+    } catch (HelixException expected) {
+      // This exception is expected because partitions do not exist.
+      Assert.assertEquals(expected.getMessage(), String.format(
+          "Can't reset state for %s.[1, 2] on %s, because not all [1, 2] exist 
in cluster %s",
+          testResource, instanceName, clusterName));
+    }
+
+    // clean up
+    manager.disconnect();
+    admin.dropCluster(clusterName);
+
+    // verify the cluster has been removed successfully
+    HelixDataAccessor dataAccessor = new ZKHelixDataAccessor(className, new 
ZkBaseDataAccessor<>(_gZkClient));
+    try {
+      Assert.assertTrue(TestHelper.verify(() -> 
dataAccessor.getChildNames(dataAccessor.keyBuilder().liveInstances()).isEmpty(),
 1000));
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.out.println("There're live instances not cleaned up yet");
+      assert false;
+    }
+
+    try {
+      Assert.assertTrue(TestHelper.verify(() -> 
dataAccessor.getChildNames(dataAccessor.keyBuilder().clusterConfig()).isEmpty(),
 1000));
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.out.println("The cluster is not cleaned up yet");
+      assert false;
+    }
+  }
+
   /**
    * Test addResourceWithWeight() and validateResourcesForWagedRebalance() by 
trying to add a resource with incomplete ResourceConfig.
    */

Reply via email to