This is an automated email from the ASF dual-hosted git repository. jxue pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/helix.git
commit 4dcf624777786fadcad5ed71471db27a775e36eb Author: Huizhi Lu <[email protected]> AuthorDate: Mon Apr 13 13:25:20 2020 -0700 Fix TestCrushAutoRebalanceNonRack failure of dropping instance --- .../org/apache/helix/manager/zk/ZKHelixAdmin.java | 10 +++--- .../helix/manager/zk/ZkBaseDataAccessor.java | 5 +-- .../TestCrushAutoRebalanceNonRack.java | 14 +++++++- .../helix/manager/zk/TestZkBaseDataAccessor.java | 20 ++++++++++++ .../apache/helix/manager/zk/TestZkHelixAdmin.java | 37 +++++++++++++++++++++- 5 files changed, 78 insertions(+), 8 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java index bc62b18..095fd08 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java @@ -83,6 +83,7 @@ import org.apache.helix.util.RebalanceUtil; import org.apache.helix.zookeeper.api.client.HelixZkClient; import org.apache.helix.zookeeper.api.client.RealmAwareZkClient; import org.apache.helix.zookeeper.datamodel.ZNRecord; +import org.apache.helix.zookeeper.exception.ZkClientException; import org.apache.helix.zookeeper.impl.client.FederatedZkClient; import org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory; import org.apache.helix.zookeeper.util.HttpRoutingDataReader; @@ -225,7 +226,7 @@ public class ZKHelixAdmin implements HelixAdmin { try { _zkClient.deleteRecursively(instancePath); return; - } catch (HelixException e) { + } catch (ZkClientException e) { if (retryCnt < 3 && e.getCause() instanceof ZkException && e.getCause() .getCause() instanceof KeeperException.NotEmptyException) { // Racing condition with controller's persisting node history, retryable. @@ -235,9 +236,10 @@ public class ZKHelixAdmin implements HelixAdmin { instanceConfig.getInstanceName(), e.getCause().getMessage()); retryCnt++; } else { - logger.error("Failed to drop instance {} (not retryable).", - instanceConfig.getInstanceName(), e.getCause()); - throw e; + String errorMessage = "Failed to drop instance: " + instanceConfig.getInstanceName() + + ". Retry times: " + retryCnt; + logger.error(errorMessage, retryCnt, e.getCause()); + throw new HelixException(errorMessage, e); } } } diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java index a7596f4..d5ea20b 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java @@ -40,6 +40,7 @@ import org.apache.helix.util.HelixUtil; import org.apache.helix.zookeeper.api.client.HelixZkClient; import org.apache.helix.zookeeper.api.client.RealmAwareZkClient; import org.apache.helix.zookeeper.datamodel.ZNRecord; +import org.apache.helix.zookeeper.exception.ZkClientException; import org.apache.helix.zookeeper.impl.client.FederatedZkClient; import org.apache.helix.zookeeper.impl.factory.DedicatedZkClientFactory; import org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory; @@ -703,8 +704,8 @@ public class ZkBaseDataAccessor<T> implements BaseDataAccessor<T> { e.getMessage()); try { _zkClient.deleteRecursively(path); - } catch (HelixException he) { - LOG.error("Failed to delete {} recursively with opts {}.", path, options, he); + } catch (ZkClientException zce) { + LOG.error("Failed to delete {} recursively with opts {}.", path, options, zce); return false; } } diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java index 5b7d083..f889074 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java @@ -28,11 +28,15 @@ import java.util.Map; import java.util.Set; import org.apache.helix.ConfigAccessor; +import org.apache.helix.HelixDataAccessor; +import org.apache.helix.InstanceType; +import org.apache.helix.TestHelper; import org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy; import org.apache.helix.controller.rebalancer.strategy.CrushRebalanceStrategy; import org.apache.helix.integration.common.ZkStandAloneCMTestBase; import org.apache.helix.integration.manager.ClusterControllerManager; import org.apache.helix.integration.manager.MockParticipantManager; +import org.apache.helix.manager.zk.ZKHelixDataAccessor; import org.apache.helix.model.BuiltInStateModelDefinitions; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.ExternalView; @@ -42,6 +46,7 @@ import org.apache.helix.model.InstanceConfig; import org.apache.helix.tools.ClusterVerifiers.HelixClusterVerifier; import org.apache.helix.tools.ClusterVerifiers.StrictMatchExternalViewVerifier; import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier; +import org.apache.helix.util.InstanceValidationUtil; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.AfterMethod; @@ -242,13 +247,20 @@ public class TestCrushAutoRebalanceNonRack extends ZkStandAloneCMTestBase { enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true); // shutdown participants, keep only two left + HelixDataAccessor helixDataAccessor = + new ZKHelixDataAccessor(CLUSTER_NAME, InstanceType.PARTICIPANT, _baseAccessor); for (int i = 2; i < _participants.size(); i++) { MockParticipantManager p = _participants.get(i); p.syncStop(); _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, p.getInstanceName(), false); + Assert.assertTrue(TestHelper.verify(() -> { + _gSetupTool.getClusterManagementTool() + .enableInstance(CLUSTER_NAME, p.getInstanceName(), false); + return !InstanceValidationUtil.isEnabled(helixDataAccessor, p.getInstanceName()) + && !InstanceValidationUtil.isAlive(helixDataAccessor, p.getInstanceName()); + }, TestHelper.WAIT_DURATION), "Instance should be disabled and offline"); _gSetupTool.dropInstanceFromCluster(CLUSTER_NAME, p.getInstanceName()); - } int j = 0; diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java index 3e03125..9e56b6e 100644 --- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java +++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java @@ -30,15 +30,19 @@ import org.apache.helix.AccessOption; import org.apache.helix.BaseDataAccessor; import org.apache.helix.PropertyPathBuilder; import org.apache.helix.TestHelper; +import org.apache.helix.zookeeper.api.client.RealmAwareZkClient; import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.datamodel.ZNRecordUpdater; import org.apache.helix.ZkUnitTestBase; import org.apache.helix.manager.zk.ZkBaseDataAccessor.AccessResult; import org.apache.helix.manager.zk.ZkBaseDataAccessor.RetCode; +import org.apache.helix.zookeeper.exception.ZkClientException; import org.apache.helix.zookeeper.zkclient.DataUpdater; +import org.apache.helix.zookeeper.zkclient.exception.ZkException; import org.apache.helix.zookeeper.zkclient.exception.ZkMarshallingError; import org.apache.helix.zookeeper.zkclient.serialize.ZkSerializer; import org.apache.zookeeper.data.Stat; +import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.Test; @@ -343,6 +347,22 @@ public class TestZkBaseDataAccessor extends ZkUnitTestBase { Assert.assertNotNull(getRecord); Assert.assertEquals(getRecord.getId(), "msg_0"); + // Tests that ZkClientException thrown from ZkClient should be caught + // and remove() should return false. + RealmAwareZkClient mockZkClient = Mockito.mock(RealmAwareZkClient.class); + Mockito.doThrow(new ZkException("Failed to delete " + path)).when(mockZkClient) + .delete(path); + Mockito.doThrow(new ZkClientException("Failed to recursively delete " + path)).when(mockZkClient) + .deleteRecursively(path); + ZkBaseDataAccessor<ZNRecord> accessorMock = + new ZkBaseDataAccessor<>(mockZkClient); + try { + Assert.assertFalse(accessorMock.remove(path, AccessOption.PERSISTENT), + "Should return false because ZkClientException is thrown"); + } catch (ZkClientException e) { + Assert.fail("Should not throw ZkClientException because it should be caught."); + } + success = accessor.remove(path, 0); Assert.assertTrue(success); Assert.assertFalse(_gZkClient.exists(path)); diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java index 234f2c3..639a4b1 100644 --- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java +++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java @@ -41,6 +41,7 @@ import org.apache.helix.PropertyKey; import org.apache.helix.PropertyPathBuilder; import org.apache.helix.PropertyType; import org.apache.helix.TestHelper; +import org.apache.helix.zookeeper.api.client.RealmAwareZkClient; import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.ZkUnitTestBase; import org.apache.helix.controller.rebalancer.waged.WagedRebalancer; @@ -48,7 +49,6 @@ import org.apache.helix.examples.MasterSlaveStateModelFactory; import org.apache.helix.integration.manager.MockParticipantManager; import org.apache.helix.model.ClusterConfig; import org.apache.helix.cloud.constants.CloudProvider; -import org.apache.helix.examples.MasterSlaveStateModelFactory; import org.apache.helix.model.CloudConfig; import org.apache.helix.model.ClusterConstraints; import org.apache.helix.model.ClusterConstraints.ConstraintAttribute; @@ -66,8 +66,12 @@ import org.apache.helix.model.builder.ConstraintItemBuilder; import org.apache.helix.model.builder.HelixConfigScopeBuilder; import org.apache.helix.participant.StateMachineEngine; import org.apache.helix.tools.StateModelConfigGenerator; +import org.apache.helix.zookeeper.exception.ZkClientException; +import org.apache.helix.zookeeper.zkclient.exception.ZkException; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; import org.codehaus.jackson.map.ObjectMapper; +import org.mockito.Mockito; import org.testng.Assert; import org.testng.AssertJUnit; import org.testng.annotations.BeforeClass; @@ -191,6 +195,36 @@ public class TestZkHelixAdmin extends ZkUnitTestBase { Assert.fail("HelixManager failed disconnecting"); } + // Tests that ZkClientException thrown from ZkClient should be caught + // and it should be converted HelixException to be rethrown + String instancePath = PropertyPathBuilder.instance(clusterName, config.getInstanceName()); + String instanceConfigPath = PropertyPathBuilder.instanceConfig(clusterName, instanceName); + String liveInstancePath = PropertyPathBuilder.liveInstance(clusterName, instanceName); + RealmAwareZkClient mockZkClient = Mockito.mock(RealmAwareZkClient.class); + // Mock the exists() method to let dropInstance() reach deleteRecursively(). + Mockito.when(mockZkClient.exists(instanceConfigPath)).thenReturn(true); + Mockito.when(mockZkClient.exists(instancePath)).thenReturn(true); + Mockito.when(mockZkClient.exists(liveInstancePath)).thenReturn(false); + Mockito.doThrow(new ZkClientException("ZkClientException: failed to delete " + instancePath, + new ZkException("ZkException: failed to delete " + instancePath, + new KeeperException.NotEmptyException( + "NotEmptyException: directory" + instancePath + " is not empty")))) + .when(mockZkClient).deleteRecursively(instancePath); + + HelixAdmin helixAdminMock = new ZKHelixAdmin(mockZkClient); + try { + helixAdminMock.dropInstance(clusterName, config); + Assert.fail("Should throw HelixException"); + } catch (HelixException expected) { + // This exception is expected because it is converted from ZkClientException and rethrown. + Assert.assertEquals(expected.getMessage(), + "Failed to drop instance: " + config.getInstanceName() + ". Retry times: 3"); + } catch (ZkClientException e) { + if (e.getMessage().equals("ZkClientException: failed to delete " + instancePath)) { + Assert.fail("Should not throw ZkClientException because it should be caught."); + } + } + tool.dropInstance(clusterName, config); // correctly drop the instance try { @@ -211,6 +245,7 @@ public class TestZkHelixAdmin extends ZkUnitTestBase { } catch (HelixException e) { // OK } + ZNRecord stateModelRecord = new ZNRecord("id1"); try { tool.addStateModelDef(clusterName, "id1", new StateModelDefinition(stateModelRecord));
