Repository: helix Updated Branches: refs/heads/master 43555ff0d -> 45009e2c4
Fix unstable TestControllerLeadershipChange Project: http://git-wip-us.apache.org/repos/asf/helix/repo Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/45009e2c Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/45009e2c Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/45009e2c Branch: refs/heads/master Commit: 45009e2c48289fce825075e276aebc064ef195c2 Parents: 43555ff Author: Harry Zhang <[email protected]> Authored: Thu Nov 1 17:50:09 2018 -0700 Committer: Harry Zhang <[email protected]> Committed: Fri Nov 2 10:58:07 2018 -0700 ---------------------------------------------------------------------- .../TestControllerLeadershipChange.java | 44 ++++++++++++-------- 1 file changed, 26 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/helix/blob/45009e2c/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java ---------------------------------------------------------------------- diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java index 6c0236f..f497894 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java +++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java @@ -33,6 +33,8 @@ import org.apache.helix.integration.manager.MockParticipantManager; import org.apache.helix.model.IdealState; import org.apache.helix.model.LiveInstance; import org.apache.helix.monitoring.mbeans.MonitorDomainNames; +import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier; +import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier; import org.testng.Assert; import org.testng.annotations.Test; @@ -52,6 +54,10 @@ public class TestControllerLeadershipChange extends ZkTestBase { // Create cluster _gSetupTool.addCluster(clusterName, true); + // Create cluster verifier + ZkHelixClusterVerifier clusterVerifier = + new BestPossibleExternalViewVerifier.Builder(clusterName).setZkClient(_gZkClient).build(); + // Create participant _gSetupTool.addInstanceToCluster(clusterName, instanceName); MockParticipantManager participant = @@ -72,20 +78,20 @@ public class TestControllerLeadershipChange extends ZkTestBase { // Rebalance Resource _gSetupTool .rebalanceResource(clusterName, resourceName, numReplica); + // Wait for rebalance - Thread.sleep(2000); + Assert.assertTrue(clusterVerifier.verifyByPolling()); // Trigger missing top state in manager1 participant.syncStop(); - Thread.sleep(2000); + Thread.sleep(1000); // Starting manager2 HelixManager manager2 = HelixManagerFactory .getZKHelixManager(clusterName, clusterName + "-manager2", InstanceType.CONTROLLER, ZK_ADDR); manager2.connect(); - Assert.assertFalse(manager2.isLeader()); // Set leader to manager2 setLeader(manager2); @@ -93,30 +99,33 @@ public class TestControllerLeadershipChange extends ZkTestBase { Assert.assertFalse(manager1.isLeader()); Assert.assertTrue(manager2.isLeader()); - // Make resource top state to come back - participant = new MockParticipantManager(ZK_ADDR, clusterName, instanceName); - participant.syncStart(); - // Wait for rebalance - Thread.sleep(2000); + Assert.assertTrue(clusterVerifier.verify()); + + Thread.sleep(1000); setLeader(manager1); Assert.assertTrue(manager1.isLeader()); Assert.assertFalse(manager2.isLeader()); + // Make resource top state to come back by restarting participant + participant = new MockParticipantManager(ZK_ADDR, clusterName, instanceName); + participant.syncStart(); + + _gSetupTool.rebalanceResource(clusterName, resourceName, numReplica); - // Wait for manager1 to update - Thread.sleep(2000); + Assert.assertTrue(clusterVerifier.verifyByPolling()); - // Resource lost top state, and manager1 lost leadership for 4000ms, because manager1 will + // Resource lost top state, and manager1 lost leadership for 2000ms, because manager1 will // clean monitoring cache after re-gaining leadership, so max value of hand off duration should // not have such a large value Assert.assertTrue((long) beanServer .getAttribute(resourceMBeanObjectName, "PartitionTopStateHandoffDurationGauge.Max") < 500); + } - private void setLeader(HelixManager manager) { + private void setLeader(HelixManager manager) throws Exception { System.out.println("Setting controller " + manager.getInstanceName() + " as leader"); HelixDataAccessor accessor = manager.getHelixDataAccessor(); final LiveInstance leader = new LiveInstance(manager.getInstanceName()); @@ -125,12 +134,11 @@ public class TestControllerLeadershipChange extends ZkTestBase { leader.setHelixVersion(manager.getVersion()); // Delete the current controller leader node so it will trigger leader election - accessor.getBaseDataAccessor().remove(PropertyPathBuilder.controllerLeader(manager.getClusterName()), AccessOption.EPHEMERAL); - - // No matter who gets leadership, force the given manager to become leader - // Note there is theoretically a racing condition that GenericHelixController.onControllerChange() - // will not catch this new value when it's double checking leadership, but it's stable enough - accessor.getBaseDataAccessor().set(PropertyPathBuilder.controllerLeader(manager.getClusterName()), leader.getRecord(), AccessOption.EPHEMERAL); + while (!manager.isLeader()) { + accessor.getBaseDataAccessor() + .remove(PropertyPathBuilder.controllerLeader(manager.getClusterName()), AccessOption.EPHEMERAL); + Thread.sleep(50); + } } private ObjectName getResourceMonitorObjectName(String clusterName, String resourceName)
