This is an automated email from the ASF dual-hosted git repository. randgalt pushed a commit to branch CURATOR-564-retry-testing-cluster-startup-failures in repository https://gitbox.apache.org/repos/asf/curator.git
commit f5353056aef833775f97e19aad1665a2679d8c9c Author: randgalt <[email protected]> AuthorDate: Mon Mar 23 12:19:21 2020 -0500 CURATOR-564 Like was done for TestingServer, catch startup issues for TestingCluster and then re-recreate and re-start the cluster one time. Hopefully this will make the tests more stable. --- .../curator/framework/imps/TestFrameworkEdges.java | 3 +-- .../curator/framework/imps/TestReadOnly.java | 17 +++++++++------ .../framework/imps/TestReconfiguration.java | 10 ++++++--- .../curator/framework/imps/TestWithCluster.java | 6 ++---- .../cache/TestPathChildrenCacheInCluster.java | 8 ++----- .../framework/recipes/leader/TestLeaderLatch.java | 2 +- .../recipes/leader/TestLeaderLatchCluster.java | 4 +--- .../recipes/leader/TestLeaderSelectorCluster.java | 6 ++---- .../locks/TestInterProcessSemaphoreCluster.java | 11 +++------- .../org/apache/curator/test/BaseClassForTests.java | 25 ++++++++++++++++++++++ 10 files changed, 54 insertions(+), 38 deletions(-) diff --git a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestFrameworkEdges.java b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestFrameworkEdges.java index feb22ab..5a7c415 100644 --- a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestFrameworkEdges.java +++ b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestFrameworkEdges.java @@ -110,9 +110,8 @@ public class TestFrameworkEdges extends BaseClassForTests // by the Instance Curator is connected to but the session kill needs a quorum vote (it's a // transaction) - try (TestingCluster cluster = new TestingCluster(3)) + try (TestingCluster cluster = createAndStartCluster(3)) { - cluster.start(); InstanceSpec instanceSpec0 = cluster.getServers().get(0).getInstanceSpec(); CountDownLatch serverStoppedLatch = new CountDownLatch(1); diff --git a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReadOnly.java b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReadOnly.java index 13ceec6..b5f90ae 100644 --- a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReadOnly.java +++ b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReadOnly.java @@ -27,6 +27,7 @@ import org.apache.curator.framework.state.ConnectionStateListener; import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.curator.retry.RetryNTimes; import org.apache.curator.retry.RetryOneTime; +import org.apache.curator.test.BaseClassForTests; import org.apache.curator.test.InstanceSpec; import org.apache.curator.test.TestingCluster; import org.apache.curator.test.Timing; @@ -40,7 +41,7 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -public class TestReadOnly +public class TestReadOnly extends BaseClassForTests { @BeforeMethod public void setup() @@ -58,12 +59,10 @@ public class TestReadOnly public void testConnectionStateNewClient() throws Exception { Timing timing = new Timing(); - TestingCluster cluster = new TestingCluster(3); CuratorFramework client = null; + TestingCluster cluster = createAndStartCluster(3); try { - cluster.start(); - client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(100)); client.start(); client.checkExists().forPath("/"); @@ -116,11 +115,9 @@ public class TestReadOnly Timing timing = new Timing(); CuratorFramework client = null; - TestingCluster cluster = new TestingCluster(2); + TestingCluster cluster = createAndStartCluster(2); try { - cluster.start(); - client = CuratorFrameworkFactory.builder().connectString(cluster.getConnectString()).canBeReadOnly(true).connectionTimeoutMs(timing.connection()).sessionTimeoutMs(timing.session()).retryPolicy(new ExponentialBackoffRetry(100, 3)).build(); client.start(); @@ -167,4 +164,10 @@ public class TestReadOnly CloseableUtils.closeQuietly(cluster); } } + + @Override + protected void createServer() throws Exception + { + // NOP + } } diff --git a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReconfiguration.java b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReconfiguration.java index 1ff2805..e3327e0 100644 --- a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReconfiguration.java +++ b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestReconfiguration.java @@ -75,9 +75,7 @@ public class TestReconfiguration extends CuratorTestBase System.setProperty("zookeeper.DigestAuthenticationProvider.superDigest", superUserPasswordDigest); CloseableUtils.closeQuietly(server); - server = null; - cluster = new TestingCluster(3); - cluster.start(); + cluster = createAndStartCluster(3); } @AfterMethod @@ -406,6 +404,12 @@ public class TestReconfiguration extends CuratorTestBase Assert.assertEquals("127.0.0.1:2181", configString); } + @Override + protected void createServer() throws Exception + { + // NOP + } + private CuratorFramework newClient() { return newClient(cluster.getConnectString()); diff --git a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestWithCluster.java b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestWithCluster.java index bdcb30b..1f1e213 100644 --- a/curator-framework/src/test/java/org/apache/curator/framework/imps/TestWithCluster.java +++ b/curator-framework/src/test/java/org/apache/curator/framework/imps/TestWithCluster.java @@ -42,8 +42,7 @@ public class TestWithCluster extends CuratorTestBase Timing timing = new Timing(); CuratorFramework client = null; - TestingCluster cluster = new TestingCluster(3); - cluster.start(); + TestingCluster cluster = createAndStartCluster(3); try { client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new ExponentialBackoffRetry(100, 3)); @@ -90,8 +89,7 @@ public class TestWithCluster extends CuratorTestBase Timing timing = new Timing(); CuratorFramework client = null; - TestingCluster cluster = new TestingCluster(3); - cluster.start(); + TestingCluster cluster = createAndStartCluster(3); try { // make sure all instances are up diff --git a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/cache/TestPathChildrenCacheInCluster.java b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/cache/TestPathChildrenCacheInCluster.java index a9728b5..a8a93a8 100644 --- a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/cache/TestPathChildrenCacheInCluster.java +++ b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/cache/TestPathChildrenCacheInCluster.java @@ -45,11 +45,9 @@ public class TestPathChildrenCacheInCluster extends BaseClassForTests PathChildrenCache cache = null; CuratorFramework client1 = null; CuratorFramework client2 = null; - TestingCluster cluster = new TestingCluster(3); + TestingCluster cluster = createAndStartCluster(3); try { - cluster.start(); - // client 1 only connects to 1 server InstanceSpec client1Instance = cluster.getInstances().iterator().next(); client1 = CuratorFrameworkFactory.newClient(client1Instance.getConnectString(), 1000, 1000, new RetryOneTime(1)); @@ -103,11 +101,9 @@ public class TestPathChildrenCacheInCluster extends BaseClassForTests CuratorFramework client = null; PathChildrenCache cache = null; - TestingCluster cluster = new TestingCluster(3); + TestingCluster cluster = createAndStartCluster(3); try { - cluster.start(); - client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1)); client.start(); client.create().creatingParentsIfNeeded().forPath("/test"); diff --git a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java index 1fc9ff3..1dca724 100644 --- a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java +++ b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java @@ -187,7 +187,7 @@ public class TestLeaderLatch extends BaseClassForTests try ( LeaderLatch latch2 = new LeaderLatch(client, latchPath, "2") ) { latch1.start(); - latch1.await(); + Assert.assertTrue(latch1.await(timing.milliseconds(), TimeUnit.MILLISECONDS)); latch2.start(); // will get a watcher on latch1's node timing.sleepABit(); diff --git a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatchCluster.java b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatchCluster.java index 0d08199..752ed4f 100644 --- a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatchCluster.java +++ b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatchCluster.java @@ -57,11 +57,9 @@ public class TestLeaderLatchCluster extends CuratorTestBase final int sessionLength = timing.session() / 4; List<ClientAndLatch> clients = Lists.newArrayList(); - TestingCluster cluster = new TestingCluster(PARTICIPANT_QTY); + TestingCluster cluster = createAndStartCluster(PARTICIPANT_QTY); try { - cluster.start(); - List<InstanceSpec> instances = Lists.newArrayList(cluster.getInstances()); for ( int i = 0; i < PARTICIPANT_QTY; ++i ) { diff --git a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderSelectorCluster.java b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderSelectorCluster.java index 1dd50eb..6849816 100644 --- a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderSelectorCluster.java +++ b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderSelectorCluster.java @@ -45,8 +45,7 @@ public class TestLeaderSelectorCluster extends CuratorTestBase final Timing timing = new Timing(); CuratorFramework client = null; - TestingCluster cluster = new TestingCluster(3); - cluster.start(); + TestingCluster cluster = createAndStartCluster(3); try { client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1)); @@ -91,8 +90,7 @@ public class TestLeaderSelectorCluster extends CuratorTestBase final Timing timing = new Timing(); CuratorFramework client = null; - TestingCluster cluster = new TestingCluster(3); - cluster.start(); + TestingCluster cluster = createAndStartCluster(3); try { client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1)); diff --git a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/locks/TestInterProcessSemaphoreCluster.java b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/locks/TestInterProcessSemaphoreCluster.java index 10d4192..3470eff 100644 --- a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/locks/TestInterProcessSemaphoreCluster.java +++ b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/locks/TestInterProcessSemaphoreCluster.java @@ -59,11 +59,9 @@ public class TestInterProcessSemaphoreCluster extends BaseClassForTests ExecutorService executorService = Executors.newFixedThreadPool(CLIENT_QTY); ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<Void>(executorService); - TestingCluster cluster = new TestingCluster(3); + TestingCluster cluster = createAndStartCluster(3); try { - cluster.start(); - final AtomicReference<String> connectionString = new AtomicReference<String>(cluster.getConnectString()); final EnsembleProvider provider = new EnsembleProvider() { @@ -178,8 +176,7 @@ public class TestInterProcessSemaphoreCluster extends BaseClassForTests timing.forWaiting().sleepABit(); Assert.assertEquals(0, acquireCount.get()); - cluster = new TestingCluster(3); - cluster.start(); + cluster = createAndStartCluster(3); connectionString.set(cluster.getConnectString()); timing.forWaiting().sleepABit(); @@ -207,12 +204,10 @@ public class TestInterProcessSemaphoreCluster extends BaseClassForTests ExecutorService executorService = Executors.newFixedThreadPool(QTY); ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<Void>(executorService); final Timing timing = new Timing(); - TestingCluster cluster = new TestingCluster(3); List<SemaphoreClient> semaphoreClients = Lists.newArrayList(); + TestingCluster cluster = createAndStartCluster(3); try { - cluster.start(); - final AtomicInteger opCount = new AtomicInteger(0); for ( int i = 0; i < QTY; ++i ) { diff --git a/curator-test/src/main/java/org/apache/curator/test/BaseClassForTests.java b/curator-test/src/main/java/org/apache/curator/test/BaseClassForTests.java index d4bbffb..eb568e9 100644 --- a/curator-test/src/main/java/org/apache/curator/test/BaseClassForTests.java +++ b/curator-test/src/main/java/org/apache/curator/test/BaseClassForTests.java @@ -136,6 +136,31 @@ public class BaseClassForTests } } + public TestingCluster createAndStartCluster(int qty) throws Exception + { + TestingCluster cluster = new TestingCluster(qty); + try + { + cluster.start(); + } + catch ( FailedServerStartException e ) + { + log.warn("Failed to start cluster - retrying 1 more time"); + // cluster creation failed - we've sometime seen this with re-used addresses, etc. - retry one more time + try + { + cluster.close(); + } + catch ( Exception ex ) + { + // ignore + } + cluster = new TestingCluster(qty); + cluster.start(); + } + return cluster; + } + protected void createServer() throws Exception { while ( server == null )
