Author: michim Date: Sun Mar 15 07:54:59 2015 New Revision: 1666784 URL: http://svn.apache.org/r1666784 Log: ZOOKEEPER-1865 Fix retry logic in Learner.connectToLeader() (Edward Carter via michim)
Modified: zookeeper/trunk/CHANGES.txt zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java Modified: zookeeper/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/zookeeper/trunk/CHANGES.txt?rev=1666784&r1=1666783&r2=1666784&view=diff ============================================================================== --- zookeeper/trunk/CHANGES.txt (original) +++ zookeeper/trunk/CHANGES.txt Sun Mar 15 07:54:59 2015 @@ -48,9 +48,12 @@ BUGFIXES: (Michi Mutsuzaki via rakeshr) ZOOKEEPER-2137 Make testPortChange() less flaky (Hongchao Deng via michim) - + ZOOKEEPER-1893. automake: use serial-tests option (michim via camille) + ZOOKEEPER-1865 Fix retry logic in Learner.connectToLeader() (Edward Carter + via michim) + IMPROVEMENTS: ZOOKEEPER-1660 Documentation for Dynamic Reconfiguration (Reed Wanderman-Milne via shralex) Modified: zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java?rev=1666784&r1=1666783&r2=1666784&view=diff ============================================================================== --- zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java (original) +++ zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java Sun Mar 15 07:54:59 2015 @@ -214,10 +214,27 @@ public class Learner { } return addr; } - + + /** + * Overridable helper method to return the System.nanoTime(). + * This method behaves identical to System.nanoTime(). + */ + protected long nanoTime() { + return System.nanoTime(); + } + + /** + * Overridable helper method to simply call sock.connect(). This can be + * overriden in tests to fake connection success/failure for connectToLeader. + */ + protected void sockConnect(Socket sock, InetSocketAddress addr, int timeout) + throws IOException { + sock.connect(addr, timeout); + } + /** * Establish a connection with the Leader found by findLeader. Retries - * 5 times before giving up. + * until either initLimit time has elapsed or 5 tries have happened. * @param addr - the address of the Leader to connect to. * @throws IOException - if the socket connection fails on the 5th attempt * @throws ConnectException @@ -227,17 +244,39 @@ public class Learner { throws IOException, ConnectException, InterruptedException { sock = new Socket(); sock.setSoTimeout(self.tickTime * self.initLimit); + + int initLimitTime = self.tickTime * self.initLimit; + int remainingInitLimitTime = initLimitTime; + long startNanoTime = nanoTime(); + for (int tries = 0; tries < 5; tries++) { try { - sock.connect(addr, self.tickTime * self.syncLimit); + // recalculate the init limit time because retries sleep for 1000 milliseconds + remainingInitLimitTime = initLimitTime - (int)((nanoTime() - startNanoTime) / 1000000); + if (remainingInitLimitTime <= 0) { + LOG.error("initLimit exceeded on retries."); + throw new IOException("initLimit exceeded on retries."); + } + + sockConnect(sock, addr, Math.min(self.tickTime * self.syncLimit, remainingInitLimitTime)); sock.setTcpNoDelay(nodelay); break; } catch (IOException e) { - if (tries == 4) { - LOG.error("Unexpected exception",e); + remainingInitLimitTime = initLimitTime - (int)((nanoTime() - startNanoTime) / 1000000); + + if (remainingInitLimitTime <= 1000) { + LOG.error("Unexpected exception, initLimit exceeded. tries=" + tries + + ", remaining init limit=" + remainingInitLimitTime + + ", connecting to " + addr,e); + throw e; + } else if (tries >= 4) { + LOG.error("Unexpected exception, retries exceeded. tries=" + tries + + ", remaining init limit=" + remainingInitLimitTime + + ", connecting to " + addr,e); throw e; } else { - LOG.warn("Unexpected exception, tries="+tries+ + LOG.warn("Unexpected exception, tries=" + tries + + ", remaining init limit=" + remainingInitLimitTime + ", connecting to " + addr,e); sock = new Socket(); sock.setSoTimeout(self.tickTime * self.initLimit); Modified: zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java?rev=1666784&r1=1666783&r2=1666784&view=diff ============================================================================== --- zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java (original) +++ zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java Sun Mar 15 07:54:59 2015 @@ -24,6 +24,7 @@ import java.io.ByteArrayOutputStream; import java.io.EOFException; import java.io.File; import java.io.IOException; +import java.net.InetSocketAddress; import java.net.Socket; import java.util.ArrayList; @@ -77,6 +78,77 @@ public class LearnerTest extends ZKTestC } } + static class TimeoutLearner extends Learner { + int passSocketConnectOnAttempt = 10; + int socketConnectAttempt = 0; + long timeMultiplier = 0; + + public void setTimeMultiplier(long multiplier) { + timeMultiplier = multiplier; + } + + public void setPassConnectAttempt(int num) { + passSocketConnectOnAttempt = num; + } + + protected long nanoTime() { + return socketConnectAttempt * timeMultiplier; + } + + protected int getSockConnectAttempt() { + return socketConnectAttempt; + } + + @Override + protected void sockConnect(Socket sock, InetSocketAddress addr, int timeout) + throws IOException { + if (++socketConnectAttempt < passSocketConnectOnAttempt) { + throw new IOException("Test injected Socket.connect() error."); + } + } + } + + @Test(expected=IOException.class) + public void connectionRetryTimeoutTest() throws Exception { + Learner learner = new TimeoutLearner(); + learner.self = new QuorumPeer(); + learner.self.setTickTime(2000); + learner.self.setInitLimit(5); + learner.self.setSyncLimit(2); + + // this addr won't even be used since we fake the Socket.connect + InetSocketAddress addr = new InetSocketAddress(1111); + + // we expect this to throw an IOException since we're faking socket connect errors every time + learner.connectToLeader(addr); + } + @Test + public void connectionInitLimitTimeoutTest() throws Exception { + TimeoutLearner learner = new TimeoutLearner(); + learner.self = new QuorumPeer(); + learner.self.setTickTime(2000); + learner.self.setInitLimit(5); + learner.self.setSyncLimit(2); + + // this addr won't even be used since we fake the Socket.connect + InetSocketAddress addr = new InetSocketAddress(1111); + + // pretend each connect attempt takes 4000 milliseconds + learner.setTimeMultiplier((long)4000 * 1000000); + + learner.setPassConnectAttempt(5); + + // we expect this to throw an IOException since we're faking socket connect errors every time + try { + learner.connectToLeader(addr); + Assert.fail("should have thrown IOException!"); + } catch (IOException e) { + //good, wanted to see that, let's make sure we ran out of time + Assert.assertTrue(learner.nanoTime() > 2000*5*1000000); + Assert.assertEquals(3, learner.getSockConnectAttempt()); + } + } + @Test public void syncTest() throws Exception { File tmpFile = File.createTempFile("test", ".dir", testData);