Author: breed Date: Fri Mar 19 06:46:36 2010 New Revision: 925104 URL: http://svn.apache.org/viewvc?rev=925104&view=rev Log: ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to zookeeper cluster
Modified: hadoop/zookeeper/trunk/CHANGES.txt hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java Modified: hadoop/zookeeper/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=925104&r1=925103&r2=925104&view=diff ============================================================================== --- hadoop/zookeeper/trunk/CHANGES.txt (original) +++ hadoop/zookeeper/trunk/CHANGES.txt Fri Mar 19 06:46:36 2010 @@ -306,6 +306,8 @@ BUGFIXES: ZOOKEEPER-436. Bookies should auto register to ZooKeeper (erwin tam & fpj via breed) + ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to zookeeper cluster (phunt via breed) + IMPROVEMENTS: ZOOKEEPER-473. cleanup junit tests to eliminate false positives due to "socket reuse" and failure to close client (phunt via mahadev) Modified: hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml?rev=925104&r1=925103&r2=925104&view=diff ============================================================================== --- hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml (original) +++ hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml Fri Mar 19 06:46:36 2010 @@ -533,12 +533,13 @@ which has be reestablished on a different server. The normal cause of this error is a client that sends a request to a server, but the network packet gets delayed, so the client times out and connects to a new server. When the delayed packet arrives at - the first server, the old server detects that the session has moved and sends back - the SESSION_MOVED error. Clients normally do not see this error since they do not read + the first server, the old server detects that the session has moved, and closes the + client connection. Clients normally do not see this error since they do not read from those old connections. (Old connections are usually closed.) One situation in which this - error can be seen is when two clients try to reestablish the same connection using + condition can be seen is when two clients try to reestablish the same connection using a saved session id and password. One of the clients will reestablish the connection - and the second client will get the SessionMovedException.</para> + and the second client will be disconnected (causing the pair to attempt to re-establish + it's connection/session indefinitely).</para> </section> Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java?rev=925104&r1=925103&r2=925104&view=diff ============================================================================== --- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java (original) +++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java Fri Mar 19 06:46:36 2010 @@ -27,6 +27,7 @@ import org.apache.log4j.Logger; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.KeeperException.Code; +import org.apache.zookeeper.KeeperException.SessionMovedException; import org.apache.zookeeper.ZooDefs.OpCode; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Stat; @@ -327,6 +328,17 @@ public class FinalRequestProcessor imple break; } } + } catch (SessionMovedException e) { + // session moved is a connection level error, we need to tear + // down the connection otw ZOOKEEPER-710 might happen + // ie client on slow follower starts to renew session, fails + // before this completes, then tries the fast follower (leader) + // and is successful, however the initial renew is then + // successfully fwd/processed by the leader and as a result + // the client and leader disagree on where the client is most + // recently attached (and therefore invalid SESSION MOVED generated) + cnxn.sendCloseSession(); + return; } catch (KeeperException e) { err = e.code(); } catch (Exception e) { Modified: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java?rev=925104&r1=925103&r2=925104&view=diff ============================================================================== --- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java (original) +++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java Fri Mar 19 06:46:36 2010 @@ -28,6 +28,7 @@ import org.apache.zookeeper.WatchedEvent import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.server.quorum.Leader; @@ -180,14 +181,14 @@ public class QuorumTest extends QuorumBa @Test public void testSessionMoved() throws IOException, InterruptedException, KeeperException { String hostPorts[] = qb.hostPort.split(","); - ZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() { + DisconnectableZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() { public void process(WatchedEvent event) { }}); zk.create("/sessionMoveTest", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); // we want to loop through the list twice for(int i = 0; i < hostPorts.length*2; i++) { // This should stomp the zk handle - ZooKeeper zknew = new DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], ClientBase.CONNECTION_TIMEOUT, + DisconnectableZooKeeper zknew = new DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], ClientBase.CONNECTION_TIMEOUT, new Watcher() {public void process(WatchedEvent event) { }}, zk.getSessionId(), @@ -196,14 +197,23 @@ public class QuorumTest extends QuorumBa try { zk.setData("/", new byte[1], -1); fail("Should have lost the connection"); - } catch(KeeperException.SessionMovedException e) { + } catch(KeeperException.ConnectionLossException e) { } - //zk.close(); + zk.disconnect(); // close w/o closing session zk = zknew; } zk.close(); } - + + private static class DiscoWatcher implements Watcher { + volatile boolean zkDisco = false; + public void process(WatchedEvent event) { + if (event.getState() == KeeperState.Disconnected) { + zkDisco = true; + } + } + } + @Test /** * Connect to two different servers with two different handles using the same session and @@ -211,32 +221,41 @@ public class QuorumTest extends QuorumBa */ public void testSessionMove() throws IOException, InterruptedException, KeeperException { String hps[] = qb.hostPort.split(","); - ZooKeeper zk = new DisconnectableZooKeeper(hps[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() { - public void process(WatchedEvent event) { - }}); + DiscoWatcher oldWatcher = new DiscoWatcher(); + ZooKeeper zk = new DisconnectableZooKeeper(hps[0], + ClientBase.CONNECTION_TIMEOUT, oldWatcher); zk.create("/t1", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); // This should stomp the zk handle - ZooKeeper zknew = new DisconnectableZooKeeper(hps[1], ClientBase.CONNECTION_TIMEOUT, new Watcher() { - public void process(WatchedEvent event) { - }}, zk.getSessionId(), zk.getSessionPasswd()); + DiscoWatcher watcher = new DiscoWatcher(); + ZooKeeper zknew = new DisconnectableZooKeeper(hps[1], + ClientBase.CONNECTION_TIMEOUT, watcher, zk.getSessionId(), + zk.getSessionPasswd()); zknew.create("/t2", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); try { zk.create("/t3", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); fail("Should have lost the connection"); - } catch(KeeperException.SessionMovedException e) { + } catch(KeeperException.ConnectionLossException e) { + // wait up to 30 seconds for the disco to be delivered + for (int i = 0; i < 30; i++) { + if (oldWatcher.zkDisco) { + break; + } + Thread.sleep(1000); + } + assertTrue(oldWatcher.zkDisco); } ArrayList<ZooKeeper> toClose = new ArrayList<ZooKeeper>(); toClose.add(zknew); // Let's just make sure it can still move for(int i = 0; i < 10; i++) { - zknew = new DisconnectableZooKeeper(hps[1], ClientBase.CONNECTION_TIMEOUT, new Watcher() { - public void process(WatchedEvent event) { - }}, zk.getSessionId(), zk.getSessionPasswd()); + zknew = new DisconnectableZooKeeper(hps[1], + ClientBase.CONNECTION_TIMEOUT, new DiscoWatcher(), + zk.getSessionId(), zk.getSessionPasswd()); toClose.add(zknew); zknew.create("/t-"+i, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); } - for(ZooKeeper z: toClose) { + for (ZooKeeper z: toClose) { z.close(); } zk.close();