Author: breed
Date: Fri Mar 19 06:46:36 2010
New Revision: 925104

URL: http://svn.apache.org/viewvc?rev=925104&view=rev
Log:
ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to 
zookeeper cluster

Modified:
    hadoop/zookeeper/trunk/CHANGES.txt
    
hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
    
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
    
hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java

Modified: hadoop/zookeeper/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=925104&r1=925103&r2=925104&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/CHANGES.txt (original)
+++ hadoop/zookeeper/trunk/CHANGES.txt Fri Mar 19 06:46:36 2010
@@ -306,6 +306,8 @@ BUGFIXES: 
 
   ZOOKEEPER-436. Bookies should auto register to ZooKeeper (erwin tam & fpj 
via breed)
 
+  ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to 
zookeeper cluster (phunt via breed)
+
 IMPROVEMENTS:
   ZOOKEEPER-473. cleanup junit tests to eliminate false positives due to
   "socket reuse" and failure to close client (phunt via mahadev)

Modified: 
hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml?rev=925104&r1=925103&r2=925104&view=diff
==============================================================================
--- 
hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
 (original)
+++ 
hadoop/zookeeper/trunk/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
 Fri Mar 19 06:46:36 2010
@@ -533,12 +533,13 @@
       which has be reestablished on a different server. The normal cause of 
this error is
       a client that sends a request to a server, but the network packet gets 
delayed, so
       the client times out and connects to a new server. When the delayed 
packet arrives at
-      the first server, the old server detects that the session has moved and 
sends back
-      the SESSION_MOVED error. Clients normally do not see this error since 
they do not read
+      the first server, the old server detects that the session has moved, and 
closes the
+      client connection. Clients normally do not see this error since they do 
not read
       from those old connections. (Old connections are usually closed.) One 
situation in which this
-      error can be seen is when two clients try to reestablish the same 
connection using
+      condition can be seen is when two clients try to reestablish the same 
connection using
       a saved session id and password. One of the clients will reestablish the 
connection
-      and the second client will get the SessionMovedException.</para>
+      and the second client will be disconnected (causing the pair to attempt 
to re-establish
+      it's connection/session indefinitely).</para>
 
   </section>
 

Modified: 
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java?rev=925104&r1=925103&r2=925104&view=diff
==============================================================================
--- 
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
 (original)
+++ 
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
 Fri Mar 19 06:46:36 2010
@@ -27,6 +27,7 @@ import org.apache.log4j.Logger;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZooDefs;
 import org.apache.zookeeper.KeeperException.Code;
+import org.apache.zookeeper.KeeperException.SessionMovedException;
 import org.apache.zookeeper.ZooDefs.OpCode;
 import org.apache.zookeeper.data.ACL;
 import org.apache.zookeeper.data.Stat;
@@ -327,6 +328,17 @@ public class FinalRequestProcessor imple
                 break;
             }
             }
+        } catch (SessionMovedException e) {
+            // session moved is a connection level error, we need to tear
+            // down the connection otw ZOOKEEPER-710 might happen
+            // ie client on slow follower starts to renew session, fails
+            // before this completes, then tries the fast follower (leader)
+            // and is successful, however the initial renew is then 
+            // successfully fwd/processed by the leader and as a result
+            // the client and leader disagree on where the client is most
+            // recently attached (and therefore invalid SESSION MOVED 
generated)
+            cnxn.sendCloseSession();
+            return;
         } catch (KeeperException e) {
             err = e.code();
         } catch (Exception e) {

Modified: 
hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java?rev=925104&r1=925103&r2=925104&view=diff
==============================================================================
--- 
hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java 
(original)
+++ 
hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/QuorumTest.java 
Fri Mar 19 06:46:36 2010
@@ -28,6 +28,7 @@ import org.apache.zookeeper.WatchedEvent
 import org.apache.zookeeper.Watcher;
 import org.apache.zookeeper.ZooDefs;
 import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher.Event.KeeperState;
 import org.apache.zookeeper.ZooDefs.Ids;
 import org.apache.zookeeper.data.Stat;
 import org.apache.zookeeper.server.quorum.Leader;
@@ -180,14 +181,14 @@ public class QuorumTest extends QuorumBa
     @Test
     public void testSessionMoved() throws IOException, InterruptedException, 
KeeperException {
         String hostPorts[] = qb.hostPort.split(",");
-        ZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], 
ClientBase.CONNECTION_TIMEOUT, new Watcher() {
+        DisconnectableZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], 
ClientBase.CONNECTION_TIMEOUT, new Watcher() {
             public void process(WatchedEvent event) {
             }});
         zk.create("/sessionMoveTest", new byte[0], Ids.OPEN_ACL_UNSAFE, 
CreateMode.EPHEMERAL);
         // we want to loop through the list twice
         for(int i = 0; i < hostPorts.length*2; i++) {
             // This should stomp the zk handle
-            ZooKeeper zknew = new 
DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], 
ClientBase.CONNECTION_TIMEOUT, 
+            DisconnectableZooKeeper zknew = new 
DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], 
ClientBase.CONNECTION_TIMEOUT, 
                     new Watcher() {public void process(WatchedEvent event) {
                     }},
                     zk.getSessionId(),
@@ -196,14 +197,23 @@ public class QuorumTest extends QuorumBa
             try {
                 zk.setData("/", new byte[1], -1);
                 fail("Should have lost the connection");
-            } catch(KeeperException.SessionMovedException e) {
+            } catch(KeeperException.ConnectionLossException e) {
             }
-            //zk.close();
+            zk.disconnect(); // close w/o closing session
             zk = zknew;
         }
         zk.close();
     }
-    
+
+    private static class DiscoWatcher implements Watcher {
+        volatile boolean zkDisco = false;
+        public void process(WatchedEvent event) {
+            if (event.getState() == KeeperState.Disconnected) {
+                zkDisco = true;
+            }
+        }
+    }
+
     @Test
     /**
      * Connect to two different servers with two different handles using the 
same session and
@@ -211,32 +221,41 @@ public class QuorumTest extends QuorumBa
      */
     public void testSessionMove() throws IOException, InterruptedException, 
KeeperException {
         String hps[] = qb.hostPort.split(",");
-        ZooKeeper zk = new DisconnectableZooKeeper(hps[0], 
ClientBase.CONNECTION_TIMEOUT, new Watcher() {
-            public void process(WatchedEvent event) {
-        }});
+        DiscoWatcher oldWatcher = new DiscoWatcher();
+        ZooKeeper zk = new DisconnectableZooKeeper(hps[0],
+                ClientBase.CONNECTION_TIMEOUT, oldWatcher);
         zk.create("/t1", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, 
CreateMode.EPHEMERAL);
         // This should stomp the zk handle
-        ZooKeeper zknew = new DisconnectableZooKeeper(hps[1], 
ClientBase.CONNECTION_TIMEOUT, new Watcher() {
-            public void process(WatchedEvent event) {
-            }}, zk.getSessionId(), zk.getSessionPasswd());
+        DiscoWatcher watcher = new DiscoWatcher();
+        ZooKeeper zknew = new DisconnectableZooKeeper(hps[1],
+                ClientBase.CONNECTION_TIMEOUT, watcher, zk.getSessionId(),
+                zk.getSessionPasswd());
         zknew.create("/t2", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, 
CreateMode.EPHEMERAL);
         try {
             zk.create("/t3", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, 
CreateMode.EPHEMERAL);
             fail("Should have lost the connection");
-        } catch(KeeperException.SessionMovedException e) {
+        } catch(KeeperException.ConnectionLossException e) {
+            // wait up to 30 seconds for the disco to be delivered
+            for (int i = 0; i < 30; i++) {
+                if (oldWatcher.zkDisco) {
+                    break;
+                }
+                Thread.sleep(1000);
+            }
+            assertTrue(oldWatcher.zkDisco);
         }
 
         ArrayList<ZooKeeper> toClose = new ArrayList<ZooKeeper>();
         toClose.add(zknew);
         // Let's just make sure it can still move
         for(int i = 0; i < 10; i++) {
-            zknew = new DisconnectableZooKeeper(hps[1], 
ClientBase.CONNECTION_TIMEOUT, new Watcher() {
-                public void process(WatchedEvent event) {
-                }}, zk.getSessionId(), zk.getSessionPasswd());
+            zknew = new DisconnectableZooKeeper(hps[1],
+                    ClientBase.CONNECTION_TIMEOUT, new DiscoWatcher(),
+                    zk.getSessionId(), zk.getSessionPasswd());
             toClose.add(zknew);
             zknew.create("/t-"+i, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, 
CreateMode.EPHEMERAL);
         }
-    for(ZooKeeper z: toClose) {
+        for (ZooKeeper z: toClose) {
             z.close();
         }
         zk.close();


Reply via email to