Author: mahadev Date: Wed Nov 25 19:06:08 2009 New Revision: 884229 URL: http://svn.apache.org/viewvc?rev=884229&view=rev Log: ZOOKEEPER-597. ASyncHammerTest is failing intermittently on hudson trunk (Patrick Hunt via mahadev)
Modified: hadoop/zookeeper/trunk/CHANGES.txt hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ClientBase.java Modified: hadoop/zookeeper/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=884229&r1=884228&r2=884229&view=diff ============================================================================== --- hadoop/zookeeper/trunk/CHANGES.txt (original) +++ hadoop/zookeeper/trunk/CHANGES.txt Wed Nov 25 19:06:08 2009 @@ -133,6 +133,9 @@ ZOOKEEPER-582. ZooKeeper can revert to old data when a snapshot is created outside of normal processing (ben reed and mahadev via mahadev) + ZOOKEEPER-597. ASyncHammerTest is failing intermittently on hudson trunk + (Patrick Hunt via mahadev) + IMPROVEMENTS: ZOOKEEPER-473. cleanup junit tests to eliminate false positives due to "socket reuse" and failure to close client (phunt via mahadev) Modified: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ClientBase.java URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ClientBase.java?rev=884229&r1=884228&r2=884229&view=diff ============================================================================== --- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ClientBase.java (original) +++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/ClientBase.java Wed Nov 25 19:06:08 2009 @@ -28,6 +28,8 @@ import java.net.Socket; import java.util.Arrays; import java.util.LinkedList; +import java.util.Map; +import java.util.Map.Entry; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -453,6 +455,20 @@ return d.delete(); } + private static void logAllStackTraces() { + StringBuffer sb = new StringBuffer(); + sb.append("Starting logAllStackTraces()\n"); + Map<Thread, StackTraceElement[]> threads = Thread.getAllStackTraces(); + for (Entry<Thread, StackTraceElement[]> e: threads.entrySet()) { + sb.append("Thread " + e.getKey().getName() + "\n"); + for (StackTraceElement elem: e.getValue()) { + sb.append("\tat " + elem + "\n"); + } + } + sb.append("Ending logAllStackTraces()\n"); + LOG.error(sb.toString()); + } + /* * Verify that all of the servers see the same number of nodes * at the root @@ -465,15 +481,24 @@ // run through till the counts no longer change on each server // max 15 tries, with 2 second sleeps, so approx 30 seconds int[] counts = new int[parts.length]; + int failed = 0; for (int j = 0; j < 100; j++) { int newcounts[] = new int[parts.length]; int i = 0; for (String hp : parts) { - ZooKeeper zk = createClient(hp); try { - newcounts[i++] = zk.getChildren("/", false).size(); - } finally { - zk.close(); + ZooKeeper zk = createClient(hp); + + try { + newcounts[i++] = zk.getChildren("/", false).size(); + } finally { + zk.close(); + } + } catch (Throwable t) { + failed++; + // if session creation fails dump the thread stack + // and try the next server + logAllStackTraces(); } } @@ -486,6 +511,11 @@ counts = newcounts; Thread.sleep(10000); } + + // don't keep this up too long, will assert false below + if (failed > 10) { + break; + } } // verify all the servers reporting same number of nodes