This is an automated email from the ASF dual-hosted git repository.

nkalmar pushed a commit to branch branch-3.6
in repository https://gitbox.apache.org/repos/asf/zookeeper.git


The following commit(s) were added to refs/heads/branch-3.6 by this push:
     new 5628639  ZOOKEEPER-3740: fix flaky 
PurgeTxnTest.testPurgeWhenLogRollingInProgress
5628639 is described below

commit 562863979515140cdf00b848aea2810706226745
Author: Mate Szalay-Beko <szalay.beko.m...@gmail.com>
AuthorDate: Tue Mar 3 10:53:43 2020 +0100

    ZOOKEEPER-3740: fix flaky PurgeTxnTest.testPurgeWhenLogRollingInProgress
    
    This test is always passing for me when I execute only the PurgeTxnTest test
    class locally, but when I execute all the tests (with multiple parallel 
threads,
    using `mvn clean install`), then it always fails. It is failing frequently 
on
    the `zookeeper-master-maven` Jenkins job as well.
    
    The test starts three threads, performing 1000 ZNode creation in each thread
    and timeouts if the threads are not finished in 90 seconds. Currently it is
    not easy to tell based on the logs if the timeout happens because the
    operations are still in progress or if one of the threads terminated due
    to an unexpected exception.
    
    In this patch I:
    - increased the timeout from 90 to 120 seconds
    - added an extra logic to actually fail because of the Exception on the
    threads, if any happen during the execution
    - I decreased the number of ZNode creations to 750 (from the original 1000)
    
    Applying this patch locally fixed my issues, I hope it will be enough
    to fix the test on Jenkins as well.
    
    Author: Mate Szalay-Beko <szalay.beko.m...@gmail.com>
    
    Reviewers: Enrico Olivelli <eolive...@apache.org>, Norbert Kalmar 
<nkal...@apache.org>
    
    Closes #1274 from symat/ZOOKEEPER-3740
    
    (cherry picked from commit 118dee4b8a4a69699eb00e12bef2907cbbece160)
    Signed-off-by: Norbert Kalmar <nkal...@apache.org>
---
 .../org/apache/zookeeper/server/PurgeTxnTest.java  | 40 +++++++++++++---------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git 
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java
index 84d9076..804e237 100644
--- 
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java
+++ 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java
@@ -33,6 +33,7 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.zip.CheckedOutputStream;
 import org.apache.jute.BinaryOutputArchive;
 import org.apache.jute.OutputArchive;
@@ -59,7 +60,7 @@ public class PurgeTxnTest extends ZKTestCase {
     private static final Logger LOG = 
LoggerFactory.getLogger(PurgeTxnTest.class);
     private static String HOSTPORT = "127.0.0.1:" + PortAssignment.unique();
     private static final int CONNECTION_TIMEOUT = 3000;
-    private static final long OP_TIMEOUT_IN_MILLIS = 90000;
+    private static final long OP_TIMEOUT_IN_MILLIS = 120000;
     private File tmpDir;
 
     @Before
@@ -561,25 +562,25 @@ public class PurgeTxnTest extends ZKTestCase {
         Thread[] ths = new Thread[thCount];
         final List<String> znodes = Collections.synchronizedList(new 
ArrayList<String>());
         final CountDownLatch finished = new CountDownLatch(thCount);
+        final AtomicReference<Exception> exception = new AtomicReference<>();
         for (int indx = 0; indx < thCount; indx++) {
             final String myprefix = prefix + "-" + indx;
-            Thread th = new Thread() {
-                public void run() {
-                    for (int i = 0; i < 1000; i++) {
-                        try {
-                            String mynode = myprefix + "-" + i;
-                            znodes.add(mynode);
-                            zk.create(mynode, new byte[0], 
Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
-                        } catch (Exception e) {
-                            LOG.error("Unexpected exception", e);
-                        }
-                        if (i == 200) {
-                            doPurge.countDown();
-                        }
+            Thread th = new Thread(() -> {
+                for (int i = 0; i < 750; i++) {
+                    try {
+                        String mynode = myprefix + "-" + i;
+                        znodes.add(mynode);
+                        zk.create(mynode, new byte[0], Ids.OPEN_ACL_UNSAFE, 
CreateMode.PERSISTENT);
+                    } catch (Exception e) {
+                        LOG.error("Unexpected exception during ZkClient ops", 
e);
+                        exception.set(e);
+                    }
+                    if (i == 200) {
+                        doPurge.countDown();
                     }
-                    finished.countDown();
                 }
-            };
+                finished.countDown();
+            });
             ths[indx] = th;
         }
 
@@ -587,7 +588,12 @@ public class PurgeTxnTest extends ZKTestCase {
             thread.start();
         }
         try {
-            assertTrue("ZkClient ops is not finished!", 
finished.await(OP_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS));
+            boolean operationsFinishedSuccessfully = 
finished.await(OP_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS);
+            if (exception.get() != null) {
+                LOG.error("unexpected exception during running ZkClient ops:", 
exception.get());
+                fail("unexpected exception during running ZkClient ops, see in 
the logs above");
+            }
+            assertTrue("ZkClient ops not finished in time!", 
operationsFinishedSuccessfully);
         } catch (InterruptedException ie) {
             LOG.error("Unexpected exception", ie);
             fail("Unexpected exception occurred!");

Reply via email to