This is an automated email from the ASF dual-hosted git repository.
nkalmar pushed a commit to branch branch-3.6
in repository https://gitbox.apache.org/repos/asf/zookeeper.git
The following commit(s) were added to refs/heads/branch-3.6 by this push:
new 5628639 ZOOKEEPER-3740: fix flaky
PurgeTxnTest.testPurgeWhenLogRollingInProgress
5628639 is described below
commit 562863979515140cdf00b848aea2810706226745
Author: Mate Szalay-Beko <[email protected]>
AuthorDate: Tue Mar 3 10:53:43 2020 +0100
ZOOKEEPER-3740: fix flaky PurgeTxnTest.testPurgeWhenLogRollingInProgress
This test is always passing for me when I execute only the PurgeTxnTest test
class locally, but when I execute all the tests (with multiple parallel
threads,
using `mvn clean install`), then it always fails. It is failing frequently
on
the `zookeeper-master-maven` Jenkins job as well.
The test starts three threads, performing 1000 ZNode creation in each thread
and timeouts if the threads are not finished in 90 seconds. Currently it is
not easy to tell based on the logs if the timeout happens because the
operations are still in progress or if one of the threads terminated due
to an unexpected exception.
In this patch I:
- increased the timeout from 90 to 120 seconds
- added an extra logic to actually fail because of the Exception on the
threads, if any happen during the execution
- I decreased the number of ZNode creations to 750 (from the original 1000)
Applying this patch locally fixed my issues, I hope it will be enough
to fix the test on Jenkins as well.
Author: Mate Szalay-Beko <[email protected]>
Reviewers: Enrico Olivelli <[email protected]>, Norbert Kalmar
<[email protected]>
Closes #1274 from symat/ZOOKEEPER-3740
(cherry picked from commit 118dee4b8a4a69699eb00e12bef2907cbbece160)
Signed-off-by: Norbert Kalmar <[email protected]>
---
.../org/apache/zookeeper/server/PurgeTxnTest.java | 40 +++++++++++++---------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java
index 84d9076..804e237 100644
---
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java
+++
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/PurgeTxnTest.java
@@ -33,6 +33,7 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.zip.CheckedOutputStream;
import org.apache.jute.BinaryOutputArchive;
import org.apache.jute.OutputArchive;
@@ -59,7 +60,7 @@ public class PurgeTxnTest extends ZKTestCase {
private static final Logger LOG =
LoggerFactory.getLogger(PurgeTxnTest.class);
private static String HOSTPORT = "127.0.0.1:" + PortAssignment.unique();
private static final int CONNECTION_TIMEOUT = 3000;
- private static final long OP_TIMEOUT_IN_MILLIS = 90000;
+ private static final long OP_TIMEOUT_IN_MILLIS = 120000;
private File tmpDir;
@Before
@@ -561,25 +562,25 @@ public class PurgeTxnTest extends ZKTestCase {
Thread[] ths = new Thread[thCount];
final List<String> znodes = Collections.synchronizedList(new
ArrayList<String>());
final CountDownLatch finished = new CountDownLatch(thCount);
+ final AtomicReference<Exception> exception = new AtomicReference<>();
for (int indx = 0; indx < thCount; indx++) {
final String myprefix = prefix + "-" + indx;
- Thread th = new Thread() {
- public void run() {
- for (int i = 0; i < 1000; i++) {
- try {
- String mynode = myprefix + "-" + i;
- znodes.add(mynode);
- zk.create(mynode, new byte[0],
Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
- } catch (Exception e) {
- LOG.error("Unexpected exception", e);
- }
- if (i == 200) {
- doPurge.countDown();
- }
+ Thread th = new Thread(() -> {
+ for (int i = 0; i < 750; i++) {
+ try {
+ String mynode = myprefix + "-" + i;
+ znodes.add(mynode);
+ zk.create(mynode, new byte[0], Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT);
+ } catch (Exception e) {
+ LOG.error("Unexpected exception during ZkClient ops",
e);
+ exception.set(e);
+ }
+ if (i == 200) {
+ doPurge.countDown();
}
- finished.countDown();
}
- };
+ finished.countDown();
+ });
ths[indx] = th;
}
@@ -587,7 +588,12 @@ public class PurgeTxnTest extends ZKTestCase {
thread.start();
}
try {
- assertTrue("ZkClient ops is not finished!",
finished.await(OP_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS));
+ boolean operationsFinishedSuccessfully =
finished.await(OP_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS);
+ if (exception.get() != null) {
+ LOG.error("unexpected exception during running ZkClient ops:",
exception.get());
+ fail("unexpected exception during running ZkClient ops, see in
the logs above");
+ }
+ assertTrue("ZkClient ops not finished in time!",
operationsFinishedSuccessfully);
} catch (InterruptedException ie) {
LOG.error("Unexpected exception", ie);
fail("Unexpected exception occurred!");