Ruben's PR still doesn't fix the hung issue.

Here is jstack output:

Full thread dump Java HotSpot(TM) 64-Bit Server VM (25.162-b12 mixed mode):
"RMI TCP Connection(3)-192.168.1.171" #45 daemon prio=9 os_prio=31 
tid=0x00007ff81d0c5000 nid=0xa30b runnable [0x0000700002f0e000]
   java.lang.Thread.State: RUNNABLE
 at java.net.SocketInputStream.socketRead0(Native Method)
 at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
 at java.net.SocketInputStream.read(SocketInputStream.java:171)
 at java.net.SocketInputStream.read(SocketInputStream.java:141)
 at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
 at java.io.BufferedInputStream.read(BufferedInputStream.java:265)
 - locked <0x00000007a7239a88> (a java.io.BufferedInputStream)
 at java.io.FilterInputStream.read(FilterInputStream.java:83)
 at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:550)
 at 
sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:826)
 at 
sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:683)
 at 
sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$807/715238588.run(Unknown
 Source)
 at java.security.AccessController.doPrivileged(Native Method)
 at 
sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:682)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - <0x00000007a6ccef38> (a java.util.concurrent.ThreadPoolExecutor$Worker)
"JMX server connection timeout 44" #44 daemon prio=9 os_prio=31 
tid=0x00007ff81f006800 nid=0xa20b in Object.wait() [0x0000700002e0c000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
 at java.lang.Object.wait(Native Method)
 at 
com.sun.jmx.remote.internal.ServerCommunicatorAdmin$Timeout.run(ServerCommunicatorAdmin.java:168)
 - locked <0x00000007a6dc08e8> (a [I)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"RMI Scheduler(0)" #43 daemon prio=9 os_prio=31 tid=0x00007ff81e826800 
nid=0x610b waiting on condition [0x0000700002d09000]
   java.lang.Thread.State: TIMED_WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x00000007a6c42160> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
 at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
 at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"RMI TCP Connection(idle)" #42 daemon prio=9 os_prio=31 tid=0x00007ff81e81c000 
nid=0x9e0b waiting on condition [0x0000700002c06000]
   java.lang.Thread.State: TIMED_WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x00000007a6c3cc80> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
 at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
 at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:460)
 at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:362)
 at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:941)
 at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1073)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"RMI TCP Accept-0" #41 daemon prio=9 os_prio=31 tid=0x00007ff81c2da000 
nid=0x580f runnable [0x0000700002b03000]
   java.lang.Thread.State: RUNNABLE
 at java.net.PlainSocketImpl.socketAccept(Native Method)
 at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
 at java.net.ServerSocket.implAccept(ServerSocket.java:545)
 at java.net.ServerSocket.accept(ServerSocket.java:513)
 at 
sun.management.jmxremote.LocalRMIServerSocketFactory$1.accept(LocalRMIServerSocketFactory.java:52)
 at 
sun.rmi.transport.tcp.TCPTransport$AcceptLoop.executeAcceptLoop(TCPTransport.java:400)
 at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.run(TCPTransport.java:372)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"Attach Listener" #40 daemon prio=9 os_prio=31 tid=0x00007ff81d4ac800 
nid=0xa70f waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE
   Locked ownable synchronizers:
 - None
"maven-surefire-plugin@NotThreadSafe" #36 daemon prio=5 os_prio=31 
tid=0x00007ff81bb0d800 nid=0x5907 waiting on condition [0x0000700003218000]
   java.lang.Thread.State: WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x00000007600dfef8> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
 at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
 at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"pool-1-thread-4" #16 daemon prio=5 os_prio=31 tid=0x00007ff819af7800 
nid=0x5703 waiting on condition [0x00007000028fd000]
   java.lang.Thread.State: WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x00000007607f4c98> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
 at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
 at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"pool-1-thread-3" #15 daemon prio=5 os_prio=31 tid=0x00007ff81931c800 
nid=0x5503 waiting on condition [0x00007000027fa000]
   java.lang.Thread.State: WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x000000076002d778> (a 
java.util.concurrent.Semaphore$FairSync)
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:997)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
 at java.util.concurrent.Semaphore.acquire(Semaphore.java:312)
 at 
org.apache.maven.surefire.junitcore.pc.ThreadResourcesBalancer.acquirePermit(ThreadResourcesBalancer.java:76)
 at 
org.apache.maven.surefire.junitcore.pc.Scheduler.schedule(Scheduler.java:363)
 at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
 at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
 at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
 at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
 at org.junit.runners.Suite.runChild(Suite.java:128)
 at org.junit.runners.Suite.runChild(Suite.java:27)
 at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
 at org.apache.maven.surefire.junitcore.pc.Scheduler$1.run(Scheduler.java:410)
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 at java.util.concurrent.FutureTask.run(FutureTask.java:266)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - <0x000000076000e9d8> (a java.util.concurrent.ThreadPoolExecutor$Worker)
"pool-1-thread-2" #14 daemon prio=5 os_prio=31 tid=0x00007ff819ae4800 
nid=0x3e03 waiting on condition [0x00007000026f7000]
   java.lang.Thread.State: WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x00000007607f4c98> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
 at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
 at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"pool-1-thread-1" #13 daemon prio=5 os_prio=31 tid=0x00007ff819323800 
nid=0x4103 waiting on condition [0x00007000025f4000]
   java.lang.Thread.State: WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x000000076002d778> (a 
java.util.concurrent.Semaphore$FairSync)
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:997)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
 at java.util.concurrent.Semaphore.acquire(Semaphore.java:312)
 at 
org.apache.maven.surefire.junitcore.pc.ThreadResourcesBalancer.acquirePermit(ThreadResourcesBalancer.java:76)
 at 
org.apache.maven.surefire.junitcore.pc.Scheduler.schedule(Scheduler.java:363)
 at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
 at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
 at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
 at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
 at org.junit.runners.Suite.runChild(Suite.java:128)
 at org.junit.runners.Suite.runChild(Suite.java:27)
 at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
 at org.apache.maven.surefire.junitcore.pc.Scheduler$1.run(Scheduler.java:410)
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 at java.util.concurrent.FutureTask.run(FutureTask.java:266)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - <0x000000076000f588> (a java.util.concurrent.ThreadPoolExecutor$Worker)
"process reaper" #11 daemon prio=10 os_prio=31 tid=0x00007ff819130800 
nid=0x4303 waiting on condition [0x00007000024f1000]
   java.lang.Thread.State: TIMED_WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x0000000760015960> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
 at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
 at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:460)
 at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:362)
 at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:941)
 at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1073)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"surefire-forkedjvm-ping-30s" #10 daemon prio=5 os_prio=31 
tid=0x00007ff819011000 nid=0x3d03 runnable [0x00007000024c6000]
   java.lang.Thread.State: TIMED_WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x0000000760015ba0> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
 at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
 at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"surefire-forkedjvm-command-thread" #9 daemon prio=5 os_prio=31 
tid=0x00007ff81882d000 nid=0x3b03 runnable [0x00007000023c3000]
   java.lang.Thread.State: RUNNABLE
 at java.io.FileInputStream.readBytes(Native Method)
 at java.io.FileInputStream.read(FileInputStream.java:255)
 at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
 at java.io.BufferedInputStream.read(BufferedInputStream.java:265)
 - locked <0x0000000760017e30> (a java.io.BufferedInputStream)
 at java.io.DataInputStream.readInt(DataInputStream.java:387)
 at 
org.apache.maven.surefire.booter.MasterProcessCommand.decode(MasterProcessCommand.java:115)
 at 
org.apache.maven.surefire.booter.CommandReader$CommandRunnable.run(CommandReader.java:391)
 at java.lang.Thread.run(Thread.java:748)
   Locked ownable synchronizers:
 - None
"Service Thread" #8 daemon prio=9 os_prio=31 tid=0x00007ff818800000 nid=0x4503 
runnable [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE
   Locked ownable synchronizers:
 - None
"C1 CompilerThread2" #7 daemon prio=9 os_prio=31 tid=0x00007ff818807800 
nid=0x4703 waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE
   Locked ownable synchronizers:
 - None
"C2 CompilerThread1" #6 daemon prio=9 os_prio=31 tid=0x00007ff81982b000 
nid=0x3603 waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE
   Locked ownable synchronizers:
 - None
"C2 CompilerThread0" #5 daemon prio=9 os_prio=31 tid=0x00007ff819004000 
nid=0x3503 waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE
   Locked ownable synchronizers:
 - None
"Signal Dispatcher" #4 daemon prio=9 os_prio=31 tid=0x00007ff819828800 
nid=0x3403 runnable [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE
   Locked ownable synchronizers:
 - None
"Finalizer" #3 daemon prio=8 os_prio=31 tid=0x00007ff81a03a800 nid=0x5003 in 
Object.wait() [0x0000700001cae000]
   java.lang.Thread.State: WAITING (on object monitor)
 at java.lang.Object.wait(Native Method)
 - waiting on <0x0000000760003b70> (a java.lang.ref.ReferenceQueue$Lock)
 at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143)
 - locked <0x0000000760003b70> (a java.lang.ref.ReferenceQueue$Lock)
 at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164)
 at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:212)
   Locked ownable synchronizers:
 - None
"Reference Handler" #2 daemon prio=10 os_prio=31 tid=0x00007ff81900c000 
nid=0x2e03 in Object.wait() [0x0000700001bab000]
   java.lang.Thread.State: WAITING (on object monitor)
 at java.lang.Object.wait(Native Method)
 - waiting on <0x00000007600dc768> (a java.lang.ref.Reference$Lock)
 at java.lang.Object.wait(Object.java:502)
 at java.lang.ref.Reference.tryHandlePending(Reference.java:191)
 - locked <0x00000007600dc768> (a java.lang.ref.Reference$Lock)
 at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:153)
   Locked ownable synchronizers:
 - None
"main" #1 prio=5 os_prio=31 tid=0x00007ff818801800 nid=0x1a03 waiting on 
condition [0x0000700001598000]
   java.lang.Thread.State: WAITING (parking)
 at sun.misc.Unsafe.park(Native Method)
 - parking to wait for  <0x000000076002d778> (a 
java.util.concurrent.Semaphore$FairSync)
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:997)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
 at java.util.concurrent.Semaphore.acquire(Semaphore.java:312)
 at 
org.apache.maven.surefire.junitcore.pc.ThreadResourcesBalancer.acquirePermit(ThreadResourcesBalancer.java:76)
 at 
org.apache.maven.surefire.junitcore.pc.Scheduler.schedule(Scheduler.java:363)
 at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
 at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
 at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
 at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
 at org.junit.runners.Suite.runChild(Suite.java:128)
 at org.junit.runners.Suite.runChild(Suite.java:27)
 at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
 at org.apache.maven.surefire.junitcore.pc.Scheduler$1.run(Scheduler.java:410)
 at 
org.apache.maven.surefire.junitcore.pc.InvokerStrategy.schedule(InvokerStrategy.java:54)
 at 
org.apache.maven.surefire.junitcore.pc.Scheduler.schedule(Scheduler.java:367)
 at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
 at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
 at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
 at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
 at org.junit.runners.Suite.runChild(Suite.java:128)
 at org.junit.runners.Suite.runChild(Suite.java:27)
 at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
 at org.apache.maven.surefire.junitcore.pc.Scheduler$1.run(Scheduler.java:410)
 at 
org.apache.maven.surefire.junitcore.pc.InvokerStrategy.schedule(InvokerStrategy.java:54)
 at 
org.apache.maven.surefire.junitcore.pc.Scheduler.schedule(Scheduler.java:367)
 at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
 at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
 at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
 at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
 at 
org.apache.maven.surefire.junitcore.pc.ParallelComputerBuilder$PC$1.run(ParallelComputerBuilder.java:590)
 at org.apache.maven.surefire.junitcore.JUnitCore.run(JUnitCore.java:55)
 at 
org.apache.maven.surefire.junitcore.JUnitCoreWrapper.createRequestAndRun(JUnitCoreWrapper.java:137)
 at 
org.apache.maven.surefire.junitcore.JUnitCoreWrapper.executeEager(JUnitCoreWrapper.java:107)
 at 
org.apache.maven.surefire.junitcore.JUnitCoreWrapper.execute(JUnitCoreWrapper.java:83)
 at 
org.apache.maven.surefire.junitcore.JUnitCoreWrapper.execute(JUnitCoreWrapper.java:75)
 at 
org.apache.maven.surefire.junitcore.JUnitCoreProvider.invoke(JUnitCoreProvider.java:158)
 at 
org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:383)
 at 
org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:344)
 at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:125)
 at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:417)
   Locked ownable synchronizers:
 - None
"VM Thread" os_prio=31 tid=0x00007ff819809000 nid=0x2d03 runnable
"GC task thread#0 (ParallelGC)" os_prio=31 tid=0x00007ff81a003800 nid=0x2207 
runnable
"GC task thread#1 (ParallelGC)" os_prio=31 tid=0x00007ff81900a800 nid=0x2a03 
runnable
"GC task thread#2 (ParallelGC)" os_prio=31 tid=0x00007ff81900b000 nid=0x5403 
runnable
"GC task thread#3 (ParallelGC)" os_prio=31 tid=0x00007ff81900b800 nid=0x5203 
runnable
"VM Periodic Task Thread" os_prio=31 tid=0x00007ff81a058800 nid=0x3903 waiting 
on condition
JNI global references: 1500

Thanks ~
Haisheng Yuan
------------------------------------------------------------------
发件人:Ruben Q L<[email protected]>
日 期:2019年05月31日 16:56:07
收件人:Stamatis Zampetakis<[email protected]>
抄 送:<[email protected]>
主 题:Re: Re: CyclicMetadataException in 
testPushDownJoinConditionsWithExpandedIsNotDistinctUsingCase

My guess here is that: EnumerableRepeatUnionHierarchyTest is
a Parameterized test, if the different tests are executed in parallel,
since they all use the same plan with minor modifications, but especially
since they all share the same transient table name (the temporary table
that is used to accumulate results in the iterative process), they might
interfere on each other's results and make some tests run forever.
I can prepare a patch modifying EnumerableRepeatUnionHierarchyTest to
ensure that the transient table is also parameterized, and see if that
solves the problem.



Le ven. 31 mai 2019 à 09:26, Stamatis Zampetakis <[email protected]> a
écrit :

> @Haisheng: since you can reproduce it every time locally can you share the
> stacktrace at the moment that it is stucked?
>
> I think it will help a lot in finding the problem.
>
> On Fri, May 31, 2019, 8:22 AM Ruben Q L <[email protected]> wrote:
>
>> Ok, I understand. I'll see if I can reproduce it on my side.
>> Thanks
>>
>>
>> Le ven. 31 mai 2019 à 08:19, Haisheng Yuan <[email protected]> a
>> écrit :
>>
>>> just run that single test would not reproduce.
>>> It also passes on my machine if run it separately. mvn clean install can
>>> repro every time.
>>>
>>>
>>>
>>>
>>>
>>> Thanks~
>>> Haisheng
>>> Yuan------------------------------------------------------------------
>>> 发件人:Ruben Q L<[email protected]>
>>> 日 期:2019年05月31日 14:03:44
>>> 收件人:<[email protected]>
>>> 抄 送:Stamatis Zampetakis<[email protected]>
>>> 主 题:Re: Re: CyclicMetadataException in
>>> testPushDownJoinConditionsWithExpandedIsNotDistinctUsingCase
>>>
>>> Thanks for the info.
>>> I'm running EnumerableRepeatUnionHierarchyTest, and in my local
>>> environment
>>> (macOS 10.14.5, JDK 8) it ends successfully.
>>> I'll continue digging into it...
>>>
>>>
>>> Le ven. 31 mai 2019 à 04:20, Haisheng Yuan <[email protected]> a
>>> écrit :
>>>
>>> > The test suite stuck on my laptop for ever. macOS 10.14.4, JDK 8.
>>> > Ignoring test case EnumerableRepeatUnionHierarchyTest makes the whole
>>> test
>>> > pass again.
>>> >
>>> > Thanks ~
>>> > Haisheng Yuan
>>> > ------------------------------------------------------------------
>>> > 发件人:Stamatis Zampetakis<[email protected]>
>>> > 日 期:2019年05月30日 22:49:22
>>> > 收件人:<[email protected]>
>>> > 主 题:Re: CyclicMetadataException in
>>> > testPushDownJoinConditionsWithExpandedIsNotDistinctUsingCase
>>> >
>>> > Thanks for noticing Laurent!
>>> >
>>> > The last builds were successful so I guess we have to do with another
>>> > intermittent failure.
>>> >
>>> > If the problem persists, I will create a JIRA and follow up there 😉
>>> >
>>> > On Thu, May 30, 2019, 6:43 AM Laurent Goujon <[email protected]>
>>> wrote:
>>> >
>>> > > Looks like Jenkins started to timeout with the following change:
>>> > >
>>> > >
>>> >
>>> https://github.com/apache/calcite/commit/73e6d05fa65f16485caca80571d1fe4fda5c7468
>>> > > .
>>> > >
>>> > > Ruben, Stamatis: any idea?
>>> > >
>>> > > On Wed, May 29, 2019 at 8:22 PM Laurent Goujon <[email protected]>
>>> > wrote:
>>> > >
>>> > > > It looks like Jenkins build with JDK8 now takes more than 40min
>>> and is
>>> > > > being aborted, with the core module taking something like 38min and
>>> > being
>>> > > > stopped during tests. It seems to be independent of my test change
>>> > since
>>> > > > RelOptUtilTest shows to be running successfully in 1second.
>>> > > > That said, I don't see a test still be running from the logs, so I
>>> > guess
>>> > > > it would be something keeping the jvm from stopping even if all
>>> tests
>>> > are
>>> > > > completed?
>>> > > >
>>> > > > On Wed, May 29, 2019 at 4:15 PM Stamatis Zampetakis <
>>> [email protected]
>>> > >
>>> > > > wrote:
>>> > > >
>>> > > >> Makes sense, I was expecting something like that.
>>> > > >>
>>> > > >> +1 for the PR. Even if we are not 100% that will resolve the
>>> problem,
>>> > > >> getting rid of the statics seems like a good idea and beneficial
>>> in
>>> > any
>>> > > >> case.
>>> > > >>
>>> > > >> On Thu, May 30, 2019 at 12:02 AM Laurent Goujon <
>>> [email protected]>
>>> > > >> wrote:
>>> > > >>
>>> > > >> > Unfortunately, not locally, so I ended in crude debugging and
>>> code
>>> > > >> analysis
>>> > > >> > to understand where the static reloptcluster created for the
>>> static
>>> > > >> > relbuilder was used in a context which would cause issue in a
>>> > > >> multithreaded
>>> > > >> > context. Once I saw several rel nodes getting the parent cluster
>>> > (the
>>> > > >> > static table scan) to access the metadata query (which cannot be
>>> > > reused
>>> > > >> > between threads), that's where I assumed this would be the
>>> problem.
>>> > > >> >
>>> > > >> > On Wed, May 29, 2019 at 2:58 PM Stamatis Zampetakis <
>>> > > [email protected]>
>>> > > >> > wrote:
>>> > > >> >
>>> > > >> > > Thanks a lot Laurent, much appreciated!
>>> > > >> > >
>>> > > >> > > BTW, were you able to reproduce the problem?
>>> > > >> > >
>>> > > >> > > On Wed, May 29, 2019 at 11:29 PM Laurent Goujon <
>>> > [email protected]
>>> > > >
>>> > > >> > > wrote:
>>> > > >> > >
>>> > > >> > > > Here's the pull request:
>>> > > >> https://github.com/apache/calcite/pull/1240
>>> > > >> > > >
>>> > > >> > > > On Wed, May 29, 2019 at 1:58 PM Laurent Goujon <
>>> > > [email protected]>
>>> > > >> > > wrote:
>>> > > >> > > >
>>> > > >> > > > > Looks like most {{RelNode#create()}} access the
>>> > > >> > > RelOptCluster/RelBuilder
>>> > > >> > > > > instance from their child, and some then, perform metadata
>>> > > >> operation,
>>> > > >> > > > which
>>> > > >> > > > > would cause CyclicMetadataException for example. I'll
>>> create a
>>> > > >> fixup
>>> > > >> > > > patch.
>>> > > >> > > > >
>>> > > >> > > > > On Wed, May 29, 2019 at 9:00 AM Laurent Goujon <
>>> > > >> [email protected]>
>>> > > >> > > > wrote:
>>> > > >> > > > >
>>> > > >> > > > >> I actually did the change from using a static relbuilder
>>> to a
>>> > > >> mix of
>>> > > >> > > > >> static (to create the scans) and per test because of
>>> > > concurrency
>>> > > >> > > issues
>>> > > >> > > > :(
>>> > > >> > > > >> Maybe the test should only been using a per test-case
>>> > > relbuilder
>>> > > >> > > > instead,
>>> > > >> > > > >> but it would mean that there's some code relying on the
>>> > static
>>> > > >> > > > relbuilder
>>> > > >> > > > >> (by accessing it from the scan rel nodes) in a non-static
>>> > > >> context.
>>> > > >> > > I'll
>>> > > >> > > > try
>>> > > >> > > > >> to debug it too (but so far was unlucky reproducing the
>>> > jenkins
>>> > > >> > > > problem).
>>> > > >> > > > >>
>>> > > >> > > > >> On Wed, May 29, 2019 at 5:02 AM Francis Chuang <
>>> > > >> > > > [email protected]>
>>> > > >> > > > >> wrote:
>>> > > >> > > > >>
>>> > > >> > > > >>> In Go, there's a built in race detector that can be used
>>> > when
>>> > > >> > running
>>> > > >> > > > >>> tests using `go test`.
>>> > > >> > > > >>>
>>> > > >> > > > >>> There's RacerD [1] from Facebook that can detect races
>>> in
>>> > > Java.
>>> > > >> > > Perhaps
>>> > > >> > > > >>> this is something that can be looked in to to find the
>>> race.
>>> > > >> > > > >>>
>>> > > >> > > > >>> [1] https://fbinfer.com/docs/racerd.html
>>> > > >> > > > >>>
>>> > > >> > > > >>> On 29/05/2019 9:52 pm, Stamatis Zampetakis wrote:
>>> > > >> > > > >>> > Good insights, Ruben, Danny!
>>> > > >> > > > >>> >
>>> > > >> > > > >>> > Assuming that is indeed a concurrency problem, it
>>> will be
>>> > > >> > difficult
>>> > > >> > > > to
>>> > > >> > > > >>> > identify since many parts in Calcite are not thread
>>> safe.
>>> > > Not
>>> > > >> > being
>>> > > >> > > > >>> able to
>>> > > >> > > > >>> > reproduce the problem makes the things even worse.
>>> > > >> > > > >>> >
>>> > > >> > > > >>> > Given that intermittent test failures occur often on
>>> > > Jenkins,
>>> > > >> how
>>> > > >> > > > about
>>> > > >> > > > >>> > creating a new branch dedicated to debugging?
>>> > > >> > > > >>> > It can be associated with a new Jenkins jobs (that
>>> > obviously
>>> > > >> > > doesn't
>>> > > >> > > > >>> send
>>> > > >> > > > >>> > mails to everybody when there are failures) that can
>>> be
>>> > > >> launched
>>> > > >> > > > >>> on-demand
>>> > > >> > > > >>> > by the person who is looking into the problem.
>>> > > >> > > > >>> >
>>> > > >> > > > >>> > On the other hand, we do not really make an effort to
>>> have
>>> > > >> tests
>>> > > >> > > that
>>> > > >> > > > >>> can
>>> > > >> > > > >>> > be executed concurrently so another alternative would
>>> be
>>> > to
>>> > > >> run
>>> > > >> > > tests
>>> > > >> > > > >>> only
>>> > > >> > > > >>> > sequentially.
>>> > > >> > > > >>> > I am not very fan of this approach since it will
>>> rather
>>> > hide
>>> > > >> > > problems
>>> > > >> > > > >>> than
>>> > > >> > > > >>> > solve them.
>>> > > >> > > > >>> >
>>> > > >> > > > >>> >
>>> > > >> > > > >>> >
>>> > > >> > > > >>> >
>>> > > >> > > > >>> >
>>> > > >> > > > >>> >
>>> > > >> > > > >>> > On Wed, May 29, 2019 at 1:02 PM Yuzhao Chen <
>>> > > >> > [email protected]>
>>> > > >> > > > >>> wrote:
>>> > > >> > > > >>> >
>>> > > >> > > > >>> >> Thanks Ruben for your good analysis.
>>> > > >> > > > >>> >>
>>> > > >> > > > >>> >> What I’m confused is that isn’t the static
>>> REL_BUILDER
>>> > more
>>> > > >> > prone
>>> > > >> > > to
>>> > > >> > > > >>> have
>>> > > >> > > > >>> >> concurrency problems ? And the pushed scans(EMP_SCAN
>>> and
>>> > > >> > > DEPT_SCAN)
>>> > > >> > > > >>> are all
>>> > > >> > > > >>> >> nodes(immutable), how could this be a problem ?
>>> > > >> > > > >>> >>
>>> > > >> > > > >>> >> Best,
>>> > > >> > > > >>> >> Danny Chan
>>> > > >> > > > >>> >> 在 2019年5月29日 +0800 PM5:37,Ruben Q L <
>>> [email protected]
>>> > > >,写道:
>>> > > >> > > > >>> >>> I'm checking the commit [1] and I see something
>>> strange
>>> > in
>>> > > >> > > > >>> >> RelOptUtilTest.
>>> > > >> > > > >>> >>> Maybe I'm wrong and it is nothing, but just in case
>>> it
>>> > may
>>> > > >> > help:
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>> With the latest modification, it seems that we have
>>> two
>>> > > >> > > > >>> RelBuilder(s) in
>>> > > >> > > > >>> >>> place:
>>> > > >> > > > >>> >>> - A static one that is created ad-hoc on a static
>>> block
>>> > to
>>> > > >> > > generate
>>> > > >> > > > >>> the
>>> > > >> > > > >>> >>> EMP_SCAN and DEPT_SCAN RelNodes [2]
>>> > > >> > > > >>> >>> - An instance one to be used in the tests, that is
>>> > > >> initialized
>>> > > >> > on
>>> > > >> > > > >>> >>> the @Before public void setUp() method [3]
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>> Before this commit, the EMP_SCAN / DEPT_SCAN were
>>> only
>>> > > used
>>> > > >> to
>>> > > >> > > read
>>> > > >> > > > >>> their
>>> > > >> > > > >>> >>> rowTypes to test some join auxiliary methods. But
>>> the
>>> > new
>>> > > >> > > > >>> >>> tests testPushDownJoinConditions* actually build a
>>> plan
>>> > > and
>>> > > >> > push
>>> > > >> > > > >>> these
>>> > > >> > > > >>> >>> scans into the RelBuilder to be tested [4] (which
>>> is a
>>> > > >> > different
>>> > > >> > > > one
>>> > > >> > > > >>> than
>>> > > >> > > > >>> >>> the static RelBuider that created the scans).
>>> > > >> > > > >>> >>> Maybe this is no problem generally, but it can
>>> > potentially
>>> > > >> be
>>> > > >> > > under
>>> > > >> > > > >>> >> certain
>>> > > >> > > > >>> >>> circumstances?, which would explain the randomness
>>> of
>>> > the
>>> > > >> > issue.
>>> > > >> > > > >>> >>> Could this explain the exception?
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>> [1]
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > >
>>> >
>>> https://github.com/apache/calcite/commit/82e7d4e760cb203d31956c55e38e0fdd56119d58
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>> [2]
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > >
>>> >
>>> https://github.com/apache/calcite/blob/ac40d6951bc8c475ca6804be6d878107cc2ebb13/core/src/test/java/org/apache/calcite/plan/RelOptUtilTest.java#L71
>>> > > >> > > > >>> >>> [3]
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > >
>>> >
>>> https://github.com/apache/calcite/blob/ac40d6951bc8c475ca6804be6d878107cc2ebb13/core/src/test/java/org/apache/calcite/plan/RelOptUtilTest.java#L92
>>> > > >> > > > >>> >>> [4]
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > >
>>> >
>>> https://github.com/apache/calcite/blob/ac40d6951bc8c475ca6804be6d878107cc2ebb13/core/src/test/java/org/apache/calcite/plan/RelOptUtilTest.java#L292
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>> Le mer. 29 mai 2019 à 02:20, Julian Hyde <
>>> > > [email protected]>
>>> > > >> a
>>> > > >> > > > écrit
>>> > > >> > > > >>> :
>>> > > >> > > > >>> >>>
>>> > > >> > > > >>> >>>> It’s a tough call. It is probable that the problem
>>> > > existed
>>> > > >> > > already
>>> > > >> > > > >>> and
>>> > > >> > > > >>> >> the
>>> > > >> > > > >>> >>>> change merely surfaced it.
>>> > > >> > > > >>> >>>>
>>> > > >> > > > >>> >>>>> On May 28, 2019, at 5:17 PM, Stamatis Zampetakis <
>>> > > >> > > > >>> [email protected]>
>>> > > >> > > > >>> >>>> wrote:
>>> > > >> > > > >>> >>>>>
>>> > > >> > > > >>> >>>>> It is not the only test that is failing after
>>> commit
>>> > [1]
>>> > > >> but
>>> > > >> > > all
>>> > > >> > > > >>> the
>>> > > >> > > > >>> >> new
>>> > > >> > > > >>> >>>>> tests that were added.
>>> > > >> > > > >>> >>>>>
>>> > > >> > > > >>> >>>>> I've seen the problem on Jenkins on all JDKS but I
>>> > > cannot
>>> > > >> > > > reproduce
>>> > > >> > > > >>> >> it
>>> > > >> > > > >>> >>>>> locally.
>>> > > >> > > > >>> >>>>> I guess we have to do with a race condition most
>>> > likely
>>> > > >> due
>>> > > >> > to
>>> > > >> > > > the
>>> > > >> > > > >>> >>>>> concurrent execution of tests with surefire.
>>> > > >> > > > >>> >>>>>
>>> > > >> > > > >>> >>>>> Should we revert the commit till we find a
>>> solution?
>>> > > >> > > > >>> >>>>>
>>> > > >> > > > >>> >>>>> [1]
>>> > > >> > > > >>> >>>>>
>>> > > >> > > > >>> >>>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > >
>>> >
>>> https://github.com/apache/calcite/commit/82e7d4e760cb203d31956c55e38e0fdd56119d58
>>> > > >> > > > >>> >>>>>
>>> > > >> > > > >>> >>>>> On Tue, May 28, 2019 at 7:57 PM Julian Hyde <
>>> > > >> > [email protected]>
>>> > > >> > > > >>> >> wrote:
>>> > > >> > > > >>> >>>>>
>>> > > >> > > > >>> >>>>>> I have seen this intermittent failure 3 times in
>>> the
>>> > > last
>>> > > >> > > week:
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>>> [INFO] Running
>>> org.apache.calcite.plan.RelOptUtilTest
>>> > > >> > > > >>> >>>>>> [ERROR] Tests run: 11, Failures: 0, Errors: 1,
>>> > Skipped:
>>> > > >> 0,
>>> > > >> > > Time
>>> > > >> > > > >>> >> elapsed:
>>> > > >> > > > >>> >>>>>> 0.411 s <<< FAILURE! - in
>>> > > >> > > org.apache.calcite.plan.RelOptUtilTest
>>> > > >> > > > >>> >>>>>> [ERROR]
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > >
>>> >
>>> testPushDownJoinConditionsWithExpandedIsNotDistinctUsingCase(org.apache.calcite.plan.RelOptUtilTest)
>>> > > >> > > > >>> >>>>>> Time elapsed: 0.349 s <<< ERROR!
>>> > > >> > > > >>> >>>>>>
>>> > org.apache.calcite.rel.metadata.CyclicMetadataException
>>> > > >> > > > >>> >>>>>> at
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > >
>>> >
>>> org.apache.calcite.plan.RelOptUtilTest.testPushDownJoinConditionsWithExpandedIsNotDistinctUsingCase(RelOptUtilTest.java:445)
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>>> I have seen it on Oracle JDK 12 and OpenJDK 10.
>>> The
>>> > > test
>>> > > >> was
>>> > > >> > > > only
>>> > > >> > > > >>> >> added
>>> > > >> > > > >>> >>>> on
>>> > > >> > > > >>> >>>>>> May 22 so I assume that it will continue to fail
>>> > > >> > > intermittently
>>> > > >> > > > >>> >> until
>>> > > >> > > > >>> >>>> we do
>>> > > >> > > > >>> >>>>>> something.
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>>> Anyone have any ideas?
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>>> Laurent, As you added the test can you please
>>> look
>>> > into
>>> > > >> it?
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>>> Julian
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>>>
>>> > > >> > > > >>> >>>>
>>> > > >> > > > >>> >>>>
>>> > > >> > > > >>> >>
>>> > > >> > > > >>> >
>>> > > >> > > > >>>
>>> > > >> > > > >>
>>> > > >> > > >
>>> > > >> > >
>>> > > >> >
>>> > > >>
>>> > > >
>>> > >
>>> >
>>> >
>>>
>>>

Reply via email to