This is an automated email from the ASF dual-hosted git repository.
roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new 85320527 [#388][FOLLOWUP] fix: Fix the flaky test GrpcServerTest
(#1023)
85320527 is described below
commit 85320527ad4fbb15ef033a10f4d2ca5da617be91
Author: Alisha-Anaum <[email protected]>
AuthorDate: Wed Jul 19 02:03:46 2023 -0500
[#388][FOLLOWUP] fix: Fix the flaky test GrpcServerTest (#1023)
### What changes were proposed in this pull request?
I observed a previous fix injected some delay to fix the flaky test, but I
believe that fix might be unstable in a CI environment or when run on different
machines, given the dependency on some constant wait time. I suggest a new way
to fix the test by adding some synchronization for the test execution only. I
at first identify the source code location whose slow execution leads to the
flaky test failure, where if
common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java#157 [...]
### Why are the changes needed?
This test is flakily fails. I run this test many times and it makes
assertion fails. The failure message is as follows.
Failure:
[INFO] Running org.apache.uniffle.common.rpc.GrpcServerTest
[ERROR] Tests run: 1, Failures: 1, Errors: 0, Skipped: 0, Time elapsed:
0.185 s <<< FAILURE! - in org.apache.uniffle.common.rpc.GrpcServerTest
[ERROR] testGrpcExecutorPool Time elapsed: 0.142 s <<< FAILURE!
org.opentest4j.AssertionFailedError: expected: <2.0> but was: <0.0>
at org.junit.jupiter.api.AssertionUtils.fail(AssertionUtils.java:55)
at org.junit.jupiter.api.AssertionUtils.failNotEqual(AssertionUtils.java:62)
at org.junit.jupiter.api.AssertEquals.assertEquals(AssertEquals.java:70)
at org.junit.jupiter.api.AssertEquals.assertEquals(AssertEquals.java:65)
at org.junit.jupiter.api.Assertions.assertEquals(Assertions.java:885)
at
org.apache.uniffle.common.rpc.GrpcServerTest.testGrpcExecutorPool(GrpcServerTest.java:76)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.junit.platform.commons.util.ReflectionUtils.invokeMethod(ReflectionUtils.java:725)
at
org.junit.jupiter.engine.execution.MethodInvocation.proceed(MethodInvocation.java:60)
[ERROR] Failures:
[ERROR] GrpcServerTest.testGrpcExecutorPool:76 expected: <2.0> but was:
<0.0>
[ERROR] Tests run: 1, Failures: 1, Errors: 0, Skipped: 0
### How was this patch tested?
I run this test more than 1000 times and the test always passes.
Co-authored-by: other <[email protected]>
---
.../main/java/org/apache/uniffle/common/rpc/GrpcServer.java | 12 ++++++++++++
.../java/org/apache/uniffle/common/rpc/GrpcServerTest.java | 5 +++++
2 files changed, 17 insertions(+)
diff --git a/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java
b/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java
index 2d45417b..680c829b 100644
--- a/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java
+++ b/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java
@@ -49,6 +49,7 @@ public class GrpcServer implements ServerInterface {
private static final Logger LOG = LoggerFactory.getLogger(GrpcServer.class);
+ private static volatile boolean poolExecutorHasExecuted;
private Server server;
private final int port;
private int listenPort;
@@ -78,6 +79,16 @@ public class GrpcServer implements ServerInterface {
grpcMetrics);
}
+ // This method is only used for the sake of synchronizing one test
+ static boolean isPoolExecutorHasExecuted() {
+ return poolExecutorHasExecuted;
+ }
+
+ // This method is only used for the sake of synchronizing one test
+ static void reset() {
+ poolExecutorHasExecuted = false;
+ }
+
private Server buildGrpcServer(int serverPort) {
boolean isMetricsEnabled =
rssConf.getBoolean(RssBaseConf.RPC_METRICS_ENABLED);
long maxInboundMessageSize =
rssConf.getLong(RssBaseConf.RPC_MESSAGE_MAX_SIZE);
@@ -157,6 +168,7 @@ public class GrpcServer implements ServerInterface {
grpcMetrics.incGauge(GRPCMetrics.GRPC_SERVER_EXECUTOR_ACTIVE_THREADS_KEY);
grpcMetrics.setGauge(
GRPCMetrics.GRPC_SERVER_EXECUTOR_BLOCKING_QUEUE_SIZE_KEY,
getQueue().size());
+ poolExecutorHasExecuted = true;
super.beforeExecute(t, r);
}
diff --git
a/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java
b/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java
index 713a2014..2142bfcf 100644
--- a/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java
+++ b/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java
@@ -42,6 +42,8 @@ public class GrpcServerTest {
@Test
public void testGrpcExecutorPool() throws Exception {
+ // Explicitly setting the synchronizing variable as false at the beginning
of test run
+ GrpcServer.reset();
GRPCMetrics grpcMetrics = GRPCMetrics.getEmptyGRPCMetrics();
grpcMetrics.register(new CollectorRegistry(true));
GrpcServer.GrpcThreadPoolExecutor executor =
@@ -69,6 +71,9 @@ public class GrpcServerTest {
});
}
+ while (!GrpcServer.isPoolExecutorHasExecuted()) {
+ Thread.yield();
+ }
Thread.sleep(120);
double activeThreads =
grpcMetrics.getGaugeMap().get(GRPC_SERVER_EXECUTOR_ACTIVE_THREADS_KEY).get();