This is an automated email from the ASF dual-hosted git repository.

roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new 85320527 [#388][FOLLOWUP] fix: Fix the flaky test GrpcServerTest 
(#1023)
85320527 is described below

commit 85320527ad4fbb15ef033a10f4d2ca5da617be91
Author: Alisha-Anaum <[email protected]>
AuthorDate: Wed Jul 19 02:03:46 2023 -0500

    [#388][FOLLOWUP] fix: Fix the flaky test GrpcServerTest (#1023)
    
    ### What changes were proposed in this pull request?
    I observed a previous fix injected some delay to fix the flaky test, but I 
believe that fix might be unstable in a CI environment or when run on different 
machines, given the dependency on some constant wait time. I suggest a new way 
to fix the test by adding some synchronization for the test execution only. I 
at first identify the source code location whose slow execution leads to the 
flaky test failure, where if 
common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java#157  [...]
    
    ### Why are the changes needed?
    This test is flakily fails. I run this test many times and it makes 
assertion fails. The failure message is as follows.
    Failure:
    [INFO] Running org.apache.uniffle.common.rpc.GrpcServerTest
    [ERROR] Tests run: 1, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 
0.185 s <<< FAILURE! - in org.apache.uniffle.common.rpc.GrpcServerTest
    [ERROR] testGrpcExecutorPool Time elapsed: 0.142 s <<< FAILURE!
    org.opentest4j.AssertionFailedError: expected: <2.0> but was: <0.0>
    at org.junit.jupiter.api.AssertionUtils.fail(AssertionUtils.java:55)
    at org.junit.jupiter.api.AssertionUtils.failNotEqual(AssertionUtils.java:62)
    at org.junit.jupiter.api.AssertEquals.assertEquals(AssertEquals.java:70)
    at org.junit.jupiter.api.AssertEquals.assertEquals(AssertEquals.java:65)
    at org.junit.jupiter.api.Assertions.assertEquals(Assertions.java:885)
    at 
org.apache.uniffle.common.rpc.GrpcServerTest.testGrpcExecutorPool(GrpcServerTest.java:76)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at 
org.junit.platform.commons.util.ReflectionUtils.invokeMethod(ReflectionUtils.java:725)
    at 
org.junit.jupiter.engine.execution.MethodInvocation.proceed(MethodInvocation.java:60)
    
    [ERROR] Failures:
    [ERROR] GrpcServerTest.testGrpcExecutorPool:76 expected: <2.0> but was: 
<0.0>
    [ERROR] Tests run: 1, Failures: 1, Errors: 0, Skipped: 0
    
    ### How was this patch tested?
    I run this test more than 1000 times and the test always passes.
    
    Co-authored-by: other <[email protected]>
---
 .../main/java/org/apache/uniffle/common/rpc/GrpcServer.java  | 12 ++++++++++++
 .../java/org/apache/uniffle/common/rpc/GrpcServerTest.java   |  5 +++++
 2 files changed, 17 insertions(+)

diff --git a/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java 
b/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java
index 2d45417b..680c829b 100644
--- a/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java
+++ b/common/src/main/java/org/apache/uniffle/common/rpc/GrpcServer.java
@@ -49,6 +49,7 @@ public class GrpcServer implements ServerInterface {
 
   private static final Logger LOG = LoggerFactory.getLogger(GrpcServer.class);
 
+  private static volatile boolean poolExecutorHasExecuted;
   private Server server;
   private final int port;
   private int listenPort;
@@ -78,6 +79,16 @@ public class GrpcServer implements ServerInterface {
             grpcMetrics);
   }
 
+  // This method is only used for the sake of synchronizing one test
+  static boolean isPoolExecutorHasExecuted() {
+    return poolExecutorHasExecuted;
+  }
+
+  // This method is only used for the sake of synchronizing one test
+  static void reset() {
+    poolExecutorHasExecuted = false;
+  }
+
   private Server buildGrpcServer(int serverPort) {
     boolean isMetricsEnabled = 
rssConf.getBoolean(RssBaseConf.RPC_METRICS_ENABLED);
     long maxInboundMessageSize = 
rssConf.getLong(RssBaseConf.RPC_MESSAGE_MAX_SIZE);
@@ -157,6 +168,7 @@ public class GrpcServer implements ServerInterface {
       
grpcMetrics.incGauge(GRPCMetrics.GRPC_SERVER_EXECUTOR_ACTIVE_THREADS_KEY);
       grpcMetrics.setGauge(
           GRPCMetrics.GRPC_SERVER_EXECUTOR_BLOCKING_QUEUE_SIZE_KEY, 
getQueue().size());
+      poolExecutorHasExecuted = true;
       super.beforeExecute(t, r);
     }
 
diff --git 
a/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java 
b/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java
index 713a2014..2142bfcf 100644
--- a/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java
+++ b/common/src/test/java/org/apache/uniffle/common/rpc/GrpcServerTest.java
@@ -42,6 +42,8 @@ public class GrpcServerTest {
 
   @Test
   public void testGrpcExecutorPool() throws Exception {
+    // Explicitly setting the synchronizing variable as false at the beginning 
of test run
+    GrpcServer.reset();
     GRPCMetrics grpcMetrics = GRPCMetrics.getEmptyGRPCMetrics();
     grpcMetrics.register(new CollectorRegistry(true));
     GrpcServer.GrpcThreadPoolExecutor executor =
@@ -69,6 +71,9 @@ public class GrpcServerTest {
           });
     }
 
+    while (!GrpcServer.isPoolExecutorHasExecuted()) {
+      Thread.yield();
+    }
     Thread.sleep(120);
     double activeThreads =
         
grpcMetrics.getGaugeMap().get(GRPC_SERVER_EXECUTOR_ACTIVE_THREADS_KEY).get();

Reply via email to