This is an automated email from the ASF dual-hosted git repository.

xiaozhenliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 316675294e fix(amber): add temporary fixes for e2e tests in Amber 
(#3962)
316675294e is described below

commit 316675294ebcd2556f0098c4061d6f86f1cbfbdb
Author: Xiaozhen Liu <[email protected]>
AuthorDate: Wed Oct 22 11:08:33 2025 -0700

    fix(amber): add temporary fixes for e2e tests in Amber (#3962)
    
    <!--
    Thanks for sending a pull request (PR)! Here are some tips for you:
    1. If this is your first time, please read our contributor guidelines:
    [Contributing to
    Texera](https://github.com/apache/texera/blob/main/CONTRIBUTING.md)
      2. Ensure you have added or run the appropriate tests for your PR
      3. If the PR is work in progress, mark it a draft on GitHub.
      4. Please write your PR title to summarize what this PR proposes, we
        are following Conventional Commits style for PR titles as well.
      5. Be sure to keep the PR description updated to reflect all changes.
    -->
    
    ### What changes were proposed in this PR?
    <!--
    Please clarify what changes you are proposing. The purpose of this
    section
    is to outline the changes. Here are some tips for you:
      1. If you propose a new API, clarify the use case for a new API.
      2. If you fix a bug, you can clarify why it is a bug.
      3. If it is a refactoring, clarify what has been changed.
      3. It would be helpful to include a before-and-after comparison using
         screenshots or GIFs.
      4. Please consider writing useful notes for better and faster reviews.
    -->
    
    This PR adds temporary fixes for the occasional CI failures related to
    e2e test cases in Amber by doing the following:
    - Add a timeout for `PauseSpec`.
    - Add a retry for test cases in `PauseSpec` and `DataProcessingSpec` in
    the case of failure. We use
    
[Scalatest.retries](https://www.scalatest.org/scaladoc/3.0.9/org/scalatest/Retries.html),
    which retries a failed test once. If a test case succeeds after a retry,
    its outcome will be `Canceled` (this will still pass CIs).
    
    Please refer to #3880 for details about the current problems with these
    two e2e tests.
    
    ### Any related issues, documentation, discussions?
    <!--
    Please use this section to link other resources if not mentioned
    already.
    1. If this PR fixes an issue, please include `Fixes #1234`, `Resolves
    #1234`
    or `Closes #1234`. If it is only related, simply mention the issue
    number.
      2. If there is design documentation, please add the link.
      3. If there is a discussion in the mailing list, please add the link.
    -->
    
    Resolves #3948 .
    
    
    ### How was this PR tested?
    
    Manually tested locally by setting the timeouts to be very short. The
    retry works. When a test case fails at first but succeeds on retry, it
    will show "Test Canceled: Test canceled because flickered: initially
    failed, but succeeded on retry".
    
    ### Was this PR authored or co-authored using generative AI tooling?
    No.
---
 .../apache/amber/engine/e2e/DataProcessingSpec.scala    | 13 +++++++++++--
 .../scala/org/apache/amber/engine/e2e/PauseSpec.scala   | 17 +++++++++++++----
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git 
a/amber/src/test/scala/org/apache/amber/engine/e2e/DataProcessingSpec.scala 
b/amber/src/test/scala/org/apache/amber/engine/e2e/DataProcessingSpec.scala
index cea348764a..8b62114b41 100644
--- a/amber/src/test/scala/org/apache/amber/engine/e2e/DataProcessingSpec.scala
+++ b/amber/src/test/scala/org/apache/amber/engine/e2e/DataProcessingSpec.scala
@@ -45,7 +45,7 @@ import org.apache.amber.operator.aggregate.AggregationFunction
 import 
org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource.getResultUriByLogicalPortId
 import org.apache.texera.workflow.LogicalLink
 import org.scalatest.flatspec.AnyFlatSpecLike
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Outcome, Retries}
 
 import scala.concurrent.duration.DurationInt
 
@@ -54,7 +54,16 @@ class DataProcessingSpec
     with ImplicitSender
     with AnyFlatSpecLike
     with BeforeAndAfterAll
-    with BeforeAndAfterEach {
+    with BeforeAndAfterEach
+    with Retries {
+
+  /**
+    * This block retries each test once if it fails.
+    * In the CI environment, there is a chance that executeWorkflow does not 
receive "COMPLETED" status.
+    * Until we find the root cause of this issue, we use a retry mechanism 
here to stablize CI runs.
+    */
+  override def withFixture(test: NoArgTest): Outcome =
+    withRetry { super.withFixture(test) }
 
   implicit val timeout: Timeout = Timeout(5.seconds)
 
diff --git a/amber/src/test/scala/org/apache/amber/engine/e2e/PauseSpec.scala 
b/amber/src/test/scala/org/apache/amber/engine/e2e/PauseSpec.scala
index c905ef7e68..46236391be 100644
--- a/amber/src/test/scala/org/apache/amber/engine/e2e/PauseSpec.scala
+++ b/amber/src/test/scala/org/apache/amber/engine/e2e/PauseSpec.scala
@@ -22,7 +22,7 @@ package org.apache.amber.engine.e2e
 import akka.actor.{ActorSystem, Props}
 import akka.testkit.{ImplicitSender, TestKit}
 import akka.util.Timeout
-import com.twitter.util.{Await, Promise}
+import com.twitter.util.{Await, Duration, Promise}
 import com.typesafe.scalalogging.Logger
 import org.apache.amber.clustering.SingleNodeListener
 import org.apache.amber.core.workflow.{PortIdentity, WorkflowContext}
@@ -39,7 +39,7 @@ import org.apache.amber.engine.e2e.TestUtils.{
 import org.apache.amber.operator.{LogicalOp, TestOperators}
 import org.apache.texera.workflow.LogicalLink
 import org.scalatest.flatspec.AnyFlatSpecLike
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Outcome, Retries}
 
 import scala.concurrent.duration._
 
@@ -48,7 +48,16 @@ class PauseSpec
     with ImplicitSender
     with AnyFlatSpecLike
     with BeforeAndAfterAll
-    with BeforeAndAfterEach {
+    with BeforeAndAfterEach
+    with Retries {
+
+  /**
+    * This block retries each test once if it fails.
+    * In the CI environment, there is a chance that shouldPause does not 
receive "COMPLETED" status.
+    * Until we find the root cause of this issue, we use a retry mechanism 
here to stablize CI runs.
+    */
+  override def withFixture(test: NoArgTest): Outcome =
+    withRetry { super.withFixture(test) }
 
   implicit val timeout: Timeout = Timeout(5.seconds)
 
@@ -103,7 +112,7 @@ class PauseSpec
     Await.result(client.controllerInterface.pauseWorkflow(EmptyRequest(), ()))
     Thread.sleep(4000)
     Await.result(client.controllerInterface.resumeWorkflow(EmptyRequest(), ()))
-    Await.result(completion)
+    Await.result(completion, Duration.fromMinutes(1))
   }
 
   "Engine" should "be able to pause csv workflow" in {

Reply via email to