This is an automated email from the ASF dual-hosted git repository.

xintongsong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/flink-agents.git


The following commit(s) were added to refs/heads/main by this push:
     new e2096725 [test] Add per-test retry for flaky live-LLM 
e2e/cross-language tests
e2096725 is described below

commit e2096725abab6c22b37006caafe9fdee6154658b
Author: Weiqing Yang <[email protected]>
AuthorDate: Sat May 30 22:12:40 2026 -0700

    [test] Add per-test retry for flaky live-LLM e2e/cross-language tests
    
    The live-LLM e2e and cross-language tests run a small Ollama model
    (qwen3:1.7b) and intermittently fail on non-deterministic tool-call
    results or Ollama read timeouts, turning CI red on unrelated changes.
    
    Retry these suites automatically, scoped to the e2e/cross-language
    invocations only so unit and style runs stay deterministic:
    - Python: pytest-rerunfailures with --reruns 2 --reruns-delay 5 on the
      e2e pytest calls in tools/ut.sh and tools/e2e.sh.
    - Java: -Dsurefire.rerunFailingTestsCount=2 on the e2e mvn calls in
      tools/ut.sh and test_resource_cross_language.sh.
    
    A test that passes on retry yields a green build but is reported as a
    flake, so the signal is preserved rather than masked.
    
    Part of #716.
---
 e2e-test/test-scripts/test_resource_cross_language.sh | 2 +-
 python/pyproject.toml                                 | 1 +
 tools/e2e.sh                                          | 2 +-
 tools/ut.sh                                           | 6 ++++--
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/e2e-test/test-scripts/test_resource_cross_language.sh 
b/e2e-test/test-scripts/test_resource_cross_language.sh
index 8fb7fc2b..ad5c7af0 100755
--- a/e2e-test/test-scripts/test_resource_cross_language.sh
+++ b/e2e-test/test-scripts/test_resource_cross_language.sh
@@ -30,7 +30,7 @@ echo "Root directory: $root_dir"
 cd "$root_dir/e2e-test/flink-agents-end-to-end-tests-resource-cross-language"
 
 echo "Running all tests in resource-cross-language module..."
-mvn -T16 --batch-mode --no-transfer-progress test
+mvn -T16 --batch-mode --no-transfer-progress test 
-Dsurefire.rerunFailingTestsCount=2
 
 ret=$?
 if [ "$ret" != "0" ]; then
diff --git a/python/pyproject.toml b/python/pyproject.toml
index f5d9814c..cbd0d5b6 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -90,6 +90,7 @@ build = [
 # Test dependencies
 test = [
     "pytest==9.0.3",
+    "pytest-rerunfailures==16.3",
 ]
 
 # Lint dependencies
diff --git a/tools/e2e.sh b/tools/e2e.sh
index 886705fa..bc04762e 100755
--- a/tools/e2e.sh
+++ b/tools/e2e.sh
@@ -81,7 +81,7 @@ function run_resource_cross_language_test_in_python {
       return 1
     fi
 
-    cd "$python_dir" && uv run --no-sync pytest flink_agents -s -k 
"e2e_tests_resource_cross_language"
+    cd "$python_dir" && uv run --no-sync pytest flink_agents -s -k 
"e2e_tests_resource_cross_language" --reruns 2 --reruns-delay 5
 }
 
 function run_resource_name_consistency_check {
diff --git a/tools/ut.sh b/tools/ut.sh
index 44b941ba..8b9de711 100755
--- a/tools/ut.sh
+++ b/tools/ut.sh
@@ -155,7 +155,7 @@ java_tests() {
         local all_passed=true
         for version in "${flink_versions[@]}"; do
             echo "Running E2E tests for Flink ${version}..."
-            mvn --batch-mode --no-transfer-progress test -pl 
'e2e-test/flink-agents-end-to-end-tests-integration' -Pflink-${version} 
${SPOTLESS_FLAG}
+            mvn --batch-mode --no-transfer-progress test -pl 
'e2e-test/flink-agents-end-to-end-tests-integration' -Pflink-${version} 
-Dsurefire.rerunFailingTestsCount=2 ${SPOTLESS_FLAG}
 
             if [ $? -ne 0 ]; then
                 echo "E2E tests failed for Flink ${version}" >&2
@@ -232,6 +232,8 @@ python_tests() {
                 uv run --no-sync pytest flink_agents \
                 -s \
                 -k "e2e_tests_integration" \
+                --reruns 2 \
+                --reruns-delay 5 \
                 -o log_cli=true \
                 -o log_cli_level=${LOG_LEVEL:-CRITICAL}
             else
@@ -259,7 +261,7 @@ python_tests() {
                 echo "Running tests with pytest..."
             fi
             if $run_e2e; then
-                pytest flink_agents -k "e2e_tests_integration" -o log_cli=true 
-o log_cli_level=${LOG_LEVEL:-OFF}
+                pytest flink_agents -k "e2e_tests_integration" --reruns 2 
--reruns-delay 5 -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
             else
                 pytest flink_agents -k "not e2e_tests" -o log_cli=true -o 
log_cli_level=${LOG_LEVEL:-OFF}
             fi

Reply via email to