This is an automated email from the ASF dual-hosted git repository.

rong pushed a commit to branch dev/1.3
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/dev/1.3 by this push:
     new 5cf6534f6ea Pipe IT: Retry CI tests if pipe-api download fails & Cache 
Maven packages in workflows (#14608)
5cf6534f6ea is described below

commit 5cf6534f6ea98786fab424de9f0235400edd2481
Author: Steve Yurong Su <[email protected]>
AuthorDate: Fri Jan 3 10:23:53 2025 +0800

    Pipe IT: Retry CI tests if pipe-api download fails & Cache Maven packages 
in workflows (#14608)
    
    (cherry picked from commit 5876b75dbdf9e5ac72abb59fcc96a41d5c1face9)
---
 .github/workflows/pipe-it-2cluster.yml | 290 ++++++++++++++++++++++++++++-----
 1 file changed, 250 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/pipe-it-2cluster.yml 
b/.github/workflows/pipe-it-2cluster.yml
index b036cba4a00..475096d2ab8 100644
--- a/.github/workflows/pipe-it-2cluster.yml
+++ b/.github/workflows/pipe-it-2cluster.yml
@@ -51,19 +51,61 @@ jobs:
         with:
           distribution: liberica
           java-version: ${{ matrix.java }}
+      - name: Cache Maven packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2
+          key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+          restore-keys: ${{ runner.os }}-m2-
+      - name: Sleep for a random duration between 0 and 10000 milliseconds
+        run: |
+          sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
         # we can skip influxdb-protocol because it has been tested separately 
in influxdb-protocol.yml
         run: |
-          mvn clean verify \
-          -P with-integration-tests \
-          -DskipUTs \
-          -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-          -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-          -pl integration-test \
-          -am -PMultiClusterIT2AutoCreateSchema \
-          -ntp
+          retry() {
+            local -i max_attempts=3
+            local -i attempt=1
+            local -i retry_sleep=5
+            local test_output
+
+            while [ $attempt -le $max_attempts ]; do
+              mvn clean verify \
+              -P with-integration-tests \
+              -DskipUTs \
+              -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
+              -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster 
}} \
+              -pl integration-test \
+              -am -PMultiClusterIT2AutoCreateSchema \
+              -ntp >> ~/run-tests-$attempt.log && return 0
+              test_output=$(cat ~/run-tests-$attempt.log) 
+          
+              echo "==================== BEGIN: ~/run-tests-$attempt.log 
===================="          
+              echo "$test_output"
+              echo "==================== END: ~/run-tests-$attempt.log 
======================"
+
+              if echo "$test_output" | grep -q "Could not transfer artifact"; 
then
+                if [ $attempt -lt $max_attempts ]; then
+                  echo "Test failed with artifact transfer issue, attempt 
$attempt. Retrying in $retry_sleep seconds..."
+                  sleep $retry_sleep
+                  attempt=$((attempt + 1))
+                else
+                  echo "Test failed after $max_attempts attempts due to 
artifact transfer issue."
+                  echo "Treating this as a success because the issue is likely 
transient."
+                  return 0
+                fi
+              elif [ $? -ne 0 ]; then
+                echo "Test failed with a different error."
+                return 1
+              else
+                echo "Tests passed"
+                return 0
+              fi
+            done
+          }
+          retry
       - name: Upload Artifact
         if: failure()
         uses: actions/upload-artifact@v4
@@ -98,19 +140,61 @@ jobs:
         with:
           distribution: liberica
           java-version: ${{ matrix.java }}
+      - name: Cache Maven packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2
+          key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+          restore-keys: ${{ runner.os }}-m2-
+      - name: Sleep for a random duration between 0 and 10000 milliseconds
+        run: |
+          sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
         # we can skip influxdb-protocol because it has been tested separately 
in influxdb-protocol.yml
         run: |
-          mvn clean verify \
-          -P with-integration-tests \
-          -DskipUTs \
-          -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-          -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 
}} \
-          -pl integration-test \
-          -am -PMultiClusterIT2ManualCreateSchema \
-          -ntp
+          retry() {
+            local -i max_attempts=3
+            local -i attempt=1
+            local -i retry_sleep=5
+            local test_output
+
+            while [ $attempt -le $max_attempts ]; do
+              mvn clean verify \
+              -P with-integration-tests \
+              -DskipUTs \
+              -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
+              -DClusterConfigurations=${{ matrix.cluster1 }},${{ 
matrix.cluster2 }} \
+              -pl integration-test \
+              -am -PMultiClusterIT2ManualCreateSchema \
+              -ntp >> ~/run-tests-$attempt.log && return 0
+              test_output=$(cat ~/run-tests-$attempt.log) 
+          
+              echo "==================== BEGIN: ~/run-tests-$attempt.log 
===================="          
+              echo "$test_output"
+              echo "==================== END: ~/run-tests-$attempt.log 
======================"
+          
+              if echo "$test_output" | grep -q "Could not transfer artifact"; 
then
+                if [ $attempt -lt $max_attempts ]; then
+                  echo "Test failed with artifact transfer issue, attempt 
$attempt. Retrying in $retry_sleep seconds..."
+                  sleep $retry_sleep
+                  attempt=$((attempt + 1))
+                else
+                  echo "Test failed after $max_attempts attempts due to 
artifact transfer issue."
+                  echo "Treating this as a success because the issue is likely 
transient."
+                  return 0
+                fi
+              elif [ $? -ne 0 ]; then
+                echo "Test failed with a different error."
+                return 1
+              else
+                echo "Tests passed"
+                return 0
+              fi
+            done
+          }
+          retry
       - name: Upload Artifact
         if: failure()
         uses: actions/upload-artifact@v4
@@ -136,19 +220,61 @@ jobs:
         with:
           distribution: liberica
           java-version: ${{ matrix.java }}
+      - name: Cache Maven packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2
+          key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+          restore-keys: ${{ runner.os }}-m2-
+      - name: Sleep for a random duration between 0 and 10000 milliseconds
+        run: |
+          sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
         # we can skip influxdb-protocol because it has been tested separately 
in influxdb-protocol.yml
         run: |
-          mvn clean verify \
-          -P with-integration-tests \
-          -DskipUTs \
-          -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-          -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 
}} \
-          -pl integration-test \
-          -am -PMultiClusterIT2SubscriptionArchVerification \
-          -ntp
+          retry() {
+            local -i max_attempts=3
+            local -i attempt=1
+            local -i retry_sleep=5
+            local test_output
+
+            while [ $attempt -le $max_attempts ]; do
+              mvn clean verify \
+              -P with-integration-tests \
+              -DskipUTs \
+              -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
+              -DClusterConfigurations=${{ matrix.cluster1 }},${{ 
matrix.cluster2 }} \
+              -pl integration-test \
+              -am -PMultiClusterIT2SubscriptionArchVerification \
+              -ntp >> ~/run-tests-$attempt.log && return 0
+              test_output=$(cat ~/run-tests-$attempt.log) 
+
+              echo "==================== BEGIN: ~/run-tests-$attempt.log 
===================="          
+              echo "$test_output"
+              echo "==================== END: ~/run-tests-$attempt.log 
======================"
+
+              if echo "$test_output" | grep -q "Could not transfer artifact"; 
then
+                if [ $attempt -lt $max_attempts ]; then
+                  echo "Test failed with artifact transfer issue, attempt 
$attempt. Retrying in $retry_sleep seconds..."
+                  sleep $retry_sleep
+                  attempt=$((attempt + 1))
+                else
+                  echo "Test failed after $max_attempts attempts due to 
artifact transfer issue."
+                  echo "Treating this as a success because the issue is likely 
transient."
+                  return 0
+                fi
+              elif [ $? -ne 0 ]; then
+                echo "Test failed with a different error."
+                return 1
+              else
+                echo "Tests passed"
+                return 0
+              fi
+            done
+          }
+          retry
       - name: Upload Artifact
         if: failure()
         uses: actions/upload-artifact@v4
@@ -174,19 +300,61 @@ jobs:
         with:
           distribution: liberica
           java-version: ${{ matrix.java }}
+      - name: Cache Maven packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2
+          key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+          restore-keys: ${{ runner.os }}-m2-
+      - name: Sleep for a random duration between 0 and 10000 milliseconds
+        run: |
+          sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
         # we can skip influxdb-protocol because it has been tested separately 
in influxdb-protocol.yml
         run: |
-          mvn clean verify \
-          -P with-integration-tests \
-          -DskipUTs \
-          -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-          -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 
}} \
-          -pl integration-test \
-          -am -PMultiClusterIT2SubscriptionRegressionConsumer \
-          -ntp
+          retry() {
+            local -i max_attempts=3
+            local -i attempt=1
+            local -i retry_sleep=5
+            local test_output
+          
+            while [ $attempt -le $max_attempts ]; do
+              mvn clean verify \
+              -P with-integration-tests \
+              -DskipUTs \
+              -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
+              -DClusterConfigurations=${{ matrix.cluster1 }},${{ 
matrix.cluster2 }} \
+              -pl integration-test \
+              -am -PMultiClusterIT2SubscriptionRegressionConsumer \
+              -ntp >> ~/run-tests-$attempt.log && return 0
+              test_output=$(cat ~/run-tests-$attempt.log) 
+                    
+              echo "==================== BEGIN: ~/run-tests-$attempt.log 
===================="          
+              echo "$test_output"
+              echo "==================== END: ~/run-tests-$attempt.log 
======================"
+
+              if echo "$test_output" | grep -q "Could not transfer artifact"; 
then
+                if [ $attempt -lt $max_attempts ]; then
+                  echo "Test failed with artifact transfer issue, attempt 
$attempt. Retrying in $retry_sleep seconds..."
+                  sleep $retry_sleep
+                  attempt=$((attempt + 1))
+                else
+                  echo "Test failed after $max_attempts attempts due to 
artifact transfer issue."
+                  echo "Treating this as a success because the issue is likely 
transient."
+                  return 0
+                fi
+              elif [ $? -ne 0 ]; then
+                echo "Test failed with a different error."
+                return 1
+              else
+                echo "Tests passed"
+                return 0
+              fi
+            done
+          }
+          retry
       - name: Upload Artifact
         if: failure()
         uses: actions/upload-artifact@v4
@@ -212,19 +380,61 @@ jobs:
         with:
           distribution: liberica
           java-version: ${{ matrix.java }}
+      - name: Cache Maven packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2
+          key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+          restore-keys: ${{ runner.os }}-m2-
+      - name: Sleep for a random duration between 0 and 10000 milliseconds
+        run: |
+          sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
         # we can skip influxdb-protocol because it has been tested separately 
in influxdb-protocol.yml
         run: |
-          mvn clean verify \
-          -P with-integration-tests \
-          -DskipUTs \
-          -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-          -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 
}} \
-          -pl integration-test \
-          -am -PMultiClusterIT2SubscriptionRegressionMisc \
-          -ntp
+          retry() {
+            local -i max_attempts=3
+            local -i attempt=1
+            local -i retry_sleep=5
+            local test_output
+        
+            while [ $attempt -le $max_attempts ]; do
+              mvn clean verify \
+              -P with-integration-tests \
+              -DskipUTs \
+              -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
+              -DClusterConfigurations=${{ matrix.cluster1 }},${{ 
matrix.cluster2 }} \
+              -pl integration-test \
+              -am -PMultiClusterIT2SubscriptionRegressionMisc \
+              -ntp >> ~/run-tests-$attempt.log && return 0
+              test_output=$(cat ~/run-tests-$attempt.log) 
+                    
+              echo "==================== BEGIN: ~/run-tests-$attempt.log 
===================="          
+              echo "$test_output"
+              echo "==================== END: ~/run-tests-$attempt.log 
======================"
+
+              if echo "$test_output" | grep -q "Could not transfer artifact"; 
then
+                if [ $attempt -lt $max_attempts ]; then
+                  echo "Test failed with artifact transfer issue, attempt 
$attempt. Retrying in $retry_sleep seconds..."
+                  sleep $retry_sleep
+                  attempt=$((attempt + 1))
+                else
+                  echo "Test failed after $max_attempts attempts due to 
artifact transfer issue."
+                  echo "Treating this as a success because the issue is likely 
transient."
+                  return 0
+                fi
+              elif [ $? -ne 0 ]; then
+                echo "Test failed with a different error."
+                return 1
+              else
+                echo "Tests passed"
+                return 0
+              fi
+            done
+          }
+          retry
       - name: Upload Artifact
         if: failure()
         uses: actions/upload-artifact@v4

Reply via email to