This is an automated email from the ASF dual-hosted git repository.

JackieTien97 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 9330c7fc0b4 Shard 5 dual-cluster jobs to speed up Multi-Cluster IT 
(#17695)
9330c7fc0b4 is described below

commit 9330c7fc0b4382c25edfcd31ff8977e9d72bf68b
Author: Jackie Tien <[email protected]>
AuthorDate: Sun May 17 15:07:44 2026 +0800

    Shard 5 dual-cluster jobs to speed up Multi-Cluster IT (#17695)
---
 .github/workflows/pipe-it.yml | 115 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 110 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pipe-it.yml b/.github/workflows/pipe-it.yml
index 0968e7739a0..fb2f732560d 100644
--- a/.github/workflows/pipe-it.yml
+++ b/.github/workflows/pipe-it.yml
@@ -119,6 +119,8 @@ jobs:
           name: cluster-log-single-java${{ matrix.java }}-${{ runner.os }}-${{ 
matrix.cluster1 }}-${{ matrix.cluster2 }}
           path: integration-test/target/cluster-logs
           retention-days: 30
+  # 12 IT classes split across 3 parallel shards to cut the historical ~42 min
+  # wall clock to ~14 min. See cluster-it-1c1d.yml for the shard pattern.
   dual-tree-auto-basic:
     strategy:
       fail-fast: false
@@ -128,6 +130,7 @@ jobs:
         # StrongConsistencyClusterMode is ignored now because RatisConsensus 
has not been supported yet.
         cluster: [HighPerformanceMode]
         os: [ubuntu-latest]
+        shard: [0, 1, 2]
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v5
@@ -147,6 +150,21 @@ jobs:
       - name: Sleep for a random duration between 0 and 10000 milliseconds
         run: |
           sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
+      - name: Build IT shard list
+        shell: bash
+        # See cluster-it-1c1d.yml for the shard-list pattern. Write under
+        # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
+        run: |
+          set -euo pipefail
+          SHARD=${{ matrix.shard }}
+          TOTAL=3
+          grep -rlE --include='*IT.java' 
'\bMultiClusterIT2DualTreeAutoBasic\b' integration-test/src/test/java \
+            | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
+            | sort \
+            | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
+            > "$RUNNER_TEMP/it-shard.txt"
+          echo "Shard $SHARD/$TOTAL contains $(wc -l < 
"$RUNNER_TEMP/it-shard.txt") test classes"
+          head -5 "$RUNNER_TEMP/it-shard.txt"
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
@@ -164,6 +182,9 @@ jobs:
               -DskipUTs \
               -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
               -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster 
}} \
+              -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
+              -DfailIfNoTests=false \
+              -Dfailsafe.failIfNoSpecifiedTests=false \
               -pl integration-test \
               -am -PMultiClusterIT2DualTreeAutoBasic \
               -ntp >> ~/run-tests-$attempt.log && return 0
@@ -201,9 +222,11 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v6
         with:
-          name: cluster-log-dual-tree-auto-basic-java${{ matrix.java }}-${{ 
runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
+          name: cluster-log-dual-tree-auto-basic-shard${{ matrix.shard 
}}-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ 
matrix.cluster }}
           path: integration-test/target/cluster-logs
           retention-days: 30
+  # 9 IT classes split across 3 parallel shards to cut the historical ~51 min
+  # wall clock to ~17 min. See cluster-it-1c1d.yml for the shard pattern.
   dual-tree-auto-enhanced:
     strategy:
       fail-fast: false
@@ -214,6 +237,7 @@ jobs:
         cluster1: [HighPerformanceMode]
         cluster2: [HighPerformanceMode]
         os: [ubuntu-latest]
+        shard: [0, 1, 2]
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v5
@@ -233,6 +257,21 @@ jobs:
       - name: Sleep for a random duration between 0 and 10000 milliseconds
         run: |
           sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
+      - name: Build IT shard list
+        shell: bash
+        # See cluster-it-1c1d.yml for the shard-list pattern. Write under
+        # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
+        run: |
+          set -euo pipefail
+          SHARD=${{ matrix.shard }}
+          TOTAL=3
+          grep -rlE --include='*IT.java' 
'\bMultiClusterIT2DualTreeAutoEnhanced\b' integration-test/src/test/java \
+            | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
+            | sort \
+            | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
+            > "$RUNNER_TEMP/it-shard.txt"
+          echo "Shard $SHARD/$TOTAL contains $(wc -l < 
"$RUNNER_TEMP/it-shard.txt") test classes"
+          head -5 "$RUNNER_TEMP/it-shard.txt"
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
@@ -250,6 +289,9 @@ jobs:
               -DskipUTs \
               -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
               -DClusterConfigurations=${{ matrix.cluster1 }},${{ 
matrix.cluster2 }} \
+              -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
+              -DfailIfNoTests=false \
+              -Dfailsafe.failIfNoSpecifiedTests=false \
               -pl integration-test \
               -am -PMultiClusterIT2DualTreeAutoEnhanced \
               -ntp >> ~/run-tests-$attempt.log && return 0
@@ -287,9 +329,11 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v6
         with:
-          name: cluster-log-dual-tree-auto-enhanced-java${{ matrix.java }}-${{ 
runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
+          name: cluster-log-dual-tree-auto-enhanced-shard${{ matrix.shard 
}}-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ 
matrix.cluster2 }}
           path: integration-test/target/cluster-logs
           retention-days: 30
+  # 11 IT classes split across 3 parallel shards to cut the historical ~27 min
+  # wall clock to ~9 min. See cluster-it-1c1d.yml for the shard pattern.
   dual-tree-manual:
     strategy:
       fail-fast: false
@@ -300,6 +344,7 @@ jobs:
         cluster1: [HighPerformanceMode]
         cluster2: [HighPerformanceMode]
         os: [ubuntu-latest]
+        shard: [0, 1, 2]
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v5
@@ -319,6 +364,21 @@ jobs:
       - name: Sleep for a random duration between 0 and 10000 milliseconds
         run: |
           sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
+      - name: Build IT shard list
+        shell: bash
+        # See cluster-it-1c1d.yml for the shard-list pattern. Write under
+        # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
+        run: |
+          set -euo pipefail
+          SHARD=${{ matrix.shard }}
+          TOTAL=3
+          grep -rlE --include='*IT.java' '\bMultiClusterIT2DualTreeManual\b' 
integration-test/src/test/java \
+            | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
+            | sort \
+            | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
+            > "$RUNNER_TEMP/it-shard.txt"
+          echo "Shard $SHARD/$TOTAL contains $(wc -l < 
"$RUNNER_TEMP/it-shard.txt") test classes"
+          head -5 "$RUNNER_TEMP/it-shard.txt"
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
@@ -336,6 +396,9 @@ jobs:
               -DskipUTs \
               -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
               -DClusterConfigurations=${{ matrix.cluster1 }},${{ 
matrix.cluster2 }} \
+              -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
+              -DfailIfNoTests=false \
+              -Dfailsafe.failIfNoSpecifiedTests=false \
               -pl integration-test \
               -am -PMultiClusterIT2DualTreeManual \
               -ntp >> ~/run-tests-$attempt.log && return 0
@@ -373,7 +436,7 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v6
         with:
-          name: cluster-log-dual-tree-manual-java${{ matrix.java }}-${{ 
runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
+          name: cluster-log-dual-tree-manual-shard${{ matrix.shard }}-java${{ 
matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
           path: integration-test/target/cluster-logs
           retention-days: 30
   subscription-tree-arch-verification:
@@ -720,6 +783,8 @@ jobs:
           name: cluster-log-subscription-tree-regression-misc-java${{ 
matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
           path: integration-test/target/cluster-logs
           retention-days: 30
+  # 13 IT classes split across 3 parallel shards to cut the historical ~63 min
+  # wall clock to ~22 min. See cluster-it-1c1d.yml for the shard pattern.
   dual-table-manual-basic:
     strategy:
       fail-fast: false
@@ -729,6 +794,7 @@ jobs:
         # StrongConsistencyClusterMode is ignored now because RatisConsensus 
has not been supported yet.
         cluster: [HighPerformanceMode]
         os: [ubuntu-latest]
+        shard: [0, 1, 2]
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v5
@@ -748,6 +814,21 @@ jobs:
       - name: Sleep for a random duration between 0 and 10000 milliseconds
         run: |
           sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
+      - name: Build IT shard list
+        shell: bash
+        # See cluster-it-1c1d.yml for the shard-list pattern. Write under
+        # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
+        run: |
+          set -euo pipefail
+          SHARD=${{ matrix.shard }}
+          TOTAL=3
+          grep -rlE --include='*IT.java' 
'\bMultiClusterIT2DualTableManualBasic\b' integration-test/src/test/java \
+            | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
+            | sort \
+            | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
+            > "$RUNNER_TEMP/it-shard.txt"
+          echo "Shard $SHARD/$TOTAL contains $(wc -l < 
"$RUNNER_TEMP/it-shard.txt") test classes"
+          head -5 "$RUNNER_TEMP/it-shard.txt"
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
@@ -765,6 +846,9 @@ jobs:
               -DskipUTs \
               -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
               -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster 
}} \
+              -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
+              -DfailIfNoTests=false \
+              -Dfailsafe.failIfNoSpecifiedTests=false \
               -pl integration-test \
               -am -PMultiClusterIT2DualTableManualBasic \
               -ntp >> ~/run-tests-$attempt.log && return 0
@@ -802,9 +886,11 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v6
         with:
-          name: cluster-log-dual-table-manual-basic-java${{ matrix.java }}-${{ 
runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
+          name: cluster-log-dual-table-manual-basic-shard${{ matrix.shard 
}}-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ 
matrix.cluster }}
           path: integration-test/target/cluster-logs
           retention-days: 30
+  # 11 IT classes split across 3 parallel shards to cut the historical ~62 min
+  # wall clock to ~22 min. See cluster-it-1c1d.yml for the shard pattern.
   dual-table-manual-enhanced:
     strategy:
       fail-fast: false
@@ -814,6 +900,7 @@ jobs:
         # StrongConsistencyClusterMode is ignored now because RatisConsensus 
has not been supported yet.
         cluster: [HighPerformanceMode]
         os: [ubuntu-latest]
+        shard: [0, 1, 2]
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v5
@@ -833,6 +920,21 @@ jobs:
       - name: Sleep for a random duration between 0 and 10000 milliseconds
         run: |
           sleep  $(( $(( RANDOM % 10000 + 1 )) / 1000))
+      - name: Build IT shard list
+        shell: bash
+        # See cluster-it-1c1d.yml for the shard-list pattern. Write under
+        # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
+        run: |
+          set -euo pipefail
+          SHARD=${{ matrix.shard }}
+          TOTAL=3
+          grep -rlE --include='*IT.java' 
'\bMultiClusterIT2DualTableManualEnhanced\b' integration-test/src/test/java \
+            | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
+            | sort \
+            | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
+            > "$RUNNER_TEMP/it-shard.txt"
+          echo "Shard $SHARD/$TOTAL contains $(wc -l < 
"$RUNNER_TEMP/it-shard.txt") test classes"
+          head -5 "$RUNNER_TEMP/it-shard.txt"
       - name: IT Test
         shell: bash
         # we do not compile client-cpp for saving time, it is tested in 
client.yml
@@ -850,6 +952,9 @@ jobs:
               -DskipUTs \
               -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 
-DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
               -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster 
}} \
+              -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
+              -DfailIfNoTests=false \
+              -Dfailsafe.failIfNoSpecifiedTests=false \
               -pl integration-test \
               -am -PMultiClusterIT2DualTableManualEnhanced \
               -ntp >> ~/run-tests-$attempt.log && return 0
@@ -887,7 +992,7 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v6
         with:
-          name: cluster-log-dual-table-manual-enhanced-java${{ matrix.java 
}}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
+          name: cluster-log-dual-table-manual-enhanced-shard${{ matrix.shard 
}}-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ 
matrix.cluster }}
           path: integration-test/target/cluster-logs
           retention-days: 30
   triple:

Reply via email to