This is an automated email from the ASF dual-hosted git repository.

corgy pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new f022e409ca [Improve][CI] Optimize CI (#9900)
f022e409ca is described below

commit f022e409ca1f19ec2bd43ed9a3b2a2154e7e0d47
Author: zhangdonghao <[email protected]>
AuthorDate: Tue Oct 14 14:18:11 2025 +0800

    [Improve][CI] Optimize CI (#9900)
---
 .github/workflows/backend.yml        | 209 +++++++++++++++++++----------------
 tools/github/ci_duration_analysis.py | 160 +++++++++++++++++++++++++++
 tools/github/free_disk_space.sh      | 124 +++++++++++++++++----
 3 files changed, 372 insertions(+), 121 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 7121917cfa..5da6224955 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -61,7 +61,7 @@ jobs:
 
   helm-chart-check:
     name: Check Helm Chart Syntax
-    needs: [ license-header, code-style]
+    needs: [ license-header, code-style ]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -275,10 +275,18 @@ jobs:
           echo $build_modules
           echo "modules=$build_modules" >> $GITHUB_OUTPUT
 
+  delayed-scheduling:
+    name: Delayed Scheduling
+    needs: [ changes, sanity-check ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Sleep for 20 seconds
+        run: sleep 20s
+
   dependency-license:
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     name: Dependency licenses
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     runs-on: ubuntu-latest
     timeout-minutes: 60
     steps:
@@ -303,7 +311,7 @@ jobs:
 
   document:
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.docs == 
'true'
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     name: Build website
     runs-on: ubuntu-latest
     timeout-minutes: 60
@@ -332,10 +340,10 @@ jobs:
 
   seatunnel-ui:
     if: needs.changes.outputs.api == 'true'
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     name: Build SeaTunnel UI
     runs-on: ubuntu-latest
-    timeout-minutes: 60
+    timeout-minutes: 20
     steps:
       - name: Checkout PR
         uses: actions/checkout@v3
@@ -357,7 +365,7 @@ jobs:
           npm run build
 
   unit-test:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || (needs.changes.outputs.api == 
'false' && needs.changes.outputs.ut-modules != '')
     runs-on: ${{ matrix.os }}
     strategy:
@@ -625,14 +633,14 @@ jobs:
           MAVEN_OPTS: -Xmx2048m
 
   engine-v2-it:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || needs.changes.outputs.engine-e2e == 'true'
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 80
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -650,7 +658,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   engine-k8s-it:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || 
contains(needs.changes.outputs.it-modules, 'seatunnel-engine-k8s-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
@@ -694,7 +702,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 100
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -740,7 +748,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   all-connectors-it-1:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -750,7 +758,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 60
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -766,12 +774,13 @@ jobs:
           ./mvnw help:evaluate -Dexpression=project.modules -q -DforceStdout 
-pl :seatunnel-connector-v2-e2e >> /tmp/sub_module.txt
           sub_modules=`python 
tools/update_modules_check/update_modules_check.py sub /tmp/sub_module.txt`
           run_it_modules=`python 
tools/update_modules_check/update_modules_check.py sub_it_module "$sub_modules" 
7 0`
+          echo $run_it_modules
           ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
$run_it_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
   all-connectors-it-2:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -781,7 +790,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 80
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -797,6 +806,7 @@ jobs:
           ./mvnw help:evaluate -Dexpression=project.modules -q -DforceStdout 
-pl :seatunnel-connector-v2-e2e >> /tmp/sub_module.txt
           sub_modules=`python 
tools/update_modules_check/update_modules_check.py sub /tmp/sub_module.txt`
           run_it_modules=`python 
tools/update_modules_check/update_modules_check.py sub_it_module "$sub_modules" 
7 1`
+          echo $run_it_modules
           ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
$run_it_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
@@ -812,7 +822,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 120
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -828,12 +838,13 @@ jobs:
           ./mvnw help:evaluate -Dexpression=project.modules -q -DforceStdout 
-pl :seatunnel-connector-v2-e2e >> /tmp/sub_module.txt
           sub_modules=`python 
tools/update_modules_check/update_modules_check.py sub /tmp/sub_module.txt`
           run_it_modules=`python 
tools/update_modules_check/update_modules_check.py sub_it_module "$sub_modules" 
7 2`
+          echo $run_it_modules
           ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
$run_it_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
   all-connectors-it-4:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -843,7 +854,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 80
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -859,6 +870,7 @@ jobs:
           ./mvnw help:evaluate -Dexpression=project.modules -q -DforceStdout 
-pl :seatunnel-connector-v2-e2e >> /tmp/sub_module.txt
           sub_modules=`python 
tools/update_modules_check/update_modules_check.py sub /tmp/sub_module.txt`
           run_it_modules=`python 
tools/update_modules_check/update_modules_check.py sub_it_module "$sub_modules" 
7 3`
+          echo $run_it_modules
           ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
$run_it_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
@@ -874,7 +886,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 160
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -890,6 +902,7 @@ jobs:
           ./mvnw help:evaluate -Dexpression=project.modules -q -DforceStdout 
-pl :seatunnel-connector-v2-e2e >> /tmp/sub_module.txt
           sub_modules=`python 
tools/update_modules_check/update_modules_check.py sub /tmp/sub_module.txt`
           run_it_modules=`python 
tools/update_modules_check/update_modules_check.py sub_it_module "$sub_modules" 
7 4`
+          echo $run_it_modules
           ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
$run_it_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
@@ -905,7 +918,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 140
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -921,12 +934,13 @@ jobs:
           ./mvnw help:evaluate -Dexpression=project.modules -q -DforceStdout 
-pl :seatunnel-connector-v2-e2e >> /tmp/sub_module.txt
           sub_modules=`python 
tools/update_modules_check/update_modules_check.py sub /tmp/sub_module.txt`
           run_it_modules=`python 
tools/update_modules_check/update_modules_check.py sub_it_module "$sub_modules" 
7 5`
+          echo $run_it_modules
           ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
$run_it_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
   all-connectors-it-7:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -936,7 +950,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 100
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -952,6 +966,7 @@ jobs:
           ./mvnw help:evaluate -Dexpression=project.modules -q -DforceStdout 
-pl :seatunnel-connector-v2-e2e >> /tmp/sub_module.txt
           sub_modules=`python 
tools/update_modules_check/update_modules_check.py sub /tmp/sub_module.txt`
           run_it_modules=`python 
tools/update_modules_check/update_modules_check.py sub_it_module "$sub_modules" 
7 6`
+          echo $run_it_modules
           ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
$run_it_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
@@ -967,7 +982,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 100
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -985,7 +1000,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   jdbc-connectors-it-part-2:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -995,7 +1010,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 40
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1013,7 +1028,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   jdbc-connectors-it-part-3:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -1023,7 +1038,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 40
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1041,7 +1056,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   jdbc-connectors-it-part-4:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -1051,7 +1066,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 20
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1069,7 +1084,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   jdbc-connectors-it-part-5:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -1079,7 +1094,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 40
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1097,7 +1112,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   jdbc-connectors-it-part-6:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -1107,7 +1122,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 40
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1153,7 +1168,7 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   jdbc-connectors-it-ddl:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true'
     runs-on: ${{ matrix.os }}
     env:
@@ -1163,7 +1178,7 @@ jobs:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 40
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1180,15 +1195,15 @@ jobs:
         env:
           MAVEN_OPTS: -Xmx4096m
 
-  kudu-connector-it:
+  connector-redis-it:
     needs: [ changes, sanity-check ]
-    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-kudu-e2e')
+    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-redis-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 30
+    timeout-minutes: 160
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1199,21 +1214,21 @@ jobs:
           cache: 'maven'
       - name: free disk space
         run: tools/github/free_disk_space.sh
-      - name: run kudu connector integration test
+      - name: run redis connector integration test
         run: |
-          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-kudu-e2e -am -Pci
+          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-redis-e2e -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
-  amazonSqs-connector-it:
+  kafka-connector-it:
     needs: [ changes, sanity-check ]
-    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-amazonsqs-e2e')
+    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-kafka-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 140
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1224,21 +1239,21 @@ jobs:
           cache: 'maven'
       - name: free disk space
         run: tools/github/free_disk_space.sh
-      - name: run amazonsqs connector integration test
+      - name: run kafka connector integration test
         run: |
-          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-amazonsqs-e2e -am -Pci
+          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-kafka-e2e -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
-  kafka-connector-it:
+  connector-file-local-it:
     needs: [ changes, sanity-check ]
-    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-kafka-e2e')
+    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-file-local-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 140
+    timeout-minutes: 100
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1249,21 +1264,46 @@ jobs:
           cache: 'maven'
       - name: free disk space
         run: tools/github/free_disk_space.sh
-      - name: run kafka connector integration test
+      - name: run file local connector integration test
         run: |
-          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-kafka-e2e -am -Pci
+          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-file-local-e2e -am -Pci
+        env:
+          MAVEN_OPTS: -Xmx4096m
+
+  connector-file-sftp-it:
+    needs: [ changes, sanity-check, delayed-scheduling ]
+    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-file-sftp-e2e')
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        java: [ '8', '11' ]
+        os: [ 'ubuntu-latest' ]
+    timeout-minutes: 80
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.java }}
+          distribution: 'temurin'
+          cache: 'maven'
+      - name: free disk space
+        run: tools/github/free_disk_space.sh
+      - name: run file sftp connector integration test
+        run: |
+          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-file-sftp-e2e -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
   rocketmq-connector-it:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-rocketmq-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 80
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1282,14 +1322,14 @@ jobs:
 
 
   doris-connector-it:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-doris-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 60
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1307,14 +1347,14 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   paimon-connector-it:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-paimon-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 60
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1332,14 +1372,14 @@ jobs:
           MAVEN_OPTS: -Xmx4096m
 
   oracle-cdc-connector-it:
-    needs: [ changes, sanity-check ]
+    needs: [ changes, sanity-check, delayed-scheduling ]
     if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-cdc-oracle-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 150
+    timeout-minutes: 40
     steps:
       - name: Checkout repository
         uses: actions/checkout@v2
@@ -1361,40 +1401,15 @@ jobs:
             echo 'running oracle cdc connector integration test...' && \
             ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-cdc-oracle-e2e -am -Pci
 
-  connector-file-local-it:
-    needs: [ changes, sanity-check ]
-    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-file-local-e2e')
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        java: [ '8', '11' ]
-        os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up JDK ${{ matrix.java }}
-        uses: actions/setup-java@v3
-        with:
-          java-version: ${{ matrix.java }}
-          distribution: 'temurin'
-          cache: 'maven'
-      - name: free disk space
-        run: tools/github/free_disk_space.sh
-      - name: run file local connector integration test
-        run: |
-          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-file-local-e2e -am -Pci
-        env:
-          MAVEN_OPTS: -Xmx4096m
-
-  connector-file-sftp-it:
-    needs: [ changes, sanity-check ]
-    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-file-sftp-e2e')
+  kudu-connector-it:
+    needs: [ changes, sanity-check, delayed-scheduling ]
+    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-kudu-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 40
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1405,21 +1420,21 @@ jobs:
           cache: 'maven'
       - name: free disk space
         run: tools/github/free_disk_space.sh
-      - name: run file sftp connector integration test
+      - name: run kudu connector integration test
         run: |
-          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-file-sftp-e2e -am -Pci
+          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-kudu-e2e -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
-  connector-redis-it:
-    needs: [ changes, sanity-check ]
-    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-redis-e2e')
+  amazonSqs-connector-it:
+    needs: [ changes, sanity-check, delayed-scheduling ]
+    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 'connector-amazonsqs-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 180
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
@@ -1430,21 +1445,21 @@ jobs:
           cache: 'maven'
       - name: free disk space
         run: tools/github/free_disk_space.sh
-      - name: run redis connector integration test
+      - name: run amazonsqs connector integration test
         run: |
-          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-redis-e2e -am -Pci
+          ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false 
-D"license.skipAddThirdParty"=true -D"skip.ui"=true --no-snapshot-updates -pl 
:connector-amazonsqs-e2e -am -Pci
         env:
           MAVEN_OPTS: -Xmx4096m
 
   connector-sensorsdata-it:
-    needs: [ changes, sanity-check ]
-    if: needs.changes.outputs.api == 'true' || 
contains(needs.changes.outputs.it-modules, 'connector-sensorsdata-e2e')
+    needs: [ changes, sanity-check, delayed-scheduling ]
+    if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 
'true' || contains(needs.changes.outputs.it-modules, 
'connector-sensorsdata-e2e')
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         java: [ '8', '11' ]
         os: [ 'ubuntu-latest' ]
-    timeout-minutes: 120
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK ${{ matrix.java }}
diff --git a/tools/github/ci_duration_analysis.py 
b/tools/github/ci_duration_analysis.py
new file mode 100644
index 0000000000..e89eb4e341
--- /dev/null
+++ b/tools/github/ci_duration_analysis.py
@@ -0,0 +1,160 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+# !/usr/bin/python
+import json
+import math
+import sys
+from datetime import datetime
+import os
+import requests
+from tabulate import tabulate
+
+
+def fetch_jobs_from_api(api_url, token=None):
+    try:
+        headers = {}
+        if token:
+            headers["Authorization"] = f"Bearer {token}"
+        response = requests.get(api_url, headers=headers)
+        response.raise_for_status()
+        return parse_jobs_from_data(response.json())
+    except requests.exceptions.RequestException as e:
+        raise ConnectionError(f"API request failed: {str(e)}")
+
+
+def _parse_single_job(job):
+    job_info = {
+        'job_id': job.get('id'),
+        'name': job.get('name'),
+        'status': job.get('status'),
+        'conclusion': job.get('conclusion'),
+    }
+
+    # Calculate the total time consumed
+    try:
+        start_time = datetime.fromisoformat(job['started_at'].replace('Z', 
'+00:00'))
+        end_time = datetime.fromisoformat(job['completed_at'].replace('Z', 
'+00:00'))
+        duration_seconds = (end_time - start_time).total_seconds()
+        job_info['started_at'] = start_time.strftime("%Y-%m-%d %H:%M:%S")
+        job_info['total_duration_seconds'] = round(duration_seconds, 2)
+        job_info['total_duration_minutes'] = round(duration_seconds / 60, 2)
+    except (KeyError, ValueError) as e:
+        job_info['total_duration_seconds'] = None
+        job_info['total_duration_minutes'] = None
+        print(f"Warning: Failed to parse the time of task {job.get('id')} - 
{str(e)}")
+
+    # Parsing step information
+    job_info['steps'] = []
+    for step in job.get('steps', []):
+        try:
+            step_start = 
datetime.fromisoformat(step['started_at'].replace('Z', '+00:00'))
+            step_end = 
datetime.fromisoformat(step['completed_at'].replace('Z', '+00:00'))
+            step_duration = (step_end - step_start).total_seconds()
+
+            job_info['steps'].append({
+                'name': step.get('name'),
+                'number': step.get('number'),
+                'status': step.get('status'),
+                'conclusion': step.get('conclusion'),
+                'duration_seconds': round(step_duration, 2)
+            })
+        except (KeyError, ValueError) as e:
+            print(f"Warning: Failed to parse time for step {step.get('name')} 
- {str(e)}")
+
+    return job_info
+
+
+def _parse_jobs_batch(data):
+    if 'jobs' not in data:
+        raise KeyError("The data does not contain the 'jobs' field.")
+    jobs = data['jobs']
+    jobs.sort(
+        key=lambda x: datetime.fromisoformat(x["started_at"].replace("Z", 
"+00:00"))
+    )
+    return [_parse_single_job(job) for job in jobs]
+
+
+def parse_jobs_from_file(file_path):
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not exist: {file_path}")
+
+    with open(file_path, 'r', encoding='utf-8') as f:
+        try:
+            data = json.load(f)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"JSON parsing error: {str(e)}")
+
+    return _parse_jobs_batch(data)
+
+
+def parse_jobs_from_data(data):
+    return _parse_jobs_batch(data)
+
+
+def print_job_summary(parsed_jobs):
+    success_jobs = [job for job in parsed_jobs if job['conclusion'] == 
'success']
+    print(f"A total of {len(parsed_jobs)} tasks were parsed, among which 
{len(success_jobs)} tasks were successful (success)\n")
+
+    job_table_data = []
+    headers = ["Sequence Number", "Job Name", "Job ID", "CONCLUSION", 
"STARTED_AT",
+               "Total Duration (Seconds)", "Total Duration (Minutes)", 
"Expected"]
+
+    for i, job in enumerate(success_jobs, 1):
+        remainder = job['total_duration_minutes'] % 20
+        cost = (math.ceil(job['total_duration_minutes'] / 20) + 1) * 20 if 
remainder >= 10 else \
+            (math.ceil(job['total_duration_minutes'] / 20)) * 20
+        job_table_data.append([
+            i, job['name'], job['job_id'], job['conclusion'],job['started_at'],
+            job['total_duration_seconds'], job['total_duration_minutes'], cost
+        ])
+
+    print("Overview of the Task:")
+    print(tabulate(job_table_data, headers=headers, tablefmt="pipe"))
+    print()
+
+    for i, job in enumerate(success_jobs, 1):
+        print(f"\nTask {i}: The 3 steps with the longest duration for 
{job['name']}:")
+        if job['steps']:
+            sorted_steps = sorted(job['steps'], key=lambda x: 
x['duration_seconds'], reverse=True)
+            top_steps = sorted_steps[:3]
+            step_table_data = [
+                [s['name'], s['number'], s['status'], s['conclusion'], 
s['duration_seconds']]
+                for s in top_steps
+            ]
+            print(tabulate(step_table_data, headers=["Step name", "No", 
"Status", "Conclusion", "Total Duration (seconds)"], tablefmt="pipe"))
+        else:
+            print("There is no step information for this task.")
+        print("\n" + "-" * 80)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in (3, 4):
+        print("Usage: python ci_duration_analysis.py <owner> <run_id> [token]")
+        print("Usage: <owner> is a required parameter, example: hawk9821")
+        print("Usage: <run_id> is a required parameter, example: 18013073919")
+        print("Usage: [token] is a non-mandatory parameter. Token acquisition 
method: GitHub -> Setting -> Developer Settings -> Personal access tokens -> 
Tokens (classic)")
+        sys.exit(1)
+    owner = sys.argv[1]
+    run_id = sys.argv[2]
+    api_url = 
f'https://api.github.com/repos/{owner}/seatunnel/actions/runs/{run_id}/jobs?page=1&per_page=100'
+    token = sys.argv[3] if len(sys.argv) == 4 else None
+
+    try:
+        jobs_data = fetch_jobs_from_api(api_url, token)
+        print_job_summary(jobs_data)
+
+    except Exception as e:
+        print(f"Processing failed: {str(e)}")
\ No newline at end of file
diff --git a/tools/github/free_disk_space.sh b/tools/github/free_disk_space.sh
index f9eee621f0..18d6f168da 100755
--- a/tools/github/free_disk_space.sh
+++ b/tools/github/free_disk_space.sh
@@ -16,33 +16,109 @@
 # limitations under the License.
 #
 
+
+log_time_and_space() {
+    local operation=$1
+    local start_time=$2
+    local start_space=$3
+    # shellcheck disable=SC2155
+    local end_time=$(date +%s)
+    local duration=$((end_time - start_time))
+    # shellcheck disable=SC2155
+    local end_space=$(df -P / | tail -n 1 | awk '{print $4}')
+    local freed_space=$((end_space - start_space))
+    # shellcheck disable=SC2155
+    local freed_gb=$(echo "scale=2; $freed_space / 1024 / 1024" | bc)
+    echo 
"------------------------------------------------------------------------------"
+    echo "Operation: $operation"
+    echo "Time taken: $duration seconds"
+    echo "Freed disk space: $freed_gb GB"
+    echo 
"------------------------------------------------------------------------------"
+    echo
+}
+
+get_available_space() {
+    df -P / | tail -n 1 | awk '{print $4}'
+}
+
 echo 
"=============================================================================="
 echo "Freeing up disk space on CI system"
 echo 
"=============================================================================="
+df -h
 
+# List 100 largest packages
+start_time=$(date +%s)
+start_space=$(get_available_space)
 echo "Listing 100 largest packages"
-dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100
-df -h
-echo "Removing large packages"
-sudo apt-get remove -y '^dotnet-.*'
-sudo apt-get remove -y '^llvm-.*'
-sudo apt-get remove -y 'php.*'
-sudo apt-get remove -y '^mongodb-.*'
-sudo apt-get remove -y '^mysql-.*'
-sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable 
firefox powershell mono-devel libgl1-mesa-dri
-sudo apt-get autoremove -y
-sudo apt-get clean
-echo "Disk space before cleanup:"
-df -h
-echo "Removing large directories"
-sudo rm -rf /usr/share/dotnet/
-sudo rm -rf /usr/local/graalvm/
-sudo rm -rf /usr/local/.ghcup/
-sudo rm -rf /usr/local/share/powershell
-sudo rm -rf /usr/local/share/chromium
-sudo rm -rf /usr/local/share/boost
-sudo rm -rf /usr/local/lib/android
-sudo rm -rf /usr/local/lib/node_modules
-sudo rm -rf /opt/hostedtoolcache/CodeQL
-sudo rm -rf /opt/ghc
+dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -nr | head -n 100
+
+# Uninstall the faster ones first to preload and improve overall unloading 
efficiency.
+# Clean up MongoDB
+start_time=$(date +%s)
+start_space=$(get_available_space)
+sudo apt-get remove -y --purge '^mongodb-.*' > /dev/null 2>&1
+log_time_and_space "Remove MongoDB related packages" $start_time $start_space
+
+# Clean up .NET
+start_time=$(date +%s)
+start_space=$(get_available_space)
+sudo apt-get remove -y --purge '^dotnet-.*' > /dev/null 2>&1
+log_time_and_space "Remove .NET related packages" $start_time $start_space
+
+# Clean up LLVM
+start_time=$(date +%s)
+start_space=$(get_available_space)
+sudo apt-get remove -y --purge '^llvm-.*' > /dev/null 2>&1
+log_time_and_space "Remove LLVM related packages" $start_time $start_space
+
+# Clean up MySQL
+start_time=$(date +%s)
+start_space=$(get_available_space)
+sudo apt-get remove -y --purge '^mysql-.*' > /dev/null 2>&1
+log_time_and_space "Remove MySQL related packages" $start_time $start_space
+
+# Clean up large packages
+packages_to_check="ruby3.2-doc powershell azure-cli google-cloud-sdk hhvm 
google-chrome-stable firefox mono-devel libgl1-mesa-dri"
+for package in $packages_to_check; do
+  start_time=$(date +%s)
+  start_space=$(get_available_space)
+  if dpkg -l | grep -q "$package"; then
+      sudo apt-get -o APT::Install-Suggests="false" remove -y --purge 
"$package" > /dev/null 2>&1
+      log_time_and_space "Remove $package packages" $start_time $start_space
+  fi
+done
+
+# Clean up apt cache
+start_time=$(date +%s)
+start_space=$(get_available_space)
+sudo apt-get autoremove -y > /dev/null 2>&1
+sudo apt-get clean > /dev/null 2>&1
+log_time_and_space "Remove apt cache" $start_time $start_space
+
+# Clean up Android directories
+#start_time=$(date +%s)
+#start_space=$(get_available_space)
+#sudo nohup rm -rf /usr/local/lib/android > /dev/null 2>&1 &
+#log_time_and_space "Remove android directories" $start_time $start_space
+
+# Clean up large directories
+directories=(
+    "/usr/local/.ghcup/"
+    "/usr/share/dotnet/"
+    "/usr/local/graalvm/"
+    "/usr/local/share/powershell"
+    "/usr/local/share/chromium"
+    "/usr/local/share/boost"
+    "/usr/local/lib/node_modules"
+    "/opt/hostedtoolcache/CodeQL"
+    "/opt/ghc"
+)
+start_time=$(date +%s)
+start_space=$(get_available_space)
+sudo bash -c 'for dir in "${@}"; do [ -d "$dir" ] && rm -rf "$dir" & done; 
wait' _ "${directories[@]}"
+log_time_and_space "Remove other large directories" $start_time $start_space
+
+echo 
"=============================================================================="
+echo "Disk cleanup completed"
+echo 
"=============================================================================="
 df -h
\ No newline at end of file


Reply via email to