This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f4b338d6f Run TPC-H SF10 during PR benchmarks (#9822)
7f4b338d6f is described below

commit 7f4b338d6f7e4434529b87ca9eb273c7fb8819ec
Author: Marko Grujic <[email protected]>
AuthorDate: Wed Mar 27 22:52:58 2024 +0100

    Run TPC-H SF10 during PR benchmarks (#9822)
    
    * Run TPC-H SF10 during PR benchmarks
    
    * Add memory benchmarks to the workflow
    
    Also distinguish the output file by the SF used.
---
 .github/workflows/pr_benchmarks.yml | 11 +++++++++--
 benchmarks/bench.sh                 |  4 ++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr_benchmarks.yml 
b/.github/workflows/pr_benchmarks.yml
index b7b85c9fcf..29d001783b 100644
--- a/.github/workflows/pr_benchmarks.yml
+++ b/.github/workflows/pr_benchmarks.yml
@@ -28,9 +28,10 @@ jobs:
           cd benchmarks
           mkdir data
           
-          # Setup the TPC-H data set with a scale factor of 10
+          # Setup the TPC-H data sets for scale factors 1 and 10
           ./bench.sh data tpch
-          
+          ./bench.sh data tpch10
+
       - name: Generate unique result names
         run: |
           echo "HEAD_LONG_SHA=$(git log -1 --format='%H')" >> "$GITHUB_ENV" 
@@ -44,6 +45,9 @@ jobs:
           cd benchmarks
 
           ./bench.sh run tpch
+          ./bench.sh run tpch_mem
+          ./bench.sh run tpch10
+          ./bench.sh run tpch_mem10
           
           # For some reason this step doesn't seem to propagate the env var 
down into the script
           if [ -d "results/HEAD" ]; then
@@ -64,6 +68,9 @@ jobs:
           cd benchmarks
 
           ./bench.sh run tpch
+          ./bench.sh run tpch_mem
+          ./bench.sh run tpch10
+          ./bench.sh run tpch_mem10
           
           echo ${{ github.event.issue.number }} > pr
           
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index 039f4790ac..a724008927 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -314,7 +314,7 @@ run_tpch() {
     fi
     TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"
 
-    RESULTS_FILE="${RESULTS_DIR}/tpch.json"
+    RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running tpch benchmark..."
     $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path 
"${TPCH_DIR}" --format parquet -o ${RESULTS_FILE}
@@ -329,7 +329,7 @@ run_tpch_mem() {
     fi
     TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"
 
-    RESULTS_FILE="${RESULTS_DIR}/tpch_mem.json"
+    RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running tpch_mem benchmark..."
     # -m means in memory

Reply via email to