This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0f7b1c58f8 minor: Allow to run TPCH bench for a specific query (#15467)
0f7b1c58f8 is described below

commit 0f7b1c58f833227263030f6069ce722abc8e8675
Author: Oleks V <[email protected]>
AuthorDate: Fri Mar 28 07:29:42 2025 -0700

    minor: Allow to run TPCH bench for a specific query (#15467)
    
    * minor: Allow to run TPCH bench for a specific query
---
 benchmarks/README.md | 18 +++++++++++++++++-
 benchmarks/bench.sh  |  9 ++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 39b4584bd2..8acaa298bd 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -46,6 +46,7 @@ script. Usage instructions can be found with:
 
 ```shell
 # show usage
+cd ./benchmarks/
 ./bench.sh
 ```
 
@@ -64,9 +65,24 @@ Create / download a specific dataset (TPCH)
 ```shell
 ./bench.sh data tpch
 ```
-
 Data is placed in the `data` subdirectory.
 
+## Running benchmarks
+
+Run benchmark for TPC-H dataset
+```shell
+./bench.sh run tpch
+```
+or for TPC-H dataset scale 10
+```shell
+./bench.sh run tpch10
+```
+
+To run for specific query, for example Q21
+```shell
+./bench.sh run tpch10 21
+```
+
 ## Select join algorithm
 The benchmark runs with `prefer_hash_join == true` by default, which enforces 
HASH join algorithm.
 To run TPCH benchmarks with join other than HASH:
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index 43d3d78c7e..5be825eb0d 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -43,7 +43,7 @@ usage() {
 Orchestrates running benchmarks against DataFusion checkouts
 
 Usage:
-$0 data [benchmark]
+$0 data [benchmark] [query]
 $0 run [benchmark]
 $0 compare <branch1> <branch2>
 $0 venv
@@ -410,7 +410,9 @@ run_tpch() {
     RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running tpch benchmark..."
-    $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path 
"${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o 
"${RESULTS_FILE}"
+    # Optional query filter to run specific query
+    QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
+    $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path 
"${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o 
"${RESULTS_FILE}" $QUERY
 }
 
 # Runs the tpch in memory
@@ -425,8 +427,9 @@ run_tpch_mem() {
     RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running tpch_mem benchmark..."
+    QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
     # -m means in memory
-    $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path 
"${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o 
"${RESULTS_FILE}"
+    $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path 
"${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o 
"${RESULTS_FILE}" $QUERY
 }
 
 # Runs the cancellation benchmark


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to