This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7f4b338d6f Run TPC-H SF10 during PR benchmarks (#9822)
7f4b338d6f is described below
commit 7f4b338d6f7e4434529b87ca9eb273c7fb8819ec
Author: Marko Grujic <[email protected]>
AuthorDate: Wed Mar 27 22:52:58 2024 +0100
Run TPC-H SF10 during PR benchmarks (#9822)
* Run TPC-H SF10 during PR benchmarks
* Add memory benchmarks to the workflow
Also distinguish the output file by the SF used.
---
.github/workflows/pr_benchmarks.yml | 11 +++++++++--
benchmarks/bench.sh | 4 ++--
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/pr_benchmarks.yml
b/.github/workflows/pr_benchmarks.yml
index b7b85c9fcf..29d001783b 100644
--- a/.github/workflows/pr_benchmarks.yml
+++ b/.github/workflows/pr_benchmarks.yml
@@ -28,9 +28,10 @@ jobs:
cd benchmarks
mkdir data
- # Setup the TPC-H data set with a scale factor of 10
+ # Setup the TPC-H data sets for scale factors 1 and 10
./bench.sh data tpch
-
+ ./bench.sh data tpch10
+
- name: Generate unique result names
run: |
echo "HEAD_LONG_SHA=$(git log -1 --format='%H')" >> "$GITHUB_ENV"
@@ -44,6 +45,9 @@ jobs:
cd benchmarks
./bench.sh run tpch
+ ./bench.sh run tpch_mem
+ ./bench.sh run tpch10
+ ./bench.sh run tpch_mem10
# For some reason this step doesn't seem to propagate the env var
down into the script
if [ -d "results/HEAD" ]; then
@@ -64,6 +68,9 @@ jobs:
cd benchmarks
./bench.sh run tpch
+ ./bench.sh run tpch_mem
+ ./bench.sh run tpch10
+ ./bench.sh run tpch_mem10
echo ${{ github.event.issue.number }} > pr
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index 039f4790ac..a724008927 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -314,7 +314,7 @@ run_tpch() {
fi
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"
- RESULTS_FILE="${RESULTS_DIR}/tpch.json"
+ RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running tpch benchmark..."
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path
"${TPCH_DIR}" --format parquet -o ${RESULTS_FILE}
@@ -329,7 +329,7 @@ run_tpch_mem() {
fi
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"
- RESULTS_FILE="${RESULTS_DIR}/tpch_mem.json"
+ RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running tpch_mem benchmark..."
# -m means in memory