This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 724a8e8d5b spelling etc (#9035)
724a8e8d5b is described below
commit 724a8e8d5bbe32008868a52b362eff8806ccec76
Author: Kirk Mitchener <[email protected]>
AuthorDate: Mon Jan 29 15:47:36 2024 -0500
spelling etc (#9035)
---
benchmarks/bench.sh | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index ccaf26eb79..deab8e9c4e 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -53,14 +53,14 @@ Examples:
./bench.sh data
# Run the 'tpch' benchmark on the datafusion checkout in
/source/arrow-datafusion
-DATAFASION_DIR=/source/arrow-datafusion ./bench.sh run tpch
+DATAFUSION_DIR=/source/arrow-datafusion ./bench.sh run tpch
**********
* Commands
**********
-data: Generates data needed for benchmarking
+data: Generates or downloads data needed for benchmarking
run: Runs the named benchmark
-compare: Comares results from benchmark runs
+compare: Compares results from benchmark runs
**********
* Benchmarks
@@ -81,7 +81,7 @@ clickbench_extended: ClickBench "inspired" queries against
a single parquet (
**********
DATA_DIR directory to store datasets
CARGO_COMMAND command that runs the benchmark binary
-DATAFASION_DIR directory to use (default $DATAFUSION_DIR)
+DATAFUSION_DIR directory to use (default $DATAFUSION_DIR)
"
exit 1
}
@@ -239,6 +239,9 @@ main() {
BRANCH2=$2
compare_benchmarks
;;
+ "")
+ usage
+ ;;
*)
echo "Error: unknown command: $COMMAND"
usage
@@ -401,9 +404,9 @@ run_clickbench_1() {
$CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path
"${DATA_DIR}/hits.parquet" --queries-path
"${SCRIPT_DIR}/queries/clickbench/queries.sql" -o ${RESULTS_FILE}
}
- # Runs the clickbench benchmark with a single large parquet file
+ # Runs the clickbench benchmark with the partitioned parquet files
run_clickbench_partitioned() {
- RESULTS_FILE="${RESULTS_DIR}/clickbench_1.json"
+ RESULTS_FILE="${RESULTS_DIR}/clickbench_partitioned.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running clickbench (partitioned, 100 files) benchmark..."
$CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path
"${DATA_DIR}/hits_partitioned" --queries-path
"${SCRIPT_DIR}/queries/clickbench/queries.sql" -o ${RESULTS_FILE}