This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 0289bfe6a improve: supports user-defined scale_factor for dbgen (#2741)
0289bfe6a is described below
commit 0289bfe6a98bdae371eee29d3f257b173ddb4437
Author: xudong.w <[email protected]>
AuthorDate: Sat Jun 18 21:11:23 2022 +0800
improve: supports user-defined scale_factor for dbgen (#2741)
---
benchmarks/README.md | 3 ++-
benchmarks/entrypoint.sh | 2 +-
benchmarks/run.sh | 26 --------------------------
benchmarks/tpch-gen.sh | 5 +++--
benchmarks/tpchgen.dockerfile | 2 +-
5 files changed, 7 insertions(+), 31 deletions(-)
diff --git a/benchmarks/README.md b/benchmarks/README.md
index c7e7f1264..ba68b507d 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -33,7 +33,8 @@ TPC-H data can be generated using the `tpch-gen.sh` script,
which creates a Dock
generator.
```bash
-./tpch-gen.sh
+# scale_factor: scale of the database population. scale 1.0 represents ~1 GB
of data
+./tpch-gen.sh <scale_factor>
```
Data will be generated into the `data` subdirectory and will not be checked in
because this directory has been added
diff --git a/benchmarks/entrypoint.sh b/benchmarks/entrypoint.sh
index 71c04324a..312376fed 100755
--- a/benchmarks/entrypoint.sh
+++ b/benchmarks/entrypoint.sh
@@ -18,5 +18,5 @@
set -e
cd /tpch-dbgen
-./dbgen -vf -s 1
+./dbgen -vf -s $1
mv *.tbl /data
\ No newline at end of file
diff --git a/benchmarks/run.sh b/benchmarks/run.sh
deleted file mode 100755
index 9cbbf0e16..000000000
--- a/benchmarks/run.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -e
-
-# This bash script is meant to be run inside the docker-compose environment.
Check the README for instructions
-
-cd /
-for query in 1 3 5 6 7 10 12 13
-do
- /tpch benchmark ballista --host ballista-scheduler --port 50050 --query
$query --path /data --format tbl --iterations 1 --debug
-done
diff --git a/benchmarks/tpch-gen.sh b/benchmarks/tpch-gen.sh
index f10367746..0684fd97a 100755
--- a/benchmarks/tpch-gen.sh
+++ b/benchmarks/tpch-gen.sh
@@ -21,7 +21,8 @@
pushd ..
. ./dev/build-set-env.sh
popd
-docker build -t datafusion-tpchgen:$DATAFUSION_VERSION -f tpchgen.dockerfile .
+
+docker build -f tpchgen.dockerfile -t datafusion-tpchgen:$DATAFUSION_VERSION .
# Generate data into the ./data directory if it does not already exist
FILE=./data/supplier.tbl
@@ -29,6 +30,6 @@ if test -f "$FILE"; then
echo "$FILE exists."
else
mkdir data 2>/dev/null
- docker run -v `pwd`/data:/data -it --rm
datafusion-tpchgen:$DATAFUSION_VERSION
+ docker run datafusion-tpchgen:$DATAFUSION_VERSION $1 -v `pwd`/data:/data -it
--rm datafusion-tpchgen:$DATAFUSION_VERSION
ls -l data
fi
\ No newline at end of file
diff --git a/benchmarks/tpchgen.dockerfile b/benchmarks/tpchgen.dockerfile
index 69434708b..30acdead7 100644
--- a/benchmarks/tpchgen.dockerfile
+++ b/benchmarks/tpchgen.dockerfile
@@ -29,4 +29,4 @@ ADD entrypoint.sh /tpch-dbgen/
VOLUME /data
-ENTRYPOINT [ "bash", "./entrypoint.sh" ]
+ENTRYPOINT [ "bash", "./entrypoint.sh" ]