This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 0289bfe6a improve: supports user-defined scale_factor for dbgen (#2741)
0289bfe6a is described below

commit 0289bfe6a98bdae371eee29d3f257b173ddb4437
Author: xudong.w <[email protected]>
AuthorDate: Sat Jun 18 21:11:23 2022 +0800

    improve: supports user-defined scale_factor for dbgen (#2741)
---
 benchmarks/README.md          |  3 ++-
 benchmarks/entrypoint.sh      |  2 +-
 benchmarks/run.sh             | 26 --------------------------
 benchmarks/tpch-gen.sh        |  5 +++--
 benchmarks/tpchgen.dockerfile |  2 +-
 5 files changed, 7 insertions(+), 31 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index c7e7f1264..ba68b507d 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -33,7 +33,8 @@ TPC-H data can be generated using the `tpch-gen.sh` script, 
which creates a Dock
 generator.
 
 ```bash
-./tpch-gen.sh
+# scale_factor: scale of the database population. scale 1.0 represents ~1 GB 
of data
+./tpch-gen.sh <scale_factor>
 ```
 
 Data will be generated into the `data` subdirectory and will not be checked in 
because this directory has been added
diff --git a/benchmarks/entrypoint.sh b/benchmarks/entrypoint.sh
index 71c04324a..312376fed 100755
--- a/benchmarks/entrypoint.sh
+++ b/benchmarks/entrypoint.sh
@@ -18,5 +18,5 @@
 
 set -e
 cd /tpch-dbgen
-./dbgen -vf -s 1
+./dbgen -vf -s $1
 mv *.tbl /data
\ No newline at end of file
diff --git a/benchmarks/run.sh b/benchmarks/run.sh
deleted file mode 100755
index 9cbbf0e16..000000000
--- a/benchmarks/run.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -e
-
-# This bash script is meant to be run inside the docker-compose environment. 
Check the README for instructions
-
-cd /
-for query in 1 3 5 6 7 10 12 13
-do
-  /tpch benchmark ballista --host ballista-scheduler --port 50050 --query 
$query --path /data --format tbl --iterations 1 --debug
-done
diff --git a/benchmarks/tpch-gen.sh b/benchmarks/tpch-gen.sh
index f10367746..0684fd97a 100755
--- a/benchmarks/tpch-gen.sh
+++ b/benchmarks/tpch-gen.sh
@@ -21,7 +21,8 @@
 pushd ..
 . ./dev/build-set-env.sh
 popd
-docker build -t datafusion-tpchgen:$DATAFUSION_VERSION -f tpchgen.dockerfile .
+
+docker build  -f tpchgen.dockerfile -t datafusion-tpchgen:$DATAFUSION_VERSION .
 
 # Generate data into the ./data directory if it does not already exist
 FILE=./data/supplier.tbl
@@ -29,6 +30,6 @@ if test -f "$FILE"; then
     echo "$FILE exists."
 else
   mkdir data 2>/dev/null
-  docker run -v `pwd`/data:/data -it --rm 
datafusion-tpchgen:$DATAFUSION_VERSION
+  docker run datafusion-tpchgen:$DATAFUSION_VERSION $1 -v `pwd`/data:/data -it 
--rm datafusion-tpchgen:$DATAFUSION_VERSION
   ls -l data
 fi
\ No newline at end of file
diff --git a/benchmarks/tpchgen.dockerfile b/benchmarks/tpchgen.dockerfile
index 69434708b..30acdead7 100644
--- a/benchmarks/tpchgen.dockerfile
+++ b/benchmarks/tpchgen.dockerfile
@@ -29,4 +29,4 @@ ADD entrypoint.sh /tpch-dbgen/
 
 VOLUME /data
 
-ENTRYPOINT [ "bash", "./entrypoint.sh" ]
+ENTRYPOINT [ "bash", "./entrypoint.sh" ] 

Reply via email to