This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9cceddaeb3 [tools](ssb and tpch)optimize tools (#11975)
9cceddaeb3 is described below
commit 9cceddaeb3544f03372dfea3f63e886cf05c9377
Author: Dongyang Li <[email protected]>
AuthorDate: Wed Aug 24 14:29:38 2022 +0800
[tools](ssb and tpch)optimize tools (#11975)
---
tools/ssb-tools/README.md | 24 ++--
tools/ssb-tools/{ => bin}/build-ssb-dbgen.sh | 21 ++-
tools/ssb-tools/{ => bin}/create-ssb-tables.sh | 19 ++-
tools/ssb-tools/{ => bin}/gen-ssb-data.sh | 20 +--
.../load-ssb-data.sh} | 107 +++++++++++++--
tools/ssb-tools/{ => bin}/run-ssb-flat-queries.sh | 22 +--
tools/ssb-tools/{ => bin}/run-ssb-queries.sh | 17 ++-
tools/ssb-tools/{ => conf}/doris-cluster.conf | 0
tools/ssb-tools/create-ssb-flat-table.sh | 101 --------------
tools/ssb-tools/ddl/create-ssb-flat-table.sql | 2 +-
tools/ssb-tools/load-ssb-dimension-data.sh | 86 ------------
tools/ssb-tools/load-ssb-fact-data.sh | 147 ---------------------
tools/tpch-tools/README.md | 12 +-
tools/tpch-tools/{ => bin}/build-tpch-dbgen.sh | 13 +-
tools/tpch-tools/{ => bin}/create-tpch-tables.sh | 9 +-
tools/tpch-tools/{ => bin}/gen-tpch-data.sh | 32 ++---
tools/tpch-tools/{ => bin}/load-tpch-data.sh | 95 +++++++------
tools/tpch-tools/{ => bin}/run-tpch-queries.sh | 24 ++--
tools/tpch-tools/{ => conf}/doris-cluster.conf | 2 +-
tools/tpch-tools/{ => ddl}/create-tpch-tables.sql | 0
20 files changed, 266 insertions(+), 487 deletions(-)
diff --git a/tools/ssb-tools/README.md b/tools/ssb-tools/README.md
index 5cb2a8e39e..6d8e85e97d 100644
--- a/tools/ssb-tools/README.md
+++ b/tools/ssb-tools/README.md
@@ -20,27 +20,21 @@ under the License.
# Usage
These scripts are used to make ssb and ssb flat test.
- The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT
...',
- which means ssb test steps 1 to 4 should have been done before loading ssb
flat data.
+ The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT
...'.
## ssb test, follow the steps below:
### 1. build ssb dbgen tool.
- ./build-ssb-dbgen.sh
+ ./bin/build-ssb-dbgen.sh
### 2. generate ssb data. use -h for more infomations.
- ./gen-ssb-data.sh -s 1
-### 3. create ssb tables. modify `doris-cluster.conf` to specify doris info,
then run script below.
- ./create-ssb-tables.sh
+ ./bin/gen-ssb-data.sh -s 1
+### 3. create ssb tables. modify `conf/doris-cluster.conf` to specify Doris
cluster info, then run script below.
+ ./bin/create-ssb-tables.sh
### 4. load ssb data. use -h for help.
- ./load-ssb-dimension-data.sh
- ./load-ssb-fact-data.sh
+ ./bin/load-ssb-data.sh
### 5. run ssb queries.
- ./run-ssb-queries.sh
+ ./bin/run-ssb-queries.sh
## ssb flat test, follow the steps below:
### 1. prepare ssb data, which means ssb test steps 1 to 4 have been done.
-### 2. create ssb flat table in the same database of ssb tables.
- ./create-ssb-flat-table.sh
-### 3. load ssb flat data.
- ./load-ssb-flat-data.sh
-### 4. run ssb flat queries.
- ./run-ssb-flat-queries.sh
+### 2. run ssb flat queries.
+ ./bin/run-ssb-flat-queries.sh
diff --git a/tools/ssb-tools/build-ssb-dbgen.sh
b/tools/ssb-tools/bin/build-ssb-dbgen.sh
similarity index 80%
rename from tools/ssb-tools/build-ssb-dbgen.sh
rename to tools/ssb-tools/bin/build-ssb-dbgen.sh
index 59af467441..56fd2056e4 100755
--- a/tools/ssb-tools/build-ssb-dbgen.sh
+++ b/tools/ssb-tools/bin/build-ssb-dbgen.sh
@@ -19,14 +19,17 @@
##############################################################
# This script is used to build ssb-dbgen
# sssb-dbgen's source code is from https://github.com/electrum/ssb-dbgen.git
-# Usage:
+# Usage:
# sh build-ssb-dbgen.sh
##############################################################
set -eo pipefail
-ROOT=`dirname "$0"`
-ROOT=`cd "$ROOT"; pwd`
+ROOT=$(dirname "$0")
+ROOT=$(
+ cd "$ROOT"
+ pwd
+)
CURDIR=${ROOT}
SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
@@ -35,17 +38,23 @@ SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
if [[ -d $SSB_DBGEN_DIR ]]; then
echo "Dir $CURDIR/ssb-dbgen/ already exists. No need to download."
echo "If you want to download ssb-dbgen again, please delete this dir
first."
+ exit 1
else
- curl
https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz
| tar xz -C $CURDIR/
+ cd "$CURDIR"
+ wget
https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz
&& tar -xzvf ssb-dbgen-linux.tar.gz -C $CURDIR/
fi
# compile ssb-dbgen
-cd $SSB_DBGEN_DIR/ && make
+cd "$SSB_DBGEN_DIR/" && make
cd -
# check
if [[ -f $CURDIR/ssb-dbgen/dbgen ]]; then
- echo "Build succeed! Run $CURDIR/ssb-dbgen/dbgen -h"
+ echo -e "
+################
+Build succeed!
+################
+Run $CURDIR/ssb-dbgen/dbgen -h"
exit 0
else
echo "Build failed!"
diff --git a/tools/ssb-tools/create-ssb-tables.sh
b/tools/ssb-tools/bin/create-ssb-tables.sh
similarity index 78%
rename from tools/ssb-tools/create-ssb-tables.sh
rename to tools/ssb-tools/bin/create-ssb-tables.sh
index c7b2dd2af4..41177843e2 100755
--- a/tools/ssb-tools/create-ssb-tables.sh
+++ b/tools/ssb-tools/bin/create-ssb-tables.sh
@@ -29,19 +29,20 @@ ROOT=$(
)
CURDIR=${ROOT}
-DDL="${CURDIR}/ddl/create-ssb-tables.sql"
+SSB_DDL="${CURDIR}/../ddl/create-ssb-tables.sql"
+SSB_FLAT_DDL="${CURDIR}/../ddl/create-ssb-flat-table.sql"
usage() {
echo "
This script is used to create SSB tables,
-will use mysql client to connect Doris server which is specified in
doris-cluster.conf file.
+will use mysql client to connect Doris server which is specified in
conf/doris-cluster.conf file.
Usage: $0
"
exit 1
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'h' \
-- "$@")
@@ -86,7 +87,8 @@ check_prerequest() {
check_prerequest "mysql --version" "mysql"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
@@ -95,7 +97,10 @@ echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
-mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE
DATABASE IF NOT EXISTS $DB"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT
EXISTS $DB"
-echo "Run DDL from ${DDL}"
-mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <${DDL}
+echo "Run DDL from $SSB_DDL"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_DDL"
+
+echo "Run DDL from $SSB_FLAT_DDL"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_FLAT_DDL"
diff --git a/tools/ssb-tools/gen-ssb-data.sh
b/tools/ssb-tools/bin/gen-ssb-data.sh
similarity index 87%
rename from tools/ssb-tools/gen-ssb-data.sh
rename to tools/ssb-tools/bin/gen-ssb-data.sh
index 929a9e1eea..afc9e8bf68 100755
--- a/tools/ssb-tools/gen-ssb-data.sh
+++ b/tools/ssb-tools/bin/gen-ssb-data.sh
@@ -48,7 +48,7 @@ Usage: $0 <options>
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'hs:c:' \
-- "$@")
@@ -107,24 +107,24 @@ if [[ -d $SSB_DATA_DIR/ ]]; then
exit 1
fi
-mkdir $SSB_DATA_DIR/
+mkdir "$SSB_DATA_DIR/"
# gen data
-cd $SSB_DBGEN_DIR
+cd "$SSB_DBGEN_DIR"
echo "Begin to generate data for table: customer"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T c
echo "Begin to generate data for table: part"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T p
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T p
echo "Begin to generate data for table: supplier"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T s
echo "Begin to generate data for table: date"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T d
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T d
echo "Begin to generate data for table: lineorder"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T l -C $PARALLEL
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T l -C "$PARALLEL"
cd -
# move data to $SSB_DATA_DIR
-mv $SSB_DBGEN_DIR/*.tbl* $SSB_DATA_DIR/
+mv "$SSB_DBGEN_DIR"/*.tbl* "$SSB_DATA_DIR/"
# check data
-du -sh $SSB_DATA_DIR/*.tbl*
+du -sh "$SSB_DATA_DIR"/*.tbl*
diff --git a/tools/ssb-tools/load-ssb-flat-data.sh
b/tools/ssb-tools/bin/load-ssb-data.sh
similarity index 57%
rename from tools/ssb-tools/load-ssb-flat-data.sh
rename to tools/ssb-tools/bin/load-ssb-data.sh
index 813400adbe..f6be6bbe72 100755
--- a/tools/ssb-tools/load-ssb-flat-data.sh
+++ b/tools/ssb-tools/bin/load-ssb-data.sh
@@ -30,30 +30,46 @@ ROOT=$(
)
CURDIR=${ROOT}
+SSB_DATA_DIR="$CURDIR/ssb-data/"
usage() {
echo "
-The ssb flat data actually comes from ssb tables, and will load by 'INSERT
INTO ... SELECT ...'
Usage: $0 <options>
+ Optional options:
+ -c parallelism to load data of lineorder table, default is 5.
+
+ Eg.
+ $0 load data using default value.
+ $0 -c 10 load lineorder table data using parallelism 10.
"
exit 1
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
- -o 'h' \
+ -o 'hc:' \
-- "$@")
eval set -- "$OPTS"
+PARALLEL=5
HELP=0
+
+if [ $# == 0 ]; then
+ usage
+fi
+
while true; do
case "$1" in
-h)
HELP=1
shift
;;
+ -c)
+ PARALLEL=$2
+ shift 2
+ ;;
--)
shift
break
@@ -70,6 +86,14 @@ if [[ ${HELP} -eq 1 ]]; then
exit
fi
+echo "Parallelism: $PARALLEL"
+
+# check if ssb-data exists
+if [[ ! -d $SSB_DATA_DIR/ ]]; then
+ echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
+ exit 1
+fi
+
check_prerequest() {
local CMD=$1
local NAME=$2
@@ -80,9 +104,9 @@ check_prerequest() {
}
run_sql() {
- sql="$@"
- echo $sql
- mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e
"$@"
+ sql="$*"
+ echo "$sql"
+ mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$@"
}
load_lineitem_flat() {
@@ -165,7 +189,9 @@ ON (p.p_partkey = l.lo_partkey);
check_prerequest "curl --version" "curl"
# load lineorder
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
@@ -173,25 +199,78 @@ echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
-echo 'Loading data for table: lineorder_flat'
+date
+echo "==========Start to load data into ssb tables=========="
+echo 'Loading data for table: part'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy"
\
+ -T "$SSB_DATA_DIR"/part.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
-echo '============================================'
+echo 'Loading data for table: date'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy"
\
+ -T "$SSB_DATA_DIR"/date.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/dates/_stream_load
+
+echo 'Loading data for table: supplier'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
+ -T "$SSB_DATA_DIR"/supplier.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
+
+echo 'Loading data for table: customer'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use"
\
+ -T "$SSB_DATA_DIR"/customer.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
+
+echo "Loading data for table: lineorder, with $PARALLEL parallel"
+function load() {
+ echo "$@"
+ curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy"
\
+ -T "$@"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineorder/_stream_load
+}
+
+# set parallelism
+[ -e /tmp/fd1 ] || mkfifo /tmp/fd1
+exec 3<>/tmp/fd1
+rm -rf /tmp/fd1
+
+for ((i = 1; i <= PARALLEL; i++)); do
+ echo >&3
+done
+
+date
+for file in "$SSB_DATA_DIR"/lineorder.tbl.*; do
+ read -r -u3
+ {
+ load "$file"
+ echo >&3
+ } &
+done
+
+# wait for child thread finished
+wait
+date
+
+echo "==========Start to insert data into ssb flat table=========="
echo "change some session variables before load, and then restore after load."
origin_query_timeout=$(run_sql 'select @@query_timeout;' | sed -n '3p')
origin_parallel=$(run_sql 'select @@parallel_fragment_exec_instance_num;' |
sed -n '3p')
# set parallel_fragment_exec_instance_num=1, loading maybe slow but stable.
run_sql "set global query_timeout=7200;"
run_sql "set global parallel_fragment_exec_instance_num=1;"
-
echo '============================================'
-echo $(date)
+date
load_lineitem_flat
-
+date
echo '============================================'
echo "restore session variables"
run_sql "set global query_timeout=${origin_query_timeout};"
run_sql "set global parallel_fragment_exec_instance_num=${origin_parallel};"
-
echo '============================================'
-echo $(date)
+
echo "DONE."
diff --git a/tools/ssb-tools/run-ssb-flat-queries.sh
b/tools/ssb-tools/bin/run-ssb-flat-queries.sh
similarity index 80%
rename from tools/ssb-tools/run-ssb-flat-queries.sh
rename to tools/ssb-tools/bin/run-ssb-flat-queries.sh
index e623aed770..6db5ba3303 100755
--- a/tools/ssb-tools/run-ssb-flat-queries.sh
+++ b/tools/ssb-tools/bin/run-ssb-flat-queries.sh
@@ -28,8 +28,8 @@ ROOT=$(
pwd
)
-CURDIR=${ROOT}
-QUERIES_DIR=$CURDIR/ssb-flat-queries
+CURDIR="${ROOT}"
+QUERIES_DIR="$CURDIR/../ssb-flat-queries"
usage() {
echo "
@@ -41,7 +41,7 @@ Usage: $0
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'h' \
-- "$@")
@@ -86,7 +86,8 @@ check_prerequest() {
check_prerequest "mysqlslap --version" "mysqlslap"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
@@ -96,8 +97,8 @@ echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
pre_set() {
- echo $@
- mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e
"$@"
+ echo "$@"
+ mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
}
pre_set "set global enable_vectorized_engine=1;"
@@ -105,14 +106,15 @@ pre_set "set global
parallel_fragment_exec_instance_num=8;"
pre_set "set global exec_mem_limit=8G;"
pre_set "set global batch_size=4096;"
echo '============================================'
-pre_set "show variables"
+pre_set "show variables;"
+echo '============================================'
+pre_set "show table status;"
echo '============================================'
for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1'
'4.2' '4.3'; do
# First run to prevent the affect of cold start
- mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D $DB
<$QUERIES_DIR/q${i}.sql >/dev/null 2>&1
+ mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D "$DB"
<"$QUERIES_DIR"/q${i}.sql >/dev/null 2>&1
# Then run 3 times and takes the average time
- res=$(mysqlslap -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT
--create-schema=$DB --query=$QUERIES_DIR/q${i}.sql -F '\r' -i 3 | sed -n '2p' |
cut -d ' ' -f 9,10)
+ res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER"
--create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n
'2p' | cut -d ' ' -f 9,10)
echo "q$i: $res"
- sleep 1
done
diff --git a/tools/ssb-tools/run-ssb-queries.sh
b/tools/ssb-tools/bin/run-ssb-queries.sh
similarity index 84%
rename from tools/ssb-tools/run-ssb-queries.sh
rename to tools/ssb-tools/bin/run-ssb-queries.sh
index 106c6e35e8..5a1db9a57e 100755
--- a/tools/ssb-tools/run-ssb-queries.sh
+++ b/tools/ssb-tools/bin/run-ssb-queries.sh
@@ -29,7 +29,7 @@ ROOT=$(
)
CURDIR=${ROOT}
-QUERIES_DIR=$CURDIR/ssb-queries
+QUERIES_DIR=$CURDIR/../ssb-queries
usage() {
echo "
@@ -41,7 +41,7 @@ Usage: $0
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'h' \
-- "$@")
@@ -86,7 +86,8 @@ check_prerequest() {
check_prerequest "mysqlslap --version" "mysql slap"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
@@ -96,8 +97,8 @@ echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
pre_set() {
- echo $@
- mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e
"$@"
+ echo "$@"
+ mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
}
pre_set "set global enable_vectorized_engine=1;"
@@ -108,11 +109,13 @@ pre_set "set global enable_projection=true;"
pre_set "set global runtime_filter_mode=global;"
# pre_set "set global enable_cost_based_join_reorder=1"
echo '============================================'
-pre_set "show variables"
+pre_set "show variables;"
+echo '============================================'
+pre_set "show table status;"
echo '============================================'
for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1'
'4.2' '4.3'; do
# Each query is executed 3 times and takes the average time
- res=$(mysqlslap -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT
--create-schema=$DB --query=$QUERIES_DIR/q${i}.sql -F '\r' -i 3 | sed -n '2p' |
cut -d ' ' -f 9,10)
+ res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER"
--create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n
'2p' | cut -d ' ' -f 9,10)
echo "q$i: $res"
done
diff --git a/tools/ssb-tools/doris-cluster.conf
b/tools/ssb-tools/conf/doris-cluster.conf
similarity index 100%
rename from tools/ssb-tools/doris-cluster.conf
rename to tools/ssb-tools/conf/doris-cluster.conf
diff --git a/tools/ssb-tools/create-ssb-flat-table.sh
b/tools/ssb-tools/create-ssb-flat-table.sh
deleted file mode 100755
index 32778a906d..0000000000
--- a/tools/ssb-tools/create-ssb-flat-table.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-##############################################################
-# This script is used to create ssb flat table
-##############################################################
-
-set -eo pipefail
-
-ROOT=$(dirname "$0")
-ROOT=$(
- cd "$ROOT"
- pwd
-)
-
-CURDIR=${ROOT}
-DDL="${CURDIR}/ddl/create-ssb-flat-table.sql"
-
-usage() {
- echo "
-This script is used to create ssb flat table,
-will use mysql client to connect Doris server which is specified in
doris-cluster.conf file.
-Usage: $0
- "
- exit 1
-}
-
-OPTS=$(getopt \
- -n $0 \
- -o '' \
- -o 'h' \
- -- "$@")
-
-eval set -- "$OPTS"
-HELP=0
-
-if [ $# == 0 ]; then
- usage
-fi
-
-while true; do
- case "$1" in
- -h)
- HELP=1
- shift
- ;;
- --)
- shift
- break
- ;;
- *)
- echo "Internal error"
- exit 1
- ;;
- esac
-done
-
-if [[ ${HELP} -eq 1 ]]; then
- usage
- exit
-fi
-
-check_prerequest() {
- local CMD=$1
- local NAME=$2
- if ! $CMD; then
- echo "$NAME is missing. This script depends on mysql to create tables in
Doris."
- exit 1
- fi
-}
-
-check_prerequest "mysql --version" "mysql"
-
-source $CURDIR/doris-cluster.conf
-export MYSQL_PWD=$PASSWORD
-
-echo "FE_HOST: $FE_HOST"
-echo "FE_QUERY_PORT: $FE_QUERY_PORT"
-echo "USER: $USER"
-echo "PASSWORD: $PASSWORD"
-echo "DB: $DB"
-
-mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE
DATABASE IF NOT EXISTS $DB"
-
-echo "Run DDL from ${DDL}"
-mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <${DDL}
diff --git a/tools/ssb-tools/ddl/create-ssb-flat-table.sql
b/tools/ssb-tools/ddl/create-ssb-flat-table.sql
index b0a4adf817..b1e1681a94 100644
--- a/tools/ssb-tools/ddl/create-ssb-flat-table.sql
+++ b/tools/ssb-tools/ddl/create-ssb-flat-table.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-CREATE TABLE `lineorder_flat` (
+CREATE TABLE IF NOT EXISTS `lineorder_flat` (
`LO_ORDERDATE` int(11) NOT NULL COMMENT "",
`LO_ORDERKEY` int(11) NOT NULL COMMENT "",
`LO_LINENUMBER` tinyint(4) NOT NULL COMMENT "",
diff --git a/tools/ssb-tools/load-ssb-dimension-data.sh
b/tools/ssb-tools/load-ssb-dimension-data.sh
deleted file mode 100755
index 81d3f900ed..0000000000
--- a/tools/ssb-tools/load-ssb-dimension-data.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-##############################################################
-# This script is used to load generated ssb data set to Doris
-# Only for 4 dimension tables: customer, part, supplier and date.
-# Usage:
-# sh load-dimension-data.sh
-##############################################################
-
-set -eo pipefail
-
-ROOT=$(dirname "$0")
-ROOT=$(
- cd "$ROOT"
- pwd
-)
-
-CURDIR=${ROOT}
-SSB_DATA_DIR=$CURDIR/ssb-data/
-
-# check if ssb-data exists
-if [[ ! -d $SSB_DATA_DIR/ ]]; then
- echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
- exit 1
-fi
-
-check_prerequest() {
- local CMD=$1
- local NAME=$2
- if ! $CMD; then
- echo "$NAME is missing. This script depends on cURL to load data to
Doris."
- exit 1
- fi
-}
-
-check_prerequest "curl --version" "curl"
-
-# load 4 small dimension tables
-
-source $CURDIR/doris-cluster.conf
-
-echo "FE_HOST: $FE_HOST"
-echo "FE_HTTP_PORT: $FE_HTTP_PORT"
-echo "USER: $USER"
-echo "PASSWORD: $PASSWORD"
-echo "DB: $DB"
-
-echo 'Loading data for table: part'
-curl --location-trusted -u $USER:$PASSWORD \
- -H "column_separator:|" \
- -H
"columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy"
\
- -T $SSB_DATA_DIR/part.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load
-
-echo 'Loading data for table: date'
-curl --location-trusted -u $USER:$PASSWORD \
- -H "column_separator:|" \
- -H
"columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy"
\
- -T $SSB_DATA_DIR/date.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/dates/_stream_load
-
-echo 'Loading data for table: supplier'
-curl --location-trusted -u $USER:$PASSWORD \
- -H "column_separator:|" \
- -H
"columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
- -T $SSB_DATA_DIR/supplier.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load
-
-echo 'Loading data for table: customer'
-curl --location-trusted -u $USER:$PASSWORD \
- -H "column_separator:|" \
- -H
"columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use"
\
- -T $SSB_DATA_DIR/customer.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load
diff --git a/tools/ssb-tools/load-ssb-fact-data.sh
b/tools/ssb-tools/load-ssb-fact-data.sh
deleted file mode 100755
index e6cd55e2aa..0000000000
--- a/tools/ssb-tools/load-ssb-fact-data.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-##############################################################
-# This script is used to load generated ssb data set to Doris
-# Only for 1 fact table: lineorder
-##############################################################
-
-set -eo pipefail
-
-ROOT=$(dirname "$0")
-ROOT=$(
- cd "$ROOT"
- pwd
-)
-
-CURDIR=${ROOT}
-SSB_DATA_DIR=$CURDIR/ssb-data/
-
-usage() {
- echo "
-Usage: $0 <options>
- Optional options:
- -c parallelism to load data of lineorder table, default is 5.
-
- Eg.
- $0 load data using default value.
- $0 -c 10 load lineorder table data using parallelism 10.
- "
- exit 1
-}
-
-OPTS=$(getopt \
- -n $0 \
- -o '' \
- -o 'hc:' \
- -- "$@")
-
-eval set -- "$OPTS"
-
-PARALLEL=3
-HELP=0
-
-if [ $# == 0 ]; then
- usage
-fi
-
-while true; do
- case "$1" in
- -h)
- HELP=1
- shift
- ;;
- -c)
- PARALLEL=$2
- shift 2
- ;;
- --)
- shift
- break
- ;;
- *)
- echo "Internal error"
- exit 1
- ;;
- esac
-done
-
-if [[ ${HELP} -eq 1 ]]; then
- usage
- exit
-fi
-
-echo "Parallelism: $PARALLEL"
-
-# check if ssb-data exists
-if [[ ! -d $SSB_DATA_DIR/ ]]; then
- echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
- exit 1
-fi
-
-check_prerequest() {
- local CMD=$1
- local NAME=$2
- if ! $CMD; then
- echo "$NAME is missing. This script depends on cURL to load data to
Doris."
- exit 1
- fi
-}
-
-check_prerequest "curl --version" "curl"
-
-# load lineorder
-source $CURDIR/doris-cluster.conf
-
-echo "FE_HOST: $FE_HOST"
-echo "FE_HTTP_PORT: $FE_HTTP_PORT"
-echo "USER: $USER"
-echo "PASSWORD: $PASSWORD"
-echo "DB: $DB"
-
-function load() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD \
- -H "column_separator:|" \
- -H
"columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy"
\
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineorder/_stream_load
-}
-
-# set parallelism
-[ -e /tmp/fd1 ] || mkfifo /tmp/fd1
-exec 3<>/tmp/fd1
-rm -rf /tmp/fd1
-
-for ((i = 1; i <= $PARALLEL; i++)); do
- echo >&3
-done
-
-echo $(date)
-for file in $(ls $SSB_DATA_DIR/lineorder.tbl.*); do
- read -u3
- {
- load $file
- echo >&3
- } &
-done
-
-# wait for child thread finished
-wait
-
-echo $(date)
-echo "DONE."
diff --git a/tools/tpch-tools/README.md b/tools/tpch-tools/README.md
index dbef056fa7..bd9e483eec 100644
--- a/tools/tpch-tools/README.md
+++ b/tools/tpch-tools/README.md
@@ -24,23 +24,23 @@ follow the steps below:
### 1. build tpc-h dbgen tool.
- ./build-tpch-dbgen.sh
+ ./bin/build-tpch-dbgen.sh
### 2. generate tpc-h data. use -h for more infomations.
- ./gen-tpch-data.sh -s 1
+ ./bin/gen-tpch-data.sh -s 1
-### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info,
then run script below.
+### 3. create tpc-h tables. modify `conf/doris-cluster.conf` to specify doris
info, then run script below.
- ./create-tpch-tables.sh
+ ./bin/create-tpch-tables.sh
### 4. load tpc-h data. use -h for help.
- ./load-tpch-data.sh
+ ./bin/load-tpch-data.sh
### 5. run tpc-h queries.
- ./run-tpch-queries.sh
+ ./bin/run-tpch-queries.sh
NOTICE: At present, Doris's query optimizer and statistical information
functions are not complete, so we rewrite some queries in TPC-H to adapt to
Doris' execution framework, but it does not affect the correctness of the
results. The rewritten SQL is marked with "Modified" in the corresponding .sql
file.
diff --git a/tools/tpch-tools/build-tpch-dbgen.sh
b/tools/tpch-tools/bin/build-tpch-dbgen.sh
similarity index 93%
rename from tools/tpch-tools/build-tpch-dbgen.sh
rename to tools/tpch-tools/bin/build-tpch-dbgen.sh
index 76da1ba890..b0cce4b1da 100755
--- a/tools/tpch-tools/build-tpch-dbgen.sh
+++ b/tools/tpch-tools/bin/build-tpch-dbgen.sh
@@ -51,13 +51,12 @@ if [[ -d $TPCH_DBGEN_DIR ]]; then
echo "If you want to download TPC-H_Tools_v3.0.0 again, please delete this
dir first."
else
wget
"https://tools-chengdu.oss-cn-chengdu.aliyuncs.com/TPC-H_Tools_v3.0.0.zip"
-
- unzip TPC-H_Tools_v3.0.0.zip -d $CURDIR/
+ unzip TPC-H_Tools_v3.0.0.zip -d "$CURDIR"/
fi
# modify tpcd.h
-cd $TPCH_DBGEN_DIR/
-echo '
+cd "$TPCH_DBGEN_DIR"/
+printf '%s' '
#ifdef MYSQL
#define GEN_QUERY_PLAN ""
#define START_TRAN "START TRANSACTION"
@@ -81,7 +80,11 @@ cd -
# check
if [[ -f $TPCH_DBGEN_DIR/dbgen ]]; then
- echo "Build succeed! Run $TPCH_DBGEN_DIR/dbgen -h"
+ echo "
+################
+Build succeed!
+################
+Run $TPCH_DBGEN_DIR/dbgen -h"
exit 0
else
echo "Build failed!"
diff --git a/tools/tpch-tools/create-tpch-tables.sh
b/tools/tpch-tools/bin/create-tpch-tables.sh
similarity index 87%
rename from tools/tpch-tools/create-tpch-tables.sh
rename to tools/tpch-tools/bin/create-tpch-tables.sh
index 79961dbba6..01ca7a1345 100755
--- a/tools/tpch-tools/create-tpch-tables.sh
+++ b/tools/tpch-tools/bin/create-tpch-tables.sh
@@ -40,7 +40,7 @@ Usage: $0
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-- "$@")
@@ -84,7 +84,8 @@ check_prerequest() {
check_prerequest "mysql --version" "mysql"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
@@ -93,7 +94,7 @@ echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
-mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE
DATABASE IF NOT EXISTS $DB"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT
EXISTS $DB"
echo "Run SQLs from $CURDIR/create-tpch-tables.sql"
-mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB
<$CURDIR/create-tpch-tables.sql
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB"
<"$CURDIR"/../ddl/create-tpch-tables.sql
diff --git a/tools/tpch-tools/gen-tpch-data.sh
b/tools/tpch-tools/bin/gen-tpch-data.sh
similarity index 80%
rename from tools/tpch-tools/gen-tpch-data.sh
rename to tools/tpch-tools/bin/gen-tpch-data.sh
index 0e7359d601..4202e3c58d 100755
--- a/tools/tpch-tools/gen-tpch-data.sh
+++ b/tools/tpch-tools/bin/gen-tpch-data.sh
@@ -48,7 +48,7 @@ Usage: $0 <options>
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'hs:c:' \
-- "$@")
@@ -107,40 +107,40 @@ if [[ -d $TPCH_DATA_DIR/ ]]; then
exit 1
fi
-mkdir $TPCH_DATA_DIR/
+mkdir "$TPCH_DATA_DIR"/
# gen data
-cd $TPCH_DBGEN_DIR
+cd "$TPCH_DBGEN_DIR"
echo "Begin to generate data for table: region"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T r
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T r
echo "Begin to generate data for table: nation"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T n
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T n
echo "Begin to generate data for table: supplier"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T s
echo "Begin to generate data for table: part"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T P
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T P
echo "Begin to generate data for table: customer"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T c
echo "Begin to generate data for table: partsupp"
-for i in $(seq 1 $PARALLEL); do
+for i in $(seq 1 "$PARALLEL"); do
{
- $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T S -C $PARALLEL -S ${i}
+ "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T S -C "$PARALLEL" -S "$i"
} &
done
wait
echo "Begin to generate data for table: orders"
-for i in $(seq 1 $PARALLEL); do
+for i in $(seq 1 "$PARALLEL"); do
{
- $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T O -C $PARALLEL -S ${i}
+ "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T O -C "$PARALLEL" -S "$i"
} &
done
wait
echo "Begin to generate data for table: lineitem"
-for i in $(seq 1 $PARALLEL); do
+for i in $(seq 1 "$PARALLEL"); do
{
- $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T L -C $PARALLEL -S ${i}
+ "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T L -C "$PARALLEL" -S "$i"
} &
done
wait
@@ -148,7 +148,7 @@ wait
cd -
# move data to $TPCH_DATA_DIR
-mv $TPCH_DBGEN_DIR/*.tbl* $TPCH_DATA_DIR/
+mv "$TPCH_DBGEN_DIR"/*.tbl* "$TPCH_DATA_DIR"/
# check data
-du -sh $TPCH_DATA_DIR/*.tbl*
+du -sh "$TPCH_DATA_DIR"/*.tbl*
diff --git a/tools/tpch-tools/load-tpch-data.sh
b/tools/tpch-tools/bin/load-tpch-data.sh
similarity index 69%
rename from tools/tpch-tools/load-tpch-data.sh
rename to tools/tpch-tools/bin/load-tpch-data.sh
index 572bfb859c..7a250ef2f4 100755
--- a/tools/tpch-tools/load-tpch-data.sh
+++ b/tools/tpch-tools/bin/load-tpch-data.sh
@@ -46,7 +46,7 @@ Usage: $0 <options>
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'hc:' \
-- "$@")
@@ -89,7 +89,7 @@ fi
echo "Parallelism: $PARALLEL"
# check if tpch-data exists
-if [[ ! -d $TPCH_DATA_DIR/ ]]; then
+if [[ ! -d "$TPCH_DATA_DIR"/ ]]; then
echo "$TPCH_DATA_DIR does not exist. Run sh gen-tpch-data.sh first."
exit 1
fi
@@ -106,7 +106,9 @@ check_prerequest() {
check_prerequest "curl --version" "curl"
# load tables
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
@@ -115,61 +117,62 @@ echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
function load_region() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: r_regionkey, r_name, r_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/region/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/region/_stream_load
}
function load_nation() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/nation/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/nation/_stream_load
}
function load_supplier() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone,
s_acctbal, s_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
}
function load_customer() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone,
c_acctbal, c_mktsegment, c_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
}
function load_part() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
}
function load_partsupp() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost,
ps_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/partsupp/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/partsupp/_stream_load
}
function load_orders() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice,
o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/orders/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/orders/_stream_load
}
function load_lineitem() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber,
l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus,
l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp"
\
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineitem/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineitem/_stream_load
}
# start load
-load_region $TPCH_DATA_DIR/region.tbl
-load_nation $TPCH_DATA_DIR/nation.tbl
-load_supplier $TPCH_DATA_DIR/supplier.tbl
-load_customer $TPCH_DATA_DIR/customer.tbl
-load_part $TPCH_DATA_DIR/part.tbl
-
+date
+load_region "$TPCH_DATA_DIR"/region.tbl
+load_nation "$TPCH_DATA_DIR"/nation.tbl
+load_supplier "$TPCH_DATA_DIR"/supplier.tbl
+load_customer "$TPCH_DATA_DIR"/customer.tbl
+load_part "$TPCH_DATA_DIR"/part.tbl
+date
# set parallelism
# 以PID为名, 防止创建命名管道时与已有文件重名,从而失败
@@ -182,20 +185,21 @@ exec 3<>${fifo}
rm -rf ${fifo}
# 在fd3中放置$PARALLEL个空行作为令牌
-for ((i = 1; i <= $PARALLEL; i++)); do
+for ((i = 1; i <= PARALLEL; i++)); do
echo >&3
done
-for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do
+date
+for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do
# 领取令牌, 即从fd3中读取行, 每次一行
# 对管道,读一行便少一行,每次只能读取一行
# 所有行读取完毕, 执行挂起, 直到管道再次有可读行
# 因此实现了进程数量控制
- read -u3
+ read -r -u3
# 要批量执行的命令放在大括号内, 后台运行
{
- load_lineitem $file
+ load_lineitem "$file"
echo "----loaded $file"
sleep 2
# 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
@@ -203,20 +207,22 @@ for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do
} &
done
-for file in $(ls $TPCH_DATA_DIR/orders.tbl*); do
- read -u3
+date
+for file in "$TPCH_DATA_DIR"/orders.tbl*; do
+ read -r -u3
{
- load_orders $file
+ load_orders "$file"
echo "----loaded $file"
sleep 2
echo >&3
} &
done
-for file in $(ls $TPCH_DATA_DIR/partsupp.tbl*); do
- read -u3
+date
+for file in "$TPCH_DATA_DIR"/partsupp.tbl*; do
+ read -r -u3
{
- load_partsupp $file
+ load_partsupp "$file"
echo "----loaded $file"
sleep 2
echo >&3
@@ -227,3 +233,6 @@ done
wait
# 删除文件标识符
exec 3>&-
+date
+
+echo "DONE."
\ No newline at end of file
diff --git a/tools/tpch-tools/run-tpch-queries.sh
b/tools/tpch-tools/bin/run-tpch-queries.sh
similarity index 79%
rename from tools/tpch-tools/run-tpch-queries.sh
rename to tools/tpch-tools/bin/run-tpch-queries.sh
index 9d7a3f1e18..14d00e17bb 100755
--- a/tools/tpch-tools/run-tpch-queries.sh
+++ b/tools/tpch-tools/bin/run-tpch-queries.sh
@@ -29,7 +29,7 @@ ROOT=$(
)
CURDIR=${ROOT}
-QUERIES_DIR=$CURDIR/queries
+QUERIES_DIR=$CURDIR/../queries
usage() {
echo "
@@ -41,7 +41,7 @@ Usage: $0
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-- "$@")
@@ -85,7 +85,9 @@ check_prerequest() {
check_prerequest "mysql --version" "mysql"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
@@ -95,10 +97,16 @@ echo "DB: $DB"
echo "Time Unit: ms"
pre_set() {
- echo $@
- mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e
"$@"
+ echo "$*"
+ mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$*"
}
+echo '============================================'
+pre_set "show variables;"
+echo '============================================'
+pre_set "show table status;"
+echo '============================================'
+
sum=0
for i in $(seq 1 22); do
total=0
@@ -106,12 +114,12 @@ for i in $(seq 1 22); do
# Each query is executed ${run} times and takes the average time
for j in $(seq 1 ${run}); do
start=$(date +%s%3N)
- mysql -h$FE_HOST -u $USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB
--comments <$QUERIES_DIR/q$i.sql >/dev/null
+ mysql -h"$FE_HOST" -u "$USER" -P"$FE_QUERY_PORT" -D"$DB" --comments
<"$QUERIES_DIR"/q"$i".sql >/dev/null
end=$(date +%s%3N)
total=$((total + end - start))
done
- cost=$((total / ${run}))
+ cost=$((total / run))
echo "q$i: ${cost}"
- sum=$((sum + $cost))
+ sum=$((sum + cost))
done
echo "Total cost: $sum"
diff --git a/tools/tpch-tools/doris-cluster.conf
b/tools/tpch-tools/conf/doris-cluster.conf
similarity index 98%
rename from tools/tpch-tools/doris-cluster.conf
rename to tools/tpch-tools/conf/doris-cluster.conf
index 7367b144b2..9417bcb9e0 100644
--- a/tools/tpch-tools/doris-cluster.conf
+++ b/tools/tpch-tools/conf/doris-cluster.conf
@@ -26,4 +26,4 @@ export USER='root'
# Doris password
export PASSWORD=''
# The database where TPC-H tables located
-export DB='tpch1'
+export DB='tpch'
diff --git a/tools/tpch-tools/create-tpch-tables.sql
b/tools/tpch-tools/ddl/create-tpch-tables.sql
similarity index 100%
rename from tools/tpch-tools/create-tpch-tables.sql
rename to tools/tpch-tools/ddl/create-tpch-tables.sql
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]