This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch dev-1.1.2
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.2 by this push:
new 68cdc20fc8 [tools](ssb and tpch) optimize tools (#11974)
68cdc20fc8 is described below
commit 68cdc20fc8499b485b1f6c1676fcb29dfc16afbd
Author: Dongyang Li <[email protected]>
AuthorDate: Tue Aug 23 09:10:49 2022 +0800
[tools](ssb and tpch) optimize tools (#11974)
---
tools/ssb-tools/README.md | 40 +++
tools/ssb-tools/{ => bin}/build-ssb-dbgen.sh | 21 +-
.../bin/create-ssb-tables.sh} | 24 +-
tools/ssb-tools/{ => bin}/gen-ssb-data.sh | 72 ++++--
tools/ssb-tools/bin/load-ssb-data.sh | 276 +++++++++++++++++++++
.../bin/run-ssb-flat-queries.sh} | 49 ++--
.../bin/run-ssb-queries.sh} | 44 ++--
tools/ssb-tools/{ => conf}/doris-cluster.conf | 6 +-
tools/ssb-tools/ddl/create-ssb-flat-table.sql | 74 ++++++
.../create-ssb-tables.sql} | 115 +++++----
tools/ssb-tools/load-dimension-data.sh | 68 -----
tools/ssb-tools/load-fact-data.sh | 126 ----------
.../q1.3.sql => ssb-flat-queries/q1.1.sql} | 17 +-
.../{queries => ssb-flat-queries}/q1.2.sql | 16 +-
.../{queries => ssb-flat-queries}/q1.3.sql | 18 +-
.../q1.2.sql => ssb-flat-queries/q2.1.sql} | 16 +-
.../q1.1.sql => ssb-flat-queries/q2.2.sql} | 19 +-
.../q1.1.sql => ssb-flat-queries/q2.3.sql} | 18 +-
.../q1.1.sql => ssb-flat-queries/q3.1.sql} | 21 +-
.../q1.1.sql => ssb-flat-queries/q3.2.sql} | 21 +-
.../q1.1.sql => ssb-flat-queries/q3.3.sql} | 21 +-
.../q1.1.sql => ssb-flat-queries/q3.4.sql} | 21 +-
.../q1.1.sql => ssb-flat-queries/q4.1.sql} | 19 +-
.../q1.1.sql => ssb-flat-queries/q4.2.sql} | 25 +-
.../q2.1.sql => ssb-flat-queries/q4.3.sql} | 23 +-
tools/ssb-tools/{queries => ssb-queries}/q1.1.sql | 15 +-
tools/ssb-tools/{queries => ssb-queries}/q1.2.sql | 15 +-
tools/ssb-tools/{queries => ssb-queries}/q1.3.sql | 18 +-
tools/ssb-tools/{queries => ssb-queries}/q2.1.sql | 16 +-
tools/ssb-tools/{queries => ssb-queries}/q2.2.sql | 17 +-
tools/ssb-tools/{queries => ssb-queries}/q2.3.sql | 16 +-
tools/ssb-tools/{queries => ssb-queries}/q3.1.sql | 26 +-
tools/ssb-tools/{queries => ssb-queries}/q3.2.sql | 26 +-
tools/ssb-tools/{queries => ssb-queries}/q3.3.sql | 34 ++-
tools/ssb-tools/{queries => ssb-queries}/q3.4.sql | 33 ++-
tools/ssb-tools/{queries => ssb-queries}/q4.1.sql | 30 ++-
tools/ssb-tools/{queries => ssb-queries}/q4.2.sql | 36 +--
tools/ssb-tools/{queries => ssb-queries}/q4.3.sql | 30 ++-
tools/tpch-tools/README.md | 25 +-
tools/tpch-tools/{ => bin}/build-tpch-dbgen.sh | 17 +-
tools/tpch-tools/{ => bin}/create-tpch-tables.sh | 12 +-
tools/tpch-tools/{ => bin}/gen-tpch-data.sh | 32 +--
tools/tpch-tools/{ => bin}/load-tpch-data.sh | 97 ++++----
tools/tpch-tools/{ => bin}/run-tpch-queries.sh | 36 +--
tools/tpch-tools/{ => conf}/doris-cluster.conf | 4 +-
tools/tpch-tools/create-tpch-tables.sql | 100 --------
tools/tpch-tools/ddl/create-tpch-tables.sql | 174 +++++++++++++
tools/tpch-tools/queries/q1.sql | 2 +-
tools/tpch-tools/queries/q10.sql | 22 +-
tools/tpch-tools/queries/q11.sql | 27 +-
tools/tpch-tools/queries/q12.sql | 2 +-
tools/tpch-tools/queries/q13.sql | 6 +-
tools/tpch-tools/queries/q14.sql | 8 +-
tools/tpch-tools/queries/q15.sql | 17 +-
tools/tpch-tools/queries/q16.sql | 2 +-
tools/tpch-tools/queries/q17.sql | 20 +-
tools/tpch-tools/queries/q18.sql | 58 +++--
tools/tpch-tools/queries/q19.sql | 2 +-
tools/tpch-tools/queries/q2.sql | 44 ++--
tools/tpch-tools/queries/q20.sql | 62 ++---
tools/tpch-tools/queries/q21.sql | 66 +++--
tools/tpch-tools/queries/q22.sql | 2 +-
tools/tpch-tools/queries/q3.sql | 22 +-
tools/tpch-tools/queries/q4.sql | 21 +-
tools/tpch-tools/queries/q5.sql | 2 +-
tools/tpch-tools/queries/q6.sql | 2 +-
tools/tpch-tools/queries/q7.sql | 2 +-
tools/tpch-tools/queries/q8.sql | 8 +-
tools/tpch-tools/queries/q9.sql | 26 +-
69 files changed, 1426 insertions(+), 976 deletions(-)
diff --git a/tools/ssb-tools/README.md b/tools/ssb-tools/README.md
new file mode 100644
index 0000000000..6d8e85e97d
--- /dev/null
+++ b/tools/ssb-tools/README.md
@@ -0,0 +1,40 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Usage
+
+ These scripts are used to make ssb and ssb flat test.
+ The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT
...'.
+
+## ssb test, follow the steps below:
+### 1. build ssb dbgen tool.
+ ./bin/build-ssb-dbgen.sh
+### 2. generate ssb data. use -h for more infomations.
+ ./bin/gen-ssb-data.sh -s 1
+### 3. create ssb tables. modify `conf/doris-cluster.conf` to specify Doris
cluster info, then run script below.
+ ./bin/create-ssb-tables.sh
+### 4. load ssb data. use -h for help.
+ ./bin/load-ssb-data.sh
+### 5. run ssb queries.
+ ./bin/run-ssb-queries.sh
+
+## ssb flat test, follow the steps below:
+### 1. prepare ssb data, which means ssb test steps 1 to 4 have been done.
+### 2. run ssb flat queries.
+ ./bin/run-ssb-flat-queries.sh
diff --git a/tools/ssb-tools/build-ssb-dbgen.sh
b/tools/ssb-tools/bin/build-ssb-dbgen.sh
similarity index 80%
rename from tools/ssb-tools/build-ssb-dbgen.sh
rename to tools/ssb-tools/bin/build-ssb-dbgen.sh
index 59af467441..56fd2056e4 100755
--- a/tools/ssb-tools/build-ssb-dbgen.sh
+++ b/tools/ssb-tools/bin/build-ssb-dbgen.sh
@@ -19,14 +19,17 @@
##############################################################
# This script is used to build ssb-dbgen
# sssb-dbgen's source code is from https://github.com/electrum/ssb-dbgen.git
-# Usage:
+# Usage:
# sh build-ssb-dbgen.sh
##############################################################
set -eo pipefail
-ROOT=`dirname "$0"`
-ROOT=`cd "$ROOT"; pwd`
+ROOT=$(dirname "$0")
+ROOT=$(
+ cd "$ROOT"
+ pwd
+)
CURDIR=${ROOT}
SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
@@ -35,17 +38,23 @@ SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
if [[ -d $SSB_DBGEN_DIR ]]; then
echo "Dir $CURDIR/ssb-dbgen/ already exists. No need to download."
echo "If you want to download ssb-dbgen again, please delete this dir
first."
+ exit 1
else
- curl
https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz
| tar xz -C $CURDIR/
+ cd "$CURDIR"
+ wget
https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz
&& tar -xzvf ssb-dbgen-linux.tar.gz -C $CURDIR/
fi
# compile ssb-dbgen
-cd $SSB_DBGEN_DIR/ && make
+cd "$SSB_DBGEN_DIR/" && make
cd -
# check
if [[ -f $CURDIR/ssb-dbgen/dbgen ]]; then
- echo "Build succeed! Run $CURDIR/ssb-dbgen/dbgen -h"
+ echo -e "
+################
+Build succeed!
+################
+Run $CURDIR/ssb-dbgen/dbgen -h"
exit 0
else
echo "Build failed!"
diff --git a/tools/tpch-tools/create-tpch-tables.sh
b/tools/ssb-tools/bin/create-ssb-tables.sh
similarity index 73%
copy from tools/tpch-tools/create-tpch-tables.sh
copy to tools/ssb-tools/bin/create-ssb-tables.sh
index 2263f47e1b..41177843e2 100755
--- a/tools/tpch-tools/create-tpch-tables.sh
+++ b/tools/ssb-tools/bin/create-ssb-tables.sh
@@ -17,7 +17,7 @@
# under the License.
##############################################################
-# This script is used to create TPC-H tables
+# This script is used to create SSB tables
##############################################################
set -eo pipefail
@@ -29,19 +29,22 @@ ROOT=$(
)
CURDIR=${ROOT}
+SSB_DDL="${CURDIR}/../ddl/create-ssb-tables.sql"
+SSB_FLAT_DDL="${CURDIR}/../ddl/create-ssb-flat-table.sql"
usage() {
echo "
-This script is used to create TPC-H tables,
-will use mysql client to connect Doris server which is specified in
doris-cluster.conf file.
+This script is used to create SSB tables,
+will use mysql client to connect Doris server which is specified in
conf/doris-cluster.conf file.
Usage: $0
"
exit 1
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
+ -o 'h' \
-- "$@")
eval set -- "$OPTS"
@@ -84,7 +87,9 @@ check_prerequest() {
check_prerequest "mysql --version" "mysql"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
@@ -92,7 +97,10 @@ echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
-mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS
$DB"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT
EXISTS $DB"
-echo $CURDIR/create-tpch-tables.sql
-mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB <$CURDIR/create-tpch-tables.sql
+echo "Run DDL from $SSB_DDL"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_DDL"
+
+echo "Run DDL from $SSB_FLAT_DDL"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_FLAT_DDL"
diff --git a/tools/ssb-tools/gen-ssb-data.sh
b/tools/ssb-tools/bin/gen-ssb-data.sh
similarity index 71%
rename from tools/ssb-tools/gen-ssb-data.sh
rename to tools/ssb-tools/bin/gen-ssb-data.sh
index 594f1296a2..afc9e8bf68 100755
--- a/tools/ssb-tools/gen-ssb-data.sh
+++ b/tools/ssb-tools/bin/gen-ssb-data.sh
@@ -22,8 +22,11 @@
set -eo pipefail
-ROOT=`dirname "$0"`
-ROOT=`cd "$ROOT"; pwd`
+ROOT=$(dirname "$0")
+ROOT=$(
+ cd "$ROOT"
+ pwd
+)
CURDIR=${ROOT}
SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
@@ -45,7 +48,7 @@ Usage: $0 <options>
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'hs:c:' \
-- "$@")
@@ -56,23 +59,38 @@ SCALE_FACTOR=100
PARALLEL=10
HELP=0
-if [ $# == 0 ] ; then
- usage
+if [ $# == 0 ]; then
+ usage
fi
while true; do
- case "$1" in
- -h) HELP=1 ; shift ;;
- -s) SCALE_FACTOR=$2 ; shift 2 ;;
- -c) PARALLEL=$2 ; shift 2 ;;
- --) shift ; break ;;
- *) echo "Internal error" ; exit 1 ;;
- esac
+ case "$1" in
+ -h)
+ HELP=1
+ shift
+ ;;
+ -s)
+ SCALE_FACTOR=$2
+ shift 2
+ ;;
+ -c)
+ PARALLEL=$2
+ shift 2
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error"
+ exit 1
+ ;;
+ esac
done
if [[ ${HELP} -eq 1 ]]; then
- usage
- exit
+ usage
+ exit
fi
echo "Scale Factor: $SCALE_FACTOR"
@@ -80,33 +98,33 @@ echo "Parallelism: $PARALLEL"
# check if dbgen exists
if [[ ! -f $SSB_DBGEN_DIR/dbgen ]]; then
- echo "$SSB_DBGEN_DIR/dbgen does not exist. Run build-ssb-dbgen.sh first to
build it first."
- exit 1
+ echo "$SSB_DBGEN_DIR/dbgen does not exist. Run build-ssb-dbgen.sh first to
build it first."
+ exit 1
fi
if [[ -d $SSB_DATA_DIR/ ]]; then
- echo "$SSB_DATA_DIR exists. Remove it before generating data"
- exit 1
+ echo "$SSB_DATA_DIR exists. Remove it before generating data"
+ exit 1
fi
-mkdir $SSB_DATA_DIR/
+mkdir "$SSB_DATA_DIR/"
# gen data
-cd $SSB_DBGEN_DIR
+cd "$SSB_DBGEN_DIR"
echo "Begin to generate data for table: customer"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T c
echo "Begin to generate data for table: part"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T p
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T p
echo "Begin to generate data for table: supplier"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T s
echo "Begin to generate data for table: date"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T d
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T d
echo "Begin to generate data for table: lineorder"
-$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T l -C $PARALLEL
+"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T l -C "$PARALLEL"
cd -
# move data to $SSB_DATA_DIR
-mv $SSB_DBGEN_DIR/*.tbl* $SSB_DATA_DIR/
+mv "$SSB_DBGEN_DIR"/*.tbl* "$SSB_DATA_DIR/"
# check data
-du -sh $SSB_DATA_DIR/*.tbl*
+du -sh "$SSB_DATA_DIR"/*.tbl*
diff --git a/tools/ssb-tools/bin/load-ssb-data.sh
b/tools/ssb-tools/bin/load-ssb-data.sh
new file mode 100755
index 0000000000..f6be6bbe72
--- /dev/null
+++ b/tools/ssb-tools/bin/load-ssb-data.sh
@@ -0,0 +1,276 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to load generated ssb data set to Doris
+# Only for 1 fact table: lineorder
+##############################################################
+
+set -eo pipefail
+
+ROOT=$(dirname "$0")
+ROOT=$(
+ cd "$ROOT"
+ pwd
+)
+
+CURDIR=${ROOT}
+SSB_DATA_DIR="$CURDIR/ssb-data/"
+
+usage() {
+ echo "
+Usage: $0 <options>
+ Optional options:
+ -c parallelism to load data of lineorder table, default is 5.
+
+ Eg.
+ $0 load data using default value.
+ $0 -c 10 load lineorder table data using parallelism 10.
+ "
+ exit 1
+}
+
+OPTS=$(getopt \
+ -n "$0" \
+ -o '' \
+ -o 'hc:' \
+ -- "$@")
+
+eval set -- "$OPTS"
+
+PARALLEL=5
+HELP=0
+
+if [ $# == 0 ]; then
+ usage
+fi
+
+while true; do
+ case "$1" in
+ -h)
+ HELP=1
+ shift
+ ;;
+ -c)
+ PARALLEL=$2
+ shift 2
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error"
+ exit 1
+ ;;
+ esac
+done
+
+if [[ ${HELP} -eq 1 ]]; then
+ usage
+ exit
+fi
+
+echo "Parallelism: $PARALLEL"
+
+# check if ssb-data exists
+if [[ ! -d $SSB_DATA_DIR/ ]]; then
+ echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
+ exit 1
+fi
+
+check_prerequest() {
+ local CMD=$1
+ local NAME=$2
+ if ! $CMD; then
+ echo "$NAME is missing. This script depends on cURL to load data to
Doris."
+ exit 1
+ fi
+}
+
+run_sql() {
+ sql="$*"
+ echo "$sql"
+ mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$@"
+}
+
+load_lineitem_flat() {
+ # Loading data in batches by year.
+ for con in 'lo_orderdate<19930101' 'lo_orderdate>=19930101 and
lo_orderdate<19940101' 'lo_orderdate>=19940101 and lo_orderdate<19950101'
'lo_orderdate>=19950101 and lo_orderdate<19960101' 'lo_orderdate>=19960101 and
lo_orderdate<19970101' 'lo_orderdate>=19970101 and lo_orderdate<19980101'
'lo_orderdate>=19980101'; do
+ echo -e "\n$con"
+ run_sql "
+INSERT INTO lineorder_flat
+SELECT
+ LO_ORDERDATE,
+ LO_ORDERKEY,
+ LO_LINENUMBER,
+ LO_CUSTKEY,
+ LO_PARTKEY,
+ LO_SUPPKEY,
+ LO_ORDERPRIORITY,
+ LO_SHIPPRIORITY,
+ LO_QUANTITY,
+ LO_EXTENDEDPRICE,
+ LO_ORDTOTALPRICE,
+ LO_DISCOUNT,
+ LO_REVENUE,
+ LO_SUPPLYCOST,
+ LO_TAX,
+ LO_COMMITDATE,
+ LO_SHIPMODE,
+ C_NAME,
+ C_ADDRESS,
+ C_CITY,
+ C_NATION,
+ C_REGION,
+ C_PHONE,
+ C_MKTSEGMENT,
+ S_NAME,
+ S_ADDRESS,
+ S_CITY,
+ S_NATION,
+ S_REGION,
+ S_PHONE,
+ P_NAME,
+ P_MFGR,
+ P_CATEGORY,
+ P_BRAND,
+ P_COLOR,
+ P_TYPE,
+ P_SIZE,
+ P_CONTAINER
+FROM (
+ SELECT
+ lo_orderkey,
+ lo_linenumber,
+ lo_custkey,
+ lo_partkey,
+ lo_suppkey,
+ lo_orderdate,
+ lo_orderpriority,
+ lo_shippriority,
+ lo_quantity,
+ lo_extendedprice,
+ lo_ordtotalprice,
+ lo_discount,
+ lo_revenue,
+ lo_supplycost,
+ lo_tax,
+ lo_commitdate,
+ lo_shipmode
+ FROM lineorder
+ WHERE ${con}
+) l
+INNER JOIN customer c
+ON (c.c_custkey = l.lo_custkey)
+INNER JOIN supplier s
+ON (s.s_suppkey = l.lo_suppkey)
+INNER JOIN part p
+ON (p.p_partkey = l.lo_partkey);
+"
+ done
+}
+
+check_prerequest "curl --version" "curl"
+
+# load lineorder
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
+
+echo "FE_HOST: $FE_HOST"
+echo "FE_HTTP_PORT: $FE_HTTP_PORT"
+echo "USER: $USER"
+echo "PASSWORD: $PASSWORD"
+echo "DB: $DB"
+
+date
+echo "==========Start to load data into ssb tables=========="
+echo 'Loading data for table: part'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy"
\
+ -T "$SSB_DATA_DIR"/part.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
+
+echo 'Loading data for table: date'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy"
\
+ -T "$SSB_DATA_DIR"/date.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/dates/_stream_load
+
+echo 'Loading data for table: supplier'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
+ -T "$SSB_DATA_DIR"/supplier.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
+
+echo 'Loading data for table: customer'
+curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use"
\
+ -T "$SSB_DATA_DIR"/customer.tbl
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
+
+echo "Loading data for table: lineorder, with $PARALLEL parallel"
+function load() {
+ echo "$@"
+ curl --location-trusted -u "$USER":"$PASSWORD" \
+ -H "column_separator:|" \
+ -H
"columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy"
\
+ -T "$@"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineorder/_stream_load
+}
+
+# set parallelism
+[ -e /tmp/fd1 ] || mkfifo /tmp/fd1
+exec 3<>/tmp/fd1
+rm -rf /tmp/fd1
+
+for ((i = 1; i <= PARALLEL; i++)); do
+ echo >&3
+done
+
+date
+for file in "$SSB_DATA_DIR"/lineorder.tbl.*; do
+ read -r -u3
+ {
+ load "$file"
+ echo >&3
+ } &
+done
+
+# wait for child thread finished
+wait
+date
+
+echo "==========Start to insert data into ssb flat table=========="
+echo "change some session variables before load, and then restore after load."
+origin_query_timeout=$(run_sql 'select @@query_timeout;' | sed -n '3p')
+origin_parallel=$(run_sql 'select @@parallel_fragment_exec_instance_num;' |
sed -n '3p')
+# set parallel_fragment_exec_instance_num=1, loading maybe slow but stable.
+run_sql "set global query_timeout=7200;"
+run_sql "set global parallel_fragment_exec_instance_num=1;"
+echo '============================================'
+date
+load_lineitem_flat
+date
+echo '============================================'
+echo "restore session variables"
+run_sql "set global query_timeout=${origin_query_timeout};"
+run_sql "set global parallel_fragment_exec_instance_num=${origin_parallel};"
+echo '============================================'
+
+echo "DONE."
diff --git a/tools/tpch-tools/run-tpch-queries.sh
b/tools/ssb-tools/bin/run-ssb-flat-queries.sh
similarity index 62%
copy from tools/tpch-tools/run-tpch-queries.sh
copy to tools/ssb-tools/bin/run-ssb-flat-queries.sh
index 5a0d06d512..6db5ba3303 100755
--- a/tools/tpch-tools/run-tpch-queries.sh
+++ b/tools/ssb-tools/bin/run-ssb-flat-queries.sh
@@ -17,7 +17,7 @@
# under the License.
##############################################################
-# This script is used to create TPC-H tables
+# This script is used to create ssb flat queries
##############################################################
set -eo pipefail
@@ -28,21 +28,22 @@ ROOT=$(
pwd
)
-CURDIR=${ROOT}
-QUERIES_DIR=$CURDIR/queries
+CURDIR="${ROOT}"
+QUERIES_DIR="$CURDIR/../ssb-flat-queries"
usage() {
echo "
-This script is used to run TPC-H 22queries,
-will use mysql client to connect Doris server which parameter is specified in
doris-cluster.conf file.
+This script is used to run SSB flat 13queries,
+will use mysql client to connect Doris server which parameter is specified in
'doris-cluster.conf' file.
Usage: $0
"
exit 1
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
+ -o 'h' \
-- "$@")
eval set -- "$OPTS"
@@ -83,9 +84,11 @@ check_prerequest() {
fi
}
-check_prerequest "mysql --version" "mysql"
+check_prerequest "mysqlslap --version" "mysqlslap"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
@@ -94,24 +97,24 @@ echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
pre_set() {
- echo $@
- mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@"
+ echo "$@"
+ mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
}
pre_set "set global enable_vectorized_engine=1;"
pre_set "set global parallel_fragment_exec_instance_num=8;"
-pre_set "set global exec_mem_limit=48G;"
+pre_set "set global exec_mem_limit=8G;"
pre_set "set global batch_size=4096;"
-# pre_set "show variables like 'batch_size';"
-
-for i in $(seq 1 22); do
- total=0
- # Each query is executed three times and takes the average time
- for j in $(seq 1 3); do
- start=$(date +%s%3N)
- mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql
>/dev/null
- end=$(date +%s%3N)
- total=$((total + end - start))
- done
- echo "q$i: $((total / 3))ms"
+echo '============================================'
+pre_set "show variables;"
+echo '============================================'
+pre_set "show table status;"
+echo '============================================'
+
+for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1'
'4.2' '4.3'; do
+ # First run to prevent the affect of cold start
+ mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D "$DB"
<"$QUERIES_DIR"/q${i}.sql >/dev/null 2>&1
+ # Then run 3 times and takes the average time
+ res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER"
--create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n
'2p' | cut -d ' ' -f 9,10)
+ echo "q$i: $res"
done
diff --git a/tools/tpch-tools/run-tpch-queries.sh
b/tools/ssb-tools/bin/run-ssb-queries.sh
similarity index 65%
copy from tools/tpch-tools/run-tpch-queries.sh
copy to tools/ssb-tools/bin/run-ssb-queries.sh
index 5a0d06d512..5a1db9a57e 100755
--- a/tools/tpch-tools/run-tpch-queries.sh
+++ b/tools/ssb-tools/bin/run-ssb-queries.sh
@@ -17,7 +17,7 @@
# under the License.
##############################################################
-# This script is used to create TPC-H tables
+# This script is used to create ssb queries
##############################################################
set -eo pipefail
@@ -29,11 +29,11 @@ ROOT=$(
)
CURDIR=${ROOT}
-QUERIES_DIR=$CURDIR/queries
+QUERIES_DIR=$CURDIR/../ssb-queries
usage() {
echo "
-This script is used to run TPC-H 22queries,
+This script is used to run SSB 13queries,
will use mysql client to connect Doris server which parameter is specified in
doris-cluster.conf file.
Usage: $0
"
@@ -41,8 +41,9 @@ Usage: $0
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
+ -o 'h' \
-- "$@")
eval set -- "$OPTS"
@@ -83,9 +84,11 @@ check_prerequest() {
fi
}
-check_prerequest "mysql --version" "mysql"
+check_prerequest "mysqlslap --version" "mysql slap"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
@@ -94,24 +97,25 @@ echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
pre_set() {
- echo $@
- mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@"
+ echo "$@"
+ mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
}
pre_set "set global enable_vectorized_engine=1;"
pre_set "set global parallel_fragment_exec_instance_num=8;"
pre_set "set global exec_mem_limit=48G;"
pre_set "set global batch_size=4096;"
-# pre_set "show variables like 'batch_size';"
-
-for i in $(seq 1 22); do
- total=0
- # Each query is executed three times and takes the average time
- for j in $(seq 1 3); do
- start=$(date +%s%3N)
- mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql
>/dev/null
- end=$(date +%s%3N)
- total=$((total + end - start))
- done
- echo "q$i: $((total / 3))ms"
+pre_set "set global enable_projection=true;"
+pre_set "set global runtime_filter_mode=global;"
+# pre_set "set global enable_cost_based_join_reorder=1"
+echo '============================================'
+pre_set "show variables;"
+echo '============================================'
+pre_set "show table status;"
+echo '============================================'
+
+for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1'
'4.2' '4.3'; do
+ # Each query is executed 3 times and takes the average time
+ res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER"
--create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n
'2p' | cut -d ' ' -f 9,10)
+ echo "q$i: $res"
done
diff --git a/tools/ssb-tools/doris-cluster.conf
b/tools/ssb-tools/conf/doris-cluster.conf
similarity index 91%
rename from tools/ssb-tools/doris-cluster.conf
rename to tools/ssb-tools/conf/doris-cluster.conf
index bef6c7b54c..5567a2f588 100644
--- a/tools/ssb-tools/doris-cluster.conf
+++ b/tools/ssb-tools/conf/doris-cluster.conf
@@ -19,9 +19,11 @@
export FE_HOST='127.0.0.1'
# http_port in fe.conf
export FE_HTTP_PORT=8030
+# query_port in fe.conf
+export FE_QUERY_PORT=9030
# Doris username
-export USER='admin'
+export USER='root'
# Doris password
export PASSWORD=''
# The database where SSB tables located
-export DB='db1'
+export DB='ssb'
diff --git a/tools/ssb-tools/ddl/create-ssb-flat-table.sql
b/tools/ssb-tools/ddl/create-ssb-flat-table.sql
new file mode 100644
index 0000000000..b1e1681a94
--- /dev/null
+++ b/tools/ssb-tools/ddl/create-ssb-flat-table.sql
@@ -0,0 +1,74 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+CREATE TABLE IF NOT EXISTS `lineorder_flat` (
+ `LO_ORDERDATE` int(11) NOT NULL COMMENT "",
+ `LO_ORDERKEY` int(11) NOT NULL COMMENT "",
+ `LO_LINENUMBER` tinyint(4) NOT NULL COMMENT "",
+ `LO_CUSTKEY` int(11) NOT NULL COMMENT "",
+ `LO_PARTKEY` int(11) NOT NULL COMMENT "",
+ `LO_SUPPKEY` int(11) NOT NULL COMMENT "",
+ `LO_ORDERPRIORITY` varchar(100) NOT NULL COMMENT "",
+ `LO_SHIPPRIORITY` tinyint(4) NOT NULL COMMENT "",
+ `LO_QUANTITY` tinyint(4) NOT NULL COMMENT "",
+ `LO_EXTENDEDPRICE` int(11) NOT NULL COMMENT "",
+ `LO_ORDTOTALPRICE` int(11) NOT NULL COMMENT "",
+ `LO_DISCOUNT` tinyint(4) NOT NULL COMMENT "",
+ `LO_REVENUE` int(11) NOT NULL COMMENT "",
+ `LO_SUPPLYCOST` int(11) NOT NULL COMMENT "",
+ `LO_TAX` tinyint(4) NOT NULL COMMENT "",
+ `LO_COMMITDATE` date NOT NULL COMMENT "",
+ `LO_SHIPMODE` varchar(100) NOT NULL COMMENT "",
+ `C_NAME` varchar(100) NOT NULL COMMENT "",
+ `C_ADDRESS` varchar(100) NOT NULL COMMENT "",
+ `C_CITY` varchar(100) NOT NULL COMMENT "",
+ `C_NATION` varchar(100) NOT NULL COMMENT "",
+ `C_REGION` varchar(100) NOT NULL COMMENT "",
+ `C_PHONE` varchar(100) NOT NULL COMMENT "",
+ `C_MKTSEGMENT` varchar(100) NOT NULL COMMENT "",
+ `S_NAME` varchar(100) NOT NULL COMMENT "",
+ `S_ADDRESS` varchar(100) NOT NULL COMMENT "",
+ `S_CITY` varchar(100) NOT NULL COMMENT "",
+ `S_NATION` varchar(100) NOT NULL COMMENT "",
+ `S_REGION` varchar(100) NOT NULL COMMENT "",
+ `S_PHONE` varchar(100) NOT NULL COMMENT "",
+ `P_NAME` varchar(100) NOT NULL COMMENT "",
+ `P_MFGR` varchar(100) NOT NULL COMMENT "",
+ `P_CATEGORY` varchar(100) NOT NULL COMMENT "",
+ `P_BRAND` varchar(100) NOT NULL COMMENT "",
+ `P_COLOR` varchar(100) NOT NULL COMMENT "",
+ `P_TYPE` varchar(100) NOT NULL COMMENT "",
+ `P_SIZE` tinyint(4) NOT NULL COMMENT "",
+ `P_CONTAINER` varchar(100) NOT NULL COMMENT ""
+) ENGINE=OLAP
+DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`)
+COMMENT "OLAP"
+PARTITION BY RANGE(`LO_ORDERDATE`)
+(PARTITION p1992 VALUES [("-2147483648"), ("19930101")),
+PARTITION p1993 VALUES [("19930101"), ("19940101")),
+PARTITION p1994 VALUES [("19940101"), ("19950101")),
+PARTITION p1995 VALUES [("19950101"), ("19960101")),
+PARTITION p1996 VALUES [("19960101"), ("19970101")),
+PARTITION p1997 VALUES [("19970101"), ("19980101")),
+PARTITION p1998 VALUES [("19980101"), ("19990101")))
+DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48
+PROPERTIES (
+"replication_num" = "1",
+"colocate_with" = "groupxx1",
+"in_memory" = "false",
+"storage_format" = "DEFAULT"
+);
\ No newline at end of file
diff --git a/tools/ssb-tools/create-tables.sql
b/tools/ssb-tools/ddl/create-ssb-tables.sql
similarity index 62%
rename from tools/ssb-tools/create-tables.sql
rename to tools/ssb-tools/ddl/create-ssb-tables.sql
index 5e99f89146..273d2d90b9 100644
--- a/tools/ssb-tools/create-tables.sql
+++ b/tools/ssb-tools/ddl/create-ssb-tables.sql
@@ -15,39 +15,44 @@
-- specific language governing permissions and limitations
-- under the License.
-CREATE TABLE `lineorder` (
- `lo_orderkey` bigint(20) NOT NULL COMMENT "",
- `lo_linenumber` bigint(20) NOT NULL COMMENT "",
+CREATE TABLE IF NOT EXISTS `lineorder` (
+ `lo_orderkey` int(11) NOT NULL COMMENT "",
+ `lo_linenumber` int(11) NOT NULL COMMENT "",
`lo_custkey` int(11) NOT NULL COMMENT "",
`lo_partkey` int(11) NOT NULL COMMENT "",
`lo_suppkey` int(11) NOT NULL COMMENT "",
`lo_orderdate` int(11) NOT NULL COMMENT "",
`lo_orderpriority` varchar(16) NOT NULL COMMENT "",
`lo_shippriority` int(11) NOT NULL COMMENT "",
- `lo_quantity` bigint(20) NOT NULL COMMENT "",
- `lo_extendedprice` bigint(20) NOT NULL COMMENT "",
- `lo_ordtotalprice` bigint(20) NOT NULL COMMENT "",
- `lo_discount` bigint(20) NOT NULL COMMENT "",
- `lo_revenue` bigint(20) NOT NULL COMMENT "",
- `lo_supplycost` bigint(20) NOT NULL COMMENT "",
- `lo_tax` bigint(20) NOT NULL COMMENT "",
- `lo_commitdate` bigint(20) NOT NULL COMMENT "",
+ `lo_quantity` int(11) NOT NULL COMMENT "",
+ `lo_extendedprice` int(11) NOT NULL COMMENT "",
+ `lo_ordtotalprice` int(11) NOT NULL COMMENT "",
+ `lo_discount` int(11) NOT NULL COMMENT "",
+ `lo_revenue` int(11) NOT NULL COMMENT "",
+ `lo_supplycost` int(11) NOT NULL COMMENT "",
+ `lo_tax` int(11) NOT NULL COMMENT "",
+ `lo_commitdate` int(11) NOT NULL COMMENT "",
`lo_shipmode` varchar(11) NOT NULL COMMENT ""
-)
+) ENGINE=OLAP
+DUPLICATE KEY(`lo_orderkey`)
+COMMENT "OLAP"
PARTITION BY RANGE(`lo_orderdate`)
-(PARTITION p1992 VALUES [("-2147483648"), ("19930101")),
-PARTITION p1993 VALUES [("19930101"), ("19940101")),
-PARTITION p1994 VALUES [("19940101"), ("19950101")),
-PARTITION p1995 VALUES [("19950101"), ("19960101")),
-PARTITION p1996 VALUES [("19960101"), ("19970101")),
-PARTITION p1997 VALUES [("19970101"), ("19980101")),
-PARTITION p1998 VALUES [("19980101"), ("19990101")))
+(PARTITION p1 VALUES [("-2147483648"), ("19930101")),
+PARTITION p2 VALUES [("19930101"), ("19940101")),
+PARTITION p3 VALUES [("19940101"), ("19950101")),
+PARTITION p4 VALUES [("19950101"), ("19960101")),
+PARTITION p5 VALUES [("19960101"), ("19970101")),
+PARTITION p6 VALUES [("19970101"), ("19980101")),
+PARTITION p7 VALUES [("19980101"), ("19990101")))
DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 48
PROPERTIES (
-"replication_num" = "1"
+"replication_num" = "1",
+"colocate_with" = "groupa1",
+"in_memory" = "false",
+"storage_format" = "DEFAULT"
);
-CREATE TABLE `customer` (
+CREATE TABLE IF NOT EXISTS `customer` (
`c_custkey` int(11) NOT NULL COMMENT "",
`c_name` varchar(26) NOT NULL COMMENT "",
`c_address` varchar(41) NOT NULL COMMENT "",
@@ -56,13 +61,18 @@ CREATE TABLE `customer` (
`c_region` varchar(13) NOT NULL COMMENT "",
`c_phone` varchar(16) NOT NULL COMMENT "",
`c_mktsegment` varchar(11) NOT NULL COMMENT ""
-)
-DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 10
+) ENGINE=OLAP
+DUPLICATE KEY(`c_custkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 12
PROPERTIES (
-"replication_num" = "1"
+"replication_num" = "1",
+"colocate_with" = "groupa2",
+"in_memory" = "false",
+"storage_format" = "DEFAULT"
);
-CREATE TABLE `date` (
+CREATE TABLE IF NOT EXISTS `dates` (
`d_datekey` int(11) NOT NULL COMMENT "",
`d_date` varchar(20) NOT NULL COMMENT "",
`d_dayofweek` varchar(10) NOT NULL COMMENT "",
@@ -80,13 +90,37 @@ CREATE TABLE `date` (
`d_lastdayinmonthfl` int(11) NOT NULL COMMENT "",
`d_holidayfl` int(11) NOT NULL COMMENT "",
`d_weekdayfl` int(11) NOT NULL COMMENT ""
-)
+) ENGINE=OLAP
+DUPLICATE KEY(`d_datekey`)
+COMMENT "OLAP"
DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1
PROPERTIES (
-"replication_num" = "1"
+"replication_num" = "1",
+"in_memory" = "false",
+"colocate_with" = "groupa3",
+"storage_format" = "DEFAULT"
);
-CREATE TABLE `part` (
+ CREATE TABLE IF NOT EXISTS `supplier` (
+ `s_suppkey` int(11) NOT NULL COMMENT "",
+ `s_name` varchar(26) NOT NULL COMMENT "",
+ `s_address` varchar(26) NOT NULL COMMENT "",
+ `s_city` varchar(11) NOT NULL COMMENT "",
+ `s_nation` varchar(16) NOT NULL COMMENT "",
+ `s_region` varchar(13) NOT NULL COMMENT "",
+ `s_phone` varchar(16) NOT NULL COMMENT ""
+) ENGINE=OLAP
+DUPLICATE KEY(`s_suppkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12
+PROPERTIES (
+"replication_num" = "1",
+"colocate_with" = "groupa4",
+"in_memory" = "false",
+"storage_format" = "DEFAULT"
+);
+
+CREATE TABLE IF NOT EXISTS `part` (
`p_partkey` int(11) NOT NULL COMMENT "",
`p_name` varchar(23) NOT NULL COMMENT "",
`p_mfgr` varchar(7) NOT NULL COMMENT "",
@@ -96,22 +130,13 @@ CREATE TABLE `part` (
`p_type` varchar(26) NOT NULL COMMENT "",
`p_size` int(11) NOT NULL COMMENT "",
`p_container` varchar(11) NOT NULL COMMENT ""
-)
-DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 10
-PROPERTIES (
-"replication_num" = "1"
-);
-
-CREATE TABLE `supplier` (
- `s_suppkey` int(11) NOT NULL COMMENT "",
- `s_name` varchar(26) NOT NULL COMMENT "",
- `s_address` varchar(26) NOT NULL COMMENT "",
- `s_city` varchar(11) NOT NULL COMMENT "",
- `s_nation` varchar(16) NOT NULL COMMENT "",
- `s_region` varchar(13) NOT NULL COMMENT "",
- `s_phone` varchar(16) NOT NULL COMMENT ""
-)
-DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 10
+) ENGINE=OLAP
+DUPLICATE KEY(`p_partkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 12
PROPERTIES (
-"replication_num" = "1"
+"replication_num" = "1",
+"colocate_with" = "groupa5",
+"in_memory" = "false",
+"storage_format" = "DEFAULT"
);
diff --git a/tools/ssb-tools/load-dimension-data.sh
b/tools/ssb-tools/load-dimension-data.sh
deleted file mode 100755
index f24ad3af59..0000000000
--- a/tools/ssb-tools/load-dimension-data.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-##############################################################
-# This script is used to load generated ssb data set to Doris
-# Only for 4 dimension tables: customer, part, supplier and date.
-# Usage:
-# sh load-dimension-data.sh
-##############################################################
-
-set -eo pipefail
-
-ROOT=`dirname "$0"`
-ROOT=`cd "$ROOT"; pwd`
-
-CURDIR=${ROOT}
-SSB_DATA_DIR=$CURDIR/ssb-data/
-
-# check if ssb-data exists
-if [[ ! -d $SSB_DATA_DIR/ ]]; then
- echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
- exit 1
-fi
-
-check_prerequest() {
- local CMD=$1
- local NAME=$2
- if ! $CMD; then
- echo "$NAME is missing. This script depends on cURL to load data to
Doris."
- exit 1
- fi
-}
-
-check_prerequest "curl --version" "curl"
-
-# load 4 small dimension tables
-
-source $CURDIR/doris-cluster.conf
-
-echo "FE_HOST: $FE_HOST"
-echo "FE_HTTP_PORT: $FE_HTTP_PORT"
-echo "USER: $USER"
-echo "PASSWORD: $PASSWORD"
-echo "DB: $DB"
-
-echo 'Loading data for table: part'
-curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H
"columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy"
-T $SSB_DATA_DIR/part.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load
-echo 'Loading data for table: date'
-curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H
"columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy"
-T $SSB_DATA_DIR/date.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/date/_stream_load
-echo 'Loading data for table: supplier'
-curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H
"columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy"
-T $SSB_DATA_DIR/supplier.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load
-echo 'Loading data for table: customer'
-curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H
"columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use"
-T $SSB_DATA_DIR/customer.tbl
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load
diff --git a/tools/ssb-tools/load-fact-data.sh
b/tools/ssb-tools/load-fact-data.sh
deleted file mode 100755
index 24bc7f3bf4..0000000000
--- a/tools/ssb-tools/load-fact-data.sh
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-##############################################################
-# This script is used to load generated ssb data set to Doris
-# Only for 1 fact table: lineorder
-##############################################################
-
-set -eo pipefail
-
-ROOT=`dirname "$0"`
-ROOT=`cd "$ROOT"; pwd`
-
-CURDIR=${ROOT}
-SSB_DATA_DIR=$CURDIR/ssb-data/
-
-usage() {
- echo "
-Usage: $0 <options>
- Optional options:
- -c parallelism to load data of lineorder table, default is 5.
-
- Eg.
- $0 load data using default value.
- $0 -c 10 load lineorder table data using parallelism 10.
- "
- exit 1
-}
-
-OPTS=$(getopt \
- -n $0 \
- -o '' \
- -o 'c:' \
- -- "$@")
-
-eval set -- "$OPTS"
-
-PARALLEL=3
-HELP=0
-
-if [ $# == 0 ] ; then
- usage
-fi
-
-while true; do
- case "$1" in
- -h) HELP=1 ; shift ;;
- -c) PARALLEL=$2 ; shift 2 ;;
- --) shift ; break ;;
- *) echo "Internal error" ; exit 1 ;;
- esac
-done
-
-if [[ ${HELP} -eq 1 ]]; then
- usage
- exit
-fi
-
-echo "Parallelism: $PARALLEL"
-
-# check if ssb-data exists
-if [[ ! -d $SSB_DATA_DIR/ ]]; then
- echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
- exit 1
-fi
-
-check_prerequest() {
- local CMD=$1
- local NAME=$2
- if ! $CMD; then
- echo "$NAME is missing. This script depends on cURL to load data to
Doris."
- exit 1
- fi
-}
-
-check_prerequest "curl --version" "curl"
-
-# load lineorder
-source $CURDIR/doris-cluster.conf
-
-echo "FE_HOST: $FE_HOST"
-echo "FE_HTTP_PORT: $FE_HTTP_PORT"
-echo "USER: $USER"
-echo "PASSWORD: $PASSWORD"
-echo "DB: $DB"
-
-function load()
-{
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" -H
"columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy"
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineorder/_stream_load
-}
-
-
-# set parallelism
-[ -e /tmp/fd1 ] || mkfifo /tmp/fd1
-exec 3<>/tmp/fd1
-rm -rf /tmp/fd1
-
-for ((i=1;i<=$PARALLEL;i++))
-do
- echo >&3
-done
-
-for file in `ls $SSB_DATA_DIR/lineorder.tbl.*`
-do
- read -u3
- {
- load $file
- echo >&3
- }&
-done
diff --git a/tools/ssb-tools/queries/q1.3.sql
b/tools/ssb-tools/ssb-flat-queries/q1.1.sql
similarity index 78%
copy from tools/ssb-tools/queries/q1.3.sql
copy to tools/ssb-tools/ssb-flat-queries/q1.1.sql
index ed6e51b1cf..fbc2c6e4a1 100644
--- a/tools/ssb-tools/queries/q1.3.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q1.1.sql
@@ -14,12 +14,11 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_weeknuminyear= 6
-AND d_year = 1994
-AND lo_discount BETWEEN 5 AND 7
-AND lo_quantity BETWEEN 26 AND 35;
+--Q1.1
+SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
+FROM lineorder_flat
+WHERE
+ LO_ORDERDATE >= 19930101
+ AND LO_ORDERDATE <= 19931231
+ AND LO_DISCOUNT BETWEEN 1 AND 3
+ AND LO_QUANTITY < 25;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.2.sql
b/tools/ssb-tools/ssb-flat-queries/q1.2.sql
similarity index 78%
copy from tools/ssb-tools/queries/q1.2.sql
copy to tools/ssb-tools/ssb-flat-queries/q1.2.sql
index 1b8442bd93..3a899c9344 100644
--- a/tools/ssb-tools/queries/q1.2.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q1.2.sql
@@ -14,11 +14,11 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_yearmonth = 'Jan1994'
-AND lo_discount BETWEEN 4 AND 6
-AND lo_quantity BETWEEN 26 AND 35;
+--Q1.2
+SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
+FROM lineorder_flat
+WHERE
+ LO_ORDERDATE >= 19940101
+ AND LO_ORDERDATE <= 19940131
+ AND LO_DISCOUNT BETWEEN 4 AND 6
+ AND LO_QUANTITY BETWEEN 26 AND 35;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.3.sql
b/tools/ssb-tools/ssb-flat-queries/q1.3.sql
similarity index 75%
copy from tools/ssb-tools/queries/q1.3.sql
copy to tools/ssb-tools/ssb-flat-queries/q1.3.sql
index ed6e51b1cf..5aaeff83a7 100644
--- a/tools/ssb-tools/queries/q1.3.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q1.3.sql
@@ -14,12 +14,12 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_weeknuminyear= 6
-AND d_year = 1994
-AND lo_discount BETWEEN 5 AND 7
-AND lo_quantity BETWEEN 26 AND 35;
+--Q1.3
+SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
+FROM lineorder_flat
+WHERE
+ weekofyear(LO_ORDERDATE) = 6
+ AND LO_ORDERDATE >= 19940101
+ AND LO_ORDERDATE <= 19941231
+ AND LO_DISCOUNT BETWEEN 5 AND 7
+ AND LO_QUANTITY BETWEEN 26 AND 35;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.2.sql
b/tools/ssb-tools/ssb-flat-queries/q2.1.sql
similarity index 79%
copy from tools/ssb-tools/queries/q1.2.sql
copy to tools/ssb-tools/ssb-flat-queries/q2.1.sql
index 1b8442bd93..254ea6481a 100644
--- a/tools/ssb-tools/queries/q1.2.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q2.1.sql
@@ -14,11 +14,11 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_yearmonth = 'Jan1994'
-AND lo_discount BETWEEN 4 AND 6
-AND lo_quantity BETWEEN 26 AND 35;
+--Q2.1
+SELECT
+ SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR,
+ P_BRAND
+FROM lineorder_flat
+WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
+GROUP BY YEAR, P_BRAND
+ORDER BY YEAR, P_BRAND;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q2.2.sql
similarity index 77%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q2.2.sql
index 4ef15e93ea..6a636f3a9e 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q2.2.sql
@@ -14,11 +14,14 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q2.2
+SELECT
+ SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR,
+ P_BRAND
+FROM lineorder_flat
+WHERE
+ P_BRAND >= 'MFGR#2221'
+ AND P_BRAND <= 'MFGR#2228'
+ AND S_REGION = 'ASIA'
+GROUP BY YEAR, P_BRAND
+ORDER BY YEAR, P_BRAND;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q2.3.sql
similarity index 79%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q2.3.sql
index 4ef15e93ea..a2ef0c6df3 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q2.3.sql
@@ -14,11 +14,13 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q2.3
+SELECT
+ SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR,
+ P_BRAND
+FROM lineorder_flat
+WHERE
+ P_BRAND = 'MFGR#2239'
+ AND S_REGION = 'EUROPE'
+GROUP BY YEAR, P_BRAND
+ORDER BY YEAR, P_BRAND;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q3.1.sql
similarity index 71%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q3.1.sql
index 4ef15e93ea..8df98222c4 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q3.1.sql
@@ -14,11 +14,16 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q3.1
+SELECT
+ C_NATION,
+ S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR,
+ SUM(LO_REVENUE) AS revenue
+FROM lineorder_flat
+WHERE
+ C_REGION = 'ASIA'
+ AND S_REGION = 'ASIA'
+ AND LO_ORDERDATE >= 19920101
+ AND LO_ORDERDATE <= 19971231
+GROUP BY C_NATION, S_NATION, YEAR
+ORDER BY YEAR ASC, revenue DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q3.2.sql
similarity index 71%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q3.2.sql
index 4ef15e93ea..c588b5bbce 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q3.2.sql
@@ -14,11 +14,16 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q3.2
+SELECT
+ C_CITY,
+ S_CITY, (LO_ORDERDATE DIV 10000) AS YEAR,
+ SUM(LO_REVENUE) AS revenue
+FROM lineorder_flat
+WHERE
+ C_NATION = 'UNITED STATES'
+ AND S_NATION = 'UNITED STATES'
+ AND LO_ORDERDATE >= 19920101
+ AND LO_ORDERDATE <= 19971231
+GROUP BY C_CITY, S_CITY, YEAR
+ORDER BY YEAR ASC, revenue DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q3.3.sql
similarity index 69%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q3.3.sql
index 4ef15e93ea..9a099d1732 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q3.3.sql
@@ -14,11 +14,16 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q3.3
+SELECT
+ C_CITY,
+ S_CITY, (LO_ORDERDATE DIV 10000) AS YEAR,
+ SUM(LO_REVENUE) AS revenue
+FROM lineorder_flat
+WHERE
+ C_CITY IN ('UNITED KI1', 'UNITED KI5')
+ AND S_CITY IN ('UNITED KI1', 'UNITED KI5')
+ AND LO_ORDERDATE >= 19920101
+ AND LO_ORDERDATE <= 19971231
+GROUP BY C_CITY, S_CITY, YEAR
+ORDER BY YEAR ASC, revenue DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q3.4.sql
similarity index 69%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q3.4.sql
index 4ef15e93ea..6bd71b5891 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q3.4.sql
@@ -14,11 +14,16 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q3.4
+SELECT
+ C_CITY,
+ S_CITY, (LO_ORDERDATE DIV 10000) AS YEAR,
+ SUM(LO_REVENUE) AS revenue
+FROM lineorder_flat
+WHERE
+ C_CITY IN ('UNITED KI1', 'UNITED KI5')
+ AND S_CITY IN ('UNITED KI1', 'UNITED KI5')
+ AND LO_ORDERDATE >= 19971201
+ AND LO_ORDERDATE <= 19971231
+GROUP BY C_CITY, S_CITY, YEAR
+ORDER BY YEAR ASC, revenue DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q4.1.sql
similarity index 73%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q4.1.sql
index 4ef15e93ea..aedd0e047e 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q4.1.sql
@@ -14,11 +14,14 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q4.1
+SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
+ C_NATION,
+ SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit
+FROM lineorder_flat
+WHERE
+ C_REGION = 'AMERICA'
+ AND S_REGION = 'AMERICA'
+ AND P_MFGR IN ('MFGR#1', 'MFGR#2')
+GROUP BY YEAR, C_NATION
+ORDER BY YEAR ASC, C_NATION ASC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-flat-queries/q4.2.sql
similarity index 66%
copy from tools/ssb-tools/queries/q1.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q4.2.sql
index 4ef15e93ea..b9891ee408 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q4.2.sql
@@ -14,11 +14,20 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+--Q4.2
+SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
+ S_NATION,
+ P_CATEGORY,
+ SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit
+FROM lineorder_flat
+WHERE
+ C_REGION = 'AMERICA'
+ AND S_REGION = 'AMERICA'
+ AND LO_ORDERDATE >= 19970101
+ AND LO_ORDERDATE <= 19981231
+ AND P_MFGR IN ('MFGR#1', 'MFGR#2')
+GROUP BY YEAR, S_NATION, P_CATEGORY
+ORDER BY
+ YEAR ASC,
+ S_NATION ASC,
+ P_CATEGORY ASC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q2.1.sql
b/tools/ssb-tools/ssb-flat-queries/q4.3.sql
similarity index 69%
copy from tools/ssb-tools/queries/q2.1.sql
copy to tools/ssb-tools/ssb-flat-queries/q4.3.sql
index e1a1f52d18..6871023137 100644
--- a/tools/ssb-tools/queries/q2.1.sql
+++ b/tools/ssb-tools/ssb-flat-queries/q4.3.sql
@@ -14,13 +14,16 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_revenue), d_year, p_brand
-FROM lineorder, date, part, supplier
-WHERE lo_orderdate = d_datekey
-AND lo_partkey = p_partkey
-AND lo_suppkey = s_suppkey
-AND p_category = 'MFGR#12'
-AND s_region = 'AMERICA'
-GROUP BY d_year, p_brand
-ORDER BY d_year, p_brand;
+--Q4.3
+SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
+ S_CITY,
+ P_BRAND,
+ SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit
+FROM lineorder_flat
+WHERE
+ S_NATION = 'UNITED STATES'
+ AND LO_ORDERDATE >= 19970101
+ AND LO_ORDERDATE <= 19981231
+ AND P_CATEGORY = 'MFGR#14'
+GROUP BY YEAR, S_CITY, P_BRAND
+ORDER BY YEAR ASC, S_CITY ASC, P_BRAND ASC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.1.sql
b/tools/ssb-tools/ssb-queries/q1.1.sql
similarity index 80%
rename from tools/ssb-tools/queries/q1.1.sql
rename to tools/ssb-tools/ssb-queries/q1.1.sql
index 4ef15e93ea..d8a2840ca7 100644
--- a/tools/ssb-tools/queries/q1.1.sql
+++ b/tools/ssb-tools/ssb-queries/q1.1.sql
@@ -14,11 +14,10 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_year = 1993
-AND lo_discount BETWEEN 1 AND 3
-AND lo_quantity < 25;
+SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+ lo_orderdate = d_datekey
+ AND d_year = 1993
+ AND lo_discount BETWEEN 1 AND 3
+ AND lo_quantity < 25;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.2.sql
b/tools/ssb-tools/ssb-queries/q1.2.sql
similarity index 78%
rename from tools/ssb-tools/queries/q1.2.sql
rename to tools/ssb-tools/ssb-queries/q1.2.sql
index 1b8442bd93..db6eb0c613 100644
--- a/tools/ssb-tools/queries/q1.2.sql
+++ b/tools/ssb-tools/ssb-queries/q1.2.sql
@@ -14,11 +14,10 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_yearmonth = 'Jan1994'
-AND lo_discount BETWEEN 4 AND 6
-AND lo_quantity BETWEEN 26 AND 35;
+SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+ lo_orderdate = d_datekey
+ AND d_yearmonth = 'Jan1994'
+ AND lo_discount BETWEEN 4 AND 6
+ AND lo_quantity BETWEEN 26 AND 35;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q1.3.sql
b/tools/ssb-tools/ssb-queries/q1.3.sql
similarity index 77%
rename from tools/ssb-tools/queries/q1.3.sql
rename to tools/ssb-tools/ssb-queries/q1.3.sql
index ed6e51b1cf..dbb91b0c46 100644
--- a/tools/ssb-tools/queries/q1.3.sql
+++ b/tools/ssb-tools/ssb-queries/q1.3.sql
@@ -14,12 +14,12 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT SUM(lo_extendedprice*lo_discount) AS
-REVENUE
-FROM lineorder, date
-WHERE lo_orderdate = d_datekey
-AND d_weeknuminyear= 6
-AND d_year = 1994
-AND lo_discount BETWEEN 5 AND 7
-AND lo_quantity BETWEEN 26 AND 35;
+SELECT
+ SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+ lo_orderdate = d_datekey
+ AND d_weeknuminyear = 6
+ AND d_year = 1994
+ AND lo_discount BETWEEN 5 AND 7
+ AND lo_quantity BETWEEN 26 AND 35;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q2.1.sql
b/tools/ssb-tools/ssb-queries/q2.1.sql
similarity index 80%
rename from tools/ssb-tools/queries/q2.1.sql
rename to tools/ssb-tools/ssb-queries/q2.1.sql
index e1a1f52d18..70a8de9d42 100644
--- a/tools/ssb-tools/queries/q2.1.sql
+++ b/tools/ssb-tools/ssb-queries/q2.1.sql
@@ -14,13 +14,13 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
SELECT SUM(lo_revenue), d_year, p_brand
-FROM lineorder, date, part, supplier
-WHERE lo_orderdate = d_datekey
-AND lo_partkey = p_partkey
-AND lo_suppkey = s_suppkey
-AND p_category = 'MFGR#12'
-AND s_region = 'AMERICA'
+FROM lineorder, dates, part, supplier
+WHERE
+ lo_orderdate = d_datekey
+ AND lo_partkey = p_partkey
+ AND lo_suppkey = s_suppkey
+ AND p_category = 'MFGR#12'
+ AND s_region = 'AMERICA'
GROUP BY d_year, p_brand
-ORDER BY d_year, p_brand;
+ORDER BY p_brand;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q2.2.sql
b/tools/ssb-tools/ssb-queries/q2.2.sql
similarity index 78%
rename from tools/ssb-tools/queries/q2.2.sql
rename to tools/ssb-tools/ssb-queries/q2.2.sql
index 3db6170119..e283dbdb05 100644
--- a/tools/ssb-tools/queries/q2.2.sql
+++ b/tools/ssb-tools/ssb-queries/q2.2.sql
@@ -14,14 +14,13 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
SELECT SUM(lo_revenue), d_year, p_brand
-FROM lineorder, date, part, supplier
-WHERE lo_orderdate = d_datekey
-AND lo_partkey = p_partkey
-AND lo_suppkey = s_suppkey
-AND p_brand BETWEEN 'MFGR#2221'
-AND 'MFGR#2228'
-AND s_region = 'ASIA'
+FROM lineorder, dates, part, supplier
+WHERE
+ lo_orderdate = d_datekey
+ AND lo_partkey = p_partkey
+ AND lo_suppkey = s_suppkey
+ AND p_brand BETWEEN 'MFGR#2221' AND 'MFGR#2228'
+ AND s_region = 'ASIA'
GROUP BY d_year, p_brand
-ORDER BY d_year, p_brand;
+ORDER BY d_year, p_brand;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q2.3.sql
b/tools/ssb-tools/ssb-queries/q2.3.sql
similarity index 79%
rename from tools/ssb-tools/queries/q2.3.sql
rename to tools/ssb-tools/ssb-queries/q2.3.sql
index b70ca90666..22d2419621 100644
--- a/tools/ssb-tools/queries/q2.3.sql
+++ b/tools/ssb-tools/ssb-queries/q2.3.sql
@@ -14,13 +14,13 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
SELECT SUM(lo_revenue), d_year, p_brand
-FROM lineorder, date, part, supplier
-WHERE lo_orderdate = d_datekey
-AND lo_partkey = p_partkey
-AND lo_suppkey = s_suppkey
-AND p_brand = 'MFGR#2239'
-AND s_region = 'EUROPE'
+FROM lineorder, dates, part, supplier
+WHERE
+ lo_orderdate = d_datekey
+ AND lo_partkey = p_partkey
+ AND lo_suppkey = s_suppkey
+ AND p_brand = 'MFGR#2239'
+ AND s_region = 'EUROPE'
GROUP BY d_year, p_brand
-ORDER BY d_year, p_brand;
+ORDER BY d_year, p_brand;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q3.1.sql
b/tools/ssb-tools/ssb-queries/q3.1.sql
similarity index 70%
rename from tools/ssb-tools/queries/q3.1.sql
rename to tools/ssb-tools/ssb-queries/q3.1.sql
index 70f17d789b..d674337995 100644
--- a/tools/ssb-tools/queries/q3.1.sql
+++ b/tools/ssb-tools/ssb-queries/q3.1.sql
@@ -14,15 +14,19 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT c_nation, s_nation, d_year,
-SUM(lo_revenue) AS REVENUE
-FROM customer, lineorder, supplier, date
-WHERE lo_custkey = c_custkey
-AND lo_suppkey = s_suppkey
-AND lo_orderdate = d_datekey
-AND c_region = 'ASIA'
-AND s_region = 'ASIA'
-AND d_year >= 1992 AND d_year <= 1997
+SELECT
+ c_nation,
+ s_nation,
+ d_year,
+ SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+ lo_custkey = c_custkey
+ AND lo_suppkey = s_suppkey
+ AND lo_orderdate = d_datekey
+ AND c_region = 'ASIA'
+ AND s_region = 'ASIA'
+ AND d_year >= 1992
+ AND d_year <= 1997
GROUP BY c_nation, s_nation, d_year
-ORDER BY d_year ASC, REVENUE DESC;
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q3.2.sql
b/tools/ssb-tools/ssb-queries/q3.2.sql
similarity index 69%
rename from tools/ssb-tools/queries/q3.2.sql
rename to tools/ssb-tools/ssb-queries/q3.2.sql
index a416fbea8b..2969efb1a2 100644
--- a/tools/ssb-tools/queries/q3.2.sql
+++ b/tools/ssb-tools/ssb-queries/q3.2.sql
@@ -14,15 +14,19 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT c_city, s_city, d_year, sum(lo_revenue)
-AS REVENUE
-FROM customer, lineorder, supplier, date
-WHERE lo_custkey = c_custkey
-AND lo_suppkey = s_suppkey
-AND lo_orderdate = d_datekey
-AND c_nation = 'UNITED STATES'
-AND s_nation = 'UNITED STATES'
-AND d_year >= 1992 AND d_year <= 1997
+SELECT
+ c_city,
+ s_city,
+ d_year,
+ SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+ lo_custkey = c_custkey
+ AND lo_suppkey = s_suppkey
+ AND lo_orderdate = d_datekey
+ AND c_nation = 'UNITED STATES'
+ AND s_nation = 'UNITED STATES'
+ AND d_year >= 1992
+ AND d_year <= 1997
GROUP BY c_city, s_city, d_year
-ORDER BY d_year ASC, REVENUE DESC;
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q3.3.sql
b/tools/ssb-tools/ssb-queries/q3.3.sql
similarity index 64%
rename from tools/ssb-tools/queries/q3.3.sql
rename to tools/ssb-tools/ssb-queries/q3.3.sql
index 98e29b72e7..ac1cb324d0 100644
--- a/tools/ssb-tools/queries/q3.3.sql
+++ b/tools/ssb-tools/ssb-queries/q3.3.sql
@@ -14,17 +14,25 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT c_city, s_city, d_year, SUM(lo_revenue)
-AS REVENUE
-FROM customer, lineorder, supplier, date
-WHERE lo_custkey = c_custkey
-AND lo_suppkey = s_suppkey
-AND lo_orderdate = d_datekey
-AND (c_city='UNITED KI1'
-OR c_city='UNITED KI5')
-AND (s_city='UNITED KI1'
-OR s_city='UNITED KI5')
-AND d_year >= 1992 AND d_year <= 1997
+SELECT
+ c_city,
+ s_city,
+ d_year,
+ SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+ lo_custkey = c_custkey
+ AND lo_suppkey = s_suppkey
+ AND lo_orderdate = d_datekey
+ AND (
+ c_city = 'UNITED KI1'
+ OR c_city = 'UNITED KI5'
+ )
+ AND (
+ s_city = 'UNITED KI1'
+ OR s_city = 'UNITED KI5'
+ )
+ AND d_year >= 1992
+ AND d_year <= 1997
GROUP BY c_city, s_city, d_year
-ORDER BY d_year ASC, REVENUE DESC;
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q3.4.sql
b/tools/ssb-tools/ssb-queries/q3.4.sql
similarity index 65%
rename from tools/ssb-tools/queries/q3.4.sql
rename to tools/ssb-tools/ssb-queries/q3.4.sql
index 65fe992ca4..2be6a5cd70 100644
--- a/tools/ssb-tools/queries/q3.4.sql
+++ b/tools/ssb-tools/ssb-queries/q3.4.sql
@@ -14,17 +14,24 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT c_city, s_city, d_year, SUM(lo_revenue)
-AS REVENUE
-FROM customer, lineorder, supplier, date
-WHERE lo_custkey = c_custkey
-AND lo_suppkey = s_suppkey
-AND lo_orderdate = d_datekey
-AND (c_city='UNITED KI1'
-OR c_city='UNITED KI5')
-AND (s_city='UNITED KI1'
-OR s_city='UNITED KI5')
-AND d_yearmonth = 'Dec1997'
+SELECT
+ c_city,
+ s_city,
+ d_year,
+ SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+ lo_custkey = c_custkey
+ AND lo_suppkey = s_suppkey
+ AND lo_orderdate = d_datekey
+ AND (
+ c_city = 'UNITED KI1'
+ OR c_city = 'UNITED KI5'
+ )
+ AND (
+ s_city = 'UNITED KI1'
+ OR s_city = 'UNITED KI5'
+ )
+ AND d_yearmonth = 'Dec1997'
GROUP BY c_city, s_city, d_year
-ORDER BY d_year ASC, REVENUE DESC;
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q4.1.sql
b/tools/ssb-tools/ssb-queries/q4.1.sql
similarity index 66%
rename from tools/ssb-tools/queries/q4.1.sql
rename to tools/ssb-tools/ssb-queries/q4.1.sql
index bdcd730bf9..f0cfcdd403 100644
--- a/tools/ssb-tools/queries/q4.1.sql
+++ b/tools/ssb-tools/ssb-queries/q4.1.sql
@@ -14,17 +14,21 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT d_year, c_nation,
-SUM(lo_revenue - lo_supplycost) AS PROFIT
-FROM date, customer, supplier, part, lineorder
-WHERE lo_custkey = c_custkey
-AND lo_suppkey = s_suppkey
-AND lo_partkey = p_partkey
-AND lo_orderdate = d_datekey
-AND c_region = 'AMERICA'
-AND s_region = 'AMERICA'
-AND (p_mfgr = 'MFGR#1'
-OR p_mfgr = 'MFGR#2')
+SELECT
+ d_year,
+ c_nation,
+ SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+ lo_custkey = c_custkey
+ AND lo_suppkey = s_suppkey
+ AND lo_partkey = p_partkey
+ AND lo_orderdate = d_datekey
+ AND c_region = 'AMERICA'
+ AND s_region = 'AMERICA'
+ AND (
+ p_mfgr = 'MFGR#1'
+ OR p_mfgr = 'MFGR#2'
+ )
GROUP BY d_year, c_nation
-ORDER BY d_year, c_nation;
+ORDER BY d_year, c_nation;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q4.2.sql
b/tools/ssb-tools/ssb-queries/q4.2.sql
similarity index 62%
rename from tools/ssb-tools/queries/q4.2.sql
rename to tools/ssb-tools/ssb-queries/q4.2.sql
index 24c82cf682..fbbaef00e8 100644
--- a/tools/ssb-tools/queries/q4.2.sql
+++ b/tools/ssb-tools/ssb-queries/q4.2.sql
@@ -14,18 +14,26 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT d_year, s_nation, p_category,
-SUM(lo_revenue - lo_supplycost) AS PROFIT
-FROM date, customer, supplier, part, lineorder
-WHERE lo_custkey = c_custkey
-AND lo_suppkey = s_suppkey
-AND lo_partkey = p_partkey
-AND lo_orderdate = d_datekey
-AND c_region = 'AMERICA'
-AND s_region = 'AMERICA'
-AND (d_year = 1997 OR d_year = 1998)
-AND (p_mfgr = 'MFGR#1'
-OR p_mfgr = 'MFGR#2')
+SELECT
+ d_year,
+ s_nation,
+ p_category,
+ SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+ lo_custkey = c_custkey
+ AND lo_suppkey = s_suppkey
+ AND lo_partkey = p_partkey
+ AND lo_orderdate = d_datekey
+ AND c_region = 'AMERICA'
+ AND s_region = 'AMERICA'
+ AND (
+ d_year = 1997
+ OR d_year = 1998
+ )
+ AND (
+ p_mfgr = 'MFGR#1'
+ OR p_mfgr = 'MFGR#2'
+ )
GROUP BY d_year, s_nation, p_category
-ORDER BY d_year, s_nation, p_category;
+ORDER BY d_year, s_nation, p_category;
\ No newline at end of file
diff --git a/tools/ssb-tools/queries/q4.3.sql
b/tools/ssb-tools/ssb-queries/q4.3.sql
similarity index 66%
rename from tools/ssb-tools/queries/q4.3.sql
rename to tools/ssb-tools/ssb-queries/q4.3.sql
index 0dcc08bd26..64582cc6ac 100644
--- a/tools/ssb-tools/queries/q4.3.sql
+++ b/tools/ssb-tools/ssb-queries/q4.3.sql
@@ -14,16 +14,22 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-
-SELECT d_year, s_city, p_brand,
-SUM(lo_revenue - lo_supplycost) AS PROFIT
-FROM date, customer, supplier, part, lineorder
-WHERE lo_custkey = c_custkey
-AND lo_suppkey = s_suppkey
-AND lo_partkey = p_partkey
-AND lo_orderdate = d_datekey
-AND s_nation = 'UNITED STATES'
-AND (d_year = 1997 OR d_year = 1998)
-AND p_category = 'MFGR#14'
+SELECT
+ d_year,
+ s_city,
+ p_brand,
+ SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+ lo_custkey = c_custkey
+ AND lo_suppkey = s_suppkey
+ AND lo_partkey = p_partkey
+ AND lo_orderdate = d_datekey
+ AND s_nation = 'UNITED STATES'
+ AND (
+ d_year = 1997
+ OR d_year = 1998
+ )
+ AND p_category = 'MFGR#14'
GROUP BY d_year, s_city, p_brand
-ORDER BY d_year, s_city, p_brand;
+ORDER BY d_year, s_city, p_brand;
\ No newline at end of file
diff --git a/tools/tpch-tools/README.md b/tools/tpch-tools/README.md
index f06e96289b..bd9e483eec 100644
--- a/tools/tpch-tools/README.md
+++ b/tools/tpch-tools/README.md
@@ -23,12 +23,25 @@ These scripts are used to make tpc-h test.
follow the steps below:
### 1. build tpc-h dbgen tool.
- ./build-tpch-dbgen.sh
+
+ ./bin/build-tpch-dbgen.sh
+
### 2. generate tpc-h data. use -h for more infomations.
- ./gen-tpch-data.sh -s 1
-### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info,
then run script below.
- ./create-tpch-tables.sh
+
+ ./bin/gen-tpch-data.sh -s 1
+
+### 3. create tpc-h tables. modify `conf/doris-cluster.conf` to specify doris
info, then run script below.
+
+ ./bin/create-tpch-tables.sh
+
### 4. load tpc-h data. use -h for help.
- ./load-tpch-data.sh
+
+ ./bin/load-tpch-data.sh
+
### 5. run tpc-h queries.
- ./run-tpch-queries.sh
+
+ ./bin/run-tpch-queries.sh
+
+ NOTICE: At present, Doris's query optimizer and statistical information
functions are not complete, so we rewrite some queries in TPC-H to adapt to
Doris' execution framework, but it does not affect the correctness of the
results. The rewritten SQL is marked with "Modified" in the corresponding .sql
file.
+
+ A new query optimizer will be released in subsequent releases.
diff --git a/tools/tpch-tools/build-tpch-dbgen.sh
b/tools/tpch-tools/bin/build-tpch-dbgen.sh
similarity index 92%
rename from tools/tpch-tools/build-tpch-dbgen.sh
rename to tools/tpch-tools/bin/build-tpch-dbgen.sh
index 9c2c63702c..b0cce4b1da 100755
--- a/tools/tpch-tools/build-tpch-dbgen.sh
+++ b/tools/tpch-tools/bin/build-tpch-dbgen.sh
@@ -51,13 +51,12 @@ if [[ -d $TPCH_DBGEN_DIR ]]; then
echo "If you want to download TPC-H_Tools_v3.0.0 again, please delete this
dir first."
else
wget
"https://tools-chengdu.oss-cn-chengdu.aliyuncs.com/TPC-H_Tools_v3.0.0.zip"
-
- unzip TPC-H_Tools_v3.0.0.zip -d $CURDIR/
+ unzip TPC-H_Tools_v3.0.0.zip -d "$CURDIR"/
fi
# modify tpcd.h
-cd $TPCH_DBGEN_DIR/
-echo '
+cd "$TPCH_DBGEN_DIR"/
+printf '%s' '
#ifdef MYSQL
#define GEN_QUERY_PLAN ""
#define START_TRAN "START TRANSACTION"
@@ -66,7 +65,7 @@ echo '
#define SET_ROWCOUNT "limit %d;\n"
#define SET_DBASE "use %s;\n"
#endif
-' >> tpcd.h
+' >>tpcd.h
# modify makefile
cp makefile.suite makefile
@@ -76,12 +75,16 @@ sed -i 's/^MACHINE =/MACHINE = LINUX/g' makefile
sed -i 's/^WORKLOAD =/WORKLOAD = TPCH/g' makefile
# compile tpch-dbgen
-make > /dev/null
+make >/dev/null
cd -
# check
if [[ -f $TPCH_DBGEN_DIR/dbgen ]]; then
- echo "Build succeed! Run $TPCH_DBGEN_DIR/dbgen -h"
+ echo "
+################
+Build succeed!
+################
+Run $TPCH_DBGEN_DIR/dbgen -h"
exit 0
else
echo "Build failed!"
diff --git a/tools/tpch-tools/create-tpch-tables.sh
b/tools/tpch-tools/bin/create-tpch-tables.sh
similarity index 84%
rename from tools/tpch-tools/create-tpch-tables.sh
rename to tools/tpch-tools/bin/create-tpch-tables.sh
index 2263f47e1b..01ca7a1345 100755
--- a/tools/tpch-tools/create-tpch-tables.sh
+++ b/tools/tpch-tools/bin/create-tpch-tables.sh
@@ -40,7 +40,7 @@ Usage: $0
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-- "$@")
@@ -84,7 +84,9 @@ check_prerequest() {
check_prerequest "mysql --version" "mysql"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
@@ -92,7 +94,7 @@ echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
-mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS
$DB"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT
EXISTS $DB"
-echo $CURDIR/create-tpch-tables.sql
-mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB <$CURDIR/create-tpch-tables.sql
+echo "Run SQLs from $CURDIR/create-tpch-tables.sql"
+mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB"
<"$CURDIR"/../ddl/create-tpch-tables.sql
diff --git a/tools/tpch-tools/gen-tpch-data.sh
b/tools/tpch-tools/bin/gen-tpch-data.sh
similarity index 80%
rename from tools/tpch-tools/gen-tpch-data.sh
rename to tools/tpch-tools/bin/gen-tpch-data.sh
index 0e7359d601..4202e3c58d 100755
--- a/tools/tpch-tools/gen-tpch-data.sh
+++ b/tools/tpch-tools/bin/gen-tpch-data.sh
@@ -48,7 +48,7 @@ Usage: $0 <options>
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-o 'hs:c:' \
-- "$@")
@@ -107,40 +107,40 @@ if [[ -d $TPCH_DATA_DIR/ ]]; then
exit 1
fi
-mkdir $TPCH_DATA_DIR/
+mkdir "$TPCH_DATA_DIR"/
# gen data
-cd $TPCH_DBGEN_DIR
+cd "$TPCH_DBGEN_DIR"
echo "Begin to generate data for table: region"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T r
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T r
echo "Begin to generate data for table: nation"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T n
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T n
echo "Begin to generate data for table: supplier"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T s
echo "Begin to generate data for table: part"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T P
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T P
echo "Begin to generate data for table: customer"
-$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c
+"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T c
echo "Begin to generate data for table: partsupp"
-for i in $(seq 1 $PARALLEL); do
+for i in $(seq 1 "$PARALLEL"); do
{
- $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T S -C $PARALLEL -S ${i}
+ "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T S -C "$PARALLEL" -S "$i"
} &
done
wait
echo "Begin to generate data for table: orders"
-for i in $(seq 1 $PARALLEL); do
+for i in $(seq 1 "$PARALLEL"); do
{
- $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T O -C $PARALLEL -S ${i}
+ "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T O -C "$PARALLEL" -S "$i"
} &
done
wait
echo "Begin to generate data for table: lineitem"
-for i in $(seq 1 $PARALLEL); do
+for i in $(seq 1 "$PARALLEL"); do
{
- $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T L -C $PARALLEL -S ${i}
+ "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T L -C "$PARALLEL" -S "$i"
} &
done
wait
@@ -148,7 +148,7 @@ wait
cd -
# move data to $TPCH_DATA_DIR
-mv $TPCH_DBGEN_DIR/*.tbl* $TPCH_DATA_DIR/
+mv "$TPCH_DBGEN_DIR"/*.tbl* "$TPCH_DATA_DIR"/
# check data
-du -sh $TPCH_DATA_DIR/*.tbl*
+du -sh "$TPCH_DATA_DIR"/*.tbl*
diff --git a/tools/tpch-tools/load-tpch-data.sh
b/tools/tpch-tools/bin/load-tpch-data.sh
similarity index 69%
rename from tools/tpch-tools/load-tpch-data.sh
rename to tools/tpch-tools/bin/load-tpch-data.sh
index f04e21bb9c..7a250ef2f4 100755
--- a/tools/tpch-tools/load-tpch-data.sh
+++ b/tools/tpch-tools/bin/load-tpch-data.sh
@@ -46,9 +46,9 @@ Usage: $0 <options>
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
- -o 'c:' \
+ -o 'hc:' \
-- "$@")
eval set -- "$OPTS"
@@ -89,7 +89,7 @@ fi
echo "Parallelism: $PARALLEL"
# check if tpch-data exists
-if [[ ! -d $TPCH_DATA_DIR/ ]]; then
+if [[ ! -d "$TPCH_DATA_DIR"/ ]]; then
echo "$TPCH_DATA_DIR does not exist. Run sh gen-tpch-data.sh first."
exit 1
fi
@@ -106,7 +106,9 @@ check_prerequest() {
check_prerequest "curl --version" "curl"
# load tables
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
@@ -115,61 +117,62 @@ echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
function load_region() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: r_regionkey, r_name, r_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/region/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/region/_stream_load
}
function load_nation() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/nation/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/nation/_stream_load
}
function load_supplier() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone,
s_acctbal, s_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
}
function load_customer() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone,
c_acctbal, c_mktsegment, c_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
}
function load_part() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
}
function load_partsupp() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost,
ps_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/partsupp/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/partsupp/_stream_load
}
function load_orders() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice,
o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/orders/_stream_load
+ -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/orders/_stream_load
}
function load_lineitem() {
- echo $@
- curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
+ echo "$*"
+ curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber,
l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus,
l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp"
\
- -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineitem/_stream_load
+ -T "$*"
http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineitem/_stream_load
}
# start load
-load_region $TPCH_DATA_DIR/region.tbl
-load_nation $TPCH_DATA_DIR/nation.tbl
-load_supplier $TPCH_DATA_DIR/supplier.tbl
-load_customer $TPCH_DATA_DIR/customer.tbl
-load_part $TPCH_DATA_DIR/part.tbl
-
+date
+load_region "$TPCH_DATA_DIR"/region.tbl
+load_nation "$TPCH_DATA_DIR"/nation.tbl
+load_supplier "$TPCH_DATA_DIR"/supplier.tbl
+load_customer "$TPCH_DATA_DIR"/customer.tbl
+load_part "$TPCH_DATA_DIR"/part.tbl
+date
# set parallelism
# 以PID为名, 防止创建命名管道时与已有文件重名,从而失败
@@ -182,20 +185,21 @@ exec 3<>${fifo}
rm -rf ${fifo}
# 在fd3中放置$PARALLEL个空行作为令牌
-for ((i = 1; i <= $PARALLEL; i++)); do
+for ((i = 1; i <= PARALLEL; i++)); do
echo >&3
done
-for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do
+date
+for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do
# 领取令牌, 即从fd3中读取行, 每次一行
# 对管道,读一行便少一行,每次只能读取一行
# 所有行读取完毕, 执行挂起, 直到管道再次有可读行
# 因此实现了进程数量控制
- read -u3
+ read -r -u3
# 要批量执行的命令放在大括号内, 后台运行
{
- load_lineitem $file
+ load_lineitem "$file"
echo "----loaded $file"
sleep 2
# 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
@@ -203,20 +207,22 @@ for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do
} &
done
-for file in $(ls $TPCH_DATA_DIR/orders.tbl*); do
- read -u3
+date
+for file in "$TPCH_DATA_DIR"/orders.tbl*; do
+ read -r -u3
{
- load_orders $file
+ load_orders "$file"
echo "----loaded $file"
sleep 2
echo >&3
} &
done
-for file in $(ls $TPCH_DATA_DIR/partsupp.tbl*); do
- read -u3
+date
+for file in "$TPCH_DATA_DIR"/partsupp.tbl*; do
+ read -r -u3
{
- load_partsupp $file
+ load_partsupp "$file"
echo "----loaded $file"
sleep 2
echo >&3
@@ -227,3 +233,6 @@ done
wait
# 删除文件标识符
exec 3>&-
+date
+
+echo "DONE."
\ No newline at end of file
diff --git a/tools/tpch-tools/run-tpch-queries.sh
b/tools/tpch-tools/bin/run-tpch-queries.sh
similarity index 73%
rename from tools/tpch-tools/run-tpch-queries.sh
rename to tools/tpch-tools/bin/run-tpch-queries.sh
index 5a0d06d512..14d00e17bb 100755
--- a/tools/tpch-tools/run-tpch-queries.sh
+++ b/tools/tpch-tools/bin/run-tpch-queries.sh
@@ -29,7 +29,7 @@ ROOT=$(
)
CURDIR=${ROOT}
-QUERIES_DIR=$CURDIR/queries
+QUERIES_DIR=$CURDIR/../queries
usage() {
echo "
@@ -41,7 +41,7 @@ Usage: $0
}
OPTS=$(getopt \
- -n $0 \
+ -n "$0" \
-o '' \
-- "$@")
@@ -85,33 +85,41 @@ check_prerequest() {
check_prerequest "mysql --version" "mysql"
-source $CURDIR/doris-cluster.conf
+# shellcheck source=/dev/null
+source "$CURDIR/../conf/doris-cluster.conf"
+export MYSQL_PWD=$PASSWORD
echo "FE_HOST: $FE_HOST"
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
+echo "Time Unit: ms"
pre_set() {
- echo $@
- mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@"
+ echo "$*"
+ mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$*"
}
-pre_set "set global enable_vectorized_engine=1;"
-pre_set "set global parallel_fragment_exec_instance_num=8;"
-pre_set "set global exec_mem_limit=48G;"
-pre_set "set global batch_size=4096;"
-# pre_set "show variables like 'batch_size';"
+echo '============================================'
+pre_set "show variables;"
+echo '============================================'
+pre_set "show table status;"
+echo '============================================'
+sum=0
for i in $(seq 1 22); do
total=0
- # Each query is executed three times and takes the average time
- for j in $(seq 1 3); do
+ run=3
+ # Each query is executed ${run} times and takes the average time
+ for j in $(seq 1 ${run}); do
start=$(date +%s%3N)
- mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql
>/dev/null
+ mysql -h"$FE_HOST" -u "$USER" -P"$FE_QUERY_PORT" -D"$DB" --comments
<"$QUERIES_DIR"/q"$i".sql >/dev/null
end=$(date +%s%3N)
total=$((total + end - start))
done
- echo "q$i: $((total / 3))ms"
+ cost=$((total / run))
+ echo "q$i: ${cost}"
+ sum=$((sum + cost))
done
+echo "Total cost: $sum"
diff --git a/tools/tpch-tools/doris-cluster.conf
b/tools/tpch-tools/conf/doris-cluster.conf
similarity index 95%
rename from tools/tpch-tools/doris-cluster.conf
rename to tools/tpch-tools/conf/doris-cluster.conf
index 5d76264104..9417bcb9e0 100644
--- a/tools/tpch-tools/doris-cluster.conf
+++ b/tools/tpch-tools/conf/doris-cluster.conf
@@ -16,7 +16,7 @@
# under the License.
# Any of FE host
-export FE_HOST='172.20.194.235'
+export FE_HOST='127.0.0.1'
# http_port in fe.conf
export FE_HTTP_PORT=8030
# query_port in fe.conf
@@ -26,4 +26,4 @@ export USER='root'
# Doris password
export PASSWORD=''
# The database where TPC-H tables located
-export DB='tpch1'
+export DB='tpch'
diff --git a/tools/tpch-tools/create-tpch-tables.sql
b/tools/tpch-tools/create-tpch-tables.sql
deleted file mode 100644
index f5acc7e842..0000000000
--- a/tools/tpch-tools/create-tpch-tables.sql
+++ /dev/null
@@ -1,100 +0,0 @@
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements. See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership. The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License. You may obtain a copy of the License at
---
--- http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing,
--- software distributed under the License is distributed on an
--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
--- KIND, either express or implied. See the License for the
--- specific language governing permissions and limitations
--- under the License.
-CREATE TABLE `region` (
- `r_regionkey` integer NOT NULL,
- `r_name` char(25) NOT NULL,
- `r_comment` varchar(152)
-) DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 PROPERTIES ("replication_num" =
"1");
-
-CREATE TABLE `nation` (
- `n_nationkey` integer NOT NULL,
- `n_name` char(25) NOT NULL,
- `n_regionkey` integer NOT NULL,
- `n_comment` varchar(152)
-) DISTRIBUTED BY HASH(`n_nationkey`) BUCKETS 1 PROPERTIES ("replication_num" =
"1");
-
-CREATE TABLE `part` (
- `p_partkey` integer NOT NULL,
- `p_name` varchar(55) NOT NULL,
- `p_mfgr` char(25) NOT NULL,
- `p_brand` char(10) NOT NULL,
- `p_type` varchar(25) NOT NULL,
- `p_size` integer NOT NULL,
- `p_container` char(10) NOT NULL,
- `p_retailprice` decimal(12, 2) NOT NULL,
- `p_comment` varchar(23) NOT NULL
-) DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 32 PROPERTIES ("replication_num" =
"1");
-
-CREATE TABLE `supplier` (
- `s_suppkey` integer NOT NULL,
- `s_name` char(25) NOT NULL,
- `s_address` varchar(40) NOT NULL,
- `s_nationkey` integer NOT NULL,
- `s_phone` char(15) NOT NULL,
- `s_acctbal` decimal(12, 2) NOT NULL,
- `s_comment` varchar(101) NOT NULL
-) DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 32 PROPERTIES ("replication_num" =
"1");
-
-CREATE TABLE `customer` (
- `c_custkey` integer NOT NULL,
- `c_name` varchar(25) NOT NULL,
- `c_address` varchar(40) NOT NULL,
- `c_nationkey` integer NOT NULL,
- `c_phone` char(15) NOT NULL,
- `c_acctbal` decimal(12, 2) NOT NULL,
- `c_mktsegment` char(10) NOT NULL,
- `c_comment` varchar(117) NOT NULL
-) DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 32 PROPERTIES ("replication_num" =
"1");
-
-CREATE TABLE `partsupp` (
- `ps_partkey` integer NOT NULL,
- `ps_suppkey` integer NOT NULL,
- `ps_availqty` integer NOT NULL,
- `ps_supplycost` decimal(12, 2) NOT NULL,
- `ps_comment` varchar(199) NOT NULL
-) DISTRIBUTED BY HASH(`ps_partkey`, `ps_suppkey`) BUCKETS 32 PROPERTIES
("replication_num" = "1");
-
-CREATE TABLE `orders` (
- `o_orderkey` integer NOT NULL,
- `o_custkey` integer NOT NULL,
- `o_orderstatus` char(1) NOT NULL,
- `o_totalprice` decimal(12, 2) NOT NULL,
- `o_orderdate` date NOT NULL,
- `o_orderpriority` char(15) NOT NULL,
- `o_clerk` char(15) NOT NULL,
- `o_shippriority` integer NOT NULL,
- `o_comment` varchar(79) NOT NULL
-) DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 32 PROPERTIES ("replication_num" =
"1");
-
-CREATE TABLE `lineitem` (
- `l_orderkey` integer NOT NULL,
- `l_linenumber` integer NOT NULL,
- `l_partkey` integer NOT NULL,
- `l_suppkey` integer NOT NULL,
- `l_quantity` decimal(12, 2) NOT NULL,
- `l_extendedprice` decimal(12, 2) NOT NULL,
- `l_discount` decimal(12, 2) NOT NULL,
- `l_tax` decimal(12, 2) NOT NULL,
- `l_returnflag` char(1) NOT NULL,
- `l_linestatus` char(1) NOT NULL,
- `l_shipdate` date NOT NULL,
- `l_commitdate` date NOT NULL,
- `l_receiptdate` date NOT NULL,
- `l_shipinstruct` char(25) NOT NULL,
- `l_shipmode` char(10) NOT NULL,
- `l_comment` varchar(44) NOT NULL
-) DISTRIBUTED BY HASH(`l_orderkey`, `l_linenumber`) BUCKETS 48 PROPERTIES
("replication_num" = "1");
diff --git a/tools/tpch-tools/ddl/create-tpch-tables.sql
b/tools/tpch-tools/ddl/create-tpch-tables.sql
new file mode 100644
index 0000000000..1627720899
--- /dev/null
+++ b/tools/tpch-tools/ddl/create-tpch-tables.sql
@@ -0,0 +1,174 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+drop table if exists lineitem;
+CREATE TABLE lineitem (
+ l_shipdate DATE NOT NULL,
+ l_orderkey bigint NOT NULL,
+ l_linenumber int not null,
+ l_partkey int NOT NULL,
+ l_suppkey int not null,
+ l_quantity decimal(15, 2) NOT NULL,
+ l_extendedprice decimal(15, 2) NOT NULL,
+ l_discount decimal(15, 2) NOT NULL,
+ l_tax decimal(15, 2) NOT NULL,
+ l_returnflag VARCHAR(1) NOT NULL,
+ l_linestatus VARCHAR(1) NOT NULL,
+ l_commitdate DATE NOT NULL,
+ l_receiptdate DATE NOT NULL,
+ l_shipinstruct VARCHAR(25) NOT NULL,
+ l_shipmode VARCHAR(10) NOT NULL,
+ l_comment VARCHAR(44) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`l_shipdate`, `l_orderkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96
+PROPERTIES (
+ "replication_num" = "1",
+ "colocate_with" = "lineitem_orders"
+);
+
+drop table if exists orders;
+CREATE TABLE orders (
+ o_orderkey bigint NOT NULL,
+ o_orderdate DATE NOT NULL,
+ o_custkey int NOT NULL,
+ o_orderstatus VARCHAR(1) NOT NULL,
+ o_totalprice decimal(15, 2) NOT NULL,
+ o_orderpriority VARCHAR(15) NOT NULL,
+ o_clerk VARCHAR(15) NOT NULL,
+ o_shippriority int NOT NULL,
+ o_comment VARCHAR(79) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`o_orderkey`, `o_orderdate`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96
+PROPERTIES (
+ "replication_num" = "1",
+ "colocate_with" = "lineitem_orders"
+);
+
+drop table if exists partsupp;
+CREATE TABLE partsupp (
+ ps_partkey int NOT NULL,
+ ps_suppkey int NOT NULL,
+ ps_availqty int NOT NULL,
+ ps_supplycost decimal(15, 2) NOT NULL,
+ ps_comment VARCHAR(199) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`ps_partkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 24
+PROPERTIES (
+ "replication_num" = "1",
+ "colocate_with" = "part_partsupp"
+);
+
+drop table if exists part;
+CREATE TABLE part (
+ p_partkey int NOT NULL,
+ p_name VARCHAR(55) NOT NULL,
+ p_mfgr VARCHAR(25) NOT NULL,
+ p_brand VARCHAR(10) NOT NULL,
+ p_type VARCHAR(25) NOT NULL,
+ p_size int NOT NULL,
+ p_container VARCHAR(10) NOT NULL,
+ p_retailprice decimal(15, 2) NOT NULL,
+ p_comment VARCHAR(23) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`p_partkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 24
+PROPERTIES (
+ "replication_num" = "1",
+ "colocate_with" = "part_partsupp"
+);
+
+drop table if exists customer;
+CREATE TABLE customer (
+ c_custkey int NOT NULL,
+ c_name VARCHAR(25) NOT NULL,
+ c_address VARCHAR(40) NOT NULL,
+ c_nationkey int NOT NULL,
+ c_phone VARCHAR(15) NOT NULL,
+ c_acctbal decimal(15, 2) NOT NULL,
+ c_mktsegment VARCHAR(10) NOT NULL,
+ c_comment VARCHAR(117) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`c_custkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 24
+PROPERTIES (
+ "replication_num" = "1"
+);
+
+drop table if exists supplier;
+CREATE TABLE supplier (
+ s_suppkey int NOT NULL,
+ s_name VARCHAR(25) NOT NULL,
+ s_address VARCHAR(40) NOT NULL,
+ s_nationkey int NOT NULL,
+ s_phone VARCHAR(15) NOT NULL,
+ s_acctbal decimal(15, 2) NOT NULL,
+ s_comment VARCHAR(101) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`s_suppkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12
+PROPERTIES (
+ "replication_num" = "1"
+);
+
+drop table if exists nation;
+CREATE TABLE `nation` (
+ `n_nationkey` int(11) NOT NULL,
+ `n_name` varchar(25) NOT NULL,
+ `n_regionkey` int(11) NOT NULL,
+ `n_comment` varchar(152) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`N_NATIONKEY`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
+PROPERTIES (
+ "replication_num" = "1"
+);
+
+drop table if exists region;
+CREATE TABLE region (
+ r_regionkey int NOT NULL,
+ r_name VARCHAR(25) NOT NULL,
+ r_comment VARCHAR(152)
+)ENGINE=OLAP
+DUPLICATE KEY(`r_regionkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
+PROPERTIES (
+ "replication_num" = "1"
+);
+
+drop view if exists revenue0;
+create view revenue0 (supplier_no, total_revenue) as
+select
+ l_suppkey,
+ sum(l_extendedprice * (1 - l_discount))
+from
+ lineitem
+where
+ l_shipdate >= date '1996-01-01'
+ and l_shipdate < date '1996-01-01' + interval '3' month
+group by
+ l_suppkey;
diff --git a/tools/tpch-tools/queries/q1.sql b/tools/tpch-tools/queries/q1.sql
index a888c2bdb1..6075f47ccb 100644
--- a/tools/tpch-tools/queries/q1.sql
+++ b/tools/tpch-tools/queries/q1.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=false, enable_projection=false) */
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
diff --git a/tools/tpch-tools/queries/q10.sql b/tools/tpch-tools/queries/q10.sql
index 6a12c1f5b4..52a0d313d7 100644
--- a/tools/tpch-tools/queries/q10.sql
+++ b/tools/tpch-tools/queries/q10.sql
@@ -15,10 +15,12 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=false, enable_projection=true) */
c_custkey,
c_name,
- sum(l_extendedprice * (1 - l_discount)) as revenue,
+ sum(t1.l_extendedprice * (1 - t1.l_discount)) as revenue,
c_acctbal,
n_name,
c_address,
@@ -26,15 +28,16 @@ select
c_comment
from
customer,
- orders,
- lineitem,
+ (
+ select o_custkey,l_extendedprice,l_discount from lineitem, orders
+ where l_orderkey = o_orderkey
+ and o_orderdate >= date '1993-10-01'
+ and o_orderdate < date '1993-10-01' + interval '3' month
+ and l_returnflag = 'R'
+ ) t1,
nation
where
- c_custkey = o_custkey
- and l_orderkey = o_orderkey
- and o_orderdate >= date '1993-10-01'
- and o_orderdate < date '1993-10-01' + interval '3' month
- and l_returnflag = 'R'
+ c_custkey = t1.o_custkey
and c_nationkey = n_nationkey
group by
c_custkey,
@@ -47,3 +50,4 @@ group by
order by
revenue desc
limit 20;
+
diff --git a/tools/tpch-tools/queries/q11.sql b/tools/tpch-tools/queries/q11.sql
index f70fab2be9..99fbf49149 100644
--- a/tools/tpch-tools/queries/q11.sql
+++ b/tools/tpch-tools/queries/q11.sql
@@ -15,30 +15,31 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=true, enable_projection=true) */
ps_partkey,
sum(ps_supplycost * ps_availqty) as value
from
partsupp,
- supplier,
- nation
+ (
+ select s_suppkey
+ from supplier, nation
+ where s_nationkey = n_nationkey and n_name = 'GERMANY'
+ ) B
where
- ps_suppkey = s_suppkey
- and s_nationkey = n_nationkey
- and n_name = 'GERMANY'
+ ps_suppkey = B.s_suppkey
group by
ps_partkey having
sum(ps_supplycost * ps_availqty) > (
select
- sum(ps_supplycost * ps_availqty) * 0.0001000000
+ sum(ps_supplycost * ps_availqty) * 0.000002
from
partsupp,
- supplier,
- nation
+ (select s_suppkey
+ from supplier, nation
+ where s_nationkey = n_nationkey and n_name = 'GERMANY'
+ ) A
where
- ps_suppkey = s_suppkey
- and s_nationkey = n_nationkey
- and n_name = 'GERMANY'
+ ps_suppkey = A.s_suppkey
)
order by
- value desc;
+ value desc;
\ No newline at end of file
diff --git a/tools/tpch-tools/queries/q12.sql b/tools/tpch-tools/queries/q12.sql
index d53dd9c809..5fcd65c635 100644
--- a/tools/tpch-tools/queries/q12.sql
+++ b/tools/tpch-tools/queries/q12.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=true, enable_projection=true) */
l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
diff --git a/tools/tpch-tools/queries/q13.sql b/tools/tpch-tools/queries/q13.sql
index a35497cd57..cf8ef42817 100644
--- a/tools/tpch-tools/queries/q13.sql
+++ b/tools/tpch-tools/queries/q13.sql
@@ -15,7 +15,9 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=true, enable_projection=true) */
c_count,
count(*) as custdist
from
@@ -24,7 +26,7 @@ from
c_custkey,
count(o_orderkey) as c_count
from
- customer left outer join orders on
+ orders right outer join customer on
c_custkey = o_custkey
and o_comment not like '%special%requests%'
group by
diff --git a/tools/tpch-tools/queries/q14.sql b/tools/tpch-tools/queries/q14.sql
index 960cd15fb3..637808b261 100644
--- a/tools/tpch-tools/queries/q14.sql
+++ b/tools/tpch-tools/queries/q14.sql
@@ -15,15 +15,17 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=true, enable_projection=true) */
100.00 * sum(case
when p_type like 'PROMO%'
then l_extendedprice * (1 - l_discount)
else 0
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
- lineitem,
- part
+ part,
+ lineitem
where
l_partkey = p_partkey
and l_shipdate >= date '1995-09-01'
diff --git a/tools/tpch-tools/queries/q15.sql b/tools/tpch-tools/queries/q15.sql
index 1f7f19b34b..db621695cf 100644
--- a/tools/tpch-tools/queries/q15.sql
+++ b/tools/tpch-tools/queries/q15.sql
@@ -15,20 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-create view revenue0 (supplier_no, total_revenue) as
- select
- l_suppkey,
- sum(l_extendedprice * (1 - l_discount))
- from
- lineitem
- where
- l_shipdate >= date '1996-01-01'
- and l_shipdate < date '1996-01-01' + interval '3' month
- group by
- l_suppkey;
-
-
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=true, enable_projection=true) */
s_suppkey,
s_name,
s_address,
@@ -47,5 +34,3 @@ where
)
order by
s_suppkey;
-
-drop view revenue0;
diff --git a/tools/tpch-tools/queries/q16.sql b/tools/tpch-tools/queries/q16.sql
index 6027b5c2d0..a7e2d4ca74 100644
--- a/tools/tpch-tools/queries/q16.sql
+++ b/tools/tpch-tools/queries/q16.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=true, enable_projection=true) */
p_brand,
p_type,
p_size,
diff --git a/tools/tpch-tools/queries/q17.sql b/tools/tpch-tools/queries/q17.sql
index 7724caac8f..a13fbd89b6 100644
--- a/tools/tpch-tools/queries/q17.sql
+++ b/tools/tpch-tools/queries/q17.sql
@@ -15,20 +15,24 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=true, enable_projection=true) */
sum(l_extendedprice) / 7.0 as avg_yearly
from
- lineitem,
- part
+ lineitem join [broadcast]
+ part p1 on p1.p_partkey = l_partkey
where
- p_partkey = l_partkey
- and p_brand = 'Brand#23'
- and p_container = 'MED BOX'
+ p1.p_brand = 'Brand#23'
+ and p1.p_container = 'MED BOX'
and l_quantity < (
select
0.2 * avg(l_quantity)
from
- lineitem
+ lineitem join [broadcast]
+ part p2 on p2.p_partkey = l_partkey
where
- l_partkey = p_partkey
+ l_partkey = p1.p_partkey
+ and p2.p_brand = 'Brand#23'
+ and p2.p_container = 'MED BOX'
);
diff --git a/tools/tpch-tools/queries/q18.sql b/tools/tpch-tools/queries/q18.sql
index 352d8be84a..23d1513076 100644
--- a/tools/tpch-tools/queries/q18.sql
+++ b/tools/tpch-tools/queries/q18.sql
@@ -15,36 +15,44 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=true, enable_projection=true) */
c_name,
c_custkey,
- o_orderkey,
- o_orderdate,
- o_totalprice,
- sum(l_quantity)
+ t3.o_orderkey,
+ t3.o_orderdate,
+ t3.o_totalprice,
+ sum(t3.l_quantity)
from
- customer,
- orders,
- lineitem
-where
- o_orderkey in (
- select
- l_orderkey
- from
- lineitem
- group by
- l_orderkey having
- sum(l_quantity) > 300
- )
- and c_custkey = o_custkey
- and o_orderkey = l_orderkey
+customer join
+(
+ select * from
+ lineitem join
+ (
+ select * from
+ orders left semi join
+ (
+ select
+ l_orderkey
+ from
+ lineitem
+ group by
+ l_orderkey having sum(l_quantity) > 300
+ ) t1
+ on o_orderkey = t1.l_orderkey
+ ) t2
+ on t2.o_orderkey = l_orderkey
+) t3
+on c_custkey = t3.o_custkey
group by
c_name,
c_custkey,
- o_orderkey,
- o_orderdate,
- o_totalprice
+ t3.o_orderkey,
+ t3.o_orderdate,
+ t3.o_totalprice
order by
- o_totalprice desc,
- o_orderdate
+ t3.o_totalprice desc,
+ t3.o_orderdate
limit 100;
+
diff --git a/tools/tpch-tools/queries/q19.sql b/tools/tpch-tools/queries/q19.sql
index a20a62533a..857ffc9267 100644
--- a/tools/tpch-tools/queries/q19.sql
+++ b/tools/tpch-tools/queries/q19.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=false, enable_projection=true) */
sum(l_extendedprice* (1 - l_discount)) as revenue
from
lineitem,
diff --git a/tools/tpch-tools/queries/q2.sql b/tools/tpch-tools/queries/q2.sql
index d1ccca0933..99b049d25a 100644
--- a/tools/tpch-tools/queries/q2.sql
+++ b/tools/tpch-tools/queries/q2.sql
@@ -15,7 +15,9 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=false, enable_projection=true) */
s_acctbal,
s_name,
n_name,
@@ -25,24 +27,14 @@ select
s_phone,
s_comment
from
- part,
- supplier,
- partsupp,
- nation,
- region
-where
- p_partkey = ps_partkey
- and s_suppkey = ps_suppkey
- and p_size = 15
- and p_type like '%BRASS'
- and s_nationkey = n_nationkey
- and n_regionkey = r_regionkey
- and r_name = 'EUROPE'
- and ps_supplycost = (
+ partsupp join
+ (
select
- min(ps_supplycost)
+ ps_partkey as a_partkey,
+ min(ps_supplycost) as a_min
from
partsupp,
+ part,
supplier,
nation,
region
@@ -52,10 +44,26 @@ where
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
- )
+ and p_size = 15
+ and p_type like '%BRASS'
+ group by a_partkey
+ ) A on ps_partkey = a_partkey and ps_supplycost=a_min ,
+ part,
+ supplier,
+ nation,
+ region
+where
+ p_partkey = ps_partkey
+ and s_suppkey = ps_suppkey
+ and p_size = 15
+ and p_type like '%BRASS'
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'EUROPE'
+
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
-limit 100;
+limit 100;
\ No newline at end of file
diff --git a/tools/tpch-tools/queries/q20.sql b/tools/tpch-tools/queries/q20.sql
index 6803b4e3c3..77a2b9de0a 100644
--- a/tools/tpch-tools/queries/q20.sql
+++ b/tools/tpch-tools/queries/q20.sql
@@ -15,40 +15,30 @@
-- specific language governing permissions and limitations
-- under the License.
-select
- s_name,
- s_address
-from
- supplier,
- nation
-where
- s_suppkey in (
- select
- ps_suppkey
- from
- partsupp
- where
- ps_partkey in (
- select
- p_partkey
- from
- part
- where
- p_name like 'forest%'
- )
- and ps_availqty > (
- select
- 0.5 * sum(l_quantity)
- from
- lineitem
- where
- l_partkey = ps_partkey
- and l_suppkey = ps_suppkey
- and l_shipdate >= date '1994-01-01'
- and l_shipdate < date '1994-01-01' + interval '1' year
- )
- )
- and s_nationkey = n_nationkey
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=true, enable_projection=true) */
+s_name, s_address from
+supplier left semi join
+(
+ select * from
+ (
+ select l_partkey,l_suppkey, 0.5 * sum(l_quantity) as l_q
+ from lineitem
+ where l_shipdate >= date '1994-01-01'
+ and l_shipdate < date '1994-01-01' + interval '1' year
+ group by l_partkey,l_suppkey
+ ) t2 join
+ (
+ select ps_partkey, ps_suppkey, ps_availqty
+ from partsupp left semi join part
+ on ps_partkey = p_partkey and p_name like 'forest%'
+ ) t1
+ on t2.l_partkey = t1.ps_partkey and t2.l_suppkey = t1.ps_suppkey
+ and t1.ps_availqty > t2.l_q
+) t3
+on s_suppkey = t3.ps_suppkey
+join nation
+where s_nationkey = n_nationkey
and n_name = 'CANADA'
-order by
- s_name;
+order by s_name;
diff --git a/tools/tpch-tools/queries/q21.sql b/tools/tpch-tools/queries/q21.sql
index f3cdf2228d..6b13b1ca77 100644
--- a/tools/tpch-tools/queries/q21.sql
+++ b/tools/tpch-tools/queries/q21.sql
@@ -15,43 +15,37 @@
-- specific language governing permissions and limitations
-- under the License.
-select
- s_name,
- count(*) as numwait
-from
- supplier,
- lineitem l1,
- orders,
- nation
-where
- s_suppkey = l1.l_suppkey
- and o_orderkey = l1.l_orderkey
- and o_orderstatus = 'F'
- and l1.l_receiptdate > l1.l_commitdate
- and exists (
- select
- *
- from
- lineitem l2
- where
- l2.l_orderkey = l1.l_orderkey
- and l2.l_suppkey <> l1.l_suppkey
- )
- and not exists (
- select
- *
- from
- lineitem l3
- where
- l3.l_orderkey = l1.l_orderkey
- and l3.l_suppkey <> l1.l_suppkey
- and l3.l_receiptdate > l3.l_commitdate
- )
- and s_nationkey = n_nationkey
- and n_name = 'SAUDI ARABIA'
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=true, enable_projection=true) */
+s_name, count(*) as numwait
+from orders join
+(
+ select * from
+ lineitem l2 right semi join
+ (
+ select * from
+ lineitem l3 right anti join
+ (
+ select * from
+ lineitem l1 join
+ (
+ select * from
+ supplier join nation
+ where s_nationkey = n_nationkey
+ and n_name = 'SAUDI ARABIA'
+ ) t1
+ where t1.s_suppkey = l1.l_suppkey and l1.l_receiptdate > l1.l_commitdate
+ ) t2
+ on l3.l_orderkey = t2.l_orderkey and l3.l_suppkey <> t2.l_suppkey and
l3.l_receiptdate > l3.l_commitdate
+ ) t3
+ on l2.l_orderkey = t3.l_orderkey and l2.l_suppkey <> t3.l_suppkey
+) t4
+on o_orderkey = t4.l_orderkey and o_orderstatus = 'F'
group by
- s_name
+ t4.s_name
order by
numwait desc,
- s_name
+ t4.s_name
limit 100;
+
diff --git a/tools/tpch-tools/queries/q22.sql b/tools/tpch-tools/queries/q22.sql
index 48d7384bb5..44a663a0b1 100644
--- a/tools/tpch-tools/queries/q22.sql
+++ b/tools/tpch-tools/queries/q22.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=16, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=true, enable_projection=true) */
cntrycode,
count(*) as numcust,
sum(c_acctbal) as totacctbal
diff --git a/tools/tpch-tools/queries/q3.sql b/tools/tpch-tools/queries/q3.sql
index 73fbb51ede..01561b56cc 100644
--- a/tools/tpch-tools/queries/q3.sql
+++ b/tools/tpch-tools/queries/q3.sql
@@ -15,21 +15,23 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=false, enable_projection=true) */
l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
o_shippriority
from
- customer,
- orders,
- lineitem
-where
- c_mktsegment = 'BUILDING'
- and c_custkey = o_custkey
- and l_orderkey = o_orderkey
- and o_orderdate < date '1995-03-15'
- and l_shipdate > date '1995-03-15'
+ (
+ select l_orderkey, l_extendedprice, l_discount, o_orderdate,
o_shippriority, o_custkey from
+ lineitem join orders
+ where l_orderkey = o_orderkey
+ and o_orderdate < date '1995-03-15'
+ and l_shipdate > date '1995-03-15'
+ ) t1 join customer c
+ on c.c_custkey = t1.o_custkey
+ where c_mktsegment = 'BUILDING'
group by
l_orderkey,
o_orderdate,
diff --git a/tools/tpch-tools/queries/q4.sql b/tools/tpch-tools/queries/q4.sql
index b6bc24d511..0fc4e80885 100644
--- a/tools/tpch-tools/queries/q4.sql
+++ b/tools/tpch-tools/queries/q4.sql
@@ -15,23 +15,24 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=false, enable_projection=true) */
o_orderpriority,
count(*) as order_count
from
- orders
-where
- o_orderdate >= date '1993-07-01'
- and o_orderdate < date '1993-07-01' + interval '3' month
- and exists (
+ (
select
*
from
lineitem
- where
- l_orderkey = o_orderkey
- and l_commitdate < l_receiptdate
- )
+ where l_commitdate < l_receiptdate
+ ) t1
+ right semi join orders
+ on t1.l_orderkey = o_orderkey
+where
+ o_orderdate >= date '1993-07-01'
+ and o_orderdate < date '1993-07-01' + interval '3' month
group by
o_orderpriority
order by
diff --git a/tools/tpch-tools/queries/q5.sql b/tools/tpch-tools/queries/q5.sql
index 5e3bc4116f..135f59801d 100644
--- a/tools/tpch-tools/queries/q5.sql
+++ b/tools/tpch-tools/queries/q5.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=false, enable_projection=true) */
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from
diff --git a/tools/tpch-tools/queries/q6.sql b/tools/tpch-tools/queries/q6.sql
index 2d62a6a4f1..819f4de56c 100644
--- a/tools/tpch-tools/queries/q6.sql
+++ b/tools/tpch-tools/queries/q6.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=false, enable_projection=true) */
sum(l_extendedprice * l_discount) as revenue
from
lineitem
diff --git a/tools/tpch-tools/queries/q7.sql b/tools/tpch-tools/queries/q7.sql
index 9a6e4a9b9f..f1044a31c2 100644
--- a/tools/tpch-tools/queries/q7.sql
+++ b/tools/tpch-tools/queries/q7.sql
@@ -15,7 +15,7 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=false, enable_projection=true) */
supp_nation,
cust_nation,
l_year,
diff --git a/tools/tpch-tools/queries/q8.sql b/tools/tpch-tools/queries/q8.sql
index e61c8632d0..3ebdf57272 100644
--- a/tools/tpch-tools/queries/q8.sql
+++ b/tools/tpch-tools/queries/q8.sql
@@ -15,7 +15,9 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select /*+SET_VAR(exec_mem_limit=8589934592,
parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=true,
enable_cost_based_join_reorder=false, enable_projection=true) */
o_year,
sum(case
when nation = 'BRAZIL' then volume
@@ -28,11 +30,11 @@ from
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
- part,
- supplier,
lineitem,
orders,
customer,
+ supplier,
+ part,
nation n1,
nation n2,
region
diff --git a/tools/tpch-tools/queries/q9.sql b/tools/tpch-tools/queries/q9.sql
index 8e486d8fa3..2a5eebf54d 100644
--- a/tools/tpch-tools/queries/q9.sql
+++ b/tools/tpch-tools/queries/q9.sql
@@ -15,7 +15,9 @@
-- specific language governing permissions and limitations
-- under the License.
-select
+-- Modified
+
+select/*+SET_VAR(exec_mem_limit=17179869184,
parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true,
batch_size=4096, disable_join_reorder=false,
enable_cost_based_join_reorder=false, enable_projection=true) */
nation,
o_year,
sum(amount) as sum_profit
@@ -26,24 +28,18 @@ from
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as
amount
from
- part,
- supplier,
- lineitem,
- partsupp,
- orders,
- nation
+ lineitem join orders on o_orderkey = l_orderkey
+ join[shuffle] part on p_partkey = l_partkey
+ join[shuffle] partsupp on ps_partkey = l_partkey
+ join[shuffle] supplier on s_suppkey = l_suppkey
+ join[broadcast] nation on s_nationkey = n_nationkey
where
- s_suppkey = l_suppkey
- and ps_suppkey = l_suppkey
- and ps_partkey = l_partkey
- and p_partkey = l_partkey
- and o_orderkey = l_orderkey
- and s_nationkey = n_nationkey
- and p_name like '%green%'
+ ps_suppkey = l_suppkey and
+ p_name like '%green%'
) as profit
group by
nation,
o_year
order by
nation,
- o_year desc;
+ o_year desc;
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]