This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 8d1675049921dbbe0e9a3d023e7a8780f91cfb59 Author: Dongyang Li <[email protected]> AuthorDate: Sun Mar 20 23:06:10 2022 +0800 [feature](benchmark) Add TPC-H benchmark tools (#8408) --- tools/tpch-tools/README.md | 34 +++++ tools/tpch-tools/build-tpch-dbgen.sh | 89 +++++++++++++ tools/tpch-tools/create-tpch-tables.sh | 98 ++++++++++++++ tools/tpch-tools/create-tpch-tables.sql | 100 ++++++++++++++ tools/tpch-tools/doris-cluster.conf | 29 ++++ tools/tpch-tools/gen-tpch-data.sh | 154 +++++++++++++++++++++ tools/tpch-tools/load-tpch-data.sh | 229 ++++++++++++++++++++++++++++++++ tools/tpch-tools/queries/q1.sql | 38 ++++++ tools/tpch-tools/queries/q10.sql | 49 +++++++ tools/tpch-tools/queries/q11.sql | 44 ++++++ tools/tpch-tools/queries/q12.sql | 45 +++++++ tools/tpch-tools/queries/q13.sql | 37 ++++++ tools/tpch-tools/queries/q14.sql | 30 +++++ tools/tpch-tools/queries/q15.sql | 51 +++++++ tools/tpch-tools/queries/q16.sql | 47 +++++++ tools/tpch-tools/queries/q17.sql | 34 +++++ tools/tpch-tools/queries/q18.sql | 50 +++++++ tools/tpch-tools/queries/q19.sql | 52 ++++++++ tools/tpch-tools/queries/q2.sql | 61 +++++++++ tools/tpch-tools/queries/q20.sql | 54 ++++++++ tools/tpch-tools/queries/q21.sql | 57 ++++++++ tools/tpch-tools/queries/q22.sql | 54 ++++++++ tools/tpch-tools/queries/q3.sql | 40 ++++++ tools/tpch-tools/queries/q4.sql | 38 ++++++ tools/tpch-tools/queries/q5.sql | 41 ++++++ tools/tpch-tools/queries/q6.sql | 26 ++++ tools/tpch-tools/queries/q7.sql | 56 ++++++++ tools/tpch-tools/queries/q8.sql | 54 ++++++++ tools/tpch-tools/queries/q9.sql | 49 +++++++ tools/tpch-tools/run-tpch-queries.sh | 117 ++++++++++++++++ 30 files changed, 1857 insertions(+) diff --git a/tools/tpch-tools/README.md b/tools/tpch-tools/README.md new file mode 100644 index 0000000..f06e962 --- /dev/null +++ b/tools/tpch-tools/README.md @@ -0,0 +1,34 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## Usage + +These scripts are used to make tpc-h test. +follow the steps below: + +### 1. build tpc-h dbgen tool. + ./build-tpch-dbgen.sh +### 2. generate tpc-h data. use -h for more infomations. + ./gen-tpch-data.sh -s 1 +### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info, then run script below. + ./create-tpch-tables.sh +### 4. load tpc-h data. use -h for help. + ./load-tpch-data.sh +### 5. run tpc-h queries. + ./run-tpch-queries.sh diff --git a/tools/tpch-tools/build-tpch-dbgen.sh b/tools/tpch-tools/build-tpch-dbgen.sh new file mode 100755 index 0000000..9c2c637 --- /dev/null +++ b/tools/tpch-tools/build-tpch-dbgen.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to build tpch-dbgen +# TPC-H_Tools_v3.0.0.zip is from https://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp +# Usage: +# sh build-tpch-dbgen.sh +############################################################## + +set -eo pipefail + +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) + +CURDIR=${ROOT} +TPCH_DBGEN_DIR=$CURDIR/TPC-H_Tools_v3.0.0/dbgen + +check_prerequest() { + local CMD=$1 + local NAME=$2 + if ! $CMD; then + echo "$NAME is missing. This script depends on unzip to extract files from TPC-H_Tools_v3.0.0.zip" + exit 1 + fi +} + +check_prerequest "unzip -h" "unzip" + +# download tpch tools pacage first +if [[ -d $TPCH_DBGEN_DIR ]]; then + echo "Dir $TPCH_DBGEN_DIR already exists. No need to download." + echo "If you want to download TPC-H_Tools_v3.0.0 again, please delete this dir first." +else + wget "https://tools-chengdu.oss-cn-chengdu.aliyuncs.com/TPC-H_Tools_v3.0.0.zip" + + unzip TPC-H_Tools_v3.0.0.zip -d $CURDIR/ +fi + +# modify tpcd.h +cd $TPCH_DBGEN_DIR/ +echo ' +#ifdef MYSQL +#define GEN_QUERY_PLAN "" +#define START_TRAN "START TRANSACTION" +#define END_TRAN "COMMIT" +#define SET_OUTPUT "" +#define SET_ROWCOUNT "limit %d;\n" +#define SET_DBASE "use %s;\n" +#endif +' >> tpcd.h + +# modify makefile +cp makefile.suite makefile +sed -i 's/^CC =/CC = gcc/g' makefile +sed -i 's/^DATABASE=/DATABASE = MYSQL/g' makefile +sed -i 's/^MACHINE =/MACHINE = LINUX/g' makefile +sed -i 's/^WORKLOAD =/WORKLOAD = TPCH/g' makefile + +# compile tpch-dbgen +make > /dev/null +cd - + +# check +if [[ -f $TPCH_DBGEN_DIR/dbgen ]]; then + echo "Build succeed! Run $TPCH_DBGEN_DIR/dbgen -h" + exit 0 +else + echo "Build failed!" + exit 1 +fi diff --git a/tools/tpch-tools/create-tpch-tables.sh b/tools/tpch-tools/create-tpch-tables.sh new file mode 100755 index 0000000..2263f47 --- /dev/null +++ b/tools/tpch-tools/create-tpch-tables.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to create TPC-H tables +############################################################## + +set -eo pipefail + +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) + +CURDIR=${ROOT} + +usage() { + echo " +This script is used to create TPC-H tables, +will use mysql client to connect Doris server which is specified in doris-cluster.conf file. +Usage: $0 + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -- "$@") + +eval set -- "$OPTS" +HELP=0 + +if [ $# == 0 ]; then + usage +fi + +while true; do + case "$1" in + -h) + HELP=1 + shift + ;; + --) + shift + break + ;; + *) + echo "Internal error" + exit 1 + ;; + esac +done + +if [[ ${HELP} -eq 1 ]]; then + usage + exit +fi + +check_prerequest() { + local CMD=$1 + local NAME=$2 + if ! $CMD; then + echo "$NAME is missing. This script depends on mysql to create tables in Doris." + exit 1 + fi +} + +check_prerequest "mysql --version" "mysql" + +source $CURDIR/doris-cluster.conf + +echo "FE_HOST: $FE_HOST" +echo "FE_QUERY_PORT: $FE_QUERY_PORT" +echo "USER: $USER" +echo "PASSWORD: $PASSWORD" +echo "DB: $DB" + +mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB" + +echo $CURDIR/create-tpch-tables.sql +mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB <$CURDIR/create-tpch-tables.sql diff --git a/tools/tpch-tools/create-tpch-tables.sql b/tools/tpch-tools/create-tpch-tables.sql new file mode 100644 index 0000000..f5acc7e --- /dev/null +++ b/tools/tpch-tools/create-tpch-tables.sql @@ -0,0 +1,100 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +CREATE TABLE `region` ( + `r_regionkey` integer NOT NULL, + `r_name` char(25) NOT NULL, + `r_comment` varchar(152) +) DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); + +CREATE TABLE `nation` ( + `n_nationkey` integer NOT NULL, + `n_name` char(25) NOT NULL, + `n_regionkey` integer NOT NULL, + `n_comment` varchar(152) +) DISTRIBUTED BY HASH(`n_nationkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); + +CREATE TABLE `part` ( + `p_partkey` integer NOT NULL, + `p_name` varchar(55) NOT NULL, + `p_mfgr` char(25) NOT NULL, + `p_brand` char(10) NOT NULL, + `p_type` varchar(25) NOT NULL, + `p_size` integer NOT NULL, + `p_container` char(10) NOT NULL, + `p_retailprice` decimal(12, 2) NOT NULL, + `p_comment` varchar(23) NOT NULL +) DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); + +CREATE TABLE `supplier` ( + `s_suppkey` integer NOT NULL, + `s_name` char(25) NOT NULL, + `s_address` varchar(40) NOT NULL, + `s_nationkey` integer NOT NULL, + `s_phone` char(15) NOT NULL, + `s_acctbal` decimal(12, 2) NOT NULL, + `s_comment` varchar(101) NOT NULL +) DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); + +CREATE TABLE `customer` ( + `c_custkey` integer NOT NULL, + `c_name` varchar(25) NOT NULL, + `c_address` varchar(40) NOT NULL, + `c_nationkey` integer NOT NULL, + `c_phone` char(15) NOT NULL, + `c_acctbal` decimal(12, 2) NOT NULL, + `c_mktsegment` char(10) NOT NULL, + `c_comment` varchar(117) NOT NULL +) DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); + +CREATE TABLE `partsupp` ( + `ps_partkey` integer NOT NULL, + `ps_suppkey` integer NOT NULL, + `ps_availqty` integer NOT NULL, + `ps_supplycost` decimal(12, 2) NOT NULL, + `ps_comment` varchar(199) NOT NULL +) DISTRIBUTED BY HASH(`ps_partkey`, `ps_suppkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); + +CREATE TABLE `orders` ( + `o_orderkey` integer NOT NULL, + `o_custkey` integer NOT NULL, + `o_orderstatus` char(1) NOT NULL, + `o_totalprice` decimal(12, 2) NOT NULL, + `o_orderdate` date NOT NULL, + `o_orderpriority` char(15) NOT NULL, + `o_clerk` char(15) NOT NULL, + `o_shippriority` integer NOT NULL, + `o_comment` varchar(79) NOT NULL +) DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); + +CREATE TABLE `lineitem` ( + `l_orderkey` integer NOT NULL, + `l_linenumber` integer NOT NULL, + `l_partkey` integer NOT NULL, + `l_suppkey` integer NOT NULL, + `l_quantity` decimal(12, 2) NOT NULL, + `l_extendedprice` decimal(12, 2) NOT NULL, + `l_discount` decimal(12, 2) NOT NULL, + `l_tax` decimal(12, 2) NOT NULL, + `l_returnflag` char(1) NOT NULL, + `l_linestatus` char(1) NOT NULL, + `l_shipdate` date NOT NULL, + `l_commitdate` date NOT NULL, + `l_receiptdate` date NOT NULL, + `l_shipinstruct` char(25) NOT NULL, + `l_shipmode` char(10) NOT NULL, + `l_comment` varchar(44) NOT NULL +) DISTRIBUTED BY HASH(`l_orderkey`, `l_linenumber`) BUCKETS 48 PROPERTIES ("replication_num" = "1"); diff --git a/tools/tpch-tools/doris-cluster.conf b/tools/tpch-tools/doris-cluster.conf new file mode 100644 index 0000000..5d76264 --- /dev/null +++ b/tools/tpch-tools/doris-cluster.conf @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Any of FE host +export FE_HOST='172.20.194.235' +# http_port in fe.conf +export FE_HTTP_PORT=8030 +# query_port in fe.conf +export FE_QUERY_PORT=9030 +# Doris username +export USER='root' +# Doris password +export PASSWORD='' +# The database where TPC-H tables located +export DB='tpch1' diff --git a/tools/tpch-tools/gen-tpch-data.sh b/tools/tpch-tools/gen-tpch-data.sh new file mode 100755 index 0000000..0e7359d --- /dev/null +++ b/tools/tpch-tools/gen-tpch-data.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to generate TPC-H data set +############################################################## + +set -eo pipefail + +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) + +CURDIR=${ROOT} +TPCH_DBGEN_DIR=$CURDIR/TPC-H_Tools_v3.0.0/dbgen/ +TPCH_DATA_DIR=$CURDIR/tpch-data/ + +usage() { + echo " +Usage: $0 <options> + Optional options: + -s scale factor, default is 100 + -c parallelism to generate data of (lineitem, orders, partsupp) table, default is 10 + + Eg. + $0 generate data using default value. + $0 -s 10 generate data with scale factor 10. + $0 -s 10 -c 5 generate data with scale factor 10. And using 5 threads to generate data concurrently. + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -o 'hs:c:' \ + -- "$@") + +eval set -- "$OPTS" + +SCALE_FACTOR=100 +PARALLEL=10 +HELP=0 + +if [ $# == 0 ]; then + usage +fi + +while true; do + case "$1" in + -h) + HELP=1 + shift + ;; + -s) + SCALE_FACTOR=$2 + shift 2 + ;; + -c) + PARALLEL=$2 + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Internal error" + exit 1 + ;; + esac +done + +if [[ ${HELP} -eq 1 ]]; then + usage + exit +fi + +echo "Scale Factor: $SCALE_FACTOR" +echo "Parallelism: $PARALLEL" + +# check if dbgen exists +if [[ ! -f $TPCH_DBGEN_DIR/dbgen ]]; then + echo "$TPCH_DBGEN_DIR/dbgen does not exist. Run build-tpch-dbgen.sh first to build it first." + exit 1 +fi + +if [[ -d $TPCH_DATA_DIR/ ]]; then + echo "$TPCH_DATA_DIR exists. Remove it before generating data" + exit 1 +fi + +mkdir $TPCH_DATA_DIR/ + +# gen data +cd $TPCH_DBGEN_DIR +echo "Begin to generate data for table: region" +$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T r +echo "Begin to generate data for table: nation" +$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T n +echo "Begin to generate data for table: supplier" +$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s +echo "Begin to generate data for table: part" +$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T P +echo "Begin to generate data for table: customer" +$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c +echo "Begin to generate data for table: partsupp" +for i in $(seq 1 $PARALLEL); do + { + $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T S -C $PARALLEL -S ${i} + } & +done +wait + +echo "Begin to generate data for table: orders" +for i in $(seq 1 $PARALLEL); do + { + $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T O -C $PARALLEL -S ${i} + } & +done +wait + +echo "Begin to generate data for table: lineitem" +for i in $(seq 1 $PARALLEL); do + { + $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T L -C $PARALLEL -S ${i} + } & +done +wait + +cd - + +# move data to $TPCH_DATA_DIR +mv $TPCH_DBGEN_DIR/*.tbl* $TPCH_DATA_DIR/ + +# check data +du -sh $TPCH_DATA_DIR/*.tbl* diff --git a/tools/tpch-tools/load-tpch-data.sh b/tools/tpch-tools/load-tpch-data.sh new file mode 100755 index 0000000..f04e21b --- /dev/null +++ b/tools/tpch-tools/load-tpch-data.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to load generated TPC-H data set into Doris. +# for table lineitem, orders, partsupp, they will be loading in parallel +############################################################## + +set -eo pipefail + +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) + +CURDIR=${ROOT} +TPCH_DATA_DIR=$CURDIR/tpch-data + +usage() { + echo " +Usage: $0 <options> + Optional options: + -c parallelism to load data of lineitem, orders, partsupp, default is 5. + + Eg. + $0 load data using default value. + $0 -c 10 load lineitem, orders, partsupp table data using parallelism 10. + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -o 'c:' \ + -- "$@") + +eval set -- "$OPTS" + +PARALLEL=5 +HELP=0 + +if [ $# == 0 ]; then + usage +fi + +while true; do + case "$1" in + -h) + HELP=1 + shift + ;; + -c) + PARALLEL=$2 + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Internal error" + exit 1 + ;; + esac +done + +if [[ ${HELP} -eq 1 ]]; then + usage + exit +fi + +echo "Parallelism: $PARALLEL" + +# check if tpch-data exists +if [[ ! -d $TPCH_DATA_DIR/ ]]; then + echo "$TPCH_DATA_DIR does not exist. Run sh gen-tpch-data.sh first." + exit 1 +fi + +check_prerequest() { + local CMD=$1 + local NAME=$2 + if ! $CMD; then + echo "$NAME is missing. This script depends on cURL to load data to Doris." + exit 1 + fi +} + +check_prerequest "curl --version" "curl" + +# load tables +source $CURDIR/doris-cluster.conf + +echo "FE_HOST: $FE_HOST" +echo "FE_HTTP_PORT: $FE_HTTP_PORT" +echo "USER: $USER" +echo "PASSWORD: $PASSWORD" +echo "DB: $DB" + +function load_region() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: r_regionkey, r_name, r_comment, temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/region/_stream_load +} +function load_nation() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/nation/_stream_load +} +function load_supplier() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load +} +function load_customer() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load +} +function load_part() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load +} +function load_partsupp() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/partsupp/_stream_load +} +function load_orders() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/orders/_stream_load +} +function load_lineitem() { + echo $@ + curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \ + -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineitem/_stream_load +} + +# start load +load_region $TPCH_DATA_DIR/region.tbl +load_nation $TPCH_DATA_DIR/nation.tbl +load_supplier $TPCH_DATA_DIR/supplier.tbl +load_customer $TPCH_DATA_DIR/customer.tbl +load_part $TPCH_DATA_DIR/part.tbl + +# set parallelism + +# 以PID为名, 防止创建命名管道时与已有文件重名,从而失败 +fifo="/tmp/$$.fifo" +# 创建命名管道 +mkfifo ${fifo} +# 以读写方式打开命名管道,文件标识符fd为3,fd可取除0,1,2,5外0-9中的任意数字 +exec 3<>${fifo} +# 删除文件, 也可不删除, 不影响后面操作 +rm -rf ${fifo} + +# 在fd3中放置$PARALLEL个空行作为令牌 +for ((i = 1; i <= $PARALLEL; i++)); do + echo >&3 +done + +for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do + # 领取令牌, 即从fd3中读取行, 每次一行 + # 对管道,读一行便少一行,每次只能读取一行 + # 所有行读取完毕, 执行挂起, 直到管道再次有可读行 + # 因此实现了进程数量控制 + read -u3 + + # 要批量执行的命令放在大括号内, 后台运行 + { + load_lineitem $file + echo "----loaded $file" + sleep 2 + # 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行 + echo >&3 + } & +done + +for file in $(ls $TPCH_DATA_DIR/orders.tbl*); do + read -u3 + { + load_orders $file + echo "----loaded $file" + sleep 2 + echo >&3 + } & +done + +for file in $(ls $TPCH_DATA_DIR/partsupp.tbl*); do + read -u3 + { + load_partsupp $file + echo "----loaded $file" + sleep 2 + echo >&3 + } & +done + +# 等待所有的后台子进程结束 +wait +# 删除文件标识符 +exec 3>&- diff --git a/tools/tpch-tools/queries/q1.sql b/tools/tpch-tools/queries/q1.sql new file mode 100644 index 0000000..a888c2b --- /dev/null +++ b/tools/tpch-tools/queries/q1.sql @@ -0,0 +1,38 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '90' day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; diff --git a/tools/tpch-tools/queries/q10.sql b/tools/tpch-tools/queries/q10.sql new file mode 100644 index 0000000..6a12c1f --- /dev/null +++ b/tools/tpch-tools/queries/q10.sql @@ -0,0 +1,49 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +limit 20; diff --git a/tools/tpch-tools/queries/q11.sql b/tools/tpch-tools/queries/q11.sql new file mode 100644 index 0000000..f70fab2 --- /dev/null +++ b/tools/tpch-tools/queries/q11.sql @@ -0,0 +1,44 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0001000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' + ) +order by + value desc; diff --git a/tools/tpch-tools/queries/q12.sql b/tools/tpch-tools/queries/q12.sql new file mode 100644 index 0000000..d53dd9c --- /dev/null +++ b/tools/tpch-tools/queries/q12.sql @@ -0,0 +1,45 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('MAIL', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1994-01-01' + and l_receiptdate < date '1994-01-01' + interval '1' year +group by + l_shipmode +order by + l_shipmode; diff --git a/tools/tpch-tools/queries/q13.sql b/tools/tpch-tools/queries/q13.sql new file mode 100644 index 0000000..a35497c --- /dev/null +++ b/tools/tpch-tools/queries/q13.sql @@ -0,0 +1,37 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) as c_count + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%special%requests%' + group by + c_custkey + ) as c_orders +group by + c_count +order by + custdist desc, + c_count desc; diff --git a/tools/tpch-tools/queries/q14.sql b/tools/tpch-tools/queries/q14.sql new file mode 100644 index 0000000..960cd15 --- /dev/null +++ b/tools/tpch-tools/queries/q14.sql @@ -0,0 +1,30 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date '1995-09-01' + and l_shipdate < date '1995-09-01' + interval '1' month; diff --git a/tools/tpch-tools/queries/q15.sql b/tools/tpch-tools/queries/q15.sql new file mode 100644 index 0000000..1f7f19b --- /dev/null +++ b/tools/tpch-tools/queries/q15.sql @@ -0,0 +1,51 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +create view revenue0 (supplier_no, total_revenue) as + select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) + from + lineitem + where + l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '3' month + group by + l_suppkey; + + +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + supplier, + revenue0 +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue0 + ) +order by + s_suppkey; + +drop view revenue0; diff --git a/tools/tpch-tools/queries/q16.sql b/tools/tpch-tools/queries/q16.sql new file mode 100644 index 0000000..6027b5c --- /dev/null +++ b/tools/tpch-tools/queries/q16.sql @@ -0,0 +1,47 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; diff --git a/tools/tpch-tools/queries/q17.sql b/tools/tpch-tools/queries/q17.sql new file mode 100644 index 0000000..7724caa --- /dev/null +++ b/tools/tpch-tools/queries/q17.sql @@ -0,0 +1,34 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container = 'MED BOX' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); diff --git a/tools/tpch-tools/queries/q18.sql b/tools/tpch-tools/queries/q18.sql new file mode 100644 index 0000000..352d8be --- /dev/null +++ b/tools/tpch-tools/queries/q18.sql @@ -0,0 +1,50 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 300 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +limit 100; diff --git a/tools/tpch-tools/queries/q19.sql b/tools/tpch-tools/queries/q19.sql new file mode 100644 index 0000000..a20a625 --- /dev/null +++ b/tools/tpch-tools/queries/q19.sql @@ -0,0 +1,52 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 1 and l_quantity <= 1 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 10 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#34' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 20 and l_quantity <= 20 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); diff --git a/tools/tpch-tools/queries/q2.sql b/tools/tpch-tools/queries/q2.sql new file mode 100644 index 0000000..d1ccca0 --- /dev/null +++ b/tools/tpch-tools/queries/q2.sql @@ -0,0 +1,61 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 15 + and p_type like '%BRASS' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey +limit 100; diff --git a/tools/tpch-tools/queries/q20.sql b/tools/tpch-tools/queries/q20.sql new file mode 100644 index 0000000..6803b4e --- /dev/null +++ b/tools/tpch-tools/queries/q20.sql @@ -0,0 +1,54 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'forest%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'CANADA' +order by + s_name; diff --git a/tools/tpch-tools/queries/q21.sql b/tools/tpch-tools/queries/q21.sql new file mode 100644 index 0000000..f3cdf22 --- /dev/null +++ b/tools/tpch-tools/queries/q21.sql @@ -0,0 +1,57 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit 100; diff --git a/tools/tpch-tools/queries/q22.sql b/tools/tpch-tools/queries/q22.sql new file mode 100644 index 0000000..48d7384 --- /dev/null +++ b/tools/tpch-tools/queries/q22.sql @@ -0,0 +1,54 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone, 1, 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone, 1, 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone, 1, 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; diff --git a/tools/tpch-tools/queries/q3.sql b/tools/tpch-tools/queries/q3.sql new file mode 100644 index 0000000..73fbb51 --- /dev/null +++ b/tools/tpch-tools/queries/q3.sql @@ -0,0 +1,40 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +limit 10; diff --git a/tools/tpch-tools/queries/q4.sql b/tools/tpch-tools/queries/q4.sql new file mode 100644 index 0000000..b6bc24d --- /dev/null +++ b/tools/tpch-tools/queries/q4.sql @@ -0,0 +1,38 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; diff --git a/tools/tpch-tools/queries/q5.sql b/tools/tpch-tools/queries/q5.sql new file mode 100644 index 0000000..5e3bc41 --- /dev/null +++ b/tools/tpch-tools/queries/q5.sql @@ -0,0 +1,41 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by + n_name +order by + revenue desc; diff --git a/tools/tpch-tools/queries/q6.sql b/tools/tpch-tools/queries/q6.sql new file mode 100644 index 0000000..2d62a6a --- /dev/null +++ b/tools/tpch-tools/queries/q6.sql @@ -0,0 +1,26 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + and l_discount between .06 - 0.01 and .06 + 0.01 + and l_quantity < 24; diff --git a/tools/tpch-tools/queries/q7.sql b/tools/tpch-tools/queries/q7.sql new file mode 100644 index 0000000..9a6e4a9 --- /dev/null +++ b/tools/tpch-tools/queries/q7.sql @@ -0,0 +1,56 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') + or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year; diff --git a/tools/tpch-tools/queries/q8.sql b/tools/tpch-tools/queries/q8.sql new file mode 100644 index 0000000..e61c863 --- /dev/null +++ b/tools/tpch-tools/queries/q8.sql @@ -0,0 +1,54 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + o_year, + sum(case + when nation = 'BRAZIL' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AMERICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'ECONOMY ANODIZED STEEL' + ) as all_nations +group by + o_year +order by + o_year; diff --git a/tools/tpch-tools/queries/q9.sql b/tools/tpch-tools/queries/q9.sql new file mode 100644 index 0000000..8e486d8 --- /dev/null +++ b/tools/tpch-tools/queries/q9.sql @@ -0,0 +1,49 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; diff --git a/tools/tpch-tools/run-tpch-queries.sh b/tools/tpch-tools/run-tpch-queries.sh new file mode 100755 index 0000000..5a0d06d --- /dev/null +++ b/tools/tpch-tools/run-tpch-queries.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to create TPC-H tables +############################################################## + +set -eo pipefail + +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) + +CURDIR=${ROOT} +QUERIES_DIR=$CURDIR/queries + +usage() { + echo " +This script is used to run TPC-H 22queries, +will use mysql client to connect Doris server which parameter is specified in doris-cluster.conf file. +Usage: $0 + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -- "$@") + +eval set -- "$OPTS" +HELP=0 + +if [ $# == 0 ]; then + usage +fi + +while true; do + case "$1" in + -h) + HELP=1 + shift + ;; + --) + shift + break + ;; + *) + echo "Internal error" + exit 1 + ;; + esac +done + +if [[ ${HELP} -eq 1 ]]; then + usage + exit +fi + +check_prerequest() { + local CMD=$1 + local NAME=$2 + if ! $CMD; then + echo "$NAME is missing. This script depends on mysql to create tables in Doris." + exit 1 + fi +} + +check_prerequest "mysql --version" "mysql" + +source $CURDIR/doris-cluster.conf + +echo "FE_HOST: $FE_HOST" +echo "FE_QUERY_PORT: $FE_QUERY_PORT" +echo "USER: $USER" +echo "PASSWORD: $PASSWORD" +echo "DB: $DB" + +pre_set() { + echo $@ + mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@" +} + +pre_set "set global enable_vectorized_engine=1;" +pre_set "set global parallel_fragment_exec_instance_num=8;" +pre_set "set global exec_mem_limit=48G;" +pre_set "set global batch_size=4096;" +# pre_set "show variables like 'batch_size';" + +for i in $(seq 1 22); do + total=0 + # Each query is executed three times and takes the average time + for j in $(seq 1 3); do + start=$(date +%s%3N) + mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql >/dev/null + end=$(date +%s%3N) + total=$((total + end - start)) + done + echo "q$i: $((total / 3))ms" +done --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
