This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new b15b1a9f07 [tools](tpc-h) Modify tpch tools (#9982) b15b1a9f07 is described below commit b15b1a9f074087a640f06dca73d53f0f651a73c2 Author: Mingyu Chen <morningman....@gmail.com> AuthorDate: Wed Jun 8 11:21:57 2022 +0800 [tools](tpc-h) Modify tpch tools (#9982) 1. Modify the create table stmt. 2. Modify part of queries. --- tools/tpch-tools/README.md | 13 ++ tools/tpch-tools/create-tpch-tables.sql | 228 +++++++++++++++++++++----------- tools/tpch-tools/queries/q1.sql | 2 +- tools/tpch-tools/queries/q10.sql | 21 +-- tools/tpch-tools/queries/q11.sql | 2 +- tools/tpch-tools/queries/q12.sql | 2 +- tools/tpch-tools/queries/q13.sql | 6 +- tools/tpch-tools/queries/q14.sql | 8 +- tools/tpch-tools/queries/q15.sql | 17 +-- tools/tpch-tools/queries/q16.sql | 2 +- tools/tpch-tools/queries/q17.sql | 10 +- tools/tpch-tools/queries/q18.sql | 57 ++++---- tools/tpch-tools/queries/q19.sql | 2 +- tools/tpch-tools/queries/q2.sql | 43 +++--- tools/tpch-tools/queries/q20.sql | 62 ++++----- tools/tpch-tools/queries/q21.sql | 65 ++++----- tools/tpch-tools/queries/q22.sql | 2 +- tools/tpch-tools/queries/q3.sql | 23 ++-- tools/tpch-tools/queries/q4.sql | 21 +-- tools/tpch-tools/queries/q5.sql | 2 +- tools/tpch-tools/queries/q6.sql | 2 +- tools/tpch-tools/queries/q7.sql | 2 +- tools/tpch-tools/queries/q8.sql | 8 +- tools/tpch-tools/queries/q9.sql | 4 +- tools/tpch-tools/run-tpch-queries.sh | 22 ++- 25 files changed, 351 insertions(+), 275 deletions(-) diff --git a/tools/tpch-tools/README.md b/tools/tpch-tools/README.md index f06e96289b..dbef056fa7 100644 --- a/tools/tpch-tools/README.md +++ b/tools/tpch-tools/README.md @@ -23,12 +23,25 @@ These scripts are used to make tpc-h test. follow the steps below: ### 1. build tpc-h dbgen tool. + ./build-tpch-dbgen.sh + ### 2. generate tpc-h data. use -h for more infomations. + ./gen-tpch-data.sh -s 1 + ### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info, then run script below. + ./create-tpch-tables.sh + ### 4. load tpc-h data. use -h for help. + ./load-tpch-data.sh + ### 5. run tpc-h queries. + ./run-tpch-queries.sh + + NOTICE: At present, Doris's query optimizer and statistical information functions are not complete, so we rewrite some queries in TPC-H to adapt to Doris' execution framework, but it does not affect the correctness of the results. The rewritten SQL is marked with "Modified" in the corresponding .sql file. + + A new query optimizer will be released in subsequent releases. diff --git a/tools/tpch-tools/create-tpch-tables.sql b/tools/tpch-tools/create-tpch-tables.sql index de3d936857..c86d7b6c51 100644 --- a/tools/tpch-tools/create-tpch-tables.sql +++ b/tools/tpch-tools/create-tpch-tables.sql @@ -14,87 +14,161 @@ -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. -CREATE TABLE `region` ( - `r_regionkey` integer NOT NULL, - `r_name` char(25) NOT NULL, - `r_comment` varchar(152) -) DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); -CREATE TABLE `nation` ( - `n_nationkey` integer NOT NULL, - `n_name` char(25) NOT NULL, - `n_regionkey` integer NOT NULL, - `n_comment` varchar(152) -) DISTRIBUTED BY HASH(`n_nationkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +drop table if exists lineitem; +CREATE TABLE lineitem ( + l_shipdate DATE NOT NULL, + l_orderkey bigint NOT NULL, + l_linenumber int not null, + l_partkey int NOT NULL, + l_suppkey int not null, + l_quantity decimal(15, 2) NOT NULL, + l_extendedprice decimal(15, 2) NOT NULL, + l_discount decimal(15, 2) NOT NULL, + l_tax decimal(15, 2) NOT NULL, + l_returnflag VARCHAR(1) NOT NULL, + l_linestatus VARCHAR(1) NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct VARCHAR(25) NOT NULL, + l_shipmode VARCHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`l_shipdate`, `l_orderkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders" +); + +drop table if exists orders; +CREATE TABLE orders ( + o_orderkey bigint NOT NULL, + o_orderdate DATE NOT NULL, + o_custkey int NOT NULL, + o_orderstatus VARCHAR(1) NOT NULL, + o_totalprice decimal(15, 2) NOT NULL, + o_orderpriority VARCHAR(15) NOT NULL, + o_clerk VARCHAR(15) NOT NULL, + o_shippriority int NOT NULL, + o_comment VARCHAR(79) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`o_orderkey`, `o_orderdate`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders" +); -CREATE TABLE `part` ( - `p_partkey` integer NOT NULL, - `p_name` varchar(55) NOT NULL, - `p_mfgr` char(25) NOT NULL, - `p_brand` char(10) NOT NULL, - `p_type` varchar(25) NOT NULL, - `p_size` integer NOT NULL, - `p_container` char(10) NOT NULL, - `p_retailprice` decimal(12, 2) NOT NULL, - `p_comment` varchar(23) NOT NULL -) DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); +drop table if exists partsupp; +CREATE TABLE partsupp ( + ps_partkey int NOT NULL, + ps_suppkey int NOT NULL, + ps_availqty int NOT NULL, + ps_supplycost decimal(15, 2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`ps_partkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 24 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "part_partsupp" +); -CREATE TABLE `supplier` ( - `s_suppkey` integer NOT NULL, - `s_name` char(25) NOT NULL, - `s_address` varchar(40) NOT NULL, - `s_nationkey` integer NOT NULL, - `s_phone` char(15) NOT NULL, - `s_acctbal` decimal(12, 2) NOT NULL, - `s_comment` varchar(101) NOT NULL -) DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); +drop table if exists part; +CREATE TABLE part ( + p_partkey int NOT NULL, + p_name VARCHAR(55) NOT NULL, + p_mfgr VARCHAR(25) NOT NULL, + p_brand VARCHAR(10) NOT NULL, + p_type VARCHAR(25) NOT NULL, + p_size int NOT NULL, + p_container VARCHAR(10) NOT NULL, + p_retailprice decimal(15, 2) NOT NULL, + p_comment VARCHAR(23) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`p_partkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 24 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "part_partsupp" +); -CREATE TABLE `customer` ( - `c_custkey` integer NOT NULL, - `c_name` varchar(25) NOT NULL, - `c_address` varchar(40) NOT NULL, - `c_nationkey` integer NOT NULL, - `c_phone` char(15) NOT NULL, - `c_acctbal` decimal(12, 2) NOT NULL, - `c_mktsegment` char(10) NOT NULL, - `c_comment` varchar(117) NOT NULL -) DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); +drop table if exists customer; +CREATE TABLE customer ( + c_custkey int NOT NULL, + c_name VARCHAR(25) NOT NULL, + c_address VARCHAR(40) NOT NULL, + c_nationkey int NOT NULL, + c_phone VARCHAR(15) NOT NULL, + c_acctbal decimal(15, 2) NOT NULL, + c_mktsegment VARCHAR(10) NOT NULL, + c_comment VARCHAR(117) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`c_custkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 24 +PROPERTIES ( + "replication_num" = "1" +); -CREATE TABLE `partsupp` ( - `ps_partkey` integer NOT NULL, - `ps_suppkey` integer NOT NULL, - `ps_availqty` integer NOT NULL, - `ps_supplycost` decimal(12, 2) NOT NULL, - `ps_comment` varchar(199) NOT NULL -) DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); +drop table if exists supplier; +CREATE TABLE supplier ( + s_suppkey int NOT NULL, + s_name VARCHAR(25) NOT NULL, + s_address VARCHAR(40) NOT NULL, + s_nationkey int NOT NULL, + s_phone VARCHAR(15) NOT NULL, + s_acctbal decimal(15, 2) NOT NULL, + s_comment VARCHAR(101) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`s_suppkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12 +PROPERTIES ( + "replication_num" = "1" +); + +drop table if exists nation; +CREATE TABLE `nation` ( + `n_nationkey` int(11) NOT NULL, + `n_name` varchar(25) NOT NULL, + `n_regionkey` int(11) NOT NULL, + `n_comment` varchar(152) NULL +) ENGINE=OLAP +DUPLICATE KEY(`N_NATIONKEY`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "3" +); -CREATE TABLE `orders` ( - `o_orderkey` integer NOT NULL, - `o_custkey` integer NOT NULL, - `o_orderstatus` char(1) NOT NULL, - `o_totalprice` decimal(12, 2) NOT NULL, - `o_orderdate` date NOT NULL, - `o_orderpriority` char(15) NOT NULL, - `o_clerk` char(15) NOT NULL, - `o_shippriority` integer NOT NULL, - `o_comment` varchar(79) NOT NULL -) DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); +drop table if exists region; +CREATE TABLE region ( + r_regionkey int NOT NULL, + r_name VARCHAR(25) NOT NULL, + r_comment VARCHAR(152) +)ENGINE=OLAP +DUPLICATE KEY(`r_regionkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "3" +); -CREATE TABLE `lineitem` ( - `l_orderkey` integer NOT NULL, - `l_linenumber` integer NOT NULL, - `l_partkey` integer NOT NULL, - `l_suppkey` integer NOT NULL, - `l_quantity` decimal(12, 2) NOT NULL, - `l_extendedprice` decimal(12, 2) NOT NULL, - `l_discount` decimal(12, 2) NOT NULL, - `l_tax` decimal(12, 2) NOT NULL, - `l_returnflag` char(1) NOT NULL, - `l_linestatus` char(1) NOT NULL, - `l_shipdate` date NOT NULL, - `l_commitdate` date NOT NULL, - `l_receiptdate` date NOT NULL, - `l_shipinstruct` char(25) NOT NULL, - `l_shipmode` char(10) NOT NULL, - `l_comment` varchar(44) NOT NULL -) DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1"); +drop view if exists revenue0; +create view revenue0 (supplier_no, total_revenue) as +select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) +from + lineitem +where + l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '3' month +group by + l_suppkey; diff --git a/tools/tpch-tools/queries/q1.sql b/tools/tpch-tools/queries/q1.sql index a888c2bdb1..dfa27c7446 100644 --- a/tools/tpch-tools/queries/q1.sql +++ b/tools/tpch-tools/queries/q1.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=false) */ l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, diff --git a/tools/tpch-tools/queries/q10.sql b/tools/tpch-tools/queries/q10.sql index 6a12c1f5b4..af68dd7ece 100644 --- a/tools/tpch-tools/queries/q10.sql +++ b/tools/tpch-tools/queries/q10.sql @@ -15,10 +15,12 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=10, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ c_custkey, c_name, - sum(l_extendedprice * (1 - l_discount)) as revenue, + sum(t1.l_extendedprice * (1 - t1.l_discount)) as revenue, c_acctbal, n_name, c_address, @@ -26,15 +28,16 @@ select c_comment from customer, - orders, - lineitem, + ( + select o_custkey,l_extendedprice,l_discount from lineitem, orders + where l_orderkey = o_orderkey + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month + and l_returnflag = 'R' + ) t1, nation where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate >= date '1993-10-01' - and o_orderdate < date '1993-10-01' + interval '3' month - and l_returnflag = 'R' + c_custkey = t1.o_custkey and c_nationkey = n_nationkey group by c_custkey, diff --git a/tools/tpch-tools/queries/q11.sql b/tools/tpch-tools/queries/q11.sql index f70fab2be9..609798ab36 100644 --- a/tools/tpch-tools/queries/q11.sql +++ b/tools/tpch-tools/queries/q11.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ ps_partkey, sum(ps_supplycost * ps_availqty) as value from diff --git a/tools/tpch-tools/queries/q12.sql b/tools/tpch-tools/queries/q12.sql index d53dd9c809..4401f61163 100644 --- a/tools/tpch-tools/queries/q12.sql +++ b/tools/tpch-tools/queries/q12.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ l_shipmode, sum(case when o_orderpriority = '1-URGENT' diff --git a/tools/tpch-tools/queries/q13.sql b/tools/tpch-tools/queries/q13.sql index a35497cd57..60449da5c8 100644 --- a/tools/tpch-tools/queries/q13.sql +++ b/tools/tpch-tools/queries/q13.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ c_count, count(*) as custdist from @@ -24,7 +26,7 @@ from c_custkey, count(o_orderkey) as c_count from - customer left outer join orders on + orders right outer join customer on c_custkey = o_custkey and o_comment not like '%special%requests%' group by diff --git a/tools/tpch-tools/queries/q14.sql b/tools/tpch-tools/queries/q14.sql index 960cd15fb3..39829045a2 100644 --- a/tools/tpch-tools/queries/q14.sql +++ b/tools/tpch-tools/queries/q14.sql @@ -15,15 +15,17 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from - lineitem, - part + part, + lineitem where l_partkey = p_partkey and l_shipdate >= date '1995-09-01' diff --git a/tools/tpch-tools/queries/q15.sql b/tools/tpch-tools/queries/q15.sql index 1f7f19b34b..74ce5f0c3c 100644 --- a/tools/tpch-tools/queries/q15.sql +++ b/tools/tpch-tools/queries/q15.sql @@ -15,20 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -create view revenue0 (supplier_no, total_revenue) as - select - l_suppkey, - sum(l_extendedprice * (1 - l_discount)) - from - lineitem - where - l_shipdate >= date '1996-01-01' - and l_shipdate < date '1996-01-01' + interval '3' month - group by - l_suppkey; - - -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ s_suppkey, s_name, s_address, @@ -47,5 +34,3 @@ where ) order by s_suppkey; - -drop view revenue0; diff --git a/tools/tpch-tools/queries/q16.sql b/tools/tpch-tools/queries/q16.sql index 6027b5c2d0..0b1b93875b 100644 --- a/tools/tpch-tools/queries/q16.sql +++ b/tools/tpch-tools/queries/q16.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ p_brand, p_type, p_size, diff --git a/tools/tpch-tools/queries/q17.sql b/tools/tpch-tools/queries/q17.sql index 7724caac8f..3d3fc2c671 100644 --- a/tools/tpch-tools/queries/q17.sql +++ b/tools/tpch-tools/queries/q17.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, @@ -28,7 +30,9 @@ where select 0.2 * avg(l_quantity) from - lineitem + lineitem, part where l_partkey = p_partkey - ); + and p_brand = 'Brand#23' + and p_container = 'MED BOX' + ) diff --git a/tools/tpch-tools/queries/q18.sql b/tools/tpch-tools/queries/q18.sql index 352d8be84a..1b88388bb2 100644 --- a/tools/tpch-tools/queries/q18.sql +++ b/tools/tpch-tools/queries/q18.sql @@ -15,36 +15,43 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ c_name, c_custkey, - o_orderkey, - o_orderdate, - o_totalprice, - sum(l_quantity) + t3.o_orderkey, + t3.o_orderdate, + t3.o_totalprice, + sum(t3.l_quantity) from - customer, - orders, - lineitem -where - o_orderkey in ( - select - l_orderkey - from - lineitem - group by - l_orderkey having - sum(l_quantity) > 300 - ) - and c_custkey = o_custkey - and o_orderkey = l_orderkey +customer join +( + select * from + lineitem join + ( + select * from + orders left semi join + ( + select + l_orderkey + from + lineitem + group by + l_orderkey having sum(l_quantity) > 300 + ) t1 + on o_orderkey = t1.l_orderkey + ) t2 + on t2.o_orderkey = l_orderkey +) t3 +on c_custkey = t3.o_custkey group by c_name, c_custkey, - o_orderkey, - o_orderdate, - o_totalprice + t3.o_orderkey, + t3.o_orderdate, + t3.o_totalprice order by - o_totalprice desc, - o_orderdate + t3.o_totalprice desc, + t3.o_orderdate limit 100; diff --git a/tools/tpch-tools/queries/q19.sql b/tools/tpch-tools/queries/q19.sql index a20a62533a..9cf162eacf 100644 --- a/tools/tpch-tools/queries/q19.sql +++ b/tools/tpch-tools/queries/q19.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ sum(l_extendedprice* (1 - l_discount)) as revenue from lineitem, diff --git a/tools/tpch-tools/queries/q2.sql b/tools/tpch-tools/queries/q2.sql index d1ccca0933..1f69856266 100644 --- a/tools/tpch-tools/queries/q2.sql +++ b/tools/tpch-tools/queries/q2.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ s_acctbal, s_name, n_name, @@ -25,34 +27,29 @@ select s_phone, s_comment from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey +partsupp, +( + select ps_partkey, min(ps_supplycost) as ps_s from + partsupp, supplier, nation, region + where s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + group by ps_partkey +) t1, +supplier, +part, +nation, +region +where p_partkey = t1.ps_partkey + and p_partkey = partsupp.ps_partkey and s_suppkey = ps_suppkey and p_size = 15 and p_type like '%BRASS' and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - ) + and ps_supplycost = t1.ps_s order by s_acctbal desc, n_name, diff --git a/tools/tpch-tools/queries/q20.sql b/tools/tpch-tools/queries/q20.sql index 6803b4e3c3..7c621e2244 100644 --- a/tools/tpch-tools/queries/q20.sql +++ b/tools/tpch-tools/queries/q20.sql @@ -15,40 +15,30 @@ -- specific language governing permissions and limitations -- under the License. -select - s_name, - s_address -from - supplier, - nation -where - s_suppkey in ( - select - ps_suppkey - from - partsupp - where - ps_partkey in ( - select - p_partkey - from - part - where - p_name like 'forest%' - ) - and ps_availqty > ( - select - 0.5 * sum(l_quantity) - from - lineitem - where - l_partkey = ps_partkey - and l_suppkey = ps_suppkey - and l_shipdate >= date '1994-01-01' - and l_shipdate < date '1994-01-01' + interval '1' year - ) - ) - and s_nationkey = n_nationkey +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ +s_name, s_address from +supplier left semi join +( + select * from + ( + select l_partkey,l_suppkey, 0.5 * sum(l_quantity) as l_q + from lineitem + where l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + group by l_partkey,l_suppkey + ) t2 join + ( + select ps_partkey, ps_suppkey, ps_availqty + from partsupp left semi join part + on ps_partkey = p_partkey and p_name like 'forest%' + ) t1 + on t2.l_partkey = t1.ps_partkey and t2.l_suppkey = t1.ps_suppkey + and t1.ps_availqty > t2.l_q +) t3 +on s_suppkey = t3.ps_suppkey +join nation +where s_nationkey = n_nationkey and n_name = 'CANADA' -order by - s_name; +order by s_name; diff --git a/tools/tpch-tools/queries/q21.sql b/tools/tpch-tools/queries/q21.sql index f3cdf2228d..4353a3a2cb 100644 --- a/tools/tpch-tools/queries/q21.sql +++ b/tools/tpch-tools/queries/q21.sql @@ -15,43 +15,36 @@ -- specific language governing permissions and limitations -- under the License. -select - s_name, - count(*) as numwait -from - supplier, - lineitem l1, - orders, - nation -where - s_suppkey = l1.l_suppkey - and o_orderkey = l1.l_orderkey - and o_orderstatus = 'F' - and l1.l_receiptdate > l1.l_commitdate - and exists ( - select - * - from - lineitem l2 - where - l2.l_orderkey = l1.l_orderkey - and l2.l_suppkey <> l1.l_suppkey - ) - and not exists ( - select - * - from - lineitem l3 - where - l3.l_orderkey = l1.l_orderkey - and l3.l_suppkey <> l1.l_suppkey - and l3.l_receiptdate > l3.l_commitdate - ) - and s_nationkey = n_nationkey - and n_name = 'SAUDI ARABIA' +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */ +s_name, count(*) as numwait +from orders join +( + select * from + lineitem l2 right semi join + ( + select * from + lineitem l3 right anti join + ( + select * from + lineitem l1 join + ( + select * from + supplier join nation + where s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' + ) t1 + where t1.s_suppkey = l1.l_suppkey and l1.l_receiptdate > l1.l_commitdate + ) t2 + on l3.l_orderkey = t2.l_orderkey and l3.l_suppkey <> t2.l_suppkey and l3.l_receiptdate > l3.l_commitdate + ) t3 + on l2.l_orderkey = t3.l_orderkey and l2.l_suppkey <> t3.l_suppkey +) t4 +on o_orderkey = t4.l_orderkey and o_orderstatus = 'F' group by - s_name + t4.s_name order by numwait desc, - s_name + t4.s_name limit 100; diff --git a/tools/tpch-tools/queries/q22.sql b/tools/tpch-tools/queries/q22.sql index 48d7384bb5..3077b7e2c8 100644 --- a/tools/tpch-tools/queries/q22.sql +++ b/tools/tpch-tools/queries/q22.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */ cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal diff --git a/tools/tpch-tools/queries/q3.sql b/tools/tpch-tools/queries/q3.sql index 73fbb51ede..316a728ee3 100644 --- a/tools/tpch-tools/queries/q3.sql +++ b/tools/tpch-tools/queries/q3.sql @@ -15,21 +15,24 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ l_orderkey, sum(l_extendedprice * (1 - l_discount)) as revenue, o_orderdate, o_shippriority from - customer, - orders, - lineitem -where - c_mktsegment = 'BUILDING' - and c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate < date '1995-03-15' - and l_shipdate > date '1995-03-15' + customer c join + ( + select l_orderkey, l_extendedprice, l_discount, o_orderdate, o_shippriority, o_custkey from + lineitem join orders + where l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' + ) t1 + where c_mktsegment = 'BUILDING' + and c.c_custkey = t1.o_custkey group by l_orderkey, o_orderdate, diff --git a/tools/tpch-tools/queries/q4.sql b/tools/tpch-tools/queries/q4.sql index b6bc24d511..1292883ec5 100644 --- a/tools/tpch-tools/queries/q4.sql +++ b/tools/tpch-tools/queries/q4.sql @@ -15,23 +15,24 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ o_orderpriority, count(*) as order_count from - orders -where - o_orderdate >= date '1993-07-01' - and o_orderdate < date '1993-07-01' + interval '3' month - and exists ( + ( select * from lineitem - where - l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) + where l_commitdate < l_receiptdate + ) t1 + right semi join orders + on t1.l_orderkey = o_orderkey +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month group by o_orderpriority order by diff --git a/tools/tpch-tools/queries/q5.sql b/tools/tpch-tools/queries/q5.sql index 5e3bc4116f..19372edf2b 100644 --- a/tools/tpch-tools/queries/q5.sql +++ b/tools/tpch-tools/queries/q5.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ n_name, sum(l_extendedprice * (1 - l_discount)) as revenue from diff --git a/tools/tpch-tools/queries/q6.sql b/tools/tpch-tools/queries/q6.sql index 2d62a6a4f1..417023b436 100644 --- a/tools/tpch-tools/queries/q6.sql +++ b/tools/tpch-tools/queries/q6.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ sum(l_extendedprice * l_discount) as revenue from lineitem diff --git a/tools/tpch-tools/queries/q7.sql b/tools/tpch-tools/queries/q7.sql index 9a6e4a9b9f..97ee9af11b 100644 --- a/tools/tpch-tools/queries/q7.sql +++ b/tools/tpch-tools/queries/q7.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -select +select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ supp_nation, cust_nation, l_year, diff --git a/tools/tpch-tools/queries/q8.sql b/tools/tpch-tools/queries/q8.sql index e61c8632d0..f27609ac2d 100644 --- a/tools/tpch-tools/queries/q8.sql +++ b/tools/tpch-tools/queries/q8.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */ o_year, sum(case when nation = 'BRAZIL' then volume @@ -28,11 +30,11 @@ from l_extendedprice * (1 - l_discount) as volume, n2.n_name as nation from - part, - supplier, lineitem, orders, customer, + supplier, + part, nation n1, nation n2, region diff --git a/tools/tpch-tools/queries/q9.sql b/tools/tpch-tools/queries/q9.sql index 8e486d8fa3..715d845bb7 100644 --- a/tools/tpch-tools/queries/q9.sql +++ b/tools/tpch-tools/queries/q9.sql @@ -15,7 +15,9 @@ -- specific language governing permissions and limitations -- under the License. -select +-- Modified + +select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */ nation, o_year, sum(amount) as sum_profit diff --git a/tools/tpch-tools/run-tpch-queries.sh b/tools/tpch-tools/run-tpch-queries.sh index 457be4447f..c384a2132a 100755 --- a/tools/tpch-tools/run-tpch-queries.sh +++ b/tools/tpch-tools/run-tpch-queries.sh @@ -17,7 +17,7 @@ # under the License. ############################################################## -# This script is used to run TPC-H queries +# This script is used to create TPC-H tables ############################################################## set -eo pipefail @@ -43,7 +43,6 @@ Usage: $0 OPTS=$(getopt \ -n $0 \ -o '' \ - -o 'h' \ -- "$@") eval set -- "$OPTS" @@ -87,33 +86,32 @@ check_prerequest() { check_prerequest "mysql --version" "mysql" source $CURDIR/doris-cluster.conf -export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_QUERY_PORT: $FE_QUERY_PORT" echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" +echo "Time Unit: ms" pre_set() { echo $@ mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@" } -pre_set "set global enable_vectorized_engine=1;" -pre_set "set global parallel_fragment_exec_instance_num=8;" -pre_set "set global exec_mem_limit=48G;" -pre_set "set global batch_size=4096;" -# pre_set "show variables like 'batch_size';" - +sum=0 for i in $(seq 1 22); do total=0 - # Each query is executed three times and takes the average time - for j in $(seq 1 3); do + run=3 + # Each query is executed ${run} times and takes the average time + for j in $(seq 1 ${run}); do start=$(date +%s%3N) mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql >/dev/null end=$(date +%s%3N) total=$((total + end - start)) done - echo "q$i: $((total / 3))ms" + cost=$((total / ${run})) + echo "q$i: ${cost}" + sum=$((sum + $cost)) done +echo "Total cost: $sum" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org