Add TPC-H based planner tests for Kudu tables This commit adds a set of planner tests for Kudu tables based on the 22 TPC-H queries.
Change-Id: I6c40534b72b9aa1ee582b9679c2a63cad52df703 Reviewed-on: http://gerrit.cloudera.org:8080/3790 Reviewed-by: Dimitris Tsirogiannis <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0e88f0d7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0e88f0d7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0e88f0d7 Branch: refs/heads/master Commit: 0e88f0d7aa3450c6bf036a6559827c5152bfcb03 Parents: a997130 Author: Dimitris Tsirogiannis <[email protected]> Authored: Mon Jul 18 22:27:28 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Fri Jul 29 02:49:50 2016 +0000 ---------------------------------------------------------------------- .../impala/planner/KuduPlannerTest.java | 3 + .../queries/PlannerTest/tpch-kudu.test | 1265 ++++++++++++++++++ 2 files changed, 1268 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0e88f0d7/fe/src/test/java/com/cloudera/impala/planner/KuduPlannerTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/planner/KuduPlannerTest.java b/fe/src/test/java/com/cloudera/impala/planner/KuduPlannerTest.java index fa58f79..000359b 100644 --- a/fe/src/test/java/com/cloudera/impala/planner/KuduPlannerTest.java +++ b/fe/src/test/java/com/cloudera/impala/planner/KuduPlannerTest.java @@ -43,4 +43,7 @@ public class KuduPlannerTest extends PlannerTestBase { options.setExplain_level(TExplainLevel.VERBOSE); runPlannerTestFile("kudu-selectivity", options); } + + @Test + public void testTpch() { runPlannerTestFile("tpch-kudu"); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0e88f0d7/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test new file mode 100644 index 0000000..ddd37d4 --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test @@ -0,0 +1,1265 @@ +# Q1 - Pricing Summary Report Query +select + l_returnflag, + l_linestatus, + round(sum(l_quantity), 1) as sum_qty, + round(sum(l_extendedprice), 1) as sum_base_price, + round(sum(l_extendedprice * (1 - l_discount)), 1) as sum_disc_price, + round(sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), 1) as sum_charge, + round(avg(l_quantity), 1) as avg_qty, + round(avg(l_extendedprice), 1) as avg_price, + round(avg(l_discount), 1) as avg_disc, + count(*) as count_order +from + tpch_kudu.lineitem +where + l_shipdate <= '1998-09-02' +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus +---- PLAN +02:SORT +| order by: l_returnflag ASC, l_linestatus ASC +| +01:AGGREGATE [FINALIZE] +| output: sum(l_quantity), sum(l_extendedprice), sum(l_extendedprice * (1 - l_discount)), sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), avg(l_quantity), avg(l_extendedprice), avg(l_discount), count(*) +| group by: l_returnflag, l_linestatus +| +00:SCAN KUDU [tpch_kudu.lineitem] + kudu predicates: l_shipdate <= '1998-09-02' +==== +# Q2 - Minimum Cost Supplier Query +select + round(s_acctbal, 2), + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + tpch_kudu.part, + tpch_kudu.supplier, + tpch_kudu.partsupp, + tpch_kudu.nation, + tpch_kudu.region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 15 + and p_type like '%BRASS' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + and ps_supplycost = ( + select + min(ps_supplycost) + from + tpch_kudu.partsupp, + tpch_kudu.supplier, + tpch_kudu.nation, + tpch_kudu.region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey +limit 100 +---- PLAN +18:TOP-N [LIMIT=100] +| order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC +| +17:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: min(ps_supplycost) = ps_supplycost, ps_partkey = p_partkey +| +|--16:HASH JOIN [INNER JOIN] +| | hash predicates: n_regionkey = r_regionkey +| | +| |--04:SCAN KUDU [tpch_kudu.region] +| | kudu predicates: r_name = 'EUROPE' +| | +| 15:HASH JOIN [INNER JOIN] +| | hash predicates: s_nationkey = n_nationkey +| | +| |--03:SCAN KUDU [tpch_kudu.nation] +| | +| 14:HASH JOIN [INNER JOIN] +| | hash predicates: ps_suppkey = s_suppkey +| | +| |--01:SCAN KUDU [tpch_kudu.supplier] +| | +| 13:HASH JOIN [INNER JOIN] +| | hash predicates: ps_partkey = p_partkey +| | +| |--00:SCAN KUDU [tpch_kudu.part] +| | predicates: p_type LIKE '%BRASS' +| | kudu predicates: p_size = 15 +| | +| 02:SCAN KUDU [tpch_kudu.partsupp] +| +12:AGGREGATE [FINALIZE] +| output: min(ps_supplycost) +| group by: ps_partkey +| +11:HASH JOIN [INNER JOIN] +| hash predicates: n_regionkey = r_regionkey +| +|--08:SCAN KUDU [tpch_kudu.region] +| kudu predicates: r_name = 'EUROPE' +| +10:HASH JOIN [INNER JOIN] +| hash predicates: s_nationkey = n_nationkey +| +|--07:SCAN KUDU [tpch_kudu.nation] +| +09:HASH JOIN [INNER JOIN] +| hash predicates: ps_suppkey = s_suppkey +| +|--06:SCAN KUDU [tpch_kudu.supplier] +| +05:SCAN KUDU [tpch_kudu.partsupp] +==== +# Q3 - Shipping Priority Query +select + l_orderkey, + round(sum(l_extendedprice * (1 - l_discount)), 2) as revenue, + o_orderdate, + o_shippriority +from + tpch_kudu.customer, + tpch_kudu.orders, + tpch_kudu.lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < '1995-03-15' + and l_shipdate > '1995-03-15' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +limit 10 +---- PLAN +06:TOP-N [LIMIT=10] +| order by: round(sum(l_extendedprice * (1 - l_discount)), 2) DESC, o_orderdate ASC +| +05:AGGREGATE [FINALIZE] +| output: sum(l_extendedprice * (1 - l_discount)) +| group by: l_orderkey, o_orderdate, o_shippriority +| +04:HASH JOIN [INNER JOIN] +| hash predicates: o_custkey = c_custkey +| +|--00:SCAN KUDU [tpch_kudu.customer] +| kudu predicates: c_mktsegment = 'BUILDING' +| +03:HASH JOIN [INNER JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--01:SCAN KUDU [tpch_kudu.orders] +| predicates: o_orderdate < '1995-03-15' +| +02:SCAN KUDU [tpch_kudu.lineitem] + predicates: l_shipdate > '1995-03-15' +==== +# Q4 - Order Priority Checking Query +select + o_orderpriority, + count(*) as order_count +from + tpch_kudu.orders +where + o_orderdate >= '1993-07-01' + and o_orderdate < '1993-10-01' + and exists ( + select + * + from + tpch_kudu.lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority +---- PLAN +04:SORT +| order by: o_orderpriority ASC +| +03:AGGREGATE [FINALIZE] +| output: count(*) +| group by: o_orderpriority +| +02:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--00:SCAN KUDU [tpch_kudu.orders] +| predicates: o_orderdate < '1993-10-01' +| kudu predicates: o_orderdate >= '1993-07-01' +| +01:SCAN KUDU [tpch_kudu.lineitem] + predicates: l_commitdate < l_receiptdate +==== +# Q5 - Local Supplier Volume Query +select + n_name, + round(sum(l_extendedprice * (1 - l_discount)), 2) as revenue +from + tpch_kudu.customer, + tpch_kudu.orders, + tpch_kudu.lineitem, + tpch_kudu.supplier, + tpch_kudu.nation, + tpch_kudu.region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= '1994-01-01' + and o_orderdate < '1995-01-01' +group by + n_name +order by + revenue desc +---- PLAN +12:SORT +| order by: round(sum(l_extendedprice * (1 - l_discount)), 2) DESC +| +11:AGGREGATE [FINALIZE] +| output: sum(l_extendedprice * (1 - l_discount)) +| group by: n_name +| +10:HASH JOIN [INNER JOIN] +| hash predicates: n_regionkey = r_regionkey +| +|--05:SCAN KUDU [tpch_kudu.region] +| kudu predicates: r_name = 'ASIA' +| +09:HASH JOIN [INNER JOIN] +| hash predicates: s_nationkey = n_nationkey +| +|--04:SCAN KUDU [tpch_kudu.nation] +| +08:HASH JOIN [INNER JOIN] +| hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey +| +|--03:SCAN KUDU [tpch_kudu.supplier] +| +07:HASH JOIN [INNER JOIN] +| hash predicates: o_custkey = c_custkey +| +|--00:SCAN KUDU [tpch_kudu.customer] +| +06:HASH JOIN [INNER JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--01:SCAN KUDU [tpch_kudu.orders] +| predicates: o_orderdate < '1995-01-01' +| kudu predicates: o_orderdate >= '1994-01-01' +| +02:SCAN KUDU [tpch_kudu.lineitem] +==== +# Q6 - Forecasting Revenue Change Query +select + round(sum(l_extendedprice * l_discount), 2) as revenue +from + tpch_kudu.lineitem +where + l_shipdate >= '1994-01-01' + and l_shipdate < '1995-01-01' + and l_discount between 0.05 and 0.07 + and l_quantity < 24 +---- PLAN +01:AGGREGATE [FINALIZE] +| output: sum(l_extendedprice * l_discount) +| +00:SCAN KUDU [tpch_kudu.lineitem] + predicates: l_quantity < 24, l_shipdate < '1995-01-01' + kudu predicates: l_discount >= 0.05, l_discount <= 0.07, l_shipdate >= '1994-01-01' +==== +# Q7 - Volume Shipping Query +select + supp_nation, + cust_nation, + l_year, + round(sum(volume), 2) as revenue +from ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + year(l_shipdate) as l_year, + round(l_extendedprice * (1 - l_discount), 2) as volume + from + tpch_kudu.supplier, + tpch_kudu.lineitem, + tpch_kudu.orders, + tpch_kudu.customer, + tpch_kudu.nation n1, + tpch_kudu.nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') + or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') + ) + and l_shipdate between '1995-01-01' and '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year +---- PLAN +12:SORT +| order by: supp_nation ASC, cust_nation ASC, l_year ASC +| +11:AGGREGATE [FINALIZE] +| output: sum(round(l_extendedprice * (1 - l_discount), 2)) +| group by: n1.n_name, n2.n_name, year(l_shipdate) +| +10:HASH JOIN [INNER JOIN] +| hash predicates: c_nationkey = n2.n_nationkey +| other predicates: ((n1.n_name = 'FRANCE' AND n2.n_name = 'GERMANY') OR (n1.n_name = 'GERMANY' AND n2.n_name = 'FRANCE')) +| +|--05:SCAN KUDU [tpch_kudu.nation n2] +| +09:HASH JOIN [INNER JOIN] +| hash predicates: s_nationkey = n1.n_nationkey +| +|--04:SCAN KUDU [tpch_kudu.nation n1] +| +08:HASH JOIN [INNER JOIN] +| hash predicates: o_custkey = c_custkey +| +|--03:SCAN KUDU [tpch_kudu.customer] +| +07:HASH JOIN [INNER JOIN] +| hash predicates: l_suppkey = s_suppkey +| +|--00:SCAN KUDU [tpch_kudu.supplier] +| +06:HASH JOIN [INNER JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--02:SCAN KUDU [tpch_kudu.orders] +| +01:SCAN KUDU [tpch_kudu.lineitem] + kudu predicates: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31' +==== +# Q8 - National Market Share Query +select + o_year, + round(sum(case + when nation = 'BRAZIL' + then volume + else 0 + end) / sum(volume), 4) as mkt_share +from ( + select + year(o_orderdate) as o_year, + round(l_extendedprice * (1 - l_discount), 2) as volume, + n2.n_name as nation + from + tpch_kudu.part, + tpch_kudu.supplier, + tpch_kudu.lineitem, + tpch_kudu.orders, + tpch_kudu.customer, + tpch_kudu.nation n1, + tpch_kudu.nation n2, + tpch_kudu.region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AMERICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between '1995-01-01' and '1996-12-31' + and p_type = 'ECONOMY ANODIZED STEEL' + ) as all_nations +group by + o_year +order by + o_year +---- PLAN +16:SORT +| order by: o_year ASC +| +15:AGGREGATE [FINALIZE] +| output: sum(CASE WHEN n2.n_name = 'BRAZIL' THEN round(l_extendedprice * (1 - l_discount), 2) ELSE 0 END), sum(round(l_extendedprice * (1 - l_discount), 2)) +| group by: year(o_orderdate) +| +14:HASH JOIN [INNER JOIN] +| hash predicates: s_nationkey = n2.n_nationkey +| +|--06:SCAN KUDU [tpch_kudu.nation n2] +| +13:HASH JOIN [INNER JOIN] +| hash predicates: n1.n_regionkey = r_regionkey +| +|--07:SCAN KUDU [tpch_kudu.region] +| kudu predicates: r_name = 'AMERICA' +| +12:HASH JOIN [INNER JOIN] +| hash predicates: c_nationkey = n1.n_nationkey +| +|--05:SCAN KUDU [tpch_kudu.nation n1] +| +11:HASH JOIN [INNER JOIN] +| hash predicates: o_custkey = c_custkey +| +|--04:SCAN KUDU [tpch_kudu.customer] +| +10:HASH JOIN [INNER JOIN] +| hash predicates: l_suppkey = s_suppkey +| +|--01:SCAN KUDU [tpch_kudu.supplier] +| +09:HASH JOIN [INNER JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--03:SCAN KUDU [tpch_kudu.orders] +| kudu predicates: o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31' +| +08:HASH JOIN [INNER JOIN] +| hash predicates: l_partkey = p_partkey +| +|--00:SCAN KUDU [tpch_kudu.part] +| kudu predicates: p_type = 'ECONOMY ANODIZED STEEL' +| +02:SCAN KUDU [tpch_kudu.lineitem] +==== +# Q9 - Product Type Measure Query +select + nation, + o_year, + round(sum(amount), 2) as sum_profit +from( + select + n_name as nation, + year(o_orderdate) as o_year, + round(l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity, 2) as amount + from + tpch_kudu.part, + tpch_kudu.supplier, + tpch_kudu.lineitem, + tpch_kudu.partsupp, + tpch_kudu.orders, + tpch_kudu.nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc +---- PLAN +12:SORT +| order by: nation ASC, o_year DESC +| +11:AGGREGATE [FINALIZE] +| output: sum(round(l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity, 2)) +| group by: n_name, year(o_orderdate) +| +10:HASH JOIN [INNER JOIN] +| hash predicates: s_nationkey = n_nationkey +| +|--05:SCAN KUDU [tpch_kudu.nation] +| +09:HASH JOIN [INNER JOIN] +| hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey +| +|--03:SCAN KUDU [tpch_kudu.partsupp] +| +08:HASH JOIN [INNER JOIN] +| hash predicates: l_suppkey = s_suppkey +| +|--01:SCAN KUDU [tpch_kudu.supplier] +| +07:HASH JOIN [INNER JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--04:SCAN KUDU [tpch_kudu.orders] +| +06:HASH JOIN [INNER JOIN] +| hash predicates: l_partkey = p_partkey +| +|--00:SCAN KUDU [tpch_kudu.part] +| predicates: p_name LIKE '%green%' +| +02:SCAN KUDU [tpch_kudu.lineitem] +==== +# Q10 - Returned Item Reporting Query +# Converted select from multiple tables to joins +select + c_custkey, + c_name, + round(sum(l_extendedprice * (1 - l_discount)), 1) as revenue, + cast (c_acctbal as bigint), + n_name, + c_address, + c_phone, + c_comment +from + tpch_kudu.customer, + tpch_kudu.orders, + tpch_kudu.lineitem, + tpch_kudu.nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= '1993-10-01' + and o_orderdate < '1994-01-01' + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + cast(c_acctbal as bigint), + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +limit 20 +---- PLAN +08:TOP-N [LIMIT=20] +| order by: round(sum(l_extendedprice * (1 - l_discount)), 1) DESC +| +07:AGGREGATE [FINALIZE] +| output: sum(l_extendedprice * (1 - l_discount)) +| group by: c_custkey, c_name, CAST(c_acctbal AS BIGINT), c_phone, n_name, c_address, c_comment +| +06:HASH JOIN [INNER JOIN] +| hash predicates: c_nationkey = n_nationkey +| +|--03:SCAN KUDU [tpch_kudu.nation] +| +05:HASH JOIN [INNER JOIN] +| hash predicates: o_custkey = c_custkey +| +|--00:SCAN KUDU [tpch_kudu.customer] +| +04:HASH JOIN [INNER JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--01:SCAN KUDU [tpch_kudu.orders] +| predicates: o_orderdate < '1994-01-01' +| kudu predicates: o_orderdate >= '1993-10-01' +| +02:SCAN KUDU [tpch_kudu.lineitem] + kudu predicates: l_returnflag = 'R' +==== +# Q11 - Important Stock Identification +# Modifications: query was rewritten to not have a subquery in the having clause +select + * +from ( + select + ps_partkey, + round(sum(ps_supplycost * ps_availqty), 2) as value + from + tpch_kudu.partsupp, + tpch_kudu.supplier, + tpch_kudu.nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' + group by + ps_partkey +) as inner_query +where + value > ( + select + round(sum(ps_supplycost * ps_availqty) * 0.0001, 2) + from + tpch_kudu.partsupp, + tpch_kudu.supplier, + tpch_kudu.nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' + ) +order by + value desc +---- PLAN +13:SORT +| order by: value DESC +| +12:NESTED LOOP JOIN [INNER JOIN] +| predicates: round(sum(ps_supplycost * ps_availqty), 2) > round(sum(ps_supplycost * ps_availqty) * 0.0001, 2) +| +|--11:AGGREGATE [FINALIZE] +| | output: sum(ps_supplycost * ps_availqty) +| | +| 10:HASH JOIN [INNER JOIN] +| | hash predicates: s_nationkey = n_nationkey +| | +| |--08:SCAN KUDU [tpch_kudu.nation] +| | kudu predicates: n_name = 'GERMANY' +| | +| 09:HASH JOIN [INNER JOIN] +| | hash predicates: ps_suppkey = s_suppkey +| | +| |--07:SCAN KUDU [tpch_kudu.supplier] +| | +| 06:SCAN KUDU [tpch_kudu.partsupp] +| +05:AGGREGATE [FINALIZE] +| output: sum(ps_supplycost * ps_availqty) +| group by: ps_partkey +| +04:HASH JOIN [INNER JOIN] +| hash predicates: s_nationkey = n_nationkey +| +|--02:SCAN KUDU [tpch_kudu.nation] +| kudu predicates: n_name = 'GERMANY' +| +03:HASH JOIN [INNER JOIN] +| hash predicates: ps_suppkey = s_suppkey +| +|--01:SCAN KUDU [tpch_kudu.supplier] +| +00:SCAN KUDU [tpch_kudu.partsupp] +==== +# Q12 - Shipping Mode and Order Priority Query +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + tpch_kudu.orders, + tpch_kudu.lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('MAIL', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= '1994-01-01' + and l_receiptdate < '1995-01-01' +group by + l_shipmode +order by + l_shipmode +---- PLAN +04:SORT +| order by: l_shipmode ASC +| +03:AGGREGATE [FINALIZE] +| output: sum(CASE WHEN o_orderpriority = '1-URGENT' OR o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END) +| group by: l_shipmode +| +02:HASH JOIN [INNER JOIN] +| hash predicates: o_orderkey = l_orderkey +| +|--01:SCAN KUDU [tpch_kudu.lineitem] +| predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate < '1995-01-01' +| kudu predicates: l_receiptdate >= '1994-01-01' +| +00:SCAN KUDU [tpch_kudu.orders] +==== +# Q13 - Customer Distribution Query +select + c_count, + count(*) as custdist +from ( + select + c_custkey, + count(o_orderkey) as c_count + from + tpch_kudu.customer left outer join tpch_kudu.orders on ( + c_custkey = o_custkey + and o_comment not like '%special%requests%' + ) + group by + c_custkey + ) as c_orders +group by + c_count +order by + custdist desc, + c_count desc +---- PLAN +05:SORT +| order by: count(*) DESC, c_count DESC +| +04:AGGREGATE [FINALIZE] +| output: count(*) +| group by: count(o_orderkey) +| +03:AGGREGATE [FINALIZE] +| output: count(o_orderkey) +| group by: c_custkey +| +02:HASH JOIN [RIGHT OUTER JOIN] +| hash predicates: o_custkey = c_custkey +| +|--00:SCAN KUDU [tpch_kudu.customer] +| +01:SCAN KUDU [tpch_kudu.orders] + predicates: NOT o_comment LIKE '%special%requests%' +==== +# Q14 - Promotion Effect +select + round(100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0.0 + end) / sum(l_extendedprice * (1 - l_discount)), 4) as promo_revenue +from + tpch_kudu.lineitem, + tpch_kudu.part +where + l_partkey = p_partkey + and l_shipdate >= '1995-09-01' + and l_shipdate < '1995-10-01' +---- PLAN +03:AGGREGATE [FINALIZE] +| output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount)) +| +02:HASH JOIN [INNER JOIN] +| hash predicates: l_partkey = p_partkey +| +|--01:SCAN KUDU [tpch_kudu.part] +| +00:SCAN KUDU [tpch_kudu.lineitem] + predicates: l_shipdate < '1995-10-01' + kudu predicates: l_shipdate >= '1995-09-01' +==== +# Q15 - Top Supplier Query +with revenue_view as ( + select + l_suppkey as supplier_no, + round(sum(l_extendedprice * (1 - l_discount)), 1) as total_revenue + from + tpch_kudu.lineitem + where + l_shipdate >= '1996-01-01' + and l_shipdate < '1996-04-01' + group by + l_suppkey) +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + tpch_kudu.supplier, + revenue_view +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue_view + ) +order by + s_suppkey +---- PLAN +08:SORT +| order by: s_suppkey ASC +| +07:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: round(sum(l_extendedprice * (1 - l_discount)), 1) = max(total_revenue) +| +|--05:AGGREGATE [FINALIZE] +| | output: max(round(sum(l_extendedprice * (1 - l_discount)), 1)) +| | +| 04:AGGREGATE [FINALIZE] +| | output: sum(l_extendedprice * (1 - l_discount)) +| | group by: l_suppkey +| | +| 03:SCAN KUDU [tpch_kudu.lineitem] +| predicates: l_shipdate < '1996-04-01' +| kudu predicates: l_shipdate >= '1996-01-01' +| +06:HASH JOIN [INNER JOIN] +| hash predicates: s_suppkey = l_suppkey +| +|--02:AGGREGATE [FINALIZE] +| | output: sum(l_extendedprice * (1 - l_discount)) +| | group by: l_suppkey +| | +| 01:SCAN KUDU [tpch_kudu.lineitem] +| predicates: l_shipdate < '1996-04-01' +| kudu predicates: l_shipdate >= '1996-01-01' +| +00:SCAN KUDU [tpch_kudu.supplier] +==== +# Q16 - Parts/Supplier Relation Query +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + tpch_kudu.partsupp, + tpch_kudu.part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + tpch_kudu.supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size +---- PLAN +07:SORT +| order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC +| +06:AGGREGATE [FINALIZE] +| output: count(ps_suppkey) +| group by: p_brand, p_type, p_size +| +05:AGGREGATE +| group by: p_brand, p_type, p_size, ps_suppkey +| +04:HASH JOIN [NULL AWARE LEFT ANTI JOIN] +| hash predicates: ps_suppkey = s_suppkey +| +|--02:SCAN KUDU [tpch_kudu.supplier] +| predicates: s_comment LIKE '%Customer%Complaints%' +| +03:HASH JOIN [INNER JOIN] +| hash predicates: ps_partkey = p_partkey +| +|--01:SCAN KUDU [tpch_kudu.part] +| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%' +| +00:SCAN KUDU [tpch_kudu.partsupp] +==== +# Q17 - Small-Quantity-Order Revenue Query +select + round(sum(l_extendedprice) / 7.0, 2) as avg_yearly +from + tpch_kudu.lineitem, + tpch_kudu.part +where + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container = 'MED BOX' + and l_quantity < ( + select + round(0.2 * avg(l_quantity), 2) + from + tpch_kudu.lineitem + where + l_partkey = p_partkey + ) +---- PLAN +06:AGGREGATE [FINALIZE] +| output: sum(l_extendedprice) +| +05:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: l_partkey = p_partkey +| other join predicates: l_quantity < round(0.2 * avg(l_quantity), 2) +| +|--04:HASH JOIN [INNER JOIN] +| | hash predicates: l_partkey = p_partkey +| | +| |--01:SCAN KUDU [tpch_kudu.part] +| | kudu predicates: p_container = 'MED BOX', p_brand = 'Brand#23' +| | +| 00:SCAN KUDU [tpch_kudu.lineitem] +| +03:AGGREGATE [FINALIZE] +| output: avg(l_quantity) +| group by: l_partkey +| +02:SCAN KUDU [tpch_kudu.lineitem] +==== +# Q18 - Large Value tpch_kudu.customer Query +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + round(sum(l_quantity), 2) +from + tpch_kudu.customer, + tpch_kudu.orders, + tpch_kudu.lineitem +where + o_orderkey in ( + select + l_orderkey + from + tpch_kudu.lineitem + group by + l_orderkey + having + sum(l_quantity) > 300 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +limit 100 +---- PLAN +09:TOP-N [LIMIT=100] +| order by: o_totalprice DESC, o_orderdate ASC +| +08:AGGREGATE [FINALIZE] +| output: sum(l_quantity) +| group by: c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice +| +07:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: o_orderkey = l_orderkey +| +|--04:AGGREGATE [FINALIZE] +| | output: sum(l_quantity) +| | group by: l_orderkey +| | having: sum(l_quantity) > 300 +| | +| 03:SCAN KUDU [tpch_kudu.lineitem] +| +06:HASH JOIN [INNER JOIN] +| hash predicates: o_custkey = c_custkey +| +|--00:SCAN KUDU [tpch_kudu.customer] +| +05:HASH JOIN [INNER JOIN] +| hash predicates: l_orderkey = o_orderkey +| +|--01:SCAN KUDU [tpch_kudu.orders] +| +02:SCAN KUDU [tpch_kudu.lineitem] +==== +# Q19 - Discounted Revenue Query +select + round(sum(l_extendedprice * (1 - l_discount)), 2) as revenue +from + tpch_kudu.lineitem, + tpch_kudu.part +where + p_partkey = l_partkey + and ( + ( + p_brand = 'Brand#12' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 1 and l_quantity <= 11 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 20 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_brand = 'Brand#34' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 20 and l_quantity <= 30 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + ) +---- PLAN +03:AGGREGATE [FINALIZE] +| output: sum(l_extendedprice * (1 - l_discount)) +| +02:HASH JOIN [INNER JOIN] +| hash predicates: l_partkey = p_partkey +| other predicates: ((p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= 1 AND l_quantity <= 11 AND p_size BETWEEN 1 AND 5 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= 10 AND l_quantity <= 20 AND p_size BETWEEN 1 AND 10 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON') OR (p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= 20 AND l_quantity <= 30 AND p_size BETWEEN 1 AND 15 AND l_shipmode IN ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON')) +| +|--01:SCAN KUDU [tpch_kudu.part] +| +00:SCAN KUDU [tpch_kudu.lineitem] +==== +# Q20 - Potential Part Promotion Query +select + s_name, + s_address +from + tpch_kudu.supplier, tpch_kudu.nation +where + s_suppkey in ( + select + ps_suppkey + from + tpch_kudu.partsupp + where + ps_partkey in ( + select + p_partkey + from + tpch_kudu.part + where + p_name like 'forest%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + tpch_kudu.lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= '1994-01-01' + and l_shipdate < '1995-01-01' + ) + ) + and s_nationkey = n_nationkey + and n_name = 'CANADA' +order by + s_name +---- PLAN +10:SORT +| order by: s_name ASC +| +09:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: ps_suppkey = s_suppkey +| +|--08:HASH JOIN [INNER JOIN] +| | hash predicates: s_nationkey = n_nationkey +| | +| |--01:SCAN KUDU [tpch_kudu.nation] +| | kudu predicates: n_name = 'CANADA' +| | +| 00:SCAN KUDU [tpch_kudu.supplier] +| +07:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey +| other join predicates: ps_availqty > 0.5 * sum(l_quantity) +| +|--06:HASH JOIN [LEFT SEMI JOIN] +| | hash predicates: ps_partkey = p_partkey +| | +| |--03:SCAN KUDU [tpch_kudu.part] +| | predicates: p_name LIKE 'forest%' +| | +| 02:SCAN KUDU [tpch_kudu.partsupp] +| +05:AGGREGATE [FINALIZE] +| output: sum(l_quantity) +| group by: l_partkey, l_suppkey +| +04:SCAN KUDU [tpch_kudu.lineitem] + predicates: l_shipdate < '1995-01-01' + kudu predicates: l_shipdate >= '1994-01-01' +==== +# Q21 - Suppliers Who Kept Orders Waiting Query +select + s_name, + count(*) as numwait +from + tpch_kudu.supplier, + tpch_kudu.lineitem l1, + tpch_kudu.orders, + tpch_kudu.nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + tpch_kudu.lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + tpch_kudu.lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit 100 +---- PLAN +12:TOP-N [LIMIT=100] +| order by: count(*) DESC, s_name ASC +| +11:AGGREGATE [FINALIZE] +| output: count(*) +| group by: s_name +| +10:HASH JOIN [RIGHT ANTI JOIN] +| hash predicates: l3.l_orderkey = l1.l_orderkey +| other join predicates: l3.l_suppkey != l1.l_suppkey +| +|--09:HASH JOIN [RIGHT SEMI JOIN] +| | hash predicates: l2.l_orderkey = l1.l_orderkey +| | other join predicates: l2.l_suppkey != l1.l_suppkey +| | +| |--08:HASH JOIN [INNER JOIN] +| | | hash predicates: s_nationkey = n_nationkey +| | | +| | |--03:SCAN KUDU [tpch_kudu.nation] +| | | kudu predicates: n_name = 'SAUDI ARABIA' +| | | +| | 07:HASH JOIN [INNER JOIN] +| | | hash predicates: l1.l_suppkey = s_suppkey +| | | +| | |--00:SCAN KUDU [tpch_kudu.supplier] +| | | +| | 06:HASH JOIN [INNER JOIN] +| | | hash predicates: l1.l_orderkey = o_orderkey +| | | +| | |--02:SCAN KUDU [tpch_kudu.orders] +| | | kudu predicates: o_orderstatus = 'F' +| | | +| | 01:SCAN KUDU [tpch_kudu.lineitem l1] +| | predicates: l1.l_receiptdate > l1.l_commitdate +| | +| 04:SCAN KUDU [tpch_kudu.lineitem l2] +| +05:SCAN KUDU [tpch_kudu.lineitem l3] + predicates: l3.l_receiptdate > l3.l_commitdate +==== +# Q22 - Global Sales Opportunity Query +select + cntrycode, + count(*) as numcust, + round(sum(c_acctbal), 2) as totacctbal +from ( + select + substr(c_phone, 1, 2) as cntrycode, + c_acctbal + from + tpch_kudu.customer + where + substr(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + round(avg(c_acctbal), 1) + from + tpch_kudu.customer + where + c_acctbal > 0.00 + and substr(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + tpch_kudu.orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode +---- PLAN +07:SORT +| order by: cntrycode ASC +| +06:AGGREGATE [FINALIZE] +| output: count(*), sum(c_acctbal) +| group by: substr(c_phone, 1, 2) +| +05:HASH JOIN [RIGHT ANTI JOIN] +| hash predicates: o_custkey = c_custkey +| +|--04:NESTED LOOP JOIN [INNER JOIN] +| | predicates: c_acctbal > round(avg(c_acctbal), 1) +| | +| |--02:AGGREGATE [FINALIZE] +| | | output: avg(c_acctbal) +| | | +| | 01:SCAN KUDU [tpch_kudu.customer] +| | predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') +| | +| 00:SCAN KUDU [tpch_kudu.customer] +| predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') +| +03:SCAN KUDU [tpch_kudu.orders] +====
