This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new a9ee67471 MINOR: Benchmark regression tests (#3790)
a9ee67471 is described below
commit a9ee67471d1ab0ad11f12d024315b3a824d6101f
Author: Andy Grove <[email protected]>
AuthorDate: Tue Oct 11 07:13:34 2022 -0600
MINOR: Benchmark regression tests (#3790)
---
benchmarks/expected-plans/q1.txt | 6 ++
benchmarks/expected-plans/q10.txt | 12 +++
benchmarks/expected-plans/q11.txt | 19 ++++
benchmarks/expected-plans/q12.txt | 7 ++
benchmarks/expected-plans/q13.txt | 11 +++
benchmarks/expected-plans/q14.txt | 7 ++
benchmarks/expected-plans/q15.txt | 11 +++
benchmarks/expected-plans/q16.txt | 13 +++
benchmarks/expected-plans/q17.txt | 11 +++
benchmarks/expected-plans/q18.txt | 13 +++
benchmarks/expected-plans/q19.txt | 9 ++
benchmarks/expected-plans/q2.txt | 25 ++++++
benchmarks/expected-plans/q20.txt | 19 ++++
benchmarks/expected-plans/q21.txt | 21 +++++
benchmarks/expected-plans/q22.txt | 15 ++++
benchmarks/expected-plans/q3.txt | 11 +++
benchmarks/expected-plans/q4.txt | 8 ++
benchmarks/expected-plans/q5.txt | 16 ++++
benchmarks/expected-plans/q6.txt | 5 ++
benchmarks/expected-plans/q7.txt | 20 +++++
benchmarks/expected-plans/q8.txt | 25 ++++++
benchmarks/expected-plans/q9.txt | 17 ++++
benchmarks/src/bin/tpch.rs | 179 ++++++++++++++++++++++++++++++++++++++
dev/release/rat_exclude_files.txt | 1 +
24 files changed, 481 insertions(+)
diff --git a/benchmarks/expected-plans/q1.txt b/benchmarks/expected-plans/q1.txt
new file mode 100644
index 000000000..1a9d4b7c9
--- /dev/null
+++ b/benchmarks/expected-plans/q1.txt
@@ -0,0 +1,6 @@
+Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS
LAST
+ Projection: lineitem.l_returnflag, lineitem.l_linestatus,
SUM(lineitem.l_quantity) AS sum_qty, SUM(lineitem.l_extendedprice) AS
sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)
AS sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge,
AVG(lineitem.l_quantity) AS avg_qty, AVG(lineitem.l_extendedprice) AS
avg_price, AVG(lineitem.l_discount) AS avg_disc, COUNT(UInt8(1)) AS count_order
+ Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]],
aggr=[[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice),
SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) *
CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) -
CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38,
4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2))CAST(lineitem.l_di [...]
+ Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) *
CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38,
4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS
Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) -
CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38,
4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS [...]
+ Filter: lineitem.l_shipdate <= Date32("10471")
+ TableScan: lineitem projection=[l_quantity, l_extendedprice,
l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q10.txt
b/benchmarks/expected-plans/q10.txt
new file mode 100644
index 000000000..25189feb6
--- /dev/null
+++ b/benchmarks/expected-plans/q10.txt
@@ -0,0 +1,12 @@
+Sort: revenue DESC NULLS FIRST
+ Projection: customer.c_custkey, customer.c_name,
SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue,
customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone,
customer.c_comment
+ Aggregate: groupBy=[[customer.c_custkey, customer.c_name,
customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address,
customer.c_comment]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS
Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount
AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice *
Int64(1) - lineitem.l_discount)]]
+ Inner Join: customer.c_nationkey = nation.n_nationkey
+ Inner Join: orders.o_orderkey = lineitem.l_orderkey
+ Inner Join: customer.c_custkey = orders.o_custkey
+ TableScan: customer projection=[c_custkey, c_name, c_address,
c_nationkey, c_phone, c_acctbal, c_comment]
+ Filter: orders.o_orderdate >= Date32("8674") AND
orders.o_orderdate < Date32("8766")
+ TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
+ Filter: lineitem.l_returnflag = Utf8("R")
+ TableScan: lineitem projection=[l_orderkey, l_extendedprice,
l_discount, l_returnflag]
+ TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q11.txt
b/benchmarks/expected-plans/q11.txt
new file mode 100644
index 000000000..b408340a3
--- /dev/null
+++ b/benchmarks/expected-plans/q11.txt
@@ -0,0 +1,19 @@
+Sort: value DESC NULLS FIRST
+ Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost *
partsupp.ps_availqty) AS value
+ Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS
Decimal128(38, 17)) > __sq_1.__value
+ CrossJoin:
+ Aggregate: groupBy=[[partsupp.ps_partkey]],
aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) *
CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+ TableScan: partsupp projection=[ps_partkey, ps_suppkey,
ps_availqty, ps_supplycost]
+ TableScan: supplier projection=[s_suppkey, s_nationkey]
+ Filter: nation.n_name = Utf8("GERMANY")
+ TableScan: nation projection=[n_nationkey, n_name]
+ Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS
Decimal128(38, 17)) * Decimal128(Some(10000000000000),38,17) AS __value,
alias=__sq_1
+ Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS
Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+ TableScan: partsupp projection=[ps_partkey, ps_suppkey,
ps_availqty, ps_supplycost]
+ TableScan: supplier projection=[s_suppkey, s_nationkey]
+ Filter: nation.n_name = Utf8("GERMANY")
+ TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q12.txt
b/benchmarks/expected-plans/q12.txt
new file mode 100644
index 000000000..ef3bab4b7
--- /dev/null
+++ b/benchmarks/expected-plans/q12.txt
@@ -0,0 +1,7 @@
+Sort: lineitem.l_shipmode ASC NULLS LAST
+ Projection: lineitem.l_shipmode, SUM(CASE WHEN orders.o_orderpriority =
Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE
Int64(0) END) AS high_line_count, SUM(CASE WHEN orders.o_orderpriority !=
Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1)
ELSE Int64(0) END) AS low_line_count
+ Aggregate: groupBy=[[lineitem.l_shipmode]], aggr=[[SUM(CASE WHEN
orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority =
Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), SUM(CASE WHEN
orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority !=
Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]]
+ Inner Join: lineitem.l_orderkey = orders.o_orderkey
+ Filter: lineitem.l_shipmode IN ([Utf8("MAIL"), Utf8("SHIP")]) AND
lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_shipdate <
lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("8766") AND
lineitem.l_receiptdate < Date32("9131")
+ TableScan: lineitem projection=[l_orderkey, l_shipdate,
l_commitdate, l_receiptdate, l_shipmode]
+ TableScan: orders projection=[o_orderkey, o_orderpriority]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q13.txt
b/benchmarks/expected-plans/q13.txt
new file mode 100644
index 000000000..12a9bf1da
--- /dev/null
+++ b/benchmarks/expected-plans/q13.txt
@@ -0,0 +1,11 @@
+Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST
+ Projection: c_orders.c_count, COUNT(UInt8(1)) AS custdist
+ Aggregate: groupBy=[[c_orders.c_count]], aggr=[[COUNT(UInt8(1))]]
+ Projection: c_orders.COUNT(orders.o_orderkey) AS c_count, alias=c_orders
+ Projection: c_orders.COUNT(orders.o_orderkey), alias=c_orders
+ Projection: COUNT(orders.o_orderkey), alias=c_orders
+ Aggregate: groupBy=[[customer.c_custkey]],
aggr=[[COUNT(orders.o_orderkey)]]
+ Left Join: customer.c_custkey = orders.o_custkey
+ TableScan: customer projection=[c_custkey]
+ Filter: orders.o_comment NOT LIKE Utf8("%special%requests%")
+ TableScan: orders projection=[o_orderkey, o_custkey,
o_comment]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q14.txt
b/benchmarks/expected-plans/q14.txt
new file mode 100644
index 000000000..c410363a5
--- /dev/null
+++ b/benchmarks/expected-plans/q14.txt
@@ -0,0 +1,7 @@
+Projection: CAST(Decimal128(Some(1000000000000000000000),38,19) *
CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN
lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS
Decimal128(38, 19)) AS Decimal128(38, 38)) / CAST(SUM(lineitem.l_extendedprice
* Int64(1) - lineitem.l_discount) AS Decimal128(38, 38)) AS promo_revenue
+ Aggregate: groupBy=[[]], aggr=[[SUM(CASE WHEN part.p_type LIKE
Utf8("PROMO%") THEN CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) *
CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) -
CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38,
4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal12
[...]
+ Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) *
CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38,
4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS
Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) -
CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38,
4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS D [...]
+ Inner Join: lineitem.l_partkey = part.p_partkey
+ Filter: lineitem.l_shipdate >= Date32("9374") AND lineitem.l_shipdate
< Date32("9404")
+ TableScan: lineitem projection=[l_partkey, l_extendedprice,
l_discount, l_shipdate]
+ TableScan: part projection=[p_partkey, p_type]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q15.txt
b/benchmarks/expected-plans/q15.txt
new file mode 100644
index 000000000..e2f59dc5c
--- /dev/null
+++ b/benchmarks/expected-plans/q15.txt
@@ -0,0 +1,11 @@
+EmptyRelation
+Sort: supplier.s_suppkey ASC NULLS LAST
+ Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address,
supplier.s_phone, revenue0.total_revenue
+ Inner Join: revenue0.total_revenue = __sq_1.__value
+ Inner Join: supplier.s_suppkey = revenue0.supplier_no
+ TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
+ TableScan: revenue0 projection=[supplier_no, total_revenue]
+ Projection: MAX(revenue0.total_revenue) AS __value, alias=__sq_1
+ Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]
+ TableScan: revenue0 projection=[total_revenue]
+EmptyRelation
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q16.txt
b/benchmarks/expected-plans/q16.txt
new file mode 100644
index 000000000..11943cf24
--- /dev/null
+++ b/benchmarks/expected-plans/q16.txt
@@ -0,0 +1,13 @@
+Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type
ASC NULLS LAST, part.p_size ASC NULLS LAST
+ Projection: part.p_brand, part.p_type, part.p_size, COUNT(DISTINCT
partsupp.ps_suppkey) AS supplier_cnt
+ Projection: group_alias_0 AS p_brand, group_alias_1 AS p_type,
group_alias_2 AS p_size, COUNT(alias1) AS COUNT(DISTINCT partsupp.ps_suppkey)
+ Aggregate: groupBy=[[group_alias_0, group_alias_1, group_alias_2]],
aggr=[[COUNT(alias1)]]
+ Aggregate: groupBy=[[part.p_brand AS group_alias_0, part.p_type AS
group_alias_1, part.p_size AS group_alias_2, partsupp.ps_suppkey AS alias1]],
aggr=[[]]
+ Anti Join: partsupp.ps_suppkey = __sq_1.s_suppkey
+ Inner Join: partsupp.ps_partkey = part.p_partkey
+ TableScan: partsupp projection=[ps_partkey, ps_suppkey]
+ Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT
LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14),
Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
+ TableScan: part projection=[p_partkey, p_brand, p_type, p_size]
+ Projection: supplier.s_suppkey AS s_suppkey, alias=__sq_1
+ Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
+ TableScan: supplier projection=[s_suppkey, s_comment]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q17.txt
b/benchmarks/expected-plans/q17.txt
new file mode 100644
index 000000000..17b8e9698
--- /dev/null
+++ b/benchmarks/expected-plans/q17.txt
@@ -0,0 +1,11 @@
+Projection: CAST(SUM(lineitem.l_extendedprice) AS Decimal128(38, 33)) /
Decimal128(Some(7000000000000000195487369212723200),38,33) AS avg_yearly
+ Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]
+ Filter: CAST(lineitem.l_quantity AS Decimal128(38, 21)) < __sq_1.__value
+ Inner Join: part.p_partkey = __sq_1.l_partkey
+ Inner Join: lineitem.l_partkey = part.p_partkey
+ TableScan: lineitem projection=[l_partkey, l_quantity,
l_extendedprice]
+ Filter: part.p_brand = Utf8("Brand#23") AND part.p_container =
Utf8("MED BOX")
+ TableScan: part projection=[p_partkey, p_brand, p_container]
+ Projection: lineitem.l_partkey,
Decimal128(Some(200000000000000000000),38,21) * CAST(AVG(lineitem.l_quantity)
AS Decimal128(38, 21)) AS __value, alias=__sq_1
+ Aggregate: groupBy=[[lineitem.l_partkey]],
aggr=[[AVG(lineitem.l_quantity)]]
+ TableScan: lineitem projection=[l_partkey, l_quantity,
l_extendedprice]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q18.txt
b/benchmarks/expected-plans/q18.txt
new file mode 100644
index 000000000..ebc22ea5d
--- /dev/null
+++ b/benchmarks/expected-plans/q18.txt
@@ -0,0 +1,13 @@
+Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST
+ Projection: customer.c_name, customer.c_custkey, orders.o_orderkey,
orders.o_orderdate, orders.o_totalprice, SUM(lineitem.l_quantity)
+ Aggregate: groupBy=[[customer.c_name, customer.c_custkey,
orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]],
aggr=[[SUM(lineitem.l_quantity)]]
+ Semi Join: orders.o_orderkey = __sq_1.l_orderkey
+ Inner Join: orders.o_orderkey = lineitem.l_orderkey
+ Inner Join: customer.c_custkey = orders.o_custkey
+ TableScan: customer projection=[c_custkey, c_name]
+ TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice,
o_orderdate]
+ TableScan: lineitem projection=[l_orderkey, l_quantity]
+ Projection: lineitem.l_orderkey AS l_orderkey, alias=__sq_1
+ Filter: SUM(lineitem.l_quantity) > Decimal128(Some(30000),25,2)
+ Aggregate: groupBy=[[lineitem.l_orderkey]],
aggr=[[SUM(lineitem.l_quantity)]]
+ TableScan: lineitem projection=[l_orderkey, l_quantity]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q19.txt
b/benchmarks/expected-plans/q19.txt
new file mode 100644
index 000000000..902893ea9
--- /dev/null
+++ b/benchmarks/expected-plans/q19.txt
@@ -0,0 +1,9 @@
+Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS
revenue
+ Aggregate: groupBy=[[]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS
Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount
AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice *
Int64(1) - lineitem.l_discount)]]
+ Projection: lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") AS
lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON")Utf8("DELIVER IN
PERSON")lineitem.l_shipinstruct, lineitem.l_shipmode IN ([Utf8("AIR"),
Utf8("AIR REG")]) AS lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR
REG")])Utf8("AIR REG")Utf8("AIR")lineitem.l_shipmode, part.p_size >= Int32(1)
AS part.p_size >= Int32(1)Int32(1)part.p_size, lineitem.l_quantity,
lineitem.l_extendedprice, lineitem.l_discount, part.p_brand, part.p [...]
+ Filter: part.p_brand = Utf8("Brand#12") AND part.p_container IN
([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND
lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <=
Decimal128(Some(1100),15,2) AND part.p_size <= Int32(5) OR part.p_brand =
Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"),
Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >=
Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Dec [...]
+ Inner Join: lineitem.l_partkey = part.p_partkey
+ Filter: lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR REG")]) AND
lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON")
+ TableScan: lineitem projection=[l_partkey, l_quantity,
l_extendedprice, l_discount, l_shipinstruct, l_shipmode]
+ Filter: part.p_size >= Int32(1)
+ TableScan: part projection=[p_partkey, p_brand, p_size,
p_container]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q2.txt b/benchmarks/expected-plans/q2.txt
new file mode 100644
index 000000000..10d68cd37
--- /dev/null
+++ b/benchmarks/expected-plans/q2.txt
@@ -0,0 +1,25 @@
+Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST,
supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
+ Projection: supplier.s_acctbal, supplier.s_name, nation.n_name,
part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone,
supplier.s_comment
+ Filter: partsupp.ps_supplycost = __sq_1.__value
+ Inner Join: part.p_partkey = __sq_1.ps_partkey
+ Inner Join: nation.n_regionkey = region.r_regionkey
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+ Inner Join: part.p_partkey = partsupp.ps_partkey
+ Filter: part.p_size = Int32(15) AND part.p_type LIKE
Utf8("%BRASS")
+ TableScan: part projection=[p_partkey, p_mfgr, p_type,
p_size]
+ TableScan: partsupp projection=[ps_partkey, ps_suppkey,
ps_supplycost]
+ TableScan: supplier projection=[s_suppkey, s_name, s_address,
s_nationkey, s_phone, s_acctbal, s_comment]
+ TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
+ Filter: region.r_name = Utf8("EUROPE")
+ TableScan: region projection=[r_regionkey, r_name]
+ Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS
__value, alias=__sq_1
+ Aggregate: groupBy=[[partsupp.ps_partkey]],
aggr=[[MIN(partsupp.ps_supplycost)]]
+ Inner Join: nation.n_regionkey = region.r_regionkey
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+ TableScan: partsupp projection=[ps_partkey, ps_suppkey,
ps_supplycost]
+ TableScan: supplier projection=[s_suppkey, s_name,
s_address, s_nationkey, s_phone, s_acctbal, s_comment]
+ TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
+ Filter: region.r_name = Utf8("EUROPE")
+ TableScan: region projection=[r_regionkey, r_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q20.txt
b/benchmarks/expected-plans/q20.txt
new file mode 100644
index 000000000..6d3ef1f6c
--- /dev/null
+++ b/benchmarks/expected-plans/q20.txt
@@ -0,0 +1,19 @@
+Sort: supplier.s_name ASC NULLS LAST
+ Projection: supplier.s_name, supplier.s_address
+ Semi Join: supplier.s_suppkey = __sq_2.ps_suppkey
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ TableScan: supplier projection=[s_suppkey, s_name, s_address,
s_nationkey]
+ Filter: nation.n_name = Utf8("CANADA")
+ TableScan: nation projection=[n_nationkey, n_name]
+ Projection: partsupp.ps_suppkey AS ps_suppkey, alias=__sq_2
+ Filter: CAST(partsupp.ps_availqty AS Decimal128(38, 17)) >
__sq_3.__value
+ Inner Join: partsupp.ps_partkey = __sq_3.l_partkey,
partsupp.ps_suppkey = __sq_3.l_suppkey
+ Semi Join: partsupp.ps_partkey = __sq_1.p_partkey
+ TableScan: partsupp projection=[ps_partkey, ps_suppkey,
ps_availqty]
+ Projection: part.p_partkey AS p_partkey, alias=__sq_1
+ Filter: part.p_name LIKE Utf8("forest%")
+ TableScan: part projection=[p_partkey, p_name]
+ Projection: lineitem.l_partkey, lineitem.l_suppkey,
Decimal128(Some(50000000000000000),38,17) * CAST(SUM(lineitem.l_quantity) AS
Decimal128(38, 17)) AS __value, alias=__sq_3
+ Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]],
aggr=[[SUM(lineitem.l_quantity)]]
+ Filter: lineitem.l_shipdate >= Date32("8766") AND
lineitem.l_shipdate < Date32("9131")
+ TableScan: lineitem projection=[l_partkey, l_suppkey,
l_quantity, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q21.txt
b/benchmarks/expected-plans/q21.txt
new file mode 100644
index 000000000..5689fafc2
--- /dev/null
+++ b/benchmarks/expected-plans/q21.txt
@@ -0,0 +1,21 @@
+Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST
+ Projection: supplier.s_name, COUNT(UInt8(1)) AS numwait
+ Aggregate: groupBy=[[supplier.s_name]], aggr=[[COUNT(UInt8(1))]]
+ Anti Join: l1.l_orderkey = l3.l_orderkey Filter: l3.l_suppkey !=
l1.l_suppkey
+ Semi Join: l1.l_orderkey = l2.l_orderkey Filter: l2.l_suppkey !=
l1.l_suppkey
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ Inner Join: l1.l_orderkey = orders.o_orderkey
+ Inner Join: supplier.s_suppkey = l1.l_suppkey
+ TableScan: supplier projection=[s_suppkey, s_name, s_nationkey]
+ Filter: l1.l_receiptdate > l1.l_commitdate AND
l1.l_receiptdate > l1.l_commitdate
+ SubqueryAlias: l1
+ TableScan: lineitem projection=[l_orderkey, l_suppkey,
l_commitdate, l_receiptdate]
+ Filter: orders.o_orderstatus = Utf8("F") AND
orders.o_orderstatus = Utf8("F")
+ TableScan: orders projection=[o_orderkey, o_orderstatus]
+ Filter: nation.n_name = Utf8("SAUDI ARABIA") AND nation.n_name =
Utf8("SAUDI ARABIA")
+ TableScan: nation projection=[n_nationkey, n_name]
+ SubqueryAlias: l2
+ TableScan: lineitem projection=[l_orderkey, l_suppkey]
+ Filter: l3.l_receiptdate > l3.l_commitdate
+ SubqueryAlias: l3
+ TableScan: lineitem projection=[l_orderkey, l_suppkey,
l_commitdate, l_receiptdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q22.txt
b/benchmarks/expected-plans/q22.txt
new file mode 100644
index 000000000..4b0c1a59c
--- /dev/null
+++ b/benchmarks/expected-plans/q22.txt
@@ -0,0 +1,15 @@
+Sort: custsale.cntrycode ASC NULLS LAST
+ Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust,
SUM(custsale.c_acctbal) AS totacctbal
+ Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)),
SUM(custsale.c_acctbal)]]
+ Projection: custsale.cntrycode, custsale.c_acctbal, alias=custsale
+ Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode,
customer.c_acctbal, alias=custsale
+ Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) >
__sq_1.__value
+ CrossJoin:
+ Anti Join: customer.c_custkey = orders.o_custkey
+ Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN
([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"),
Utf8("17")])
+ TableScan: customer projection=[c_custkey, c_phone,
c_acctbal]
+ TableScan: orders projection=[o_custkey]
+ Projection: AVG(customer.c_acctbal) AS __value, alias=__sq_1
+ Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
+ Filter: CAST(customer.c_acctbal AS Decimal128(30, 15)) >
Decimal128(Some(0),30,15) AND substr(customer.c_phone, Int64(1), Int64(2)) IN
([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"),
Utf8("17")])
+ TableScan: customer projection=[c_phone, c_acctbal]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q3.txt b/benchmarks/expected-plans/q3.txt
new file mode 100644
index 000000000..7cd69b92a
--- /dev/null
+++ b/benchmarks/expected-plans/q3.txt
@@ -0,0 +1,11 @@
+Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST
+ Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority
+ Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate,
orders.o_shippriority]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS
Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount
AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice *
Int64(1) - lineitem.l_discount)]]
+ Inner Join: orders.o_orderkey = lineitem.l_orderkey
+ Inner Join: customer.c_custkey = orders.o_custkey
+ Filter: customer.c_mktsegment = Utf8("BUILDING")
+ TableScan: customer projection=[c_custkey, c_mktsegment]
+ Filter: orders.o_orderdate < Date32("9204")
+ TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate,
o_shippriority]
+ Filter: lineitem.l_shipdate > Date32("9204")
+ TableScan: lineitem projection=[l_orderkey, l_extendedprice,
l_discount, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q4.txt b/benchmarks/expected-plans/q4.txt
new file mode 100644
index 000000000..a4339732e
--- /dev/null
+++ b/benchmarks/expected-plans/q4.txt
@@ -0,0 +1,8 @@
+Sort: orders.o_orderpriority ASC NULLS LAST
+ Projection: orders.o_orderpriority, COUNT(UInt8(1)) AS order_count
+ Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[COUNT(UInt8(1))]]
+ Semi Join: orders.o_orderkey = lineitem.l_orderkey
+ Filter: orders.o_orderdate >= Date32("8582") AND orders.o_orderdate <
Date32("8674")
+ TableScan: orders projection=[o_orderkey, o_orderdate,
o_orderpriority]
+ Filter: lineitem.l_commitdate < lineitem.l_receiptdate
+ TableScan: lineitem projection=[l_orderkey, l_commitdate,
l_receiptdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q5.txt b/benchmarks/expected-plans/q5.txt
new file mode 100644
index 000000000..0c2e0c131
--- /dev/null
+++ b/benchmarks/expected-plans/q5.txt
@@ -0,0 +1,16 @@
+Sort: revenue DESC NULLS FIRST
+ Projection: nation.n_name, SUM(lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount) AS revenue
+ Aggregate: groupBy=[[nation.n_name]],
aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) *
CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount)]]
+ Inner Join: nation.n_regionkey = region.r_regionkey
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ Inner Join: lineitem.l_suppkey = supplier.s_suppkey,
customer.c_nationkey = supplier.s_nationkey
+ Inner Join: orders.o_orderkey = lineitem.l_orderkey
+ Inner Join: customer.c_custkey = orders.o_custkey
+ TableScan: customer projection=[c_custkey, c_nationkey]
+ Filter: orders.o_orderdate >= Date32("8766") AND
orders.o_orderdate < Date32("9131")
+ TableScan: orders projection=[o_orderkey, o_custkey,
o_orderdate]
+ TableScan: lineitem projection=[l_orderkey, l_suppkey,
l_extendedprice, l_discount]
+ TableScan: supplier projection=[s_suppkey, s_nationkey]
+ TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
+ Filter: region.r_name = Utf8("ASIA")
+ TableScan: region projection=[r_regionkey, r_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q6.txt b/benchmarks/expected-plans/q6.txt
new file mode 100644
index 000000000..ad27ba2b9
--- /dev/null
+++ b/benchmarks/expected-plans/q6.txt
@@ -0,0 +1,5 @@
+Projection: SUM(lineitem.l_extendedprice * lineitem.l_discount) AS revenue
+ Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice *
lineitem.l_discount)]]
+ Projection: CAST(lineitem.l_discount AS Decimal128(30, 15)) AS
CAST(lineitem.l_discount AS Decimal128(30, 15))lineitem.l_discount,
lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount,
lineitem.l_shipdate
+ Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate <
Date32("9131") AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS
lineitem.l_discount >= Decimal128(Some(49999999999999),30,15) AND
CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount <=
Decimal128(Some(69999999999999),30,15) AND lineitem.l_quantity <
Decimal128(Some(2400),15,2)
+ TableScan: lineitem projection=[l_quantity, l_extendedprice,
l_discount, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q7.txt b/benchmarks/expected-plans/q7.txt
new file mode 100644
index 000000000..4a2866a42
--- /dev/null
+++ b/benchmarks/expected-plans/q7.txt
@@ -0,0 +1,20 @@
+Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS
LAST, shipping.l_year ASC NULLS LAST
+ Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year,
SUM(shipping.volume) AS revenue
+ Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation,
shipping.l_year]], aggr=[[SUM(shipping.volume)]]
+ Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year,
shipping.volume, alias=shipping
+ Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation,
datepart(Utf8("YEAR"), lineitem.l_shipdate) AS l_year,
CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) *
CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2)) AS Decimal128(38, 4)) AS volume, alias=shipping
+ Filter: n1.n_name = Utf8("FRANCE") AND n2.n_name = Utf8("GERMANY")
OR n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("FRANCE")
+ Inner Join: customer.c_nationkey = n2.n_nationkey
+ Inner Join: supplier.s_nationkey = n1.n_nationkey
+ Inner Join: orders.o_custkey = customer.c_custkey
+ Inner Join: lineitem.l_orderkey = orders.o_orderkey
+ Inner Join: supplier.s_suppkey = lineitem.l_suppkey
+ TableScan: supplier projection=[s_suppkey, s_nationkey]
+ Filter: lineitem.l_shipdate >= Date32("9131") AND
lineitem.l_shipdate <= Date32("9861")
+ TableScan: lineitem projection=[l_orderkey, l_suppkey,
l_extendedprice, l_discount, l_shipdate]
+ TableScan: orders projection=[o_orderkey, o_custkey]
+ TableScan: customer projection=[c_custkey, c_nationkey]
+ SubqueryAlias: n1
+ TableScan: nation projection=[n_nationkey, n_name]
+ SubqueryAlias: n2
+ TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q8.txt b/benchmarks/expected-plans/q8.txt
new file mode 100644
index 000000000..20452d4bd
--- /dev/null
+++ b/benchmarks/expected-plans/q8.txt
@@ -0,0 +1,25 @@
+Sort: all_nations.o_year ASC NULLS LAST
+ Projection: all_nations.o_year, SUM(CASE WHEN all_nations.nation =
Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) /
SUM(all_nations.volume) AS mkt_share
+ Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN
all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE
Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation =
Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END),
SUM(all_nations.volume)]]
+ Projection: all_nations.o_year, all_nations.volume, all_nations.nation,
alias=all_nations
+ Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year,
CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) *
CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23,
2)) AS Decimal128(38, 4)) AS volume, n2.n_name AS nation, alias=all_nations
+ Inner Join: n1.n_regionkey = region.r_regionkey
+ Inner Join: supplier.s_nationkey = n2.n_nationkey
+ Inner Join: customer.c_nationkey = n1.n_nationkey
+ Inner Join: orders.o_custkey = customer.c_custkey
+ Inner Join: lineitem.l_orderkey = orders.o_orderkey
+ Inner Join: lineitem.l_suppkey = supplier.s_suppkey
+ Inner Join: part.p_partkey = lineitem.l_partkey
+ Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL")
+ TableScan: part projection=[p_partkey, p_type]
+ TableScan: lineitem projection=[l_orderkey, l_partkey,
l_suppkey, l_extendedprice, l_discount]
+ TableScan: supplier projection=[s_suppkey, s_nationkey]
+ Filter: orders.o_orderdate >= Date32("9131") AND
orders.o_orderdate <= Date32("9861")
+ TableScan: orders projection=[o_orderkey, o_custkey,
o_orderdate]
+ TableScan: customer projection=[c_custkey, c_nationkey]
+ SubqueryAlias: n1
+ TableScan: nation projection=[n_nationkey, n_regionkey]
+ SubqueryAlias: n2
+ TableScan: nation projection=[n_nationkey, n_name]
+ Filter: region.r_name = Utf8("AMERICA")
+ TableScan: region projection=[r_regionkey, r_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q9.txt b/benchmarks/expected-plans/q9.txt
new file mode 100644
index 000000000..954c28a35
--- /dev/null
+++ b/benchmarks/expected-plans/q9.txt
@@ -0,0 +1,17 @@
+Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST
+ Projection: profit.nation, profit.o_year, SUM(profit.amount) AS sum_profit
+ Aggregate: groupBy=[[profit.nation, profit.o_year]],
aggr=[[SUM(profit.amount)]]
+ Projection: profit.nation, profit.o_year, profit.amount, alias=profit
+ Projection: nation.n_name AS nation, datepart(Utf8("YEAR"),
orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38,
4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS
Decimal128(23, 2)) AS Decimal128(38, 4)) - CAST(partsupp.ps_supplycost *
lineitem.l_quantity AS Decimal128(38, 4)) AS amount, alias=profit
+ Inner Join: supplier.s_nationkey = nation.n_nationkey
+ Inner Join: lineitem.l_orderkey = orders.o_orderkey
+ Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey,
lineitem.l_partkey = partsupp.ps_partkey
+ Inner Join: lineitem.l_suppkey = supplier.s_suppkey
+ Inner Join: part.p_partkey = lineitem.l_partkey
+ Filter: part.p_name LIKE Utf8("%green%")
+ TableScan: part projection=[p_partkey, p_name]
+ TableScan: lineitem projection=[l_orderkey, l_partkey,
l_suppkey, l_quantity, l_extendedprice, l_discount]
+ TableScan: supplier projection=[s_suppkey, s_nationkey]
+ TableScan: partsupp projection=[ps_partkey, ps_suppkey,
ps_supplycost]
+ TableScan: orders projection=[o_orderkey, o_orderdate]
+ TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index d3cab89a2..5205f37c1 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -590,6 +590,7 @@ struct QueryResult {
mod tests {
use super::*;
use std::env;
+ use std::io::{BufRead, BufReader};
use std::ops::{Div, Mul};
use std::sync::Arc;
@@ -598,6 +599,7 @@ mod tests {
use datafusion::logical_expr::Expr;
use datafusion::logical_expr::Expr::Cast;
use datafusion::logical_expr::Expr::ScalarFunction;
+ use datafusion::sql::TableReference;
const QUERY_LIMIT: [Option<usize>; 22] = [
None,
@@ -624,6 +626,183 @@ mod tests {
None,
];
+ #[tokio::test]
+ async fn q1_expected_plan() -> Result<()> {
+ expected_plan(1).await
+ }
+
+ #[tokio::test]
+ async fn q2_expected_plan() -> Result<()> {
+ expected_plan(2).await
+ }
+
+ #[tokio::test]
+ async fn q3_expected_plan() -> Result<()> {
+ expected_plan(3).await
+ }
+
+ #[tokio::test]
+ async fn q4_expected_plan() -> Result<()> {
+ expected_plan(4).await
+ }
+
+ #[tokio::test]
+ async fn q5_expected_plan() -> Result<()> {
+ expected_plan(5).await
+ }
+
+ #[tokio::test]
+ async fn q6_expected_plan() -> Result<()> {
+ expected_plan(6).await
+ }
+
+ #[tokio::test]
+ async fn q7_expected_plan() -> Result<()> {
+ expected_plan(7).await
+ }
+
+ #[tokio::test]
+ async fn q8_expected_plan() -> Result<()> {
+ expected_plan(8).await
+ }
+
+ #[tokio::test]
+ async fn q9_expected_plan() -> Result<()> {
+ expected_plan(9).await
+ }
+
+ #[tokio::test]
+ async fn q10_expected_plan() -> Result<()> {
+ expected_plan(10).await
+ }
+
+ #[tokio::test]
+ async fn q11_expected_plan() -> Result<()> {
+ expected_plan(11).await
+ }
+
+ #[tokio::test]
+ async fn q12_expected_plan() -> Result<()> {
+ expected_plan(12).await
+ }
+
+ #[tokio::test]
+ async fn q13_expected_plan() -> Result<()> {
+ expected_plan(13).await
+ }
+
+ #[tokio::test]
+ async fn q14_expected_plan() -> Result<()> {
+ expected_plan(14).await
+ }
+
+ #[tokio::test]
+ async fn q15_expected_plan() -> Result<()> {
+ expected_plan(15).await
+ }
+
+ #[tokio::test]
+ async fn q16_expected_plan() -> Result<()> {
+ expected_plan(16).await
+ }
+
+ /// This query produces different plans depending on operating system. The
difference is
+ /// due to re-writing the following expression:
+ ///
+ /// `sum(l_extendedprice) / 7.0 as avg_yearly`
+ ///
+ /// Linux: Decimal128(Some(7000000000000000195487369212723200),38,33)
+ /// Windows: Decimal128(Some(6999999999999999042565864605876224),38,33)
+ ///
+ /// See https://github.com/apache/arrow-datafusion/issues/3791
+ #[tokio::test]
+ #[ignore]
+ async fn q17_expected_plan() -> Result<()> {
+ expected_plan(17).await
+ }
+
+ #[tokio::test]
+ async fn q18_expected_plan() -> Result<()> {
+ expected_plan(18).await
+ }
+
+ #[tokio::test]
+ async fn q19_expected_plan() -> Result<()> {
+ expected_plan(19).await
+ }
+
+ #[tokio::test]
+ async fn q20_expected_plan() -> Result<()> {
+ expected_plan(20).await
+ }
+
+ #[tokio::test]
+ async fn q21_expected_plan() -> Result<()> {
+ expected_plan(21).await
+ }
+
+ #[tokio::test]
+ async fn q22_expected_plan() -> Result<()> {
+ expected_plan(22).await
+ }
+
+ async fn expected_plan(query: usize) -> Result<()> {
+ let ctx = SessionContext::new();
+ for table in TABLES {
+ let table = table.to_string();
+ let schema = get_schema(&table);
+ let mem_table = MemTable::try_new(Arc::new(schema), vec![])?;
+ ctx.register_table(
+ TableReference::from(table.as_str()),
+ Arc::new(mem_table),
+ )?;
+ }
+
+ let mut actual = String::new();
+ let sql = get_query_sql(query)?;
+ for sql in &sql {
+ let df = ctx.sql(sql.as_str()).await?;
+ let plan = df.to_logical_plan()?;
+ if !actual.is_empty() {
+ actual += "\n";
+ }
+ actual += &format!("{}", plan.display_indent());
+ }
+
+ let possibilities = vec![
+ format!("expected-plans/q{}.txt", query),
+ format!("benchmarks/expected-plans/q{}.txt", query),
+ ];
+
+ let mut found = false;
+ for path in &possibilities {
+ let path = Path::new(&path);
+ if let Ok(expected) = read_text_file(path) {
+ assert_eq!(expected, actual);
+ found = true;
+ break;
+ }
+ }
+ assert!(found);
+
+ Ok(())
+ }
+
+ /// we need to read line by line and add \n so tests work on Windows
+ fn read_text_file(path: &Path) -> Result<String> {
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ let mut str = String::new();
+ for line in reader.lines() {
+ let line = line?;
+ if !str.is_empty() {
+ str += "\n";
+ }
+ str += &line;
+ }
+ Ok(str)
+ }
+
#[tokio::test]
async fn q1() -> Result<()> {
verify_query(1).await
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index 98e0c6a38..91ccef60f 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -113,6 +113,7 @@ python/rust-toolchain
python/requirements*.txt
**/testdata/*
benchmarks/queries/*
+benchmarks/expected-plans/*
benchmarks/data/*
ci/*
**/*.svg