wangyum commented on a change in pull request #28575:
URL: https://github.com/apache/spark/pull/28575#discussion_r432966520
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -1372,6 +1376,108 @@ object PushPredicateThroughJoin extends
Rule[LogicalPlan] with PredicateHelper {
}
}
+/**
+ * Rewriting join condition to conjunctive normal form expression so that we
can push
+ * more predicate.
+ */
+object PushPredicateThroughJoinByCNF extends Rule[LogicalPlan] with
PredicateHelper {
+ /**
+ * Rewrite pattern:
+ * 1. (a && b) || c --> (a || c) && (b || c)
+ * 2. a || (b && c) --> (a || b) && (a || c)
+ *
+ * To avoid generating too many predicates, we first group the filter
columns from the same table.
+ */
+ private def toCNF(condition: Expression, depth: Int = 0): Expression = {
+ if (depth < SQLConf.get.maxRewritingCNFDepth) {
+ condition match {
+ case or @ Or(left: And, right: And) =>
+ val lhs =
splitConjunctivePredicates(left).groupBy(_.references.map(_.qualifier))
Review comment:
group by qualifier to avoid generating too many predicates. For example:
TPCDS q85:
Without group by qualifier:
```
== Physical Plan ==
TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#137
ASC NULLS FIRST,aggOrder#142 ASC NULLS FIRST,avg(wr_refunded_cash)#139 ASC
NULLS FIRST,avg(wr_fee)#140 ASC NULLS FIRST], output=[substr(r_reason_desc, 1,
20)#137,avg(ws_quantity)#138,avg(wr_refunded_cash)#139,avg(wr_fee)#140])
+- *(9) HashAggregate(keys=[r_reason_desc#124],
functions=[avg(cast(ws_quantity#18 as bigint)),
avg(UnscaledValue(wr_refunded_cash#54)), avg(UnscaledValue(wr_fee#52))])
+- Exchange hashpartitioning(r_reason_desc#124, 5), true, [id=#351]
+- *(8) HashAggregate(keys=[r_reason_desc#124],
functions=[partial_avg(cast(ws_quantity#18 as bigint)),
partial_avg(UnscaledValue(wr_refunded_cash#54)),
partial_avg(UnscaledValue(wr_fee#52))])
+- *(8) Project [ws_quantity#18, wr_fee#52, wr_refunded_cash#54,
r_reason_desc#124]
+- *(8) BroadcastHashJoin [wr_reason_sk#46L],
[cast(r_reason_sk#122 as bigint)], Inner, BuildRight
:- *(8) Project [ws_quantity#18, wr_reason_sk#46L, wr_fee#52,
wr_refunded_cash#54]
: +- *(8) BroadcastHashJoin [ws_sold_date_sk#0],
[d_date_sk#94], Inner, BuildRight
: :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18,
wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
: : +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#40L],
[cast(ca_address_sk#81 as bigint)], Inner, BuildRight, ((((ca_state#89 IN
(IN,OH,NJ) AND (ws_net_profit#33 >= 100.00)) AND (ws_net_profit#33 <= 200.00))
OR ((ca_state#89 IN (WI,CT,KY) AND (ws_net_profit#33 >= 150.00)) AND
(ws_net_profit#33 <= 300.00))) OR ((ca_state#89 IN (LA,IA,AR) AND
(ws_net_profit#33 >= 50.00)) AND (ws_net_profit#33 <= 250.00)))
: : :- *(8) Project [ws_sold_date_sk#0,
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_reason_sk#46L,
wr_fee#52, wr_refunded_cash#54]
: : : +- *(8) BroadcastHashJoin
[wr_returning_cdemo_sk#42L, cd_marital_status#74, cd_education_status#75],
[cast(cd_demo_sk#125 as bigint), cd_marital_status#127,
cd_education_status#128], Inner, BuildRight
: : : :- *(8) Project [ws_sold_date_sk#0,
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L,
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54,
cd_marital_status#74, cd_education_status#75]
: : : : +- *(8) BroadcastHashJoin
[wr_refunded_cdemo_sk#38L], [cast(cd_demo_sk#72 as bigint)], Inner, BuildRight,
((((((cd_marital_status#74 = M) AND (cd_education_status#75 = Advanced Degree))
AND (ws_sales_price#21 >= 100.00)) AND (ws_sales_price#21 <= 150.00)) OR
((((cd_marital_status#74 = S) AND (cd_education_status#75 = College)) AND
(ws_sales_price#21 >= 50.00)) AND (ws_sales_price#21 <= 100.00))) OR
((((cd_marital_status#74 = W) AND (cd_education_status#75 = 2 yr Degree)) AND
(ws_sales_price#21 >= 150.00)) AND (ws_sales_price#21 <= 200.00)))
: : : : :- *(8) Project [ws_sold_date_sk#0,
ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L,
wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L,
wr_fee#52, wr_refunded_cash#54]
: : : : : +- *(8) BroadcastHashJoin
[ws_web_page_sk#12], [wp_web_page_sk#58], Inner, BuildRight
: : : : : :- *(8) Project
[ws_sold_date_sk#0, ws_web_page_sk#12, ws_quantity#18, ws_sales_price#21,
ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L,
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
: : : : : : +- *(8) BroadcastHashJoin
[cast(ws_item_sk#3 as bigint), cast(ws_order_number#17 as bigint)],
[wr_item_sk#36L, wr_order_number#47L], Inner, BuildRight
: : : : : : :- *(8) Project
[ws_sold_date_sk#0, ws_item_sk#3, ws_web_page_sk#12, ws_order_number#17,
ws_quantity#18, ws_sales_price#21, ws_net_profit#33]
: : : : : : : +- *(8) Filter
(((((((((((((((((((isnotnull(ws_item_sk#3) AND isnotnull(ws_order_number#17))
AND isnotnull(ws_web_page_sk#12)) AND isnotnull(ws_sold_date_sk#0)) AND
(((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 >= 50.00)) OR
(ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 >= 100.00) OR
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 >= 150.00))) AND
(((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 >= 50.00)) OR
(ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 <= 150.00) OR
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 >= 150.00))) AND
(((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 >= 50.00)) OR
(ws_sales_price#21 <= 200.00))) AND (((ws_sales_price#21 >= 100.00) OR
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 <= 200.00))) AND
(((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 >= 50.00)) OR
(ws_sales_price#21 <= 200.00))) AND (((ws_sales_price#21 <= 150.00) OR
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 <= 200.00))) AND
(((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 >= 150.00)) OR
(ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 >= 100.00) OR
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 >= 50.00))) AND
(((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 >= 150.00)) OR
(ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 <= 200.00) OR
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 >= 50.00))) AND
(((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 >= 150.00)) OR
(ws_net_profit#33 <= 250.00))) AND (((ws_net_profit#33 >= 100.00) OR
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 <= 250.00))) AND
(((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 >= 150.00)) OR
(ws_net_profit#33 <= 250.00))) AND (((ws_net_profit#33 <= 200.00) OR
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 <= 250.00)))
: : : : : : : +- *(8)
ColumnarToRow
: : : : : : : +- FileScan
parquet
default.web_sales[ws_sold_date_sk#0,ws_item_sk#3,ws_web_page_sk#12,ws_order_number#17,ws_quantity#18,ws_sales_price#21,ws_net_profit#33]
Batched: true, DataFilters: [isnotnull(ws_item_sk#3),
isnotnull(ws_order_number#17), isnotnull(ws_web_page_sk#12), isnotnull(...,
Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk),
IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_...,
ReadSchema:
struct<ws_sold_date_sk:int,ws_item_sk:int,ws_web_page_sk:int,ws_order_number:int,ws_quantity:int,...
: : : : : : +- BroadcastExchange
HashedRelationBroadcastMode(List(input[0, bigint, true], input[5, bigint,
true])), [id=#291]
: : : : : : +- *(1) Project
[wr_item_sk#36L, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L,
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_order_number#47L, wr_fee#52,
wr_refunded_cash#54]
: : : : : : +- *(1) Filter
(((((isnotnull(wr_item_sk#36L) AND isnotnull(wr_order_number#47L)) AND
isnotnull(wr_refunded_cdemo_sk#38L)) AND isnotnull(wr_returning_cdemo_sk#42L))
AND isnotnull(wr_refunded_addr_sk#40L)) AND isnotnull(wr_reason_sk#46L))
: : : : : : +- *(1)
ColumnarToRow
: : : : : : +- FileScan
parquet
default.web_returns[wr_item_sk#36L,wr_refunded_cdemo_sk#38L,wr_refunded_addr_sk#40L,wr_returning_cdemo_sk#42L,wr_reason_sk#46L,wr_order_number#47L,wr_fee#52,wr_refunded_cash#54]
Batched: true, DataFilters: [isnotnull(wr_item_sk#36L),
isnotnull(wr_order_number#47L), isnotnull(wr_refunded_cdemo_sk#38L), ...,
Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk),
IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr...,
ReadSchema:
struct<wr_item_sk:bigint,wr_refunded_cdemo_sk:bigint,wr_refunded_addr_sk:bigint,wr_returning_cdem...
: : : : : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#300]
: : : : : +- *(2) Project
[wp_web_page_sk#58]
: : : : : +- *(2) Filter
isnotnull(wp_web_page_sk#58)
: : : : : +- *(2) ColumnarToRow
: : : : : +- FileScan parquet
default.web_page[wp_web_page_sk#58] Batched: true, DataFilters:
[isnotnull(wp_web_page_sk#58)], Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema:
struct<wp_web_page_sk:int>
: : : : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#309]
: : : : +- *(3) Project [cd_demo_sk#72,
cd_marital_status#74, cd_education_status#75]
: : : : +- *(3) Filter
((((((((((isnotnull(cd_demo_sk#72) AND isnotnull(cd_education_status#75)) AND
isnotnull(cd_marital_status#74)) AND (((cd_marital_status#74 = M) OR
(cd_marital_status#74 = S)) OR (cd_marital_status#74 = W))) AND
(((cd_marital_status#74 = M) OR (cd_marital_status#74 = S)) OR
(cd_education_status#75 = 2 yr Degree))) AND (((cd_marital_status#74 = M) OR
(cd_education_status#75 = College)) OR (cd_marital_status#74 = W))) AND
(((cd_marital_status#74 = M) OR (cd_education_status#75 = College)) OR
(cd_education_status#75 = 2 yr Degree))) AND (((cd_education_status#75 =
Advanced Degree) OR (cd_marital_status#74 = S)) OR (cd_marital_status#74 = W)))
AND (((cd_education_status#75 = Advanced Degree) OR (cd_marital_status#74 = S))
OR (cd_education_status#75 = 2 yr Degree))) AND (((cd_education_status#75 =
Advanced Degree) OR (cd_education_status#75 = College)) OR
(cd_marital_status#74 = W))) AND (((cd_education_status#75 = Advanced Degree)
OR (cd_education_status#75 = College)) OR (cd_education_status#75 = 2 yr
Degree)))
: : : : +- *(3) ColumnarToRow
: : : : +- FileScan parquet
default.customer_demographics[cd_demo_sk#72,cd_marital_status#74,cd_education_status#75]
Batched: true, DataFilters: [isnotnull(cd_demo_sk#72),
isnotnull(cd_education_status#75), isnotnull(cd_marital_status#74), ((...,
Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk),
IsNotNull(cd_education_status), IsNotNull(cd_marital_status), Or(Or(Equal...,
ReadSchema:
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
: : : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1,
string, true], input[2, string, true])), [id=#318]
: : : +- *(4) Project [cd_demo_sk#125,
cd_marital_status#127, cd_education_status#128]
: : : +- *(4) Filter
((isnotnull(cd_demo_sk#125) AND isnotnull(cd_education_status#128)) AND
isnotnull(cd_marital_status#127))
: : : +- *(4) ColumnarToRow
: : : +- FileScan parquet
default.customer_demographics[cd_demo_sk#125,cd_marital_status#127,cd_education_status#128]
Batched: true, DataFilters: [isnotnull(cd_demo_sk#125),
isnotnull(cd_education_status#128), isnotnull(cd_marital_status#127)], Format:
Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk),
IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema:
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
: : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#327]
: : +- *(5) Project [ca_address_sk#81, ca_state#89]
: : +- *(5) Filter (((isnotnull(ca_country#91)
AND (ca_country#91 = United States)) AND isnotnull(ca_address_sk#81)) AND
((ca_state#89 IN (IN,OH,NJ) OR ca_state#89 IN (WI,CT,KY)) OR ca_state#89 IN
(LA,IA,AR)))
: : +- *(5) ColumnarToRow
: : +- FileScan parquet
default.customer_address[ca_address_sk#81,ca_state#89,ca_country#91] Batched:
true, DataFilters: [isnotnull(ca_country#91), (ca_country#91 = United States),
isnotnull(ca_address_sk#81), ((ca_sta..., Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(ca_country),
EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_...,
ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
: +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#336]
: +- *(6) Project [d_date_sk#94]
: +- *(6) Filter ((isnotnull(d_year#100) AND
(d_year#100 = 2000)) AND isnotnull(d_date_sk#94))
: +- *(6) ColumnarToRow
: +- FileScan parquet
default.date_dim[d_date_sk#94,d_year#100] Batched: true, DataFilters:
[isnotnull(d_year#100), (d_year#100 = 2000), isnotnull(d_date_sk#94)], Format:
Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000),
IsNotNull(d_date_sk)], ReadSchema: struct<d_date_sk:int,d_year:int>
+- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#345]
+- *(7) Project [r_reason_sk#122, r_reason_desc#124]
+- *(7) Filter isnotnull(r_reason_sk#122)
+- *(7) ColumnarToRow
+- FileScan parquet
default.reason[r_reason_sk#122,r_reason_desc#124] Batched: true, DataFilters:
[isnotnull(r_reason_sk#122)], Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema:
struct<r_reason_sk:int,r_reason_desc:string>
```
Group by qualifier:
```
== Physical Plan ==
TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#137
ASC NULLS FIRST,aggOrder#142 ASC NULLS FIRST,avg(wr_refunded_cash)#139 ASC
NULLS FIRST,avg(wr_fee)#140 ASC NULLS FIRST], output=[substr(r_reason_desc, 1,
20)#137,avg(ws_quantity)#138,avg(wr_refunded_cash)#139,avg(wr_fee)#140])
+- *(9) HashAggregate(keys=[r_reason_desc#124],
functions=[avg(cast(ws_quantity#18 as bigint)),
avg(UnscaledValue(wr_refunded_cash#54)), avg(UnscaledValue(wr_fee#52))])
+- Exchange hashpartitioning(r_reason_desc#124, 5), true, [id=#351]
+- *(8) HashAggregate(keys=[r_reason_desc#124],
functions=[partial_avg(cast(ws_quantity#18 as bigint)),
partial_avg(UnscaledValue(wr_refunded_cash#54)),
partial_avg(UnscaledValue(wr_fee#52))])
+- *(8) Project [ws_quantity#18, wr_fee#52, wr_refunded_cash#54,
r_reason_desc#124]
+- *(8) BroadcastHashJoin [wr_reason_sk#46L],
[cast(r_reason_sk#122 as bigint)], Inner, BuildRight
:- *(8) Project [ws_quantity#18, wr_reason_sk#46L, wr_fee#52,
wr_refunded_cash#54]
: +- *(8) BroadcastHashJoin [ws_sold_date_sk#0],
[d_date_sk#94], Inner, BuildRight
: :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18,
wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
: : +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#40L],
[cast(ca_address_sk#81 as bigint)], Inner, BuildRight, ((((ca_state#89 IN
(IN,OH,NJ) AND (ws_net_profit#33 >= 100.00)) AND (ws_net_profit#33 <= 200.00))
OR ((ca_state#89 IN (WI,CT,KY) AND (ws_net_profit#33 >= 150.00)) AND
(ws_net_profit#33 <= 300.00))) OR ((ca_state#89 IN (LA,IA,AR) AND
(ws_net_profit#33 >= 50.00)) AND (ws_net_profit#33 <= 250.00)))
: : :- *(8) Project [ws_sold_date_sk#0,
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_reason_sk#46L,
wr_fee#52, wr_refunded_cash#54]
: : : +- *(8) BroadcastHashJoin
[wr_returning_cdemo_sk#42L, cd_marital_status#74, cd_education_status#75],
[cast(cd_demo_sk#125 as bigint), cd_marital_status#127,
cd_education_status#128], Inner, BuildRight
: : : :- *(8) Project [ws_sold_date_sk#0,
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L,
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54,
cd_marital_status#74, cd_education_status#75]
: : : : +- *(8) BroadcastHashJoin
[wr_refunded_cdemo_sk#38L], [cast(cd_demo_sk#72 as bigint)], Inner, BuildRight,
((((((cd_marital_status#74 = M) AND (cd_education_status#75 = Advanced Degree))
AND (ws_sales_price#21 >= 100.00)) AND (ws_sales_price#21 <= 150.00)) OR
((((cd_marital_status#74 = S) AND (cd_education_status#75 = College)) AND
(ws_sales_price#21 >= 50.00)) AND (ws_sales_price#21 <= 100.00))) OR
((((cd_marital_status#74 = W) AND (cd_education_status#75 = 2 yr Degree)) AND
(ws_sales_price#21 >= 150.00)) AND (ws_sales_price#21 <= 200.00)))
: : : : :- *(8) Project [ws_sold_date_sk#0,
ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L,
wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L,
wr_fee#52, wr_refunded_cash#54]
: : : : : +- *(8) BroadcastHashJoin
[ws_web_page_sk#12], [wp_web_page_sk#58], Inner, BuildRight
: : : : : :- *(8) Project
[ws_sold_date_sk#0, ws_web_page_sk#12, ws_quantity#18, ws_sales_price#21,
ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L,
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
: : : : : : +- *(8) BroadcastHashJoin
[cast(ws_item_sk#3 as bigint), cast(ws_order_number#17 as bigint)],
[wr_item_sk#36L, wr_order_number#47L], Inner, BuildRight
: : : : : : :- *(8) Project
[ws_sold_date_sk#0, ws_item_sk#3, ws_web_page_sk#12, ws_order_number#17,
ws_quantity#18, ws_sales_price#21, ws_net_profit#33]
: : : : : : : +- *(8) Filter
(((((isnotnull(ws_item_sk#3) AND isnotnull(ws_order_number#17)) AND
isnotnull(ws_web_page_sk#12)) AND isnotnull(ws_sold_date_sk#0)) AND
((((ws_sales_price#21 >= 100.00) AND (ws_sales_price#21 <= 150.00)) OR
((ws_sales_price#21 >= 50.00) AND (ws_sales_price#21 <= 100.00))) OR
((ws_sales_price#21 >= 150.00) AND (ws_sales_price#21 <= 200.00)))) AND
((((ws_net_profit#33 >= 100.00) AND (ws_net_profit#33 <= 200.00)) OR
((ws_net_profit#33 >= 150.00) AND (ws_net_profit#33 <= 300.00))) OR
((ws_net_profit#33 >= 50.00) AND (ws_net_profit#33 <= 250.00))))
: : : : : : : +- *(8)
ColumnarToRow
: : : : : : : +- FileScan
parquet
default.web_sales[ws_sold_date_sk#0,ws_item_sk#3,ws_web_page_sk#12,ws_order_number#17,ws_quantity#18,ws_sales_price#21,ws_net_profit#33]
Batched: true, DataFilters: [isnotnull(ws_item_sk#3),
isnotnull(ws_order_number#17), isnotnull(ws_web_page_sk#12), isnotnull(...,
Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk),
IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_...,
ReadSchema:
struct<ws_sold_date_sk:int,ws_item_sk:int,ws_web_page_sk:int,ws_order_number:int,ws_quantity:int,...
: : : : : : +- BroadcastExchange
HashedRelationBroadcastMode(List(input[0, bigint, true], input[5, bigint,
true])), [id=#291]
: : : : : : +- *(1) Project
[wr_item_sk#36L, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L,
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_order_number#47L, wr_fee#52,
wr_refunded_cash#54]
: : : : : : +- *(1) Filter
(((((isnotnull(wr_item_sk#36L) AND isnotnull(wr_order_number#47L)) AND
isnotnull(wr_refunded_cdemo_sk#38L)) AND isnotnull(wr_returning_cdemo_sk#42L))
AND isnotnull(wr_refunded_addr_sk#40L)) AND isnotnull(wr_reason_sk#46L))
: : : : : : +- *(1)
ColumnarToRow
: : : : : : +- FileScan
parquet
default.web_returns[wr_item_sk#36L,wr_refunded_cdemo_sk#38L,wr_refunded_addr_sk#40L,wr_returning_cdemo_sk#42L,wr_reason_sk#46L,wr_order_number#47L,wr_fee#52,wr_refunded_cash#54]
Batched: true, DataFilters: [isnotnull(wr_item_sk#36L),
isnotnull(wr_order_number#47L), isnotnull(wr_refunded_cdemo_sk#38L), ...,
Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk),
IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr...,
ReadSchema:
struct<wr_item_sk:bigint,wr_refunded_cdemo_sk:bigint,wr_refunded_addr_sk:bigint,wr_returning_cdem...
: : : : : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#300]
: : : : : +- *(2) Project
[wp_web_page_sk#58]
: : : : : +- *(2) Filter
isnotnull(wp_web_page_sk#58)
: : : : : +- *(2) ColumnarToRow
: : : : : +- FileScan parquet
default.web_page[wp_web_page_sk#58] Batched: true, DataFilters:
[isnotnull(wp_web_page_sk#58)], Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema:
struct<wp_web_page_sk:int>
: : : : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#309]
: : : : +- *(3) Project [cd_demo_sk#72,
cd_marital_status#74, cd_education_status#75]
: : : : +- *(3) Filter
(((isnotnull(cd_demo_sk#72) AND isnotnull(cd_education_status#75)) AND
isnotnull(cd_marital_status#74)) AND ((((cd_marital_status#74 = M) AND
(cd_education_status#75 = Advanced Degree)) OR ((cd_marital_status#74 = S) AND
(cd_education_status#75 = College))) OR ((cd_marital_status#74 = W) AND
(cd_education_status#75 = 2 yr Degree))))
: : : : +- *(3) ColumnarToRow
: : : : +- FileScan parquet
default.customer_demographics[cd_demo_sk#72,cd_marital_status#74,cd_education_status#75]
Batched: true, DataFilters: [isnotnull(cd_demo_sk#72),
isnotnull(cd_education_status#75), isnotnull(cd_marital_status#74), ((...,
Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk),
IsNotNull(cd_education_status), IsNotNull(cd_marital_status), Or(Or(And(E...,
ReadSchema:
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
: : : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1,
string, true], input[2, string, true])), [id=#318]
: : : +- *(4) Project [cd_demo_sk#125,
cd_marital_status#127, cd_education_status#128]
: : : +- *(4) Filter
((isnotnull(cd_demo_sk#125) AND isnotnull(cd_education_status#128)) AND
isnotnull(cd_marital_status#127))
: : : +- *(4) ColumnarToRow
: : : +- FileScan parquet
default.customer_demographics[cd_demo_sk#125,cd_marital_status#127,cd_education_status#128]
Batched: true, DataFilters: [isnotnull(cd_demo_sk#125),
isnotnull(cd_education_status#128), isnotnull(cd_marital_status#127)], Format:
Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk),
IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema:
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
: : +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#327]
: : +- *(5) Project [ca_address_sk#81, ca_state#89]
: : +- *(5) Filter (((isnotnull(ca_country#91)
AND (ca_country#91 = United States)) AND isnotnull(ca_address_sk#81)) AND
((ca_state#89 IN (IN,OH,NJ) OR ca_state#89 IN (WI,CT,KY)) OR ca_state#89 IN
(LA,IA,AR)))
: : +- *(5) ColumnarToRow
: : +- FileScan parquet
default.customer_address[ca_address_sk#81,ca_state#89,ca_country#91] Batched:
true, DataFilters: [isnotnull(ca_country#91), (ca_country#91 = United States),
isnotnull(ca_address_sk#81), ((ca_sta..., Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(ca_country),
EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_...,
ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
: +- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#336]
: +- *(6) Project [d_date_sk#94]
: +- *(6) Filter ((isnotnull(d_year#100) AND
(d_year#100 = 2000)) AND isnotnull(d_date_sk#94))
: +- *(6) ColumnarToRow
: +- FileScan parquet
default.date_dim[d_date_sk#94,d_year#100] Batched: true, DataFilters:
[isnotnull(d_year#100), (d_year#100 = 2000), isnotnull(d_date_sk#94)], Format:
Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000),
IsNotNull(d_date_sk)], ReadSchema: struct<d_date_sk:int,d_year:int>
+- BroadcastExchange
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))),
[id=#345]
+- *(7) Project [r_reason_sk#122, r_reason_desc#124]
+- *(7) Filter isnotnull(r_reason_sk#122)
+- *(7) ColumnarToRow
+- FileScan parquet
default.reason[r_reason_sk#122,r_reason_desc#124] Batched: true, DataFilters:
[isnotnull(r_reason_sk#122)], Format: Parquet, Location:
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema:
struct<r_reason_sk:int,r_reason_desc:string>
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]