wangyum commented on a change in pull request #28575:
URL: https://github.com/apache/spark/pull/28575#discussion_r432966520



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -1372,6 +1376,108 @@ object PushPredicateThroughJoin extends 
Rule[LogicalPlan] with PredicateHelper {
   }
 }
 
+/**
+ * Rewriting join condition to conjunctive normal form expression so that we 
can push
+ * more predicate.
+ */
+object PushPredicateThroughJoinByCNF extends Rule[LogicalPlan] with 
PredicateHelper {
+  /**
+   * Rewrite pattern:
+   * 1. (a && b) || c --> (a || c) && (b || c)
+   * 2. a || (b && c) --> (a || b) && (a || c)
+   *
+   * To avoid generating too many predicates, we first group the filter 
columns from the same table.
+   */
+  private def toCNF(condition: Expression, depth: Int = 0): Expression = {
+    if (depth < SQLConf.get.maxRewritingCNFDepth) {
+      condition match {
+        case or @ Or(left: And, right: And) =>
+          val lhs = 
splitConjunctivePredicates(left).groupBy(_.references.map(_.qualifier))

Review comment:
       group by  qualifier to avoid generating too many predicates. For example:
   TPCDS q85:
   Without group by qualifier:
   ```
   == Physical Plan ==
   TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#137 
ASC NULLS FIRST,aggOrder#142 ASC NULLS FIRST,avg(wr_refunded_cash)#139 ASC 
NULLS FIRST,avg(wr_fee)#140 ASC NULLS FIRST], output=[substr(r_reason_desc, 1, 
20)#137,avg(ws_quantity)#138,avg(wr_refunded_cash)#139,avg(wr_fee)#140])
   +- *(9) HashAggregate(keys=[r_reason_desc#124], 
functions=[avg(cast(ws_quantity#18 as bigint)), 
avg(UnscaledValue(wr_refunded_cash#54)), avg(UnscaledValue(wr_fee#52))])
      +- Exchange hashpartitioning(r_reason_desc#124, 5), true, [id=#351]
         +- *(8) HashAggregate(keys=[r_reason_desc#124], 
functions=[partial_avg(cast(ws_quantity#18 as bigint)), 
partial_avg(UnscaledValue(wr_refunded_cash#54)), 
partial_avg(UnscaledValue(wr_fee#52))])
            +- *(8) Project [ws_quantity#18, wr_fee#52, wr_refunded_cash#54, 
r_reason_desc#124]
               +- *(8) BroadcastHashJoin [wr_reason_sk#46L], 
[cast(r_reason_sk#122 as bigint)], Inner, BuildRight
                  :- *(8) Project [ws_quantity#18, wr_reason_sk#46L, wr_fee#52, 
wr_refunded_cash#54]
                  :  +- *(8) BroadcastHashJoin [ws_sold_date_sk#0], 
[d_date_sk#94], Inner, BuildRight
                  :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, 
wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :  +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#40L], 
[cast(ca_address_sk#81 as bigint)], Inner, BuildRight, ((((ca_state#89 IN 
(IN,OH,NJ) AND (ws_net_profit#33 >= 100.00)) AND (ws_net_profit#33 <= 200.00)) 
OR ((ca_state#89 IN (WI,CT,KY) AND (ws_net_profit#33 >= 150.00)) AND 
(ws_net_profit#33 <= 300.00))) OR ((ca_state#89 IN (LA,IA,AR) AND 
(ws_net_profit#33 >= 50.00)) AND (ws_net_profit#33 <= 250.00)))
                  :     :     :- *(8) Project [ws_sold_date_sk#0, 
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_reason_sk#46L, 
wr_fee#52, wr_refunded_cash#54]
                  :     :     :  +- *(8) BroadcastHashJoin 
[wr_returning_cdemo_sk#42L, cd_marital_status#74, cd_education_status#75], 
[cast(cd_demo_sk#125 as bigint), cd_marital_status#127, 
cd_education_status#128], Inner, BuildRight
                  :     :     :     :- *(8) Project [ws_sold_date_sk#0, 
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, 
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54, 
cd_marital_status#74, cd_education_status#75]
                  :     :     :     :  +- *(8) BroadcastHashJoin 
[wr_refunded_cdemo_sk#38L], [cast(cd_demo_sk#72 as bigint)], Inner, BuildRight, 
((((((cd_marital_status#74 = M) AND (cd_education_status#75 = Advanced Degree)) 
AND (ws_sales_price#21 >= 100.00)) AND (ws_sales_price#21 <= 150.00)) OR 
((((cd_marital_status#74 = S) AND (cd_education_status#75 = College)) AND 
(ws_sales_price#21 >= 50.00)) AND (ws_sales_price#21 <= 100.00))) OR 
((((cd_marital_status#74 = W) AND (cd_education_status#75 = 2 yr Degree)) AND 
(ws_sales_price#21 >= 150.00)) AND (ws_sales_price#21 <= 200.00)))
                  :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, 
ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L, 
wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, 
wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :  +- *(8) BroadcastHashJoin 
[ws_web_page_sk#12], [wp_web_page_sk#58], Inner, BuildRight
                  :     :     :     :     :     :- *(8) Project 
[ws_sold_date_sk#0, ws_web_page_sk#12, ws_quantity#18, ws_sales_price#21, 
ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, 
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :     :  +- *(8) BroadcastHashJoin 
[cast(ws_item_sk#3 as bigint), cast(ws_order_number#17 as bigint)], 
[wr_item_sk#36L, wr_order_number#47L], Inner, BuildRight
                  :     :     :     :     :     :     :- *(8) Project 
[ws_sold_date_sk#0, ws_item_sk#3, ws_web_page_sk#12, ws_order_number#17, 
ws_quantity#18, ws_sales_price#21, ws_net_profit#33]
                  :     :     :     :     :     :     :  +- *(8) Filter 
(((((((((((((((((((isnotnull(ws_item_sk#3) AND isnotnull(ws_order_number#17)) 
AND isnotnull(ws_web_page_sk#12)) AND isnotnull(ws_sold_date_sk#0)) AND 
(((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 >= 50.00)) OR 
(ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 >= 100.00) OR 
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 >= 150.00))) AND 
(((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 >= 50.00)) OR 
(ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 <= 150.00) OR 
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 >= 150.00))) AND 
(((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 >= 50.00)) OR 
(ws_sales_price#21 <= 200.00))) AND (((ws_sales_price#21 >= 100.00) OR 
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 <= 200.00))) AND 
(((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 >= 50.00)) OR 
(ws_sales_price#21 <= 200.00))) AND (((ws_sales_price#21 <= 150.00) OR 
(ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 <= 200.00))) AND 
(((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 >= 150.00)) OR 
(ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 >= 100.00) OR 
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 >= 50.00))) AND 
(((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 >= 150.00)) OR 
(ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 <= 200.00) OR 
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 >= 50.00))) AND 
(((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 >= 150.00)) OR 
(ws_net_profit#33 <= 250.00))) AND (((ws_net_profit#33 >= 100.00) OR 
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 <= 250.00))) AND 
(((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 >= 150.00)) OR 
(ws_net_profit#33 <= 250.00))) AND (((ws_net_profit#33 <= 200.00) OR 
(ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 <= 250.00)))
                  :     :     :     :     :     :     :     +- *(8) 
ColumnarToRow
                  :     :     :     :     :     :     :        +- FileScan 
parquet 
default.web_sales[ws_sold_date_sk#0,ws_item_sk#3,ws_web_page_sk#12,ws_order_number#17,ws_quantity#18,ws_sales_price#21,ws_net_profit#33]
 Batched: true, DataFilters: [isnotnull(ws_item_sk#3), 
isnotnull(ws_order_number#17), isnotnull(ws_web_page_sk#12), isnotnull(..., 
Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), 
IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., 
ReadSchema: 
struct<ws_sold_date_sk:int,ws_item_sk:int,ws_web_page_sk:int,ws_order_number:int,ws_quantity:int,...
                  :     :     :     :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(input[0, bigint, true], input[5, bigint, 
true])), [id=#291]
                  :     :     :     :     :     :        +- *(1) Project 
[wr_item_sk#36L, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, 
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_order_number#47L, wr_fee#52, 
wr_refunded_cash#54]
                  :     :     :     :     :     :           +- *(1) Filter 
(((((isnotnull(wr_item_sk#36L) AND isnotnull(wr_order_number#47L)) AND 
isnotnull(wr_refunded_cdemo_sk#38L)) AND isnotnull(wr_returning_cdemo_sk#42L)) 
AND isnotnull(wr_refunded_addr_sk#40L)) AND isnotnull(wr_reason_sk#46L))
                  :     :     :     :     :     :              +- *(1) 
ColumnarToRow
                  :     :     :     :     :     :                 +- FileScan 
parquet 
default.web_returns[wr_item_sk#36L,wr_refunded_cdemo_sk#38L,wr_refunded_addr_sk#40L,wr_returning_cdemo_sk#42L,wr_reason_sk#46L,wr_order_number#47L,wr_fee#52,wr_refunded_cash#54]
 Batched: true, DataFilters: [isnotnull(wr_item_sk#36L), 
isnotnull(wr_order_number#47L), isnotnull(wr_refunded_cdemo_sk#38L), ..., 
Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), 
IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr..., 
ReadSchema: 
struct<wr_item_sk:bigint,wr_refunded_cdemo_sk:bigint,wr_refunded_addr_sk:bigint,wr_returning_cdem...
                  :     :     :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#300]
                  :     :     :     :     :        +- *(2) Project 
[wp_web_page_sk#58]
                  :     :     :     :     :           +- *(2) Filter 
isnotnull(wp_web_page_sk#58)
                  :     :     :     :     :              +- *(2) ColumnarToRow
                  :     :     :     :     :                 +- FileScan parquet 
default.web_page[wp_web_page_sk#58] Batched: true, DataFilters: 
[isnotnull(wp_web_page_sk#58)], Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: 
struct<wp_web_page_sk:int>
                  :     :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#309]
                  :     :     :     :        +- *(3) Project [cd_demo_sk#72, 
cd_marital_status#74, cd_education_status#75]
                  :     :     :     :           +- *(3) Filter 
((((((((((isnotnull(cd_demo_sk#72) AND isnotnull(cd_education_status#75)) AND 
isnotnull(cd_marital_status#74)) AND (((cd_marital_status#74 = M) OR 
(cd_marital_status#74 = S)) OR (cd_marital_status#74 = W))) AND 
(((cd_marital_status#74 = M) OR (cd_marital_status#74 = S)) OR 
(cd_education_status#75 = 2 yr Degree))) AND (((cd_marital_status#74 = M) OR 
(cd_education_status#75 = College)) OR (cd_marital_status#74 = W))) AND 
(((cd_marital_status#74 = M) OR (cd_education_status#75 = College)) OR 
(cd_education_status#75 = 2 yr Degree))) AND (((cd_education_status#75 = 
Advanced Degree) OR (cd_marital_status#74 = S)) OR (cd_marital_status#74 = W))) 
AND (((cd_education_status#75 = Advanced Degree) OR (cd_marital_status#74 = S)) 
OR (cd_education_status#75 = 2 yr Degree))) AND (((cd_education_status#75 = 
Advanced Degree) OR (cd_education_status#75 = College)) OR 
(cd_marital_status#74 = W))) AND (((cd_education_status#75 = Advanced Degree) 
OR (cd_education_status#75 = College)) OR (cd_education_status#75 = 2 yr 
Degree)))
                  :     :     :     :              +- *(3) ColumnarToRow
                  :     :     :     :                 +- FileScan parquet 
default.customer_demographics[cd_demo_sk#72,cd_marital_status#74,cd_education_status#75]
 Batched: true, DataFilters: [isnotnull(cd_demo_sk#72), 
isnotnull(cd_education_status#75), isnotnull(cd_marital_status#74), ((..., 
Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), 
IsNotNull(cd_education_status), IsNotNull(cd_marital_status), Or(Or(Equal..., 
ReadSchema: 
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, 
string, true], input[2, string, true])), [id=#318]
                  :     :     :        +- *(4) Project [cd_demo_sk#125, 
cd_marital_status#127, cd_education_status#128]
                  :     :     :           +- *(4) Filter 
((isnotnull(cd_demo_sk#125) AND isnotnull(cd_education_status#128)) AND 
isnotnull(cd_marital_status#127))
                  :     :     :              +- *(4) ColumnarToRow
                  :     :     :                 +- FileScan parquet 
default.customer_demographics[cd_demo_sk#125,cd_marital_status#127,cd_education_status#128]
 Batched: true, DataFilters: [isnotnull(cd_demo_sk#125), 
isnotnull(cd_education_status#128), isnotnull(cd_marital_status#127)], Format: 
Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), 
IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: 
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#327]
                  :     :        +- *(5) Project [ca_address_sk#81, ca_state#89]
                  :     :           +- *(5) Filter (((isnotnull(ca_country#91) 
AND (ca_country#91 = United States)) AND isnotnull(ca_address_sk#81)) AND 
((ca_state#89 IN (IN,OH,NJ) OR ca_state#89 IN (WI,CT,KY)) OR ca_state#89 IN 
(LA,IA,AR)))
                  :     :              +- *(5) ColumnarToRow
                  :     :                 +- FileScan parquet 
default.customer_address[ca_address_sk#81,ca_state#89,ca_country#91] Batched: 
true, DataFilters: [isnotnull(ca_country#91), (ca_country#91 = United States), 
isnotnull(ca_address_sk#81), ((ca_sta..., Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), 
EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_..., 
ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
                  :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#336]
                  :        +- *(6) Project [d_date_sk#94]
                  :           +- *(6) Filter ((isnotnull(d_year#100) AND 
(d_year#100 = 2000)) AND isnotnull(d_date_sk#94))
                  :              +- *(6) ColumnarToRow
                  :                 +- FileScan parquet 
default.date_dim[d_date_sk#94,d_year#100] Batched: true, DataFilters: 
[isnotnull(d_year#100), (d_year#100 = 2000), isnotnull(d_date_sk#94)], Format: 
Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), 
IsNotNull(d_date_sk)], ReadSchema: struct<d_date_sk:int,d_year:int>
                  +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#345]
                     +- *(7) Project [r_reason_sk#122, r_reason_desc#124]
                        +- *(7) Filter isnotnull(r_reason_sk#122)
                           +- *(7) ColumnarToRow
                              +- FileScan parquet 
default.reason[r_reason_sk#122,r_reason_desc#124] Batched: true, DataFilters: 
[isnotnull(r_reason_sk#122)], Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: 
struct<r_reason_sk:int,r_reason_desc:string>
   
   ```
   
   Group by qualifier:
   ```
   == Physical Plan ==
   TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#137 
ASC NULLS FIRST,aggOrder#142 ASC NULLS FIRST,avg(wr_refunded_cash)#139 ASC 
NULLS FIRST,avg(wr_fee)#140 ASC NULLS FIRST], output=[substr(r_reason_desc, 1, 
20)#137,avg(ws_quantity)#138,avg(wr_refunded_cash)#139,avg(wr_fee)#140])
   +- *(9) HashAggregate(keys=[r_reason_desc#124], 
functions=[avg(cast(ws_quantity#18 as bigint)), 
avg(UnscaledValue(wr_refunded_cash#54)), avg(UnscaledValue(wr_fee#52))])
      +- Exchange hashpartitioning(r_reason_desc#124, 5), true, [id=#351]
         +- *(8) HashAggregate(keys=[r_reason_desc#124], 
functions=[partial_avg(cast(ws_quantity#18 as bigint)), 
partial_avg(UnscaledValue(wr_refunded_cash#54)), 
partial_avg(UnscaledValue(wr_fee#52))])
            +- *(8) Project [ws_quantity#18, wr_fee#52, wr_refunded_cash#54, 
r_reason_desc#124]
               +- *(8) BroadcastHashJoin [wr_reason_sk#46L], 
[cast(r_reason_sk#122 as bigint)], Inner, BuildRight
                  :- *(8) Project [ws_quantity#18, wr_reason_sk#46L, wr_fee#52, 
wr_refunded_cash#54]
                  :  +- *(8) BroadcastHashJoin [ws_sold_date_sk#0], 
[d_date_sk#94], Inner, BuildRight
                  :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, 
wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :  +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#40L], 
[cast(ca_address_sk#81 as bigint)], Inner, BuildRight, ((((ca_state#89 IN 
(IN,OH,NJ) AND (ws_net_profit#33 >= 100.00)) AND (ws_net_profit#33 <= 200.00)) 
OR ((ca_state#89 IN (WI,CT,KY) AND (ws_net_profit#33 >= 150.00)) AND 
(ws_net_profit#33 <= 300.00))) OR ((ca_state#89 IN (LA,IA,AR) AND 
(ws_net_profit#33 >= 50.00)) AND (ws_net_profit#33 <= 250.00)))
                  :     :     :- *(8) Project [ws_sold_date_sk#0, 
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_reason_sk#46L, 
wr_fee#52, wr_refunded_cash#54]
                  :     :     :  +- *(8) BroadcastHashJoin 
[wr_returning_cdemo_sk#42L, cd_marital_status#74, cd_education_status#75], 
[cast(cd_demo_sk#125 as bigint), cd_marital_status#127, 
cd_education_status#128], Inner, BuildRight
                  :     :     :     :- *(8) Project [ws_sold_date_sk#0, 
ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, 
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54, 
cd_marital_status#74, cd_education_status#75]
                  :     :     :     :  +- *(8) BroadcastHashJoin 
[wr_refunded_cdemo_sk#38L], [cast(cd_demo_sk#72 as bigint)], Inner, BuildRight, 
((((((cd_marital_status#74 = M) AND (cd_education_status#75 = Advanced Degree)) 
AND (ws_sales_price#21 >= 100.00)) AND (ws_sales_price#21 <= 150.00)) OR 
((((cd_marital_status#74 = S) AND (cd_education_status#75 = College)) AND 
(ws_sales_price#21 >= 50.00)) AND (ws_sales_price#21 <= 100.00))) OR 
((((cd_marital_status#74 = W) AND (cd_education_status#75 = 2 yr Degree)) AND 
(ws_sales_price#21 >= 150.00)) AND (ws_sales_price#21 <= 200.00)))
                  :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, 
ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L, 
wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, 
wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :  +- *(8) BroadcastHashJoin 
[ws_web_page_sk#12], [wp_web_page_sk#58], Inner, BuildRight
                  :     :     :     :     :     :- *(8) Project 
[ws_sold_date_sk#0, ws_web_page_sk#12, ws_quantity#18, ws_sales_price#21, 
ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, 
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :     :  +- *(8) BroadcastHashJoin 
[cast(ws_item_sk#3 as bigint), cast(ws_order_number#17 as bigint)], 
[wr_item_sk#36L, wr_order_number#47L], Inner, BuildRight
                  :     :     :     :     :     :     :- *(8) Project 
[ws_sold_date_sk#0, ws_item_sk#3, ws_web_page_sk#12, ws_order_number#17, 
ws_quantity#18, ws_sales_price#21, ws_net_profit#33]
                  :     :     :     :     :     :     :  +- *(8) Filter 
(((((isnotnull(ws_item_sk#3) AND isnotnull(ws_order_number#17)) AND 
isnotnull(ws_web_page_sk#12)) AND isnotnull(ws_sold_date_sk#0)) AND 
((((ws_sales_price#21 >= 100.00) AND (ws_sales_price#21 <= 150.00)) OR 
((ws_sales_price#21 >= 50.00) AND (ws_sales_price#21 <= 100.00))) OR 
((ws_sales_price#21 >= 150.00) AND (ws_sales_price#21 <= 200.00)))) AND 
((((ws_net_profit#33 >= 100.00) AND (ws_net_profit#33 <= 200.00)) OR 
((ws_net_profit#33 >= 150.00) AND (ws_net_profit#33 <= 300.00))) OR 
((ws_net_profit#33 >= 50.00) AND (ws_net_profit#33 <= 250.00))))
                  :     :     :     :     :     :     :     +- *(8) 
ColumnarToRow
                  :     :     :     :     :     :     :        +- FileScan 
parquet 
default.web_sales[ws_sold_date_sk#0,ws_item_sk#3,ws_web_page_sk#12,ws_order_number#17,ws_quantity#18,ws_sales_price#21,ws_net_profit#33]
 Batched: true, DataFilters: [isnotnull(ws_item_sk#3), 
isnotnull(ws_order_number#17), isnotnull(ws_web_page_sk#12), isnotnull(..., 
Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), 
IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., 
ReadSchema: 
struct<ws_sold_date_sk:int,ws_item_sk:int,ws_web_page_sk:int,ws_order_number:int,ws_quantity:int,...
                  :     :     :     :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(input[0, bigint, true], input[5, bigint, 
true])), [id=#291]
                  :     :     :     :     :     :        +- *(1) Project 
[wr_item_sk#36L, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, 
wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_order_number#47L, wr_fee#52, 
wr_refunded_cash#54]
                  :     :     :     :     :     :           +- *(1) Filter 
(((((isnotnull(wr_item_sk#36L) AND isnotnull(wr_order_number#47L)) AND 
isnotnull(wr_refunded_cdemo_sk#38L)) AND isnotnull(wr_returning_cdemo_sk#42L)) 
AND isnotnull(wr_refunded_addr_sk#40L)) AND isnotnull(wr_reason_sk#46L))
                  :     :     :     :     :     :              +- *(1) 
ColumnarToRow
                  :     :     :     :     :     :                 +- FileScan 
parquet 
default.web_returns[wr_item_sk#36L,wr_refunded_cdemo_sk#38L,wr_refunded_addr_sk#40L,wr_returning_cdemo_sk#42L,wr_reason_sk#46L,wr_order_number#47L,wr_fee#52,wr_refunded_cash#54]
 Batched: true, DataFilters: [isnotnull(wr_item_sk#36L), 
isnotnull(wr_order_number#47L), isnotnull(wr_refunded_cdemo_sk#38L), ..., 
Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), 
IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr..., 
ReadSchema: 
struct<wr_item_sk:bigint,wr_refunded_cdemo_sk:bigint,wr_refunded_addr_sk:bigint,wr_returning_cdem...
                  :     :     :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#300]
                  :     :     :     :     :        +- *(2) Project 
[wp_web_page_sk#58]
                  :     :     :     :     :           +- *(2) Filter 
isnotnull(wp_web_page_sk#58)
                  :     :     :     :     :              +- *(2) ColumnarToRow
                  :     :     :     :     :                 +- FileScan parquet 
default.web_page[wp_web_page_sk#58] Batched: true, DataFilters: 
[isnotnull(wp_web_page_sk#58)], Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: 
struct<wp_web_page_sk:int>
                  :     :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#309]
                  :     :     :     :        +- *(3) Project [cd_demo_sk#72, 
cd_marital_status#74, cd_education_status#75]
                  :     :     :     :           +- *(3) Filter 
(((isnotnull(cd_demo_sk#72) AND isnotnull(cd_education_status#75)) AND 
isnotnull(cd_marital_status#74)) AND ((((cd_marital_status#74 = M) AND 
(cd_education_status#75 = Advanced Degree)) OR ((cd_marital_status#74 = S) AND 
(cd_education_status#75 = College))) OR ((cd_marital_status#74 = W) AND 
(cd_education_status#75 = 2 yr Degree))))
                  :     :     :     :              +- *(3) ColumnarToRow
                  :     :     :     :                 +- FileScan parquet 
default.customer_demographics[cd_demo_sk#72,cd_marital_status#74,cd_education_status#75]
 Batched: true, DataFilters: [isnotnull(cd_demo_sk#72), 
isnotnull(cd_education_status#75), isnotnull(cd_marital_status#74), ((..., 
Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), 
IsNotNull(cd_education_status), IsNotNull(cd_marital_status), Or(Or(And(E..., 
ReadSchema: 
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, 
string, true], input[2, string, true])), [id=#318]
                  :     :     :        +- *(4) Project [cd_demo_sk#125, 
cd_marital_status#127, cd_education_status#128]
                  :     :     :           +- *(4) Filter 
((isnotnull(cd_demo_sk#125) AND isnotnull(cd_education_status#128)) AND 
isnotnull(cd_marital_status#127))
                  :     :     :              +- *(4) ColumnarToRow
                  :     :     :                 +- FileScan parquet 
default.customer_demographics[cd_demo_sk#125,cd_marital_status#127,cd_education_status#128]
 Batched: true, DataFilters: [isnotnull(cd_demo_sk#125), 
isnotnull(cd_education_status#128), isnotnull(cd_marital_status#127)], Format: 
Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), 
IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: 
struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#327]
                  :     :        +- *(5) Project [ca_address_sk#81, ca_state#89]
                  :     :           +- *(5) Filter (((isnotnull(ca_country#91) 
AND (ca_country#91 = United States)) AND isnotnull(ca_address_sk#81)) AND 
((ca_state#89 IN (IN,OH,NJ) OR ca_state#89 IN (WI,CT,KY)) OR ca_state#89 IN 
(LA,IA,AR)))
                  :     :              +- *(5) ColumnarToRow
                  :     :                 +- FileScan parquet 
default.customer_address[ca_address_sk#81,ca_state#89,ca_country#91] Batched: 
true, DataFilters: [isnotnull(ca_country#91), (ca_country#91 = United States), 
isnotnull(ca_address_sk#81), ((ca_sta..., Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), 
EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_..., 
ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
                  :     +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#336]
                  :        +- *(6) Project [d_date_sk#94]
                  :           +- *(6) Filter ((isnotnull(d_year#100) AND 
(d_year#100 = 2000)) AND isnotnull(d_date_sk#94))
                  :              +- *(6) ColumnarToRow
                  :                 +- FileScan parquet 
default.date_dim[d_date_sk#94,d_year#100] Batched: true, DataFilters: 
[isnotnull(d_year#100), (d_year#100 = 2000), isnotnull(d_date_sk#94)], Format: 
Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), 
IsNotNull(d_date_sk)], ReadSchema: struct<d_date_sk:int,d_year:int>
                  +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), 
[id=#345]
                     +- *(7) Project [r_reason_sk#122, r_reason_desc#124]
                        +- *(7) Filter isnotnull(r_reason_sk#122)
                           +- *(7) ColumnarToRow
                              +- FileScan parquet 
default.reason[r_reason_sk#122,r_reason_desc#124] Batched: true, DataFilters: 
[isnotnull(r_reason_sk#122)], Format: Parquet, Location: 
InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark...,
 PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: 
struct<r_reason_sk:int,r_reason_desc:string>
   
   
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to