AngersZhuuuu commented on a change in pull request #30278:
URL: https://github.com/apache/spark/pull/30278#discussion_r519696759



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -1269,6 +1269,7 @@ object PushPredicateThroughNonJoin extends 
Rule[LogicalPlan] with PredicateHelpe
     case _: Sort => true
     case _: BatchEvalPython => true
     case _: ArrowEvalPython => true
+    case _: Expand => true

Review comment:
       > > This change affects the PushDownLeftSemiAntiJoin rule, too. So, 
could you add tests for the case?
   > 
   > So can we add such a test?
   
   With test case in  `LeftSemiPushdownSuite`
   ```
     test("Unary: LeftSemi join push down through expand") {
       val expand = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)),
         Seq('a, 'b, 'c), testRelation)
       val originalQuery = expand
         .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd 
&& 'b === 1))
   
       val optimized = Optimize.execute(originalQuery.analyze)
       val correctAnswer = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)),
         Seq('a, 'b, 'c), Filter(EqualTo('b, 1), testRelation))
           .join(testRelation1, joinType = LeftSemi, condition = Some('b === 
'd))
           .analyze
   
       comparePlans(optimized, correctAnswer)
     }
   ```
   originalQuery is 
   ```
   'Join LeftSemi, (('b = 'd) AND ('b = 1))
   :- 'Expand [List('a, 'b, null), List('a, null, 'c)], ['a, 'b, 'c]
   :  +- LocalRelation <empty>, [a#0, b#1, c#2]
   +- LocalRelation <empty>, [d#3]
   ```
   
   Test result is
   ```
   == FAIL: Plans do not match ===
   !'Expand [List(a#0, b#0, null), List(a#0, null, c#0)], [a#0, b#0, c#0]   
'Join LeftSemi, (b#0 = d#0)
   !+- 'Join LeftSemi, ((b#0 = 1) AND (b#0 = d#0))                          :- 
Expand [List(a#0, b#0, null), List(a#0, null, c#0)], [a#0, b#0, c#0]
   !   :- LocalRelation <empty>, [a#0, b#0, c#0]                            :  
+- Filter (b#0 = 1)
   !   +- LocalRelation <empty>, [d#0]                                      :   
  +- LocalRelation <empty>, [a#0, b#0, c#0]
   !                                                                        +- 
LocalRelation <empty>, [d#0]
       
   ```
   
   Expand will be promoted below Join, so should we ignore this case or add a 
parameter  in  `canPushThrough` like below
   ```
    def canPushThrough(p: UnaryNode, isFilterPushDown: Boolean = false): 
Boolean = p match {
       // Note that some operators (e.g. project, aggregate, union) are being 
handled separately
       // (earlier in this rule).
       case _: AppendColumns => true
       case _: Distinct => true
       case _: Generate => true
       case _: Pivot => true
       case _: RepartitionByExpression => true
       case _: Repartition => true
       case _: ScriptTransformation => true
       case _: Sort => true
       case _: BatchEvalPython => true
       case _: ArrowEvalPython => true
       case _: Expand => isFilterPushDown
       case _ => false
     }
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to