Github user dongjoon-hyun commented on a diff in the pull request:
https://github.com/apache/spark/pull/20717#discussion_r176323419
--- Diff:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
---
@@ -204,4 +204,40 @@ class InferFiltersFromConstraintsSuite extends
PlanTest {
val optimized = Optimize.execute(originalQuery)
comparePlans(optimized, correctAnswer)
}
+
+ test("SPARK-23564: left anti join should filter out null join keys on
right side") {
+ val x = testRelation.subquery('x)
+ val y = testRelation.subquery('y)
+ val condition = Some("x.a".attr === "y.a".attr)
+ val originalQuery = x.join(y, LeftAnti, condition).analyze
+ val left = x
+ val right = y.where(IsNotNull('a))
+ val correctAnswer = left.join(right, LeftAnti, condition).analyze
+ val optimized = Optimize.execute(originalQuery)
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("SPARK-23564: left outer join should filter out null join keys on
right side") {
+ val x = testRelation.subquery('x)
+ val y = testRelation.subquery('y)
+ val condition = Some("x.a".attr === "y.a".attr)
+ val originalQuery = x.join(y, LeftOuter, condition).analyze
+ val left = x
+ val right = y.where(IsNotNull('a))
+ val correctAnswer = left.join(right, LeftOuter, condition).analyze
+ val optimized = Optimize.execute(originalQuery)
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("SPARK-23564: right outer join should filter out null join keys on
left side") {
+ val x = testRelation.subquery('x)
+ val y = testRelation.subquery('y)
+ val condition = Some("x.a".attr === "y.a".attr)
+ val originalQuery = x.join(y, RightOuter, condition).analyze
+ val left = x.where(IsNotNull('a))
+ val right = y
+ val correctAnswer = left.join(right, RightOuter, condition).analyze
+ val optimized = Optimize.execute(originalQuery)
+ comparePlans(optimized, correctAnswer)
+ }
--- End diff --
Since this is a simple repetition of the previous `test("SPARK-23405:
left-semi equal-join should filter out null join keys on both sides"`, what
about making helper test function and simplify these together at this time?
```scala
private def testConstraints(
x: LogicalPlan, y: LogicalPlan, left: LogicalPlan, right:
LogicalPlan, joinType: JoinType) = {
val condition = Some("x.a".attr === "y.a".attr)
val originalQuery = x.join(y, joinType, condition).analyze
val correctAnswer = left.join(right, joinType, condition).analyze
val optimized = Optimize.execute(originalQuery)
comparePlans(optimized, correctAnswer)
}
test("SPARK-23405: left-semi equal-join should filter out null join keys
on both sides") {
val x = testRelation.subquery('x)
val y = testRelation.subquery('y)
testConstraints(x, y, x.where(IsNotNull('a)), y.where(IsNotNull('a)),
LeftSemi)
}
test("SPARK-23564: left anti join should filter out null join keys on
right side") {
val x = testRelation.subquery('x)
val y = testRelation.subquery('y)
testConstraints(x, y, x, y.where(IsNotNull('a)), LeftAnti)
}
test("SPARK-23564: left outer join should filter out null join keys on
right side") {
val x = testRelation.subquery('x)
val y = testRelation.subquery('y)
testConstraints(x, y, x, y.where(IsNotNull('a)), LeftOuter)
}
test("SPARK-23564: right outer join should filter out null join keys on
left side") {
val x = testRelation.subquery('x)
val y = testRelation.subquery('y)
testConstraints(x, y, x.where(IsNotNull('a)), y, RightOuter)
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]