cloud-fan commented on code in PR #36530:
URL: https://github.com/apache/spark/pull/36530#discussion_r873520469
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala:
##########
@@ -268,4 +268,54 @@ class OuterJoinEliminationSuite extends PlanTest {
comparePlans(optimized, originalQuery.analyze)
}
+
+ test("SPARK-39172: Remove outer join if all output come from streamed side
and buffered side " +
+ "keys exist unique key") {
+ val x = testRelation.subquery(Symbol("x"))
+ val y = testRelation1.subquery(Symbol("y"))
+
+ // left outer
+ comparePlans(Optimize.execute(
+ x.join(y.groupBy($"d")($"d"), LeftOuter, Some($"a" === $"d"))
+ .select($"a", $"b", $"c").analyze),
+ x.select($"a", $"b", $"c").analyze
+ )
+
+ comparePlans(Optimize.execute(
+ x.join(y.groupBy($"d")($"d", count($"d").as("x")), LeftOuter,
+ Some($"a" === $"d" && $"b" === $"x"))
+ .select($"a", $"b", $"c").analyze),
+ x.select($"a", $"b", $"c").analyze
+ )
+
+ // right outer
+ comparePlans(Optimize.execute(
+ x.groupBy($"a")($"a").join(y, RightOuter, Some($"a" === $"d"))
+ .select($"d", $"e", $"f").analyze),
+ y.select($"d", $"e", $"f").analyze
+ )
+
+ comparePlans(Optimize.execute(
+ x.groupBy($"a")($"a", count($"a").as("x")).join(y, RightOuter,
+ Some($"a" === $"d" && $"x" === $"e"))
+ .select($"d", $"e", $"f").analyze),
+ y.select($"d", $"e", $"f").analyze
+ )
+
+ // negative case
+ // not a equi-join
+ val p1 = x.join(y.groupBy($"d")($"d"), LeftOuter, Some($"a" > $"d"))
+ .select($"a").analyze
+ comparePlans(Optimize.execute(p1), p1)
+
+ // do not exist unique key
+ val p2 = x.join(y.groupBy($"d", $"e")($"d", $"e"), LeftOuter, Some($"a"
=== $"d"))
+ .select($"a").analyze
+ comparePlans(Optimize.execute(p2), p2)
+
+ // output comes from buffered side
Review Comment:
```suggestion
// output comes from the right side of a left outer join
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]