holdenk commented on code in PR #46143:
URL: https://github.com/apache/spark/pull/46143#discussion_r2529181308
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala:
##########
@@ -152,20 +156,122 @@ class FilterPushdownSuite extends PlanTest {
test("can't push without rewrite") {
val originalQuery =
testRelation
- .select($"a" + $"b" as "e")
+ .select($"a" + $"b" as "e", $"a" - $"b" as "f")
.where($"e" === 1)
.analyze
val optimized = Optimize.execute(originalQuery.analyze)
val correctAnswer =
testRelation
.where($"a" + $"b" === 1)
- .select($"a" + $"b" as "e")
+ .select($"a" + $"b" as "e", $"a" - $"b" as "f")
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("SPARK-47672: Avoid double evaluation with projections but push
components that can be") {
+ val originalQuery = testStringRelation
+ .select($"a", $"e".rlike("magic") as "f", $"e".rlike("notmagic") as "j",
$"b")
+ .where($"a" > 5 && $"f")
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery)
+
+ val correctAnswer = testStringRelation
+ .where($"a" > 5)
+ .select($"a", $"b", $"e", $"e".rlike("magic") as "f")
+ .where($"f")
Review Comment:
We could reduce how often we introduce this extra projection by only doing
it if there is expensive element being computed. It does make the code even
more complex, but I think that saving the extra expensive evals is worth the
occasional introduction of a new projection node.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]