This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0c30983e663d [SPARK-51218][SQL] Avoid map/flatMap in
NondeterministicExpressionCollection
0c30983e663d is described below
commit 0c30983e663d65706fde2e5abbeff7f4ea57dcfa
Author: Vladimir Golubev <[email protected]>
AuthorDate: Mon Feb 17 13:06:11 2025 +0800
[SPARK-51218][SQL] Avoid map/flatMap in NondeterministicExpressionCollection
### What changes were proposed in this pull request?
Avoid `map`/`flatMap` in `NondeterministicExpressionCollection`, because
those unnecessarily create temporarily collections.
### Why are the changes needed?
Optimization. Since https://github.com/apache/spark/pull/49935/files this
is no longer necessary, since we use a plain `java.util.LinkedHashMap`.
### Does this PR introduce _any_ user-facing change?
No, just an optimization.
### How was this patch tested?
Existing tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #49958 from
vladimirg-db/vladimirg-db/avoid-map-in-nondeterministic-expression-collection.
Authored-by: Vladimir Golubev <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../NondeterministicExpressionCollection.scala | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
index 933519fc6609..d530cfe5175b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
@@ -25,21 +25,24 @@ object NondeterministicExpressionCollection {
def getNondeterministicToAttributes(
expressions: Seq[Expression]): LinkedHashMap[Expression,
NamedExpression] = {
val nonDeterministicToAttributes = new LinkedHashMap[Expression,
NamedExpression]
- expressions
- .filterNot(_.deterministic)
- .flatMap { expr =>
+
+ for (expr <- expressions) {
+ if (!expr.deterministic) {
val leafNondeterministic = expr.collect {
- case n: Nondeterministic => n
+ case nondeterministicExpr: Nondeterministic => nondeterministicExpr
case udf: UserDefinedExpression if !udf.deterministic => udf
}
- leafNondeterministic.distinct.map { e =>
- val ne = e match {
- case n: NamedExpression => n
- case _ => Alias(e, "_nondeterministic")()
+
+ for (nondeterministicExpr <- leafNondeterministic.distinct) {
+ val namedExpression = nondeterministicExpr match {
+ case namedExpression: NamedExpression => namedExpression
+ case _ => Alias(nondeterministicExpr, "_nondeterministic")()
}
- nonDeterministicToAttributes.put(e, ne)
+ nonDeterministicToAttributes.put(nondeterministicExpr,
namedExpression)
}
}
+ }
+
nonDeterministicToAttributes
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]