This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c30983e663d [SPARK-51218][SQL] Avoid map/flatMap in 
NondeterministicExpressionCollection
0c30983e663d is described below

commit 0c30983e663d65706fde2e5abbeff7f4ea57dcfa
Author: Vladimir Golubev <[email protected]>
AuthorDate: Mon Feb 17 13:06:11 2025 +0800

    [SPARK-51218][SQL] Avoid map/flatMap in NondeterministicExpressionCollection
    
    ### What changes were proposed in this pull request?
    
    Avoid `map`/`flatMap` in `NondeterministicExpressionCollection`, because 
those unnecessarily create temporarily collections.
    
    ### Why are the changes needed?
    
    Optimization. Since https://github.com/apache/spark/pull/49935/files this 
is no longer necessary, since we use a plain `java.util.LinkedHashMap`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, just an optimization.
    
    ### How was this patch tested?
    
    Existing tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #49958 from 
vladimirg-db/vladimirg-db/avoid-map-in-nondeterministic-expression-collection.
    
    Authored-by: Vladimir Golubev <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../NondeterministicExpressionCollection.scala      | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
index 933519fc6609..d530cfe5175b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NondeterministicExpressionCollection.scala
@@ -25,21 +25,24 @@ object NondeterministicExpressionCollection {
   def getNondeterministicToAttributes(
       expressions: Seq[Expression]): LinkedHashMap[Expression, 
NamedExpression] = {
     val nonDeterministicToAttributes = new LinkedHashMap[Expression, 
NamedExpression]
-    expressions
-      .filterNot(_.deterministic)
-      .flatMap { expr =>
+
+    for (expr <- expressions) {
+      if (!expr.deterministic) {
         val leafNondeterministic = expr.collect {
-          case n: Nondeterministic => n
+          case nondeterministicExpr: Nondeterministic => nondeterministicExpr
           case udf: UserDefinedExpression if !udf.deterministic => udf
         }
-        leafNondeterministic.distinct.map { e =>
-          val ne = e match {
-            case n: NamedExpression => n
-            case _ => Alias(e, "_nondeterministic")()
+
+        for (nondeterministicExpr <- leafNondeterministic.distinct) {
+          val namedExpression = nondeterministicExpr match {
+            case namedExpression: NamedExpression => namedExpression
+            case _ => Alias(nondeterministicExpr, "_nondeterministic")()
           }
-          nonDeterministicToAttributes.put(e, ne)
+          nonDeterministicToAttributes.put(nondeterministicExpr, 
namedExpression)
         }
       }
+    }
+
     nonDeterministicToAttributes
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to