This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new aa22199  [SPARK-35665][SQL] Resolve UnresolvedAlias in CollectMetrics
aa22199 is described below

commit aa221991b3f0f1b7c91e87337f9eb5d164167f5a
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Mon Jun 7 21:05:11 2021 +0900

    [SPARK-35665][SQL] Resolve UnresolvedAlias in CollectMetrics
    
    ### What changes were proposed in this pull request?
    
    It's a long-standing bug that we forgot to resolve `UnresolvedAlias` in 
`CollectMetrics`. It's a bit hard to trigger this bug before 3.2 as most likely 
people won't create `UnresolvedAlias` when calling `Dataset.observe`. However 
things have been changed after https://github.com/apache/spark/pull/30974
    
    This PR proposes to handle `CollectMetrics` in the rule `ResolveAliases`.
    
    ### Why are the changes needed?
    
    bug fix
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    updated test
    
    Closes #32803 from cloud-fan/minor.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
    (cherry picked from commit a70e66ecfa638cacc99b4e9a7c464e41ec92ad30)
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala   | 3 +++
 .../test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala  | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 600a5af..899e723 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -420,6 +420,9 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case Project(projectList, child) if child.resolved && 
hasUnresolvedAlias(projectList) =>
         Project(assignAliases(projectList), child)
+
+      case c: CollectMetrics if c.child.resolved && 
hasUnresolvedAlias(c.metrics) =>
+        c.copy(metrics = assignAliases(c.metrics))
     }
   }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
index b17c935..e3fcf8b 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -251,7 +251,8 @@ class DataFrameCallbackSuite extends QueryTest
           name = "my_event",
           min($"id").as("min_val"),
           max($"id").as("max_val"),
-          sum($"id").as("sum_val"),
+          // Test unresolved alias
+          sum($"id"),
           count(when($"id" % 2 === 0, 1)).as("num_even"))
         .observe(
           name = "other_event",

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to