This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 53c99a4270e [SPARK-42108][SQL] Make Analyzer transform `Count(*)` into 
`Count(1)`
53c99a4270e is described below

commit 53c99a4270eeaa431e18cfd7f053d8e0ae98734d
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Jan 19 10:25:07 2023 +0800

    [SPARK-42108][SQL] Make Analyzer transform `Count(*)` into `Count(1)`
    
    ### What changes were proposed in this pull request?
    Make Analyzer transform `Count(*)` into `Count(1)`
    
    ### Why are the changes needed?
    Existing `Count(*) -> Count(1)` transformation happens in 
[`AstBuilder.visitFunctionCall`](https://github.com/apache/spark/blob/97a6955278c55fa02cb9f039ae45e49e6f0f2bfd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala#L2105-L2132).
    
    The Analyzer requires the `Count(*)` had already been converted to 
`Count(1)` in Parser, and for a given `Count(*)` expression, the Analyzer 
itself can not correctly handle it and cause correctness issue in Spark Connect 
(see https://issues.apache.org/jira/browse/SPARK-41845)
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    added UT, manually test with Spark Connect
    
    Closes #39636 from zhengruifeng/sql_move_count_star.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala  |  5 +++++
 .../spark/sql/catalyst/analysis/AnalysisSuite.scala    | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ba2c2759e2d..f0c22471afa 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1953,6 +1953,11 @@ class Analyzer(override val catalogManager: 
CatalogManager)
      */
     def expandStarExpression(expr: Expression, child: LogicalPlan): Expression 
= {
       expr.transformUp {
+        case f0: UnresolvedFunction if !f0.isDistinct &&
+          f0.nameParts.map(_.toLowerCase(Locale.ROOT)) == Seq("count") &&
+          f0.arguments == Seq(UnresolvedStar(None)) =>
+          // Transform COUNT(*) into COUNT(1).
+          f0.copy(nameParts = Seq("count"), arguments = Seq(Literal(1)))
         case f1: UnresolvedFunction if containsStar(f1.arguments) =>
           // SPECIAL CASE: We want to block count(tblName.*) because in spark, 
count(tblName.*) will
           // be expanded while count(*) will be converted to count(1). They 
will produce different
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index a91a0a44dd3..6dfbf12bbd7 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -120,6 +120,24 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       caseSensitive = false)
   }
 
+  test("SPARK-42108: transform count(*) to count(1)") {
+    val a = testRelation.output(0)
+
+    checkAnalysis(
+      Project(
+        Alias(UnresolvedFunction("count" :: Nil,
+          UnresolvedStar(None) :: Nil, isDistinct = false), "x")() :: Nil, 
testRelation),
+      Aggregate(Nil, count(Literal(1)).as("x") :: Nil, testRelation))
+
+    checkAnalysis(
+      Project(
+        Alias(UnresolvedFunction("count" :: Nil,
+          UnresolvedStar(None) :: Nil, isDistinct = false), "x")() ::
+          Alias(UnresolvedFunction("count" :: Nil,
+            UnresolvedAttribute("a") :: Nil, isDistinct = false), "y")() :: 
Nil, testRelation),
+      Aggregate(Nil, count(Literal(1)).as("x") :: count(a).as("y") :: Nil, 
testRelation))
+  }
+
   test("resolve sort references - filter/limit") {
     val a = testRelation2.output(0)
     val b = testRelation2.output(1)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to