This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 63eb910  [SPARK-28646][SQL][FOLLOWUP] Add legacy config for allowing 
parameterless count
63eb910 is described below

commit 63eb9101f2a7aba626d498413ec66f46310e3af2
Author: Gengliang Wang <gengliang.w...@databricks.com>
AuthorDate: Tue Jan 12 16:31:22 2021 +0900

    [SPARK-28646][SQL][FOLLOWUP] Add legacy config for allowing parameterless 
count
    
    ### What changes were proposed in this pull request?
    
    Add a legacy configuration `spark.sql.legacy.allowParameterlessCount` in 
case users need the parameterless count.
    This is a follow-up for https://github.com/apache/spark/pull/30541.
    
    ### Why are the changes needed?
    
    There can be some users depends on the legacy behavior. We need a legacy 
flag for it.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, adding a legacy flag `spark.sql.legacy.allowParameterlessCount`.
    
    ### How was this patch tested?
    
    Unit tests
    
    Closes #31143 from gengliangwang/countLegacy.
    
    Authored-by: Gengliang Wang <gengliang.w...@databricks.com>
    Signed-off-by: HyukjinKwon <gurwls...@apache.org>
    (cherry picked from commit 02a17e92f1d89735094385f333d87e7a53c73e30)
    Signed-off-by: HyukjinKwon <gurwls...@apache.org>
---
 .../sql/catalyst/expressions/aggregate/Count.scala |  7 ++++--
 .../org/apache/spark/sql/internal/SQLConf.scala    |  8 +++++++
 .../src/test/resources/sql-tests/inputs/count.sql  |  5 ++++
 .../test/resources/sql-tests/results/count.sql.out | 28 ++++++++++++++++++++--
 4 files changed, 44 insertions(+), 4 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
index e4488b2..189d216 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 // scalastyle:off line.size.limit
@@ -51,8 +52,10 @@ case class Count(children: Seq[Expression]) extends 
DeclarativeAggregate {
   override def dataType: DataType = LongType
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (children.isEmpty) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least one 
argument.")
+    if (children.isEmpty && 
!SQLConf.get.getConf(SQLConf.ALLOW_PARAMETERLESS_COUNT)) {
+      TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least one 
argument. " +
+        s"If you have to call the function $prettyName without arguments, set 
the legacy " +
+        s"configuration `${SQLConf.ALLOW_PARAMETERLESS_COUNT.key}` as true")
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 829e6e8..d5762a6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1522,6 +1522,14 @@ object SQLConf {
         "must be positive.")
       .createWithDefault(100)
 
+  val ALLOW_PARAMETERLESS_COUNT =
+    buildConf("spark.sql.legacy.allowParameterlessCount")
+      .internal()
+      .doc("When true, the SQL function 'count' is allowed to take no 
parameters.")
+      .version("3.1.1")
+      .booleanConf
+      .createWithDefault(false)
+
   val USE_CURRENT_SQL_CONFIGS_FOR_VIEW =
     buildConf("spark.sql.legacy.useCurrentConfigsForView")
       .internal()
diff --git a/sql/core/src/test/resources/sql-tests/inputs/count.sql 
b/sql/core/src/test/resources/sql-tests/inputs/count.sql
index fc0d662..b3e37d1 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/count.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/count.sql
@@ -36,5 +36,10 @@ SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 
2,3) FROM testData;
 SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 3,2) FROM testData;
 SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData;
 
+-- legacy behavior: allow calling function count without parameters
+set spark.sql.legacy.allowParameterlessCount=true;
+SELECT count() FROM testData;
+
 -- count without expressions
+set spark.sql.legacy.allowParameterlessCount=false;
 SELECT count() FROM testData;
diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out 
b/sql/core/src/test/resources/sql-tests/results/count.sql.out
index ffd75d6..016f17a 100644
--- a/sql/core/src/test/resources/sql-tests/results/count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
+-- Number of queries: 17
 
 
 -- !query
@@ -120,9 +120,33 @@ struct<count(DISTINCT 
0.8):bigint,percentile_approx(DISTINCT a, CAST(0.8 AS DOUB
 
 
 -- !query
+set spark.sql.legacy.allowParameterlessCount=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.allowParameterlessCount       true
+
+
+-- !query
+SELECT count() FROM testData
+-- !query schema
+struct<count():bigint>
+-- !query output
+0
+
+
+-- !query
+set spark.sql.legacy.allowParameterlessCount=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.allowParameterlessCount       false
+
+
+-- !query
 SELECT count() FROM testData
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'count()' due to data type mismatch: count requires at least 
one argument.; line 1 pos 7
+cannot resolve 'count()' due to data type mismatch: count requires at least 
one argument. If you have to call the function count without arguments, set the 
legacy configuration `spark.sql.legacy.allowParameterlessCount` as true; line 1 
pos 7


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to