This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 63eb910 [SPARK-28646][SQL][FOLLOWUP] Add legacy config for allowing parameterless count 63eb910 is described below commit 63eb9101f2a7aba626d498413ec66f46310e3af2 Author: Gengliang Wang <gengliang.w...@databricks.com> AuthorDate: Tue Jan 12 16:31:22 2021 +0900 [SPARK-28646][SQL][FOLLOWUP] Add legacy config for allowing parameterless count ### What changes were proposed in this pull request? Add a legacy configuration `spark.sql.legacy.allowParameterlessCount` in case users need the parameterless count. This is a follow-up for https://github.com/apache/spark/pull/30541. ### Why are the changes needed? There can be some users depends on the legacy behavior. We need a legacy flag for it. ### Does this PR introduce _any_ user-facing change? Yes, adding a legacy flag `spark.sql.legacy.allowParameterlessCount`. ### How was this patch tested? Unit tests Closes #31143 from gengliangwang/countLegacy. Authored-by: Gengliang Wang <gengliang.w...@databricks.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> (cherry picked from commit 02a17e92f1d89735094385f333d87e7a53c73e30) Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../sql/catalyst/expressions/aggregate/Count.scala | 7 ++++-- .../org/apache/spark/sql/internal/SQLConf.scala | 8 +++++++ .../src/test/resources/sql-tests/inputs/count.sql | 5 ++++ .../test/resources/sql-tests/results/count.sql.out | 28 ++++++++++++++++++++-- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala index e4488b2..189d216 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ // scalastyle:off line.size.limit @@ -51,8 +52,10 @@ case class Count(children: Seq[Expression]) extends DeclarativeAggregate { override def dataType: DataType = LongType override def checkInputDataTypes(): TypeCheckResult = { - if (children.isEmpty) { - TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least one argument.") + if (children.isEmpty && !SQLConf.get.getConf(SQLConf.ALLOW_PARAMETERLESS_COUNT)) { + TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least one argument. " + + s"If you have to call the function $prettyName without arguments, set the legacy " + + s"configuration `${SQLConf.ALLOW_PARAMETERLESS_COUNT.key}` as true") } else { TypeCheckResult.TypeCheckSuccess } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 829e6e8..d5762a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1522,6 +1522,14 @@ object SQLConf { "must be positive.") .createWithDefault(100) + val ALLOW_PARAMETERLESS_COUNT = + buildConf("spark.sql.legacy.allowParameterlessCount") + .internal() + .doc("When true, the SQL function 'count' is allowed to take no parameters.") + .version("3.1.1") + .booleanConf + .createWithDefault(false) + val USE_CURRENT_SQL_CONFIGS_FOR_VIEW = buildConf("spark.sql.legacy.useCurrentConfigsForView") .internal() diff --git a/sql/core/src/test/resources/sql-tests/inputs/count.sql b/sql/core/src/test/resources/sql-tests/inputs/count.sql index fc0d662..b3e37d1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/count.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/count.sql @@ -36,5 +36,10 @@ SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 2,3) FROM testData; SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 3,2) FROM testData; SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData; +-- legacy behavior: allow calling function count without parameters +set spark.sql.legacy.allowParameterlessCount=true; +SELECT count() FROM testData; + -- count without expressions +set spark.sql.legacy.allowParameterlessCount=false; SELECT count() FROM testData; diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out index ffd75d6..016f17a 100644 --- a/sql/core/src/test/resources/sql-tests/results/count.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 14 +-- Number of queries: 17 -- !query @@ -120,9 +120,33 @@ struct<count(DISTINCT 0.8):bigint,percentile_approx(DISTINCT a, CAST(0.8 AS DOUB -- !query +set spark.sql.legacy.allowParameterlessCount=true +-- !query schema +struct<key:string,value:string> +-- !query output +spark.sql.legacy.allowParameterlessCount true + + +-- !query +SELECT count() FROM testData +-- !query schema +struct<count():bigint> +-- !query output +0 + + +-- !query +set spark.sql.legacy.allowParameterlessCount=false +-- !query schema +struct<key:string,value:string> +-- !query output +spark.sql.legacy.allowParameterlessCount false + + +-- !query SELECT count() FROM testData -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7 +cannot resolve 'count()' due to data type mismatch: count requires at least one argument. If you have to call the function count without arguments, set the legacy configuration `spark.sql.legacy.allowParameterlessCount` as true; line 1 pos 7 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org