This is an automated email from the ASF dual-hosted git repository.
beliefer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f1283c126878 [SPARK-46009][SQL][CONNECT] Merge the parse rule of
PercentileCont and PercentileDisc into functionCall
f1283c126878 is described below
commit f1283c12687853f9cd190f8db69d97abe16a2d88
Author: Jiaan Geng <[email protected]>
AuthorDate: Tue Dec 5 09:28:30 2023 +0800
[SPARK-46009][SQL][CONNECT] Merge the parse rule of PercentileCont and
PercentileDisc into functionCall
### What changes were proposed in this pull request?
Spark SQL parser have a special rule to parse
`[percentile_cont|percentile_disc](percentage) WITHIN GROUP (ORDER BY v)`.
We should merge this rule into the `functionCall`.
### Why are the changes needed?
Merge the parse rule of `PercentileCont` and `PercentileDisc` into
`functionCall`.
### Does this PR introduce _any_ user-facing change?
'No'.
### How was this patch tested?
New test cases.
### Was this patch authored or co-authored using generative AI tooling?
'No'.
Closes #43910 from beliefer/SPARK-46009.
Authored-by: Jiaan Geng <[email protected]>
Signed-off-by: Jiaan Geng <[email protected]>
---
common/utils/src/main/resources/error/README.md | 1 +
.../src/main/resources/error/error-classes.json | 23 ++
.../sql/connect/planner/SparkConnectPlanner.scala | 7 +-
...id-inverse-distribution-function-error-class.md | 40 +++
docs/sql-error-conditions.md | 8 +
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 4 +-
.../spark/sql/catalyst/analysis/Analyzer.scala | 29 +-
.../sql/catalyst/analysis/FunctionRegistry.scala | 2 +
.../spark/sql/catalyst/analysis/unresolved.scala | 32 ++-
.../aggregate/SupportsOrderingWithinGroup.scala | 27 ++
.../expressions/aggregate/percentiles.scala | 81 +++++-
.../spark/sql/catalyst/parser/AstBuilder.scala | 34 +--
.../spark/sql/errors/QueryCompilationErrors.scala | 24 ++
.../sql/catalyst/parser/PlanParserSuite.scala | 49 +++-
.../sql-functions/sql-expression-schema.md | 2 +
.../sql-tests/analyzer-results/percentiles.sql.out | 275 +++++++++++++++++++
.../resources/sql-tests/inputs/percentiles.sql | 48 ++++
.../sql-tests/results/percentiles.sql.out | 299 +++++++++++++++++++++
18 files changed, 922 insertions(+), 63 deletions(-)
diff --git a/common/utils/src/main/resources/error/README.md
b/common/utils/src/main/resources/error/README.md
index c9fdd84e7442..556a634e9927 100644
--- a/common/utils/src/main/resources/error/README.md
+++ b/common/utils/src/main/resources/error/README.md
@@ -1309,6 +1309,7 @@ The following SQLSTATEs are collated from:
|HZ320 |HZ |RDA-specific condition |320
|version not supported |RDA/SQL |Y
|RDA/SQL
|
|HZ321 |HZ |RDA-specific condition |321
|TCP/IP error |RDA/SQL |Y
|RDA/SQL
|
|HZ322 |HZ |RDA-specific condition |322
|TLS alert |RDA/SQL |Y
|RDA/SQL
|
+|ID001 |IM |Invalid inverse distribution function |001
|Invalid inverse distribution function |SQL/Foundation |N
|SQL/Foundation PostgreSQL Oracle Snowflake Redshift H2
|
|IM001 |IM |ODBC driver |001
|Driver does not support this function |SQL Server |N
|SQL Server
|
|IM002 |IM |ODBC driver |002
|Data source name not found and no default driver specified |SQL Server |N
|SQL Server
|
|IM003 |IM |ODBC driver |003
|Specified driver could not be loaded |SQL Server |N
|SQL Server
|
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index 6795ebcb0bd0..a808be9510cf 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -1858,6 +1858,29 @@
},
"sqlState" : "42000"
},
+ "INVALID_INVERSE_DISTRIBUTION_FUNCTION" : {
+ "message" : [
+ "Invalid inverse distribution function <funcName>."
+ ],
+ "subClass" : {
+ "DISTINCT_UNSUPPORTED" : {
+ "message" : [
+ "Cannot use DISTINCT with WITHIN GROUP."
+ ]
+ },
+ "WITHIN_GROUP_MISSING" : {
+ "message" : [
+ "WITHIN GROUP is required for inverse distribution function."
+ ]
+ },
+ "WRONG_NUM_ORDERINGS" : {
+ "message" : [
+ "Requires <expectedNum> orderings in WITHIN GROUP but got
<actualNum>."
+ ]
+ }
+ },
+ "sqlState" : "ID001"
+ },
"INVALID_JSON_ROOT_FIELD" : {
"message" : [
"Cannot convert JSON root field to target Spark type."
diff --git
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index dc1730c78267..5c73f07a3b0e 100644
---
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -1714,7 +1714,7 @@ class SparkConnectPlanner(
children(2) match {
case Literal(b, BinaryType) if b != null =>
(Some(b.asInstanceOf[Array[Byte]]), Map.empty[String, String])
- case UnresolvedFunction(Seq("map"), arguments, _, _, _) =>
+ case UnresolvedFunction(Seq("map"), arguments, _, _, _, _) =>
(None, ExprUtils.convertToMapData(CreateMap(arguments)))
case other =>
throw InvalidPlanInput(
@@ -1730,7 +1730,7 @@ class SparkConnectPlanner(
s"DescFilePath in $functionName should be a literal binary, but
got $other")
}
val map = children(3) match {
- case UnresolvedFunction(Seq("map"), arguments, _, _, _) =>
+ case UnresolvedFunction(Seq("map"), arguments, _, _, _, _) =>
ExprUtils.convertToMapData(CreateMap(arguments))
case other =>
throw InvalidPlanInput(
@@ -2064,7 +2064,8 @@ class SparkConnectPlanner(
@scala.annotation.tailrec
private def extractMapData(expr: Expression, field: String): Map[String,
String] = expr match {
case map: CreateMap => ExprUtils.convertToMapData(map)
- case UnresolvedFunction(Seq("map"), args, _, _, _) =>
extractMapData(CreateMap(args), field)
+ case UnresolvedFunction(Seq("map"), args, _, _, _, _) =>
+ extractMapData(CreateMap(args), field)
case other => throw InvalidPlanInput(s"$field should be created by map,
but got $other")
}
diff --git
a/docs/sql-error-conditions-invalid-inverse-distribution-function-error-class.md
b/docs/sql-error-conditions-invalid-inverse-distribution-function-error-class.md
new file mode 100644
index 000000000000..d2091c72f43c
--- /dev/null
+++
b/docs/sql-error-conditions-invalid-inverse-distribution-function-error-class.md
@@ -0,0 +1,40 @@
+---
+layout: global
+title: INVALID_INVERSE_DISTRIBUTION_FUNCTION error class
+displayTitle: INVALID_INVERSE_DISTRIBUTION_FUNCTION error class
+license: |
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+---
+
+SQLSTATE: ID001
+
+Invalid inverse distribution function `<funcName>`.
+
+This error class has the following derived error classes:
+
+## DISTINCT_UNSUPPORTED
+
+Cannot use DISTINCT with WITHIN GROUP.
+
+## WITHIN_GROUP_MISSING
+
+WITHIN GROUP is required for inverse distribution function.
+
+## WRONG_NUM_ORDERINGS
+
+Requires `<expectedNum>` orderings in WITHIN GROUP but got `<actualNum>`.
+
+
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 1943f8359572..8074ba9233d0 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -1125,6 +1125,14 @@ Invalid inline table.
For more details see
[INVALID_INLINE_TABLE](sql-error-conditions-invalid-inline-table-error-class.html)
+###
[INVALID_INVERSE_DISTRIBUTION_FUNCTION](sql-error-conditions-invalid-inverse-distribution-function-error-class.html)
+
+SQLSTATE: ID001
+
+Invalid inverse distribution function `<funcName>`.
+
+For more details see
[INVALID_INVERSE_DISTRIBUTION_FUNCTION](sql-error-conditions-invalid-inverse-distribution-function-error-class.html)
+
### INVALID_JSON_ROOT_FIELD
[SQLSTATE: 22032](sql-error-conditions-sqlstates.html#class-22-data-exception)
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 439a12c30181..0ab3c5ac0c36 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -979,6 +979,7 @@ primaryExpression
| LEFT_PAREN query RIGHT_PAREN
#subqueryExpression
| functionName LEFT_PAREN (setQuantifier? argument+=functionArgument
(COMMA argument+=functionArgument)*)? RIGHT_PAREN
+ (WITHIN GROUP LEFT_PAREN ORDER BY sortItem (COMMA sortItem)*
RIGHT_PAREN)?
(FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)?
(nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)?
#functionCall
| identifier ARROW expression
#lambda
@@ -994,9 +995,6 @@ primaryExpression
FROM srcStr=valueExpression RIGHT_PAREN
#trim
| OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression
FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN
#overlay
- | name=(PERCENTILE_CONT | PERCENTILE_DISC) LEFT_PAREN
percentage=valueExpression RIGHT_PAREN
- WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN
- (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER
windowSpec)? #percentile
;
literalType
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6ddc3cbbd80f..e5961b46e743 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2047,7 +2047,7 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
val externalFunctionNameSet = new mutable.HashSet[Seq[String]]()
plan.resolveExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_FUNCTION)) {
- case f @ UnresolvedFunction(nameParts, _, _, _, _) =>
+ case f @ UnresolvedFunction(nameParts, _, _, _, _, _) =>
if
(ResolveFunctions.lookupBuiltinOrTempFunction(nameParts).isDefined) {
f
} else {
@@ -2207,7 +2207,7 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
q.transformExpressionsUpWithPruning(
_.containsAnyPattern(UNRESOLVED_FUNCTION, GENERATOR),
ruleId) {
- case u @ UnresolvedFunction(nameParts, arguments, _, _, _)
+ case u @ UnresolvedFunction(nameParts, arguments, _, _, _, _)
if hasLambdaAndResolvedArguments(arguments) => withPosition(u) {
resolveBuiltinOrTempFunction(nameParts, arguments, Some(u)).map {
case func: HigherOrderFunction => func
@@ -2236,7 +2236,7 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
}
}
- case u @ UnresolvedFunction(nameParts, arguments, _, _, _) =>
withPosition(u) {
+ case u @ UnresolvedFunction(nameParts, arguments, _, _, _, _) =>
withPosition(u) {
resolveBuiltinOrTempFunction(nameParts, arguments,
Some(u)).getOrElse {
val CatalogAndIdentifier(catalog, ident) =
expandIdentifier(nameParts)
if (CatalogV2Util.isSessionCatalog(catalog)) {
@@ -2329,6 +2329,16 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
numArgs: Int,
u: UnresolvedFunction): Expression = {
func match {
+ case owg: SupportsOrderingWithinGroup if u.isDistinct =>
+ throw
QueryCompilationErrors.distinctInverseDistributionFunctionUnsupportedError(
+ owg.prettyName)
+ case owg: SupportsOrderingWithinGroup if u.orderingWithinGroup.isEmpty
=>
+ throw
QueryCompilationErrors.inverseDistributionFunctionMissingWithinGroupError(
+ owg.prettyName)
+ case f
+ if !f.isInstanceOf[SupportsOrderingWithinGroup] &&
u.orderingWithinGroup.nonEmpty =>
+ throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+ func.prettyName, "WITHIN GROUP (ORDER BY ...)")
// AggregateWindowFunctions are AggregateFunctions that can only be
evaluated within
// the context of a Window clause. They do not need to be wrapped in an
// AggregateExpression.
@@ -2371,6 +2381,13 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
case agg: AggregateFunction =>
// Note: PythonUDAF does not support these advanced clauses.
if (agg.isInstanceOf[PythonUDAF])
checkUnsupportedAggregateClause(agg, u)
+ // After parse, the inverse distribution functions not set the
ordering within group yet.
+ val newAgg = agg match {
+ case owg: SupportsOrderingWithinGroup if
u.orderingWithinGroup.nonEmpty =>
+ owg.withOrderingWithinGroup(u.orderingWithinGroup)
+ case _ =>
+ agg
+ }
u.filter match {
case Some(filter) if !filter.deterministic =>
@@ -2384,17 +2401,17 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
case _ =>
}
if (u.ignoreNulls) {
- val aggFunc = agg match {
+ val aggFunc = newAgg match {
case first: First => first.copy(ignoreNulls = u.ignoreNulls)
case last: Last => last.copy(ignoreNulls = u.ignoreNulls)
case any_value: AnyValue => any_value.copy(ignoreNulls =
u.ignoreNulls)
case _ =>
throw
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
- agg.prettyName, "IGNORE NULLS")
+ newAgg.prettyName, "IGNORE NULLS")
}
aggFunc.toAggregateExpression(u.isDistinct, u.filter)
} else {
- agg.toAggregateExpression(u.isDistinct, u.filter)
+ newAgg.toAggregateExpression(u.isDistinct, u.filter)
}
// This function is not an aggregate function, just return the
resolved one.
case other =>
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index c8b0e8be4c07..016f764c7002 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -478,6 +478,8 @@ object FunctionRegistry {
expression[Min]("min"),
expression[MinBy]("min_by"),
expression[Percentile]("percentile"),
+ expressionBuilder("percentile_cont", PercentileContBuilder),
+ expressionBuilder("percentile_disc", PercentileDiscBuilder),
expression[Median]("median"),
expression[Skewness]("skewness"),
expression[ApproximatePercentile]("percentile_approx"),
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 9342d29245aa..97912fb5d592 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -300,11 +300,12 @@ case class UnresolvedFunction(
arguments: Seq[Expression],
isDistinct: Boolean,
filter: Option[Expression] = None,
- ignoreNulls: Boolean = false)
+ ignoreNulls: Boolean = false,
+ orderingWithinGroup: Seq[SortOrder] = Seq.empty)
extends Expression with Unevaluable {
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
- override def children: Seq[Expression] = arguments ++ filter.toSeq
+ override def children: Seq[Expression] = arguments ++ filter.toSeq ++
orderingWithinGroup
override def dataType: DataType = throw new UnresolvedException("dataType")
override def nullable: Boolean = throw new UnresolvedException("nullable")
@@ -320,9 +321,22 @@ case class UnresolvedFunction(
override protected def withNewChildrenInternal(
newChildren: IndexedSeq[Expression]): UnresolvedFunction = {
if (filter.isDefined) {
- copy(arguments = newChildren.dropRight(1), filter =
Some(newChildren.last))
- } else {
+ if (orderingWithinGroup.isEmpty) {
+ copy(arguments = newChildren.dropRight(1), filter =
Some(newChildren.last))
+ } else {
+ val nonArgs = newChildren.takeRight(orderingWithinGroup.length + 1)
+ val newSortOrders = nonArgs.tail.asInstanceOf[Seq[SortOrder]]
+ val newFilter = Some(nonArgs.head)
+ val newArgs = newChildren.dropRight(orderingWithinGroup.length + 1)
+ copy(arguments = newArgs, filter = newFilter, orderingWithinGroup =
newSortOrders)
+ }
+ } else if (orderingWithinGroup.isEmpty) {
copy(arguments = newChildren)
+ } else {
+ val newSortOrders =
+
newChildren.takeRight(orderingWithinGroup.length).asInstanceOf[Seq[SortOrder]]
+ val newArgs = newChildren.dropRight(orderingWithinGroup.length)
+ copy(arguments = newArgs, orderingWithinGroup = newSortOrders)
}
}
}
@@ -854,3 +868,13 @@ case class TempResolvedColumn(
copy(child = newChild)
final override val nodePatterns: Seq[TreePattern] = Seq(TEMP_RESOLVED_COLUMN)
}
+
+/**
+ * A place holder expression used in inverse distribution functions,
+ * will be replaced after analyze.
+ */
+case object UnresolvedWithinGroup extends LeafExpression with Unevaluable {
+ override def nullable: Boolean = throw new UnresolvedException("nullable")
+ override def dataType: DataType = throw new UnresolvedException("dataType")
+ override lazy val resolved = false
+}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/SupportsOrderingWithinGroup.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/SupportsOrderingWithinGroup.scala
new file mode 100644
index 000000000000..16ba962dc16c
--- /dev/null
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/SupportsOrderingWithinGroup.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.expressions.SortOrder
+
+/**
+ * The trait used to set the [[SortOrder]] after inverse distribution
functions parsed.
+ */
+trait SupportsOrderingWithinGroup { self: AggregateFunction =>
+ def withOrderingWithinGroup(orderingWithinGroup: Seq[SortOrder]):
AggregateFunction
+}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
index da27ba4b128d..c074b19f2419 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
@@ -20,14 +20,14 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
import java.util
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder,
TypeCheckResult, UnresolvedWithinGroup}
import
org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch,
TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.Cast._
import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike}
import org.apache.spark.sql.catalyst.types.PhysicalDataType
import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.TypeCollection.NumericAndAnsiInterval
@@ -359,6 +359,7 @@ case class PercentileCont(left: Expression, right:
Expression, reverse: Boolean
extends AggregateFunction
with RuntimeReplaceableAggregate
with ImplicitCastInputTypes
+ with SupportsOrderingWithinGroup
with BinaryLike[Expression] {
private lazy val percentile = new Percentile(left, right, reverse)
override def replacement: Expression = percentile
@@ -374,6 +375,17 @@ case class PercentileCont(left: Expression, right:
Expression, reverse: Boolean
percentile.checkInputDataTypes()
}
+ override def withOrderingWithinGroup(orderingWithinGroup: Seq[SortOrder]):
AggregateFunction = {
+ if (orderingWithinGroup.length != 1) {
+ throw
QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
+ nodeName, 1, orderingWithinGroup.length)
+ }
+ orderingWithinGroup.head match {
+ case SortOrder(child, Ascending, _, _) => this.copy(left = child)
+ case SortOrder(child, Descending, _, _) => this.copy(left = child,
reverse = true)
+ }
+ }
+
override protected def withNewChildrenInternal(
newLeft: Expression, newRight: Expression): PercentileCont =
this.copy(left = newLeft, right = newRight)
@@ -394,7 +406,7 @@ case class PercentileDisc(
mutableAggBufferOffset: Int = 0,
inputAggBufferOffset: Int = 0,
legacyCalculation: Boolean =
SQLConf.get.getConf(SQLConf.LEGACY_PERCENTILE_DISC_CALCULATION))
- extends PercentileBase with BinaryLike[Expression] {
+ extends PercentileBase with SupportsOrderingWithinGroup with
BinaryLike[Expression] {
val frequencyExpression: Expression = Literal(1L)
@@ -417,6 +429,17 @@ case class PercentileDisc(
s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY
${left.sql}$direction)"
}
+ override def withOrderingWithinGroup(orderingWithinGroup: Seq[SortOrder]):
AggregateFunction = {
+ if (orderingWithinGroup.length != 1) {
+ throw
QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
+ nodeName, 1, orderingWithinGroup.length)
+ }
+ orderingWithinGroup.head match {
+ case SortOrder(expr, Ascending, _, _) => this.copy(child = expr)
+ case SortOrder(expr, Descending, _, _) => this.copy(child = expr,
reverse = true)
+ }
+ }
+
override protected def withNewChildrenInternal(
newLeft: Expression, newRight: Expression): PercentileDisc = copy(
child = newLeft,
@@ -444,3 +467,55 @@ case class PercentileDisc(
}
}
}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(percentage) WITHIN GROUP (ORDER BY col) - Return a
percentile value based on " +
+ "a continuous distribution of numeric or ANSI interval column `col` at the
given " +
+ "`percentage` (specified in ORDER BY clause).",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(0.25) WITHIN GROUP (ORDER BY col) FROM VALUES (0), (10)
AS tab(col);
+ 2.5
+ > SELECT _FUNC_(0.25) WITHIN GROUP (ORDER BY col) FROM VALUES (INTERVAL
'0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+ 0-2
+ """,
+ group = "agg_funcs",
+ since = "4.0.0")
+// scalastyle:on line.size.limit
+object PercentileContBuilder extends ExpressionBuilder {
+ override def build(funcName: String, expressions: Seq[Expression]):
Expression = {
+ val numArgs = expressions.length
+ if (numArgs == 1) {
+ PercentileCont(UnresolvedWithinGroup, expressions(0))
+ } else {
+ throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1), numArgs)
+ }
+ }
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(percentage) WITHIN GROUP (ORDER BY col) - Return a
percentile value based on " +
+ "a discrete distribution of numeric or ANSI interval column `col` at the
given " +
+ "`percentage` (specified in ORDER BY clause).",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(0.25) WITHIN GROUP (ORDER BY col) FROM VALUES (0), (10)
AS tab(col);
+ 0.0
+ > SELECT _FUNC_(0.25) WITHIN GROUP (ORDER BY col) FROM VALUES (INTERVAL
'0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+ 0-0
+ """,
+ group = "agg_funcs",
+ since = "4.0.0")
+// scalastyle:on line.size.limit
+object PercentileDiscBuilder extends ExpressionBuilder {
+ override def build(funcName: String, expressions: Seq[Expression]):
Expression = {
+ val numArgs = expressions.length
+ if (numArgs == 1) {
+ PercentileDisc(UnresolvedWithinGroup, expressions(0))
+ } else {
+ throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1), numArgs)
+ }
+ }
+}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 49bde72c48db..4af1801aaa18 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier,
SQLConfHelper, TableId
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.catalog.{BucketSpec,
CatalogStorageFormat, ClusterBySpec}
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AnyValue, First,
Last, PercentileCont, PercentileDisc}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AnyValue, First,
Last}
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
@@ -2206,35 +2206,6 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
UnresolvedFunction("extract", arguments, isDistinct = false)
}
- /**
- * Create a Percentile expression.
- */
- override def visitPercentile(ctx: PercentileContext): Expression =
withOrigin(ctx) {
- val percentage = expression(ctx.percentage)
- val sortOrder = visitSortItem(ctx.sortItem)
- val percentile = ctx.name.getType match {
- case SqlBaseParser.PERCENTILE_CONT =>
- sortOrder.direction match {
- case Ascending => PercentileCont(sortOrder.child, percentage)
- case Descending => PercentileCont(sortOrder.child, percentage, true)
- }
- case SqlBaseParser.PERCENTILE_DISC =>
- sortOrder.direction match {
- case Ascending => PercentileDisc(sortOrder.child, percentage)
- case Descending => PercentileDisc(sortOrder.child, percentage, true)
- }
- }
- val filter = Option(ctx.where).map(expression(_))
- val aggregateExpression = percentile.toAggregateExpression(false, filter)
- ctx.windowSpec match {
- case spec: WindowRefContext =>
- UnresolvedWindowExpression(aggregateExpression, visitWindowRef(spec))
- case spec: WindowDefContext =>
- WindowExpression(aggregateExpression, visitWindowDef(spec))
- case _ => aggregateExpression
- }
- }
-
/**
* Create a Substring/Substr expression.
*/
@@ -2296,6 +2267,7 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
case expressions =>
expressions
}
+ val order = ctx.sortItem.asScala.map(visitSortItem)
val filter = Option(ctx.where).map(expression(_))
val ignoreNulls =
Option(ctx.nullsOption).map(_.getType ==
SqlBaseParser.IGNORE).getOrElse(false)
@@ -2313,7 +2285,7 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
val funcCtx = ctx.functionName
val func = withFuncIdentClause(
funcCtx,
- ident => UnresolvedFunction(ident, arguments, isDistinct, filter,
ignoreNulls)
+ ident => UnresolvedFunction(ident, arguments, isDistinct, filter,
ignoreNulls, order.toSeq)
)
// Check if the function is evaluated in a windowed context.
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index bbc51e5cbd67..b7e10dc194a0 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -639,6 +639,30 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase with Compilat
messageParameters = Map.empty)
}
+ def distinctInverseDistributionFunctionUnsupportedError(funcName: String):
Throwable = {
+ new AnalysisException(
+ errorClass =
"INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+ messageParameters = Map("funcName" -> toSQLId(funcName)))
+ }
+
+ def inverseDistributionFunctionMissingWithinGroupError(funcName: String):
Throwable = {
+ new AnalysisException(
+ errorClass =
"INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+ messageParameters = Map("funcName" -> toSQLId(funcName)))
+ }
+
+ def wrongNumOrderingsForInverseDistributionFunctionError(
+ funcName: String,
+ validOrderingsNumber: Int,
+ actualOrderingsNumber: Int): Throwable = {
+ new AnalysisException(
+ errorClass = "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+ messageParameters = Map(
+ "funcName" -> toSQLId(funcName),
+ "expectedNum" -> validOrderingsNumber.toString,
+ "actualNum" -> actualOrderingsNumber.toString))
+ }
+
def aliasNumberNotMatchColumnNumberError(
columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = {
new AnalysisException(
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index c483aa1997fc..17dd7349e7be 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -23,7 +23,6 @@ import org.apache.spark.SparkThrowable
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, NamedParameter,
PosParameter, RelationTimeTravel, UnresolvedAlias, UnresolvedAttribute,
UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable,
UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases,
UnresolvedTableValuedFunction, UnresolvedTVFAliases}
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{PercentileCont,
PercentileDisc}
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.internal.SQLConf
@@ -1813,38 +1812,62 @@ class PlanParserSuite extends AnalysisTest {
assertPercentilePlans(
"SELECT PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY col)",
- PercentileCont(UnresolvedAttribute("col"), Literal(Decimal(0.1),
DecimalType(1, 1)))
- .toAggregateExpression()
+ UnresolvedFunction(
+ Seq("PERCENTILE_CONT"),
+ Seq(Literal(Decimal(0.1), DecimalType(1, 1))),
+ false,
+ None,
+ orderingWithinGroup = Seq(SortOrder(UnresolvedAttribute("col"),
Ascending)))
)
assertPercentilePlans(
"SELECT PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY col DESC)",
- PercentileCont(UnresolvedAttribute("col"),
- Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression()
+ UnresolvedFunction(
+ Seq("PERCENTILE_CONT"),
+ Seq(Literal(Decimal(0.1), DecimalType(1, 1))),
+ false,
+ None,
+ orderingWithinGroup = Seq(SortOrder(UnresolvedAttribute("col"),
Descending)))
)
assertPercentilePlans(
"SELECT PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY col) FILTER (WHERE
id > 10)",
- PercentileCont(UnresolvedAttribute("col"), Literal(Decimal(0.1),
DecimalType(1, 1)))
- .toAggregateExpression(false,
Some(GreaterThan(UnresolvedAttribute("id"), Literal(10))))
+ UnresolvedFunction(
+ Seq("PERCENTILE_CONT"),
+ Seq(Literal(Decimal(0.1), DecimalType(1, 1))),
+ false,
+ Some(GreaterThan(UnresolvedAttribute("id"), Literal(10))),
+ orderingWithinGroup = Seq(SortOrder(UnresolvedAttribute("col"),
Ascending)))
)
assertPercentilePlans(
"SELECT PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY col)",
- PercentileDisc(UnresolvedAttribute("col"), Literal(Decimal(0.1),
DecimalType(1, 1)))
- .toAggregateExpression()
+ UnresolvedFunction(
+ Seq("PERCENTILE_DISC"),
+ Seq(Literal(Decimal(0.1), DecimalType(1, 1))),
+ false,
+ None,
+ orderingWithinGroup = Seq(SortOrder(UnresolvedAttribute("col"),
Ascending)))
)
assertPercentilePlans(
"SELECT PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY col DESC)",
- PercentileDisc(UnresolvedAttribute("col"),
- Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression()
+ UnresolvedFunction(
+ Seq("PERCENTILE_DISC"),
+ Seq(Literal(Decimal(0.1), DecimalType(1, 1))),
+ false,
+ None,
+ orderingWithinGroup = Seq(SortOrder(UnresolvedAttribute("col"),
Descending)))
)
assertPercentilePlans(
"SELECT PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY col) FILTER (WHERE
id > 10)",
- PercentileDisc(UnresolvedAttribute("col"), Literal(Decimal(0.1),
DecimalType(1, 1)))
- .toAggregateExpression(false,
Some(GreaterThan(UnresolvedAttribute("id"), Literal(10))))
+ UnresolvedFunction(
+ Seq("PERCENTILE_DISC"),
+ Seq(Literal(Decimal(0.1), DecimalType(1, 1))),
+ false,
+ Some(GreaterThan(UnresolvedAttribute("id"), Literal(10))),
+ orderingWithinGroup = Seq(SortOrder(UnresolvedAttribute("col"),
Ascending)))
)
}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 8e6bad11c09a..1cdd061e1d3d 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -409,6 +409,8 @@
| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT
min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y) |
struct<min_by(x, y):string> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Mode | mode | SELECT
mode(col) FROM VALUES (0), (10), (10) AS tab(col) | struct<mode(col,
false):int> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile
| SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) |
struct<percentile(col, 0.3, 1):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.PercentileContBuilder |
percentile_cont | SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY col) FROM
VALUES (0), (10) AS tab(col) | struct<percentile_cont(0.25) WITHIN GROUP (ORDER
BY col):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.PercentileDiscBuilder |
percentile_disc | SELECT percentile_disc(0.25) WITHIN GROUP (ORDER BY col) FROM
VALUES (0), (10) AS tab(col) | struct<percentile_disc(0.25) WITHIN GROUP (ORDER
BY col):double> |
| org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgX | regr_avgx |
SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x)
| struct<regr_avgx(y, x):double> |
| org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgY | regr_avgy |
SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x)
| struct<regr_avgy(y, x):double> |
| org.apache.spark.sql.catalyst.expressions.aggregate.RegrCount | regr_count |
SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x)
| struct<regr_count(y, x):bigint> |
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
index 1ab3ecec920d..90ef5ac35a1e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
@@ -149,6 +149,281 @@ Aggregate [median(v#x) AS median(v)#x, percentile(v#x,
cast(0.5 as double), 1, 0
+- LocalRelation [k#x, v#x]
+-- !query
+SELECT
+ round(v, 0) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "round",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 46,
+ "fragment" : "round(v, 0) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ round(v, 0) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "round",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 68,
+ "fragment" : "round(v, 0) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile(v, 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "percentile",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 53,
+ "fragment" : "percentile(v, 0.5) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile(v, 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "percentile",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 75,
+ "fragment" : "percentile(v, 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION
BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 64,
+ "fragment" : "percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 86,
+ "fragment" : "percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v) OVER
(PARTITION BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont() WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 52,
+ "fragment" : "percentile_cont() WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont() WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 74,
+ "fragment" : "percentile_cont() WITHIN GROUP (ORDER BY v) OVER (PARTITION
BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(0.5)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 29,
+ "fragment" : "percentile_cont(0.5)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(0.5) OVER (PARTITION BY k)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 51,
+ "fragment" : "percentile_cont(0.5) OVER (PARTITION BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY k, v)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "actualNum" : "2",
+ "expectedNum" : "1",
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 58,
+ "fragment" : "percentile_cont(0.5) WITHIN GROUP (ORDER BY k, v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(k, 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "2",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 58,
+ "fragment" : "percentile_cont(k, 0.5) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
-- !query
SELECT
k,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
index eae8a71be7e5..4b3e8708222a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
@@ -63,6 +63,54 @@ SELECT
percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
FROM aggr;
+SELECT
+ round(v, 0) WITHIN GROUP (ORDER BY v)
+FROM aggr;
+
+SELECT
+ round(v, 0) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr;
+
+SELECT
+ percentile(v, 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr;
+
+SELECT
+ percentile(v, 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr;
+
+SELECT
+ percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr;
+
+SELECT
+ percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr;
+
+SELECT
+ percentile_cont() WITHIN GROUP (ORDER BY v)
+FROM aggr;
+
+SELECT
+ percentile_cont() WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr;
+
+SELECT
+ percentile_cont(0.5)
+FROM aggr;
+
+SELECT
+ percentile_cont(0.5) OVER (PARTITION BY k)
+FROM aggr;
+
+SELECT
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY k, v)
+FROM aggr;
+
+SELECT
+ percentile_cont(k, 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr;
+
SELECT
k,
median(v),
diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
index 70d3155d3173..5dc430522e51 100644
--- a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
@@ -113,6 +113,305 @@ struct<median(v):double,percentile(v, 0.5,
1):double,percentile_cont(0.5) WITHIN
20.0 20.0 20.0
+-- !query
+SELECT
+ round(v, 0) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "round",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 46,
+ "fragment" : "round(v, 0) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ round(v, 0) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "round",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 68,
+ "fragment" : "round(v, 0) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile(v, 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "percentile",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 53,
+ "fragment" : "percentile(v, 0.5) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile(v, 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+ "messageParameters" : {
+ "prettyName" : "percentile",
+ "syntax" : "WITHIN GROUP (ORDER BY ...)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 75,
+ "fragment" : "percentile(v, 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION
BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 64,
+ "fragment" : "percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 86,
+ "fragment" : "percentile_cont(DISTINCT 0.5) WITHIN GROUP (ORDER BY v) OVER
(PARTITION BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont() WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 52,
+ "fragment" : "percentile_cont() WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont() WITHIN GROUP (ORDER BY v) OVER (PARTITION BY k)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 74,
+ "fragment" : "percentile_cont() WITHIN GROUP (ORDER BY v) OVER (PARTITION
BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(0.5)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 29,
+ "fragment" : "percentile_cont(0.5)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(0.5) OVER (PARTITION BY k)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 51,
+ "fragment" : "percentile_cont(0.5) OVER (PARTITION BY k)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY k, v)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+ "sqlState" : "ID001",
+ "messageParameters" : {
+ "actualNum" : "2",
+ "expectedNum" : "1",
+ "funcName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 58,
+ "fragment" : "percentile_cont(0.5) WITHIN GROUP (ORDER BY k, v)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ percentile_cont(k, 0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "2",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`percentile_cont`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 10,
+ "stopIndex" : 58,
+ "fragment" : "percentile_cont(k, 0.5) WITHIN GROUP (ORDER BY v)"
+ } ]
+}
+
+
-- !query
SELECT
k,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]