This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fbb4019d70d1 [SPARK-55279][SQL] Add `sketch_funcs` group for
DataSketches SQL functions
fbb4019d70d1 is described below
commit fbb4019d70d119ac397bc444adf16a3d51480b8e
Author: Kent Yao <[email protected]>
AuthorDate: Sat Jan 31 02:22:21 2026 +0800
[SPARK-55279][SQL] Add `sketch_funcs` group for DataSketches SQL functions
### What changes were proposed in this pull request?
All DataSketches-related expression functions should have their own
`sketch_funcs` group instead of being grouped under `misc_funcs`.
Move all sketch-related expression functions from `misc_funcs` to
`sketch_funcs`:
- **HLL sketch functions**: `hll_sketch_estimate`, `hll_union`
- **Theta sketch functions**: `theta_sketch_estimate`, `theta_union`,
`theta_difference`, `theta_intersection`
- **KLL sketch functions**: `kll_sketch_to_string_*`, `kll_sketch_get_n_*`,
`kll_sketch_get_rank_*`, `kll_sketch_get_quantile_*`, `kll_sketch_get_pmf_*`,
`kll_sketch_get_cdf_*`, `kll_sketch_merge_*`
- **Tuple sketch functions**: `tuple_sketch_*` expression functions
- **ApproxTopK**: `approx_top_k_estimate`
Add `sketch_funcs` to the groups set in `gen-sql-functions-docs.py`.
Note: Aggregate functions (like `hll_sketch_agg`, `theta_sketch_agg`,
`kll_sketch_agg_*`, etc.) remain in `agg_funcs`.
### Why are the changes needed?
This PR moves **34 DataSketches-related expression functions** from
`misc_funcs` to a dedicated `sketch_funcs` group. These 34 functions represent
over 60% of all `misc_funcs`, making `misc_funcs` a catch-all bucket that
reduces documentation clarity. By creating `sketch_funcs`, we achieve
consistency with other specialized function groups (`avro_funcs`, `json_funcs`,
`csv_funcs`, `xml_funcs`, etc.) and make it easier for users to discover and
understand DataSketches functionality in [...]
### Does this PR introduce _any_ user-facing change?
No functional changes. The only difference is in how functions are grouped
in documentation.
### How was this patch tested?
Existing tests.
### Was this patch authored or co-authored using generative AI tooling?
Yes, GitHub Copilot was used to assist with this change.
Closes #54061 from yaooqinn/SPARK-55279-sketch-funcs-group.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../sql/catalyst/expressions/ExpressionInfo.java | 4 +--
.../expressions/ApproxTopKExpressions.scala | 2 +-
.../expressions/datasketchesExpressions.scala | 4 +--
.../sql/catalyst/expressions/kllExpressions.scala | 30 +++++++++++-----------
.../expressions/thetasketchesExpressions.scala | 8 +++---
.../sql/catalyst/expressions/tupleDifference.scala | 4 +--
.../catalyst/expressions/tupleIntersection.scala | 4 +--
.../catalyst/expressions/tupleSketchEstimate.scala | 8 +++---
.../catalyst/expressions/tupleSketchSummary.scala | 4 +--
.../sql/catalyst/expressions/tupleUnion.scala | 4 +--
.../sql/expressions/ExpressionInfoSuite.scala | 5 ++--
sql/gen-sql-functions-docs.py | 2 +-
12 files changed, 40 insertions(+), 39 deletions(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 8588c8e5633f..325462f82c69 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -48,8 +48,8 @@ public class ExpressionInfo {
"bitwise_funcs", "collection_funcs", "predicate_funcs",
"conditional_funcs",
"conversion_funcs", "csv_funcs", "datetime_funcs",
"generator_funcs", "hash_funcs",
"json_funcs", "lambda_funcs", "map_funcs", "math_funcs",
"misc_funcs",
- "protobuf_funcs", "string_funcs", "struct_funcs", "window_funcs",
"xml_funcs",
- "table_funcs", "url_funcs", "variant_funcs", "vector_funcs",
"st_funcs"));
+ "protobuf_funcs", "sketch_funcs", "string_funcs", "struct_funcs",
"window_funcs",
+ "xml_funcs", "table_funcs", "url_funcs", "variant_funcs",
"vector_funcs", "st_funcs"));
private static final Set<String> validSources =
new HashSet<>(Arrays.asList("built-in", "hive", "python_udf",
"scala_udf", "sql_udf",
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
index 53c37f0a5491..7bcfef6bcd65 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
@@ -50,7 +50,7 @@ import org.apache.spark.sql.types._
> SELECT _FUNC_(approx_top_k_accumulate(expr), 2) FROM VALUES 'a', 'b',
'c', 'c', 'c', 'c', 'd', 'd' tab(expr);
[{"item":"c","count":4},{"item":"d","count":2}]
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
// scalastyle:on line.size.limit
case class ApproxTopKEstimate(state: Expression, k: Expression)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
index 1880d71e7d54..a9baf473c822 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types.{AbstractDataType,
BinaryType, BooleanType, Da
> SELECT _FUNC_(hll_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3)
tab(col);
3
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "3.5.0")
case class HllSketchEstimate(child: Expression)
extends UnaryExpression
@@ -75,7 +75,7 @@ case class HllSketchEstimate(child: Expression)
> SELECT hll_sketch_estimate(_FUNC_(hll_sketch_agg(col1),
hll_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6)
tab(col1, col2);
6
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "3.5.0")
// scalastyle:on line.size.limit
case class HllUnion(first: Expression, second: Expression, third: Expression)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
index af6c1a32e229..01481050f280 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
@@ -38,7 +38,7 @@ import org.apache.spark.unsafe.types.UTF8String
> SELECT LENGTH(_FUNC_(kll_sketch_agg_bigint(col))) > 0 FROM VALUES (1),
(2), (3), (4), (5) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchToStringBigint(child: Expression) extends
KllSketchToStringBase {
override protected def withNewChildInternal(newChild: Expression):
KllSketchToStringBigint =
@@ -66,7 +66,7 @@ case class KllSketchToStringBigint(child: Expression) extends
KllSketchToStringB
> SELECT LENGTH(_FUNC_(kll_sketch_agg_float(col))) > 0 FROM VALUES
(CAST(1.0 AS FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS
FLOAT)), (CAST(5.0 AS FLOAT)) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchToStringFloat(child: Expression) extends
KllSketchToStringBase {
override protected def withNewChildInternal(newChild: Expression):
KllSketchToStringFloat =
@@ -94,7 +94,7 @@ case class KllSketchToStringFloat(child: Expression) extends
KllSketchToStringBa
> SELECT LENGTH(_FUNC_(kll_sketch_agg_double(col))) > 0 FROM VALUES
(CAST(1.0 AS DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0
AS DOUBLE)), (CAST(5.0 AS DOUBLE)) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchToStringDouble(child: Expression) extends
KllSketchToStringBase {
override protected def withNewChildInternal(newChild: Expression):
KllSketchToStringDouble =
@@ -132,7 +132,7 @@ abstract class KllSketchToStringBase
> SELECT _FUNC_(kll_sketch_agg_bigint(col)) FROM VALUES (1), (2), (3),
(4), (5) tab(col);
5
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetNBigint(child: Expression) extends KllSketchGetNBase {
override protected def withNewChildInternal(newChild: Expression):
KllSketchGetNBigint =
@@ -160,7 +160,7 @@ case class KllSketchGetNBigint(child: Expression) extends
KllSketchGetNBase {
> SELECT _FUNC_(kll_sketch_agg_float(col)) FROM VALUES (CAST(1.0 AS
FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS FLOAT)),
(CAST(5.0 AS FLOAT)) tab(col);
5
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetNFloat(child: Expression) extends KllSketchGetNBase {
override protected def withNewChildInternal(newChild: Expression):
KllSketchGetNFloat =
@@ -188,7 +188,7 @@ case class KllSketchGetNFloat(child: Expression) extends
KllSketchGetNBase {
> SELECT _FUNC_(kll_sketch_agg_double(col)) FROM VALUES (CAST(1.0 AS
DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0 AS DOUBLE)),
(CAST(5.0 AS DOUBLE)) tab(col);
5
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetNDouble(child: Expression) extends KllSketchGetNBase {
override protected def withNewChildInternal(newChild: Expression):
KllSketchGetNDouble =
@@ -226,7 +226,7 @@ abstract class KllSketchGetNBase
> SELECT
LENGTH(kll_sketch_to_string_bigint(_FUNC_(kll_sketch_agg_bigint(col),
kll_sketch_agg_bigint(col)))) > 0 FROM VALUES (1), (2), (3), (4), (5) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchMergeBigint(left: Expression, right: Expression) extends
KllSketchMergeBase {
override def withNewChildrenInternal(newLeft: Expression, newRight:
Expression): Expression =
@@ -257,7 +257,7 @@ case class KllSketchMergeBigint(left: Expression, right:
Expression) extends Kll
> SELECT
LENGTH(kll_sketch_to_string_float(_FUNC_(kll_sketch_agg_float(col),
kll_sketch_agg_float(col)))) > 0 FROM VALUES (CAST(1.0 AS FLOAT)), (CAST(2.0 AS
FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS FLOAT)), (CAST(5.0 AS FLOAT))
tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchMergeFloat(left: Expression, right: Expression) extends
KllSketchMergeBase {
override def withNewChildrenInternal(newLeft: Expression, newRight:
Expression): Expression =
@@ -288,7 +288,7 @@ case class KllSketchMergeFloat(left: Expression, right:
Expression) extends KllS
> SELECT
LENGTH(kll_sketch_to_string_double(_FUNC_(kll_sketch_agg_double(col),
kll_sketch_agg_double(col)))) > 0 FROM VALUES (CAST(1.0 AS DOUBLE)), (CAST(2.0
AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0 AS DOUBLE)), (CAST(5.0 AS
DOUBLE)) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchMergeDouble(left: Expression, right: Expression) extends
KllSketchMergeBase {
override def withNewChildrenInternal(newLeft: Expression, newRight:
Expression): Expression =
@@ -332,7 +332,7 @@ abstract class KllSketchMergeBase
> SELECT _FUNC_(kll_sketch_agg_bigint(col), 0.5) > 1 FROM VALUES (1),
(2), (3), (4), (5) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetQuantileBigint(left: Expression, right: Expression)
extends KllSketchGetQuantileBase {
@@ -364,7 +364,7 @@ case class KllSketchGetQuantileBigint(left: Expression,
right: Expression)
> SELECT _FUNC_(kll_sketch_agg_float(col), 0.5) > 1 FROM VALUES
(CAST(1.0 AS FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS
FLOAT)), (CAST(5.0 AS FLOAT)) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetQuantileFloat(left: Expression, right: Expression)
extends KllSketchGetQuantileBase {
@@ -396,7 +396,7 @@ case class KllSketchGetQuantileFloat(left: Expression,
right: Expression)
> SELECT _FUNC_(kll_sketch_agg_double(col), 0.5) > 1 FROM VALUES
(CAST(1.0 AS DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0
AS DOUBLE)), (CAST(5.0 AS DOUBLE)) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetQuantileDouble(left: Expression, right: Expression)
extends KllSketchGetQuantileBase {
@@ -528,7 +528,7 @@ abstract class KllSketchGetQuantileBase
> SELECT _FUNC_(kll_sketch_agg_bigint(col), 3) > 0.3 FROM VALUES (1),
(2), (3), (4), (5) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetRankBigint(left: Expression, right: Expression)
extends KllSketchGetRankBase {
@@ -556,7 +556,7 @@ case class KllSketchGetRankBigint(left: Expression, right:
Expression)
> SELECT _FUNC_(kll_sketch_agg_float(col), 3.0) > 0.3 FROM VALUES
(CAST(1.0 AS FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS
FLOAT)), (CAST(5.0 AS FLOAT)) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetRankFloat(left: Expression, right: Expression)
extends KllSketchGetRankBase {
@@ -584,7 +584,7 @@ case class KllSketchGetRankFloat(left: Expression, right:
Expression)
> SELECT _FUNC_(kll_sketch_agg_double(col), 3.0) > 0.3 FROM VALUES
(CAST(1.0 AS DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0
AS DOUBLE)), (CAST(5.0 AS DOUBLE)) tab(col);
true
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class KllSketchGetRankDouble(left: Expression, right: Expression)
extends KllSketchGetRankBase {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
index f662f405297b..8ac40a3fe2a5 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.{AbstractDataType,
BinaryType, DataType, Integ
> SELECT _FUNC_(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2),
(3) tab(col);
3
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
case class ThetaSketchEstimate(child: Expression)
extends UnaryExpression
@@ -71,7 +71,7 @@ case class ThetaSketchEstimate(child: Expression)
> SELECT theta_sketch_estimate(_FUNC_(theta_sketch_agg(col1),
theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6)
tab(col1, col2);
6
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
// scalastyle:on line.size.limit
case class ThetaUnion(first: Expression, second: Expression, third: Expression)
@@ -130,7 +130,7 @@ case class ThetaUnion(first: Expression, second:
Expression, third: Expression)
> SELECT theta_sketch_estimate(_FUNC_(theta_sketch_agg(col1),
theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1)
tab(col1, col2);
2
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
// scalastyle:on line.size.limit
case class ThetaDifference(first: Expression, second: Expression)
@@ -178,7 +178,7 @@ case class ThetaDifference(first: Expression, second:
Expression)
> SELECT theta_sketch_estimate(_FUNC_(theta_sketch_agg(col1),
theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1)
tab(col1, col2);
2
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.1.0")
// scalastyle:on line.size.limit
case class ThetaIntersection(first: Expression, second: Expression)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
index bfa480100cd1..ee9c3ab22067 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.types.{AbstractDataType,
BinaryType, DataType}
> SELECT
tuple_sketch_estimate_double(_FUNC_(tuple_sketch_agg_double(col1, val1),
tuple_sketch_agg_double(col2, val2))) FROM VALUES (5, 5.0D, 4, 4.0D), (1, 1.0D,
4, 4.0D), (2, 2.0D, 5, 5.0D), (3, 3.0D, 1, 1.0D) tab(col1, val1, col2, val2);
2.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleDifferenceDouble(left: Expression, right: Expression)
@@ -72,7 +72,7 @@ case class TupleDifferenceDouble(left: Expression, right:
Expression)
> SELECT
tuple_sketch_estimate_integer(_FUNC_(tuple_sketch_agg_integer(col1, val1),
tuple_sketch_agg_integer(col2, val2))) FROM VALUES (5, 5, 4, 4), (1, 1, 4, 4),
(2, 2, 5, 5), (3, 3, 1, 1) tab(col1, val1, col2, val2);
2.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleDifferenceInteger(left: Expression, right: Expression)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
index bc7cb6a42d5d..b98d1fcb8034 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
@@ -39,7 +39,7 @@ import org.apache.spark.unsafe.types.UTF8String
> SELECT
tuple_sketch_estimate_double(_FUNC_(tuple_sketch_agg_double(col1, val1),
tuple_sketch_agg_double(col2, val2))) FROM VALUES (1, 1.0D, 1, 4.0D), (2, 2.0D,
2, 5.0D), (3, 3.0D, 4, 6.0D) tab(col1, val1, col2, val2);
2.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleIntersectionDouble(first: Expression, second: Expression,
third: Expression)
@@ -84,7 +84,7 @@ case class TupleIntersectionDouble(first: Expression, second:
Expression, third:
> SELECT
tuple_sketch_estimate_integer(_FUNC_(tuple_sketch_agg_integer(col1, val1),
tuple_sketch_agg_integer(col2, val2))) FROM VALUES (1, 1, 1, 4), (2, 2, 2, 5),
(3, 3, 4, 6) tab(col1, val1, col2, val2);
2.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleIntersectionInteger(first: Expression, second: Expression,
third: Expression)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
index 999be1d03754..fcd0a048479b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.{AbstractDataType,
BinaryType, DataType, Doubl
> SELECT _FUNC_(tuple_sketch_agg_double(key, summary)) FROM VALUES (1,
1.0D), (1, 2.0D), (2, 3.0D) tab(key, summary);
2.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleSketchEstimateDouble(child: Expression)
@@ -69,7 +69,7 @@ case class TupleSketchEstimateDouble(child: Expression)
> SELECT _FUNC_(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1,
1), (1, 2), (2, 3) tab(key, summary);
2.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleSketchEstimateInteger(child: Expression)
@@ -106,7 +106,7 @@ case class TupleSketchEstimateInteger(child: Expression)
> SELECT _FUNC_(tuple_sketch_agg_double(key, summary)) FROM VALUES (1,
1.0D), (2, 2.0D), (3, 3.0D) tab(key, summary);
1.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleSketchThetaDouble(child: Expression)
@@ -143,7 +143,7 @@ case class TupleSketchThetaDouble(child: Expression)
> SELECT _FUNC_(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1,
1), (2, 2), (3, 3) tab(key, summary);
1.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleSketchThetaInteger(child: Expression)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
index 876240b576bc..a37a03e41e53 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
@@ -39,7 +39,7 @@ import org.apache.spark.unsafe.types.UTF8String
> SELECT _FUNC_(tuple_sketch_agg_double(key, summary)) FROM VALUES (1,
1.0D), (1, 2.0D), (2, 3.0D) tab(key, summary);
6.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleSketchSummaryDouble(left: Expression, right: Expression)
@@ -103,7 +103,7 @@ case class TupleSketchSummaryDouble(left: Expression,
right: Expression)
> SELECT _FUNC_(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1,
1), (1, 2), (2, 3) tab(key, summary);
6
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
case class TupleSketchSummaryInteger(left: Expression, right: Expression)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
index e0eb6b89b9e6..f9bd1fc62222 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
@@ -241,7 +241,7 @@ abstract class TupleUnionBase[S <: Summary]
> SELECT
tuple_sketch_estimate_double(_FUNC_(tuple_sketch_agg_double(col1, val1),
tuple_sketch_agg_double(col2, val2))) FROM VALUES (1, 1.0D, 4, 4.0D), (2, 2.0D,
5, 5.0D), (3, 3.0D, 6, 6.0D) tab(col1, val1, col2, val2);
6.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
object TupleUnionDoubleExpressionBuilder extends ExpressionBuilder {
@@ -271,7 +271,7 @@ object TupleUnionDoubleExpressionBuilder extends
ExpressionBuilder {
> SELECT
tuple_sketch_estimate_integer(_FUNC_(tuple_sketch_agg_integer(col1, val1),
tuple_sketch_agg_integer(col2, val2))) FROM VALUES (1, 1, 4, 4), (2, 2, 5, 5),
(3, 3, 6, 6) tab(col1, val1, col2, val2);
6.0
""",
- group = "misc_funcs",
+ group = "sketch_funcs",
since = "4.2.0")
// scalastyle:on line.size.limit
object TupleUnionIntegerExpressionBuilder extends ExpressionBuilder {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 8601148024a1..d063c00bd43a 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -59,8 +59,9 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
"agg_funcs", "array_funcs", "avro_funcs", "binary_funcs",
"bitwise_funcs", "collection_funcs",
"predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs",
"datetime_funcs",
"generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs",
"map_funcs", "math_funcs",
- "misc_funcs", "protobuf_funcs", "string_funcs", "struct_funcs",
"window_funcs", "xml_funcs",
- "table_funcs", "url_funcs", "variant_funcs", "vector_funcs",
"st_funcs").sorted
+ "misc_funcs", "protobuf_funcs", "sketch_funcs", "string_funcs",
"struct_funcs",
+ "window_funcs", "xml_funcs", "table_funcs", "url_funcs",
"variant_funcs", "vector_funcs",
+ "st_funcs").sorted
val invalidGroupName = "invalid_group_funcs"
checkError(
exception = intercept[SparkIllegalArgumentException] {
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index b43b26e03b9c..13f9ae055fa7 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -36,7 +36,7 @@ groups = {
"bitwise_funcs", "conversion_funcs", "csv_funcs",
"xml_funcs", "lambda_funcs", "collection_funcs",
"url_funcs", "hash_funcs", "struct_funcs",
- "table_funcs", "variant_funcs", "protobuf_funcs"
+ "table_funcs", "variant_funcs", "protobuf_funcs", "sketch_funcs"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]