This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new fbb4019d70d1 [SPARK-55279][SQL] Add `sketch_funcs` group for 
DataSketches SQL functions
fbb4019d70d1 is described below

commit fbb4019d70d119ac397bc444adf16a3d51480b8e
Author: Kent Yao <[email protected]>
AuthorDate: Sat Jan 31 02:22:21 2026 +0800

    [SPARK-55279][SQL] Add `sketch_funcs` group for DataSketches SQL functions
    
    ### What changes were proposed in this pull request?
    
    All DataSketches-related expression functions should have their own 
`sketch_funcs` group instead of being grouped under `misc_funcs`.
    
    Move all sketch-related expression functions from `misc_funcs` to 
`sketch_funcs`:
    - **HLL sketch functions**: `hll_sketch_estimate`, `hll_union`
    - **Theta sketch functions**: `theta_sketch_estimate`, `theta_union`, 
`theta_difference`, `theta_intersection`
    - **KLL sketch functions**: `kll_sketch_to_string_*`, `kll_sketch_get_n_*`, 
`kll_sketch_get_rank_*`, `kll_sketch_get_quantile_*`, `kll_sketch_get_pmf_*`, 
`kll_sketch_get_cdf_*`, `kll_sketch_merge_*`
    - **Tuple sketch functions**: `tuple_sketch_*` expression functions
    - **ApproxTopK**: `approx_top_k_estimate`
    
    Add `sketch_funcs` to the groups set in `gen-sql-functions-docs.py`.
    
    Note: Aggregate functions (like `hll_sketch_agg`, `theta_sketch_agg`, 
`kll_sketch_agg_*`, etc.) remain in `agg_funcs`.
    
    ### Why are the changes needed?
    
    This PR moves **34 DataSketches-related expression functions** from 
`misc_funcs` to a dedicated `sketch_funcs` group. These 34 functions represent 
over 60% of all `misc_funcs`, making `misc_funcs` a catch-all bucket that 
reduces documentation clarity. By creating `sketch_funcs`, we achieve 
consistency with other specialized function groups (`avro_funcs`, `json_funcs`, 
`csv_funcs`, `xml_funcs`, etc.) and make it easier for users to discover and 
understand DataSketches functionality in  [...]
    
    ### Does this PR introduce _any_ user-facing change?
    
    No functional changes. The only difference is in how functions are grouped 
in documentation.
    
    ### How was this patch tested?
    
    Existing tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Yes, GitHub Copilot was used to assist with this change.
    
    Closes #54061 from yaooqinn/SPARK-55279-sketch-funcs-group.
    
    Authored-by: Kent Yao <[email protected]>
    Signed-off-by: Kent Yao <[email protected]>
---
 .../sql/catalyst/expressions/ExpressionInfo.java   |  4 +--
 .../expressions/ApproxTopKExpressions.scala        |  2 +-
 .../expressions/datasketchesExpressions.scala      |  4 +--
 .../sql/catalyst/expressions/kllExpressions.scala  | 30 +++++++++++-----------
 .../expressions/thetasketchesExpressions.scala     |  8 +++---
 .../sql/catalyst/expressions/tupleDifference.scala |  4 +--
 .../catalyst/expressions/tupleIntersection.scala   |  4 +--
 .../catalyst/expressions/tupleSketchEstimate.scala |  8 +++---
 .../catalyst/expressions/tupleSketchSummary.scala  |  4 +--
 .../sql/catalyst/expressions/tupleUnion.scala      |  4 +--
 .../sql/expressions/ExpressionInfoSuite.scala      |  5 ++--
 sql/gen-sql-functions-docs.py                      |  2 +-
 12 files changed, 40 insertions(+), 39 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 8588c8e5633f..325462f82c69 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -48,8 +48,8 @@ public class ExpressionInfo {
             "bitwise_funcs", "collection_funcs", "predicate_funcs", 
"conditional_funcs",
             "conversion_funcs", "csv_funcs", "datetime_funcs", 
"generator_funcs", "hash_funcs",
             "json_funcs", "lambda_funcs", "map_funcs", "math_funcs", 
"misc_funcs",
-            "protobuf_funcs", "string_funcs", "struct_funcs", "window_funcs", 
"xml_funcs",
-            "table_funcs", "url_funcs", "variant_funcs", "vector_funcs", 
"st_funcs"));
+            "protobuf_funcs", "sketch_funcs", "string_funcs", "struct_funcs", 
"window_funcs",
+            "xml_funcs", "table_funcs", "url_funcs", "variant_funcs", 
"vector_funcs", "st_funcs"));
 
     private static final Set<String> validSources =
             new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", 
"scala_udf", "sql_udf",
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
index 53c37f0a5491..7bcfef6bcd65 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApproxTopKExpressions.scala
@@ -50,7 +50,7 @@ import org.apache.spark.sql.types._
       > SELECT _FUNC_(approx_top_k_accumulate(expr), 2) FROM VALUES 'a', 'b', 
'c', 'c', 'c', 'c', 'd', 'd' tab(expr);
        [{"item":"c","count":4},{"item":"d","count":2}]
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 // scalastyle:on line.size.limit
 case class ApproxTopKEstimate(state: Expression, k: Expression)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
index 1880d71e7d54..a9baf473c822 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types.{AbstractDataType, 
BinaryType, BooleanType, Da
       > SELECT _FUNC_(hll_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) 
tab(col);
        3
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "3.5.0")
 case class HllSketchEstimate(child: Expression)
   extends UnaryExpression
@@ -75,7 +75,7 @@ case class HllSketchEstimate(child: Expression)
       > SELECT hll_sketch_estimate(_FUNC_(hll_sketch_agg(col1), 
hll_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) 
tab(col1, col2);
        6
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "3.5.0")
 // scalastyle:on line.size.limit
 case class HllUnion(first: Expression, second: Expression, third: Expression)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
index af6c1a32e229..01481050f280 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala
@@ -38,7 +38,7 @@ import org.apache.spark.unsafe.types.UTF8String
       > SELECT LENGTH(_FUNC_(kll_sketch_agg_bigint(col))) > 0 FROM VALUES (1), 
(2), (3), (4), (5) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchToStringBigint(child: Expression) extends 
KllSketchToStringBase {
   override protected def withNewChildInternal(newChild: Expression): 
KllSketchToStringBigint =
@@ -66,7 +66,7 @@ case class KllSketchToStringBigint(child: Expression) extends 
KllSketchToStringB
       > SELECT LENGTH(_FUNC_(kll_sketch_agg_float(col))) > 0 FROM VALUES 
(CAST(1.0 AS FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS 
FLOAT)), (CAST(5.0 AS FLOAT)) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchToStringFloat(child: Expression) extends 
KllSketchToStringBase {
   override protected def withNewChildInternal(newChild: Expression): 
KllSketchToStringFloat =
@@ -94,7 +94,7 @@ case class KllSketchToStringFloat(child: Expression) extends 
KllSketchToStringBa
       > SELECT LENGTH(_FUNC_(kll_sketch_agg_double(col))) > 0 FROM VALUES 
(CAST(1.0 AS DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0 
AS DOUBLE)), (CAST(5.0 AS DOUBLE)) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchToStringDouble(child: Expression) extends 
KllSketchToStringBase {
   override protected def withNewChildInternal(newChild: Expression): 
KllSketchToStringDouble =
@@ -132,7 +132,7 @@ abstract class KllSketchToStringBase
       > SELECT _FUNC_(kll_sketch_agg_bigint(col)) FROM VALUES (1), (2), (3), 
(4), (5) tab(col);
        5
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetNBigint(child: Expression) extends KllSketchGetNBase {
   override protected def withNewChildInternal(newChild: Expression): 
KllSketchGetNBigint =
@@ -160,7 +160,7 @@ case class KllSketchGetNBigint(child: Expression) extends 
KllSketchGetNBase {
       > SELECT _FUNC_(kll_sketch_agg_float(col)) FROM VALUES (CAST(1.0 AS 
FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS FLOAT)), 
(CAST(5.0 AS FLOAT)) tab(col);
        5
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetNFloat(child: Expression) extends KllSketchGetNBase {
   override protected def withNewChildInternal(newChild: Expression): 
KllSketchGetNFloat =
@@ -188,7 +188,7 @@ case class KllSketchGetNFloat(child: Expression) extends 
KllSketchGetNBase {
       > SELECT _FUNC_(kll_sketch_agg_double(col)) FROM VALUES (CAST(1.0 AS 
DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0 AS DOUBLE)), 
(CAST(5.0 AS DOUBLE)) tab(col);
        5
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetNDouble(child: Expression) extends KllSketchGetNBase {
   override protected def withNewChildInternal(newChild: Expression): 
KllSketchGetNDouble =
@@ -226,7 +226,7 @@ abstract class KllSketchGetNBase
       > SELECT 
LENGTH(kll_sketch_to_string_bigint(_FUNC_(kll_sketch_agg_bigint(col), 
kll_sketch_agg_bigint(col)))) > 0 FROM VALUES (1), (2), (3), (4), (5) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchMergeBigint(left: Expression, right: Expression) extends 
KllSketchMergeBase {
   override def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
@@ -257,7 +257,7 @@ case class KllSketchMergeBigint(left: Expression, right: 
Expression) extends Kll
       > SELECT 
LENGTH(kll_sketch_to_string_float(_FUNC_(kll_sketch_agg_float(col), 
kll_sketch_agg_float(col)))) > 0 FROM VALUES (CAST(1.0 AS FLOAT)), (CAST(2.0 AS 
FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS FLOAT)), (CAST(5.0 AS FLOAT)) 
tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchMergeFloat(left: Expression, right: Expression) extends 
KllSketchMergeBase {
   override def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
@@ -288,7 +288,7 @@ case class KllSketchMergeFloat(left: Expression, right: 
Expression) extends KllS
       > SELECT 
LENGTH(kll_sketch_to_string_double(_FUNC_(kll_sketch_agg_double(col), 
kll_sketch_agg_double(col)))) > 0 FROM VALUES (CAST(1.0 AS DOUBLE)), (CAST(2.0 
AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0 AS DOUBLE)), (CAST(5.0 AS 
DOUBLE)) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchMergeDouble(left: Expression, right: Expression) extends 
KllSketchMergeBase {
   override def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
@@ -332,7 +332,7 @@ abstract class KllSketchMergeBase
       > SELECT _FUNC_(kll_sketch_agg_bigint(col), 0.5) > 1 FROM VALUES (1), 
(2), (3), (4), (5) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetQuantileBigint(left: Expression, right: Expression)
     extends KllSketchGetQuantileBase {
@@ -364,7 +364,7 @@ case class KllSketchGetQuantileBigint(left: Expression, 
right: Expression)
       > SELECT _FUNC_(kll_sketch_agg_float(col), 0.5) > 1 FROM VALUES 
(CAST(1.0 AS FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS 
FLOAT)), (CAST(5.0 AS FLOAT)) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetQuantileFloat(left: Expression, right: Expression)
     extends KllSketchGetQuantileBase {
@@ -396,7 +396,7 @@ case class KllSketchGetQuantileFloat(left: Expression, 
right: Expression)
       > SELECT _FUNC_(kll_sketch_agg_double(col), 0.5) > 1 FROM VALUES 
(CAST(1.0 AS DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0 
AS DOUBLE)), (CAST(5.0 AS DOUBLE)) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetQuantileDouble(left: Expression, right: Expression)
     extends KllSketchGetQuantileBase {
@@ -528,7 +528,7 @@ abstract class KllSketchGetQuantileBase
       > SELECT _FUNC_(kll_sketch_agg_bigint(col), 3) > 0.3 FROM VALUES (1), 
(2), (3), (4), (5) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetRankBigint(left: Expression, right: Expression)
     extends KllSketchGetRankBase {
@@ -556,7 +556,7 @@ case class KllSketchGetRankBigint(left: Expression, right: 
Expression)
       > SELECT _FUNC_(kll_sketch_agg_float(col), 3.0) > 0.3 FROM VALUES 
(CAST(1.0 AS FLOAT)), (CAST(2.0 AS FLOAT)), (CAST(3.0 AS FLOAT)), (CAST(4.0 AS 
FLOAT)), (CAST(5.0 AS FLOAT)) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetRankFloat(left: Expression, right: Expression)
     extends KllSketchGetRankBase {
@@ -584,7 +584,7 @@ case class KllSketchGetRankFloat(left: Expression, right: 
Expression)
       > SELECT _FUNC_(kll_sketch_agg_double(col), 3.0) > 0.3 FROM VALUES 
(CAST(1.0 AS DOUBLE)), (CAST(2.0 AS DOUBLE)), (CAST(3.0 AS DOUBLE)), (CAST(4.0 
AS DOUBLE)), (CAST(5.0 AS DOUBLE)) tab(col);
        true
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class KllSketchGetRankDouble(left: Expression, right: Expression)
     extends KllSketchGetRankBase {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
index f662f405297b..8ac40a3fe2a5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/thetasketchesExpressions.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.{AbstractDataType, 
BinaryType, DataType, Integ
       > SELECT _FUNC_(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), 
(3) tab(col);
        3
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 case class ThetaSketchEstimate(child: Expression)
     extends UnaryExpression
@@ -71,7 +71,7 @@ case class ThetaSketchEstimate(child: Expression)
       > SELECT theta_sketch_estimate(_FUNC_(theta_sketch_agg(col1), 
theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) 
tab(col1, col2);
        6
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 // scalastyle:on line.size.limit
 case class ThetaUnion(first: Expression, second: Expression, third: Expression)
@@ -130,7 +130,7 @@ case class ThetaUnion(first: Expression, second: 
Expression, third: Expression)
       > SELECT theta_sketch_estimate(_FUNC_(theta_sketch_agg(col1), 
theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) 
tab(col1, col2);
        2
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 // scalastyle:on line.size.limit
 case class ThetaDifference(first: Expression, second: Expression)
@@ -178,7 +178,7 @@ case class ThetaDifference(first: Expression, second: 
Expression)
       > SELECT theta_sketch_estimate(_FUNC_(theta_sketch_agg(col1), 
theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) 
tab(col1, col2);
        2
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.1.0")
 // scalastyle:on line.size.limit
 case class ThetaIntersection(first: Expression, second: Expression)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
index bfa480100cd1..ee9c3ab22067 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleDifference.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.types.{AbstractDataType, 
BinaryType, DataType}
       > SELECT 
tuple_sketch_estimate_double(_FUNC_(tuple_sketch_agg_double(col1, val1), 
tuple_sketch_agg_double(col2, val2))) FROM VALUES (5, 5.0D, 4, 4.0D), (1, 1.0D, 
4, 4.0D), (2, 2.0D, 5, 5.0D), (3, 3.0D, 1, 1.0D) tab(col1, val1, col2, val2);
        2.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleDifferenceDouble(left: Expression, right: Expression)
@@ -72,7 +72,7 @@ case class TupleDifferenceDouble(left: Expression, right: 
Expression)
       > SELECT 
tuple_sketch_estimate_integer(_FUNC_(tuple_sketch_agg_integer(col1, val1), 
tuple_sketch_agg_integer(col2, val2))) FROM VALUES (5, 5, 4, 4), (1, 1, 4, 4), 
(2, 2, 5, 5), (3, 3, 1, 1) tab(col1, val1, col2, val2);
        2.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleDifferenceInteger(left: Expression, right: Expression)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
index bc7cb6a42d5d..b98d1fcb8034 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleIntersection.scala
@@ -39,7 +39,7 @@ import org.apache.spark.unsafe.types.UTF8String
       > SELECT 
tuple_sketch_estimate_double(_FUNC_(tuple_sketch_agg_double(col1, val1), 
tuple_sketch_agg_double(col2, val2))) FROM VALUES (1, 1.0D, 1, 4.0D), (2, 2.0D, 
2, 5.0D), (3, 3.0D, 4, 6.0D) tab(col1, val1, col2, val2);
        2.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleIntersectionDouble(first: Expression, second: Expression, 
third: Expression)
@@ -84,7 +84,7 @@ case class TupleIntersectionDouble(first: Expression, second: 
Expression, third:
       > SELECT 
tuple_sketch_estimate_integer(_FUNC_(tuple_sketch_agg_integer(col1, val1), 
tuple_sketch_agg_integer(col2, val2))) FROM VALUES (1, 1, 1, 4), (2, 2, 2, 5), 
(3, 3, 4, 6) tab(col1, val1, col2, val2);
        2.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleIntersectionInteger(first: Expression, second: Expression, 
third: Expression)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
index 999be1d03754..fcd0a048479b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchEstimate.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.{AbstractDataType, 
BinaryType, DataType, Doubl
       > SELECT _FUNC_(tuple_sketch_agg_double(key, summary)) FROM VALUES (1, 
1.0D), (1, 2.0D), (2, 3.0D) tab(key, summary);
        2.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleSketchEstimateDouble(child: Expression)
@@ -69,7 +69,7 @@ case class TupleSketchEstimateDouble(child: Expression)
       > SELECT _FUNC_(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1, 
1), (1, 2), (2, 3) tab(key, summary);
        2.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleSketchEstimateInteger(child: Expression)
@@ -106,7 +106,7 @@ case class TupleSketchEstimateInteger(child: Expression)
       > SELECT _FUNC_(tuple_sketch_agg_double(key, summary)) FROM VALUES (1, 
1.0D), (2, 2.0D), (3, 3.0D) tab(key, summary);
        1.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleSketchThetaDouble(child: Expression)
@@ -143,7 +143,7 @@ case class TupleSketchThetaDouble(child: Expression)
       > SELECT _FUNC_(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1, 
1), (2, 2), (3, 3) tab(key, summary);
        1.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleSketchThetaInteger(child: Expression)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
index 876240b576bc..a37a03e41e53 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleSketchSummary.scala
@@ -39,7 +39,7 @@ import org.apache.spark.unsafe.types.UTF8String
       > SELECT _FUNC_(tuple_sketch_agg_double(key, summary)) FROM VALUES (1, 
1.0D), (1, 2.0D), (2, 3.0D) tab(key, summary);
        6.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleSketchSummaryDouble(left: Expression, right: Expression)
@@ -103,7 +103,7 @@ case class TupleSketchSummaryDouble(left: Expression, 
right: Expression)
       > SELECT _FUNC_(tuple_sketch_agg_integer(key, summary)) FROM VALUES (1, 
1), (1, 2), (2, 3) tab(key, summary);
        6
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 case class TupleSketchSummaryInteger(left: Expression, right: Expression)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
index e0eb6b89b9e6..f9bd1fc62222 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/tupleUnion.scala
@@ -241,7 +241,7 @@ abstract class TupleUnionBase[S <: Summary]
       > SELECT 
tuple_sketch_estimate_double(_FUNC_(tuple_sketch_agg_double(col1, val1), 
tuple_sketch_agg_double(col2, val2))) FROM VALUES (1, 1.0D, 4, 4.0D), (2, 2.0D, 
5, 5.0D), (3, 3.0D, 6, 6.0D) tab(col1, val1, col2, val2);
        6.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 object TupleUnionDoubleExpressionBuilder extends ExpressionBuilder {
@@ -271,7 +271,7 @@ object TupleUnionDoubleExpressionBuilder extends 
ExpressionBuilder {
       > SELECT 
tuple_sketch_estimate_integer(_FUNC_(tuple_sketch_agg_integer(col1, val1), 
tuple_sketch_agg_integer(col2, val2))) FROM VALUES (1, 1, 4, 4), (2, 2, 5, 5), 
(3, 3, 6, 6) tab(col1, val1, col2, val2);
        6.0
   """,
-  group = "misc_funcs",
+  group = "sketch_funcs",
   since = "4.2.0")
 // scalastyle:on line.size.limit
 object TupleUnionIntegerExpressionBuilder extends ExpressionBuilder {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 8601148024a1..d063c00bd43a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -59,8 +59,9 @@ class ExpressionInfoSuite extends SparkFunSuite with 
SharedSparkSession {
       "agg_funcs", "array_funcs", "avro_funcs", "binary_funcs", 
"bitwise_funcs", "collection_funcs",
       "predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs", 
"datetime_funcs",
       "generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs", 
"map_funcs", "math_funcs",
-      "misc_funcs", "protobuf_funcs", "string_funcs", "struct_funcs", 
"window_funcs", "xml_funcs",
-      "table_funcs", "url_funcs", "variant_funcs", "vector_funcs", 
"st_funcs").sorted
+      "misc_funcs", "protobuf_funcs", "sketch_funcs", "string_funcs", 
"struct_funcs",
+      "window_funcs", "xml_funcs", "table_funcs", "url_funcs", 
"variant_funcs", "vector_funcs",
+      "st_funcs").sorted
     val invalidGroupName = "invalid_group_funcs"
     checkError(
       exception = intercept[SparkIllegalArgumentException] {
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index b43b26e03b9c..13f9ae055fa7 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -36,7 +36,7 @@ groups = {
     "bitwise_funcs", "conversion_funcs", "csv_funcs",
     "xml_funcs", "lambda_funcs", "collection_funcs",
     "url_funcs", "hash_funcs", "struct_funcs",
-    "table_funcs", "variant_funcs", "protobuf_funcs"
+    "table_funcs", "variant_funcs", "protobuf_funcs", "sketch_funcs"
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to