This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 17da43803fd [SPARK-45355][SQL][DOCS] Fix function groups in Scala Doc
17da43803fd is described below
commit 17da43803fd4c405fda00ffc2c7f4ff835ab24aa
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Wed Oct 4 18:52:15 2023 +0800
[SPARK-45355][SQL][DOCS] Fix function groups in Scala Doc
### What changes were proposed in this pull request?
to make functions in Scala Doc consistent with SQL references
### Why are the changes needed?
many functions in
https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html
were not in the proper groups
### Does this PR introduce _any_ user-facing change?
yes, e.g.
before:



after:



### How was this patch tested?
manually check
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #43148 from zhengruifeng/scala_functions_group.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
.../sql/catalyst/expressions/predicates.scala | 2 +-
.../scala/org/apache/spark/sql/functions.scala | 325 +++++++++++----------
2 files changed, 171 insertions(+), 156 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 419d11b13a2..9eefcef8e17 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -1150,7 +1150,7 @@ case class EqualNullSafe(left: Expression, right:
Expression) extends BinaryComp
true
""",
since = "3.4.0",
- group = "misc_funcs")
+ group = "predicate_funcs")
case class EqualNull(left: Expression, right: Expression, replacement:
Expression)
extends RuntimeReplaceable with InheritAnalysisRules {
def this(left: Expression, right: Expression) = this(left, right,
EqualNullSafe(left, right))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 58a994a0ea2..b698eafa7fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -62,16 +62,28 @@ import org.apache.spark.util.Utils
* only `Column` but also other types such as a native string. The other
variants currently exist
* for historical reasons.
*
- * @groupname udf_funcs UDF functions
+ * @groupname udf_funcs UDF, UDAF and UDT
* @groupname agg_funcs Aggregate functions
- * @groupname datetime_funcs Date time functions
- * @groupname sort_funcs Sorting functions
- * @groupname normal_funcs Non-aggregate functions
- * @groupname math_funcs Math functions
+ * @groupname datetime_funcs Date and Timestamp functions
+ * @groupname sort_funcs Sort functions
+ * @groupname normal_funcs Normal functions
+ * @groupname math_funcs Mathematical functions
+ * @groupname bitwise_funcs Bitwise functions
+ * @groupname predicate_funcs Predicate functions
+ * @groupname conditional_funcs Conditional functions
+ * @groupname hash_funcs Hash functions
* @groupname misc_funcs Misc functions
* @groupname window_funcs Window functions
+ * @groupname generator_funcs Generator functions
* @groupname string_funcs String functions
* @groupname collection_funcs Collection functions
+ * @groupname array_funcs Array functions
+ * @groupname map_funcs Map functions
+ * @groupname struct_funcs Struct functions
+ * @groupname csv_funcs CSV functions
+ * @groupname json_funcs JSON functions
+ * @groupname xml_funcs XML functions
+ * @groupname url_funcs URL functions
* @groupname partition_transforms Partition transform functions
* @groupname Ungrouped Support functions for DataFrames
* @since 1.3.0
@@ -1613,7 +1625,7 @@ object functions {
/**
* Creates a new array column. The input columns must all have the same data
type.
*
- * @group normal_funcs
+ * @group array_funcs
* @since 1.4.0
*/
@scala.annotation.varargs
@@ -1622,7 +1634,7 @@ object functions {
/**
* Creates a new array column. The input columns must all have the same data
type.
*
- * @group normal_funcs
+ * @group array_funcs
* @since 1.4.0
*/
@scala.annotation.varargs
@@ -1635,7 +1647,7 @@ object functions {
* (key1, value1, key2, value2, ...). The key columns must all have the same
data type, and can't
* be null. The value columns must all have the same data type.
*
- * @group normal_funcs
+ * @group map_funcs
* @since 2.0
*/
@scala.annotation.varargs
@@ -1644,7 +1656,7 @@ object functions {
/**
* Creates a struct with the given field names and values.
*
- * @group normal_funcs
+ * @group struct_funcs
* @since 3.5.0
*/
def named_struct(cols: Column*): Column = Column.fn("named_struct", cols: _*)
@@ -1653,7 +1665,7 @@ object functions {
* Creates a new map column. The array in the first column is used for keys.
The array in the
* second column is used for values. All elements in the array for key
should not be null.
*
- * @group normal_funcs
+ * @group map_funcs
* @since 2.4
*/
def map_from_arrays(keys: Column, values: Column): Column =
@@ -1710,7 +1722,7 @@ object functions {
* For example, `coalesce(a, b, c)` will return a if a is not null,
* or b if a is null and b is not null, or c if both a and b are null but c
is not null.
*
- * @group normal_funcs
+ * @group conditional_funcs
* @since 1.3.0
*/
@scala.annotation.varargs
@@ -1719,7 +1731,7 @@ object functions {
/**
* Creates a string column for the file name of the current Spark task.
*
- * @group normal_funcs
+ * @group misc_funcs
* @since 1.6.0
*/
def input_file_name(): Column = Column.fn("input_file_name")
@@ -1727,7 +1739,7 @@ object functions {
/**
* Return true iff the column is NaN.
*
- * @group normal_funcs
+ * @group predicate_funcs
* @since 1.6.0
*/
def isnan(e: Column): Column = e.isNaN
@@ -1735,7 +1747,7 @@ object functions {
/**
* Return true iff the column is null.
*
- * @group normal_funcs
+ * @group predicate_funcs
* @since 1.6.0
*/
def isnull(e: Column): Column = e.isNull
@@ -1755,7 +1767,7 @@ object functions {
* 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
* }}}
*
- * @group normal_funcs
+ * @group misc_funcs
* @since 1.4.0
*/
@deprecated("Use monotonically_increasing_id()", "2.0.0")
@@ -1776,7 +1788,7 @@ object functions {
* 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
* }}}
*
- * @group normal_funcs
+ * @group misc_funcs
* @since 1.6.0
*/
def monotonically_increasing_id(): Column =
Column.fn("monotonically_increasing_id")
@@ -1786,7 +1798,7 @@ object functions {
*
* Both inputs should be floating point columns (DoubleType or FloatType).
*
- * @group normal_funcs
+ * @group conditional_funcs
* @since 1.5.0
*/
def nanvl(col1: Column, col2: Column): Column = Column.fn("nanvl", col1,
col2)
@@ -1802,7 +1814,7 @@ object functions {
* df.select( negate(df.col("amount")) );
* }}}
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.3.0
*/
def negate(e: Column): Column = -e
@@ -1817,7 +1829,7 @@ object functions {
* df.filter( not(df.col("isActive")) );
* }}}
*
- * @group normal_funcs
+ * @group predicate_funcs
* @since 1.3.0
*/
def not(e: Column): Column = !e
@@ -1828,7 +1840,7 @@ object functions {
*
* @note The function is non-deterministic in general case.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.4.0
*/
def rand(seed: Long): Column = withExpr { Rand(seed) }
@@ -1839,7 +1851,7 @@ object functions {
*
* @note The function is non-deterministic in general case.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.4.0
*/
def rand(): Column = rand(Utils.random.nextLong)
@@ -1850,7 +1862,7 @@ object functions {
*
* @note The function is non-deterministic in general case.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.4.0
*/
def randn(seed: Long): Column = withExpr { Randn(seed) }
@@ -1861,7 +1873,7 @@ object functions {
*
* @note The function is non-deterministic in general case.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.4.0
*/
def randn(): Column = randn(Utils.random.nextLong)
@@ -1871,7 +1883,7 @@ object functions {
*
* @note This is non-deterministic because it depends on data partitioning
and task scheduling.
*
- * @group normal_funcs
+ * @group misc_funcs
* @since 1.6.0
*/
def spark_partition_id(): Column = Column.fn("spark_partition_id")
@@ -1951,7 +1963,7 @@ object functions {
* otherwise, the newly generated StructField's name would be auto generated
as
* `col` with a suffix `index + 1`, i.e. col1, col2, col3, ...
*
- * @group normal_funcs
+ * @group struct_funcs
* @since 1.4.0
*/
@scala.annotation.varargs
@@ -1960,7 +1972,7 @@ object functions {
/**
* Creates a new struct column that composes multiple input columns.
*
- * @group normal_funcs
+ * @group struct_funcs
* @since 1.4.0
*/
@scala.annotation.varargs
@@ -1986,7 +1998,7 @@ object functions {
* .otherwise(2))
* }}}
*
- * @group normal_funcs
+ * @group conditional_funcs
* @since 1.4.0
*/
def when(condition: Column, value: Any): Column = withExpr {
@@ -1996,7 +2008,7 @@ object functions {
/**
* Computes bitwise NOT (~) of a number.
*
- * @group normal_funcs
+ * @group bitwise_funcs
* @since 1.4.0
*/
@deprecated("Use bitwise_not", "3.2.0")
@@ -2005,7 +2017,7 @@ object functions {
/**
* Computes bitwise NOT (~) of a number.
*
- * @group normal_funcs
+ * @group bitwise_funcs
* @since 3.2.0
*/
def bitwise_not(e: Column): Column = Column.fn("~", e)
@@ -2490,7 +2502,7 @@ object functions {
* Returns the greatest value of the list of values, skipping null values.
* This function takes at least 2 parameters. It will return null iff all
parameters are null.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.5.0
*/
@scala.annotation.varargs
@@ -2500,7 +2512,7 @@ object functions {
* Returns the greatest value of the list of column names, skipping null
values.
* This function takes at least 2 parameters. It will return null iff all
parameters are null.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.5.0
*/
@scala.annotation.varargs
@@ -2594,7 +2606,7 @@ object functions {
* Returns the least value of the list of values, skipping null values.
* This function takes at least 2 parameters. It will return null iff all
parameters are null.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.5.0
*/
@scala.annotation.varargs
@@ -2604,7 +2616,7 @@ object functions {
* Returns the least value of the list of column names, skipping null values.
* This function takes at least 2 parameters. It will return null iff all
parameters are null.
*
- * @group normal_funcs
+ * @group math_funcs
* @since 1.5.0
*/
@scala.annotation.varargs
@@ -2887,7 +2899,7 @@ object functions {
* Shift the given value numBits left. If the given value is a long value,
this function
* will return a long value else it will return an integer value.
*
- * @group math_funcs
+ * @group bitwise_funcs
* @since 1.5.0
*/
@deprecated("Use shiftleft", "3.2.0")
@@ -2897,7 +2909,7 @@ object functions {
* Shift the given value numBits left. If the given value is a long value,
this function
* will return a long value else it will return an integer value.
*
- * @group math_funcs
+ * @group bitwise_funcs
* @since 3.2.0
*/
def shiftleft(e: Column, numBits: Int): Column = Column.fn("shiftleft", e,
lit(numBits))
@@ -2906,7 +2918,7 @@ object functions {
* (Signed) shift the given value numBits right. If the given value is a
long value, it will
* return a long value else it will return an integer value.
*
- * @group math_funcs
+ * @group bitwise_funcs
* @since 1.5.0
*/
@deprecated("Use shiftright", "3.2.0")
@@ -2916,7 +2928,7 @@ object functions {
* (Signed) shift the given value numBits right. If the given value is a
long value, it will
* return a long value else it will return an integer value.
*
- * @group math_funcs
+ * @group bitwise_funcs
* @since 3.2.0
*/
def shiftright(e: Column, numBits: Int): Column = Column.fn("shiftright", e,
lit(numBits))
@@ -2925,7 +2937,7 @@ object functions {
* Unsigned shift the given value numBits right. If the given value is a
long value,
* it will return a long value else it will return an integer value.
*
- * @group math_funcs
+ * @group bitwise_funcs
* @since 1.5.0
*/
@deprecated("Use shiftrightunsigned", "3.2.0")
@@ -2935,7 +2947,7 @@ object functions {
* Unsigned shift the given value numBits right. If the given value is a
long value,
* it will return a long value else it will return an integer value.
*
- * @group math_funcs
+ * @group bitwise_funcs
* @since 3.2.0
*/
def shiftrightunsigned(e: Column, numBits: Int): Column =
@@ -3165,7 +3177,7 @@ object functions {
* Calculates the MD5 digest of a binary column and returns the value
* as a 32 character hex string.
*
- * @group misc_funcs
+ * @group hash_funcs
* @since 1.5.0
*/
def md5(e: Column): Column = Column.fn("md5", e)
@@ -3174,7 +3186,7 @@ object functions {
* Calculates the SHA-1 digest of a binary column and returns the value
* as a 40 character hex string.
*
- * @group misc_funcs
+ * @group hash_funcs
* @since 1.5.0
*/
def sha1(e: Column): Column = Column.fn("sha1", e)
@@ -3186,7 +3198,7 @@ object functions {
* @param e column to compute SHA-2 on.
* @param numBits one of 224, 256, 384, or 512.
*
- * @group misc_funcs
+ * @group hash_funcs
* @since 1.5.0
*/
def sha2(e: Column, numBits: Int): Column = {
@@ -3200,7 +3212,7 @@ object functions {
* Calculates the cyclic redundancy check value (CRC32) of a binary column
and
* returns the value as a bigint.
*
- * @group misc_funcs
+ * @group hash_funcs
* @since 1.5.0
*/
def crc32(e: Column): Column = Column.fn("crc32", e)
@@ -3208,7 +3220,7 @@ object functions {
/**
* Calculates the hash code of given columns, and returns the result as an
int column.
*
- * @group misc_funcs
+ * @group hash_funcs
* @since 2.0.0
*/
@scala.annotation.varargs
@@ -3219,7 +3231,7 @@ object functions {
* variant of the xxHash algorithm, and returns the result as a long
* column. The hash computation uses an initial seed of 42.
*
- * @group misc_funcs
+ * @group hash_funcs
* @since 3.0.0
*/
@scala.annotation.varargs
@@ -3578,7 +3590,7 @@ object functions {
/**
* Returns a sha1 hash value as a hex string of the `col`.
*
- * @group misc_funcs
+ * @group hash_funcs
* @since 3.5.0
*/
def sha(col: Column): Column = Column.fn("sha", col)
@@ -3645,7 +3657,7 @@ object functions {
* Separates `col1`, ..., `colk` into `n` rows. Uses column names col0,
col1, etc. by default
* unless specified otherwise.
*
- * @group misc_funcs
+ * @group generator_funcs
* @since 3.5.0
*/
def stack(cols: Column*): Column = Column.fn("stack", cols: _*)
@@ -3654,7 +3666,7 @@ object functions {
* Returns a random value with independent and identically distributed
(i.i.d.) uniformly
* distributed values in [0, 1).
*
- * @group misc_funcs
+ * @group math_funcs
* @since 3.5.0
*/
def random(seed: Column): Column = call_function("random", seed)
@@ -3663,7 +3675,7 @@ object functions {
* Returns a random value with independent and identically distributed
(i.i.d.) uniformly
* distributed values in [0, 1).
*
- * @group misc_funcs
+ * @group math_funcs
* @since 3.5.0
*/
def random(): Column = random(lit(Utils.random.nextLong))
@@ -3690,7 +3702,7 @@ object functions {
* Returns a bitmap with the positions of the bits set from all the values
from the input column.
* The input column will most likely be bitmap_bit_position().
*
- * @group misc_funcs
+ * @group agg_funcs
* @since 3.5.0
*/
def bitmap_construct_agg(col: Column): Column =
@@ -3708,7 +3720,7 @@ object functions {
* Returns a bitmap that is the bitwise OR of all of the bitmaps from the
input column.
* The input column should be bitmaps created from bitmap_construct_agg().
*
- * @group misc_funcs
+ * @group agg_funcs
* @since 3.5.0
*/
def bitmap_or_agg(col: Column): Column = Column.fn("bitmap_or_agg", col)
@@ -3937,7 +3949,7 @@ object functions {
/**
* Returns true if `str` matches `regexp`, or false otherwise.
*
- * @group string_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def rlike(str: Column, regexp: Column): Column = Column.fn("rlike", str,
regexp)
@@ -3945,7 +3957,7 @@ object functions {
/**
* Returns true if `str` matches `regexp`, or false otherwise.
*
- * @group string_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def regexp(str: Column, regexp: Column): Column = Column.fn("regexp", str,
regexp)
@@ -3953,7 +3965,7 @@ object functions {
/**
* Returns true if `str` matches `regexp`, or false otherwise.
*
- * @group string_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def regexp_like(str: Column, regexp: Column): Column =
Column.fn("regexp_like", str, regexp)
@@ -4423,7 +4435,7 @@ object functions {
/**
* Extracts a part from a URL.
*
- * @group string_funcs
+ * @group url_funcs
* @since 3.5.0
*/
def parse_url(url: Column, partToExtract: Column, key: Column): Column =
@@ -4432,7 +4444,7 @@ object functions {
/**
* Extracts a part from a URL.
*
- * @group string_funcs
+ * @group url_funcs
* @since 3.5.0
*/
def parse_url(url: Column, partToExtract: Column): Column =
@@ -4451,7 +4463,7 @@ object functions {
* Decodes a `str` in 'application/x-www-form-urlencoded' format
* using a specific encoding scheme.
*
- * @group string_funcs
+ * @group url_funcs
* @since 3.5.0
*/
def url_decode(str: Column): Column = Column.fn("url_decode", str)
@@ -4460,7 +4472,7 @@ object functions {
* Translates a string into 'application/x-www-form-urlencoded' format
* using a specific encoding scheme.
*
- * @group string_funcs
+ * @group url_funcs
* @since 3.5.0
*/
def url_encode(str: Column): Column = Column.fn("url_encode", str)
@@ -4625,7 +4637,7 @@ object functions {
* Returns true if str matches `pattern` with `escapeChar`, null if any
arguments are null,
* false otherwise.
*
- * @group string_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def like(str: Column, pattern: Column, escapeChar: Column): Column =
withExpr {
@@ -4641,7 +4653,7 @@ object functions {
* Returns true if str matches `pattern` with `escapeChar`('\'), null if any
arguments are null,
* false otherwise.
*
- * @group string_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def like(str: Column, pattern: Column): Column = Column.fn("like", str,
pattern)
@@ -4650,7 +4662,7 @@ object functions {
* Returns true if str matches `pattern` with `escapeChar`
case-insensitively, null if any
* arguments are null, false otherwise.
*
- * @group string_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def ilike(str: Column, pattern: Column, escapeChar: Column): Column =
withExpr {
@@ -4666,7 +4678,7 @@ object functions {
* Returns true if str matches `pattern` with `escapeChar`('\')
case-insensitively, null if any
* arguments are null, false otherwise.
*
- * @group string_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def ilike(str: Column, pattern: Column): Column = Column.fn("ilike", str,
pattern)
@@ -5732,7 +5744,7 @@ object functions {
/**
* Returns null if the array is null, true if the array contains `value`,
and false otherwise.
- * @group collection_funcs
+ * @group array_funcs
* @since 1.5.0
*/
def array_contains(column: Column, value: Any): Column =
@@ -5742,7 +5754,7 @@ object functions {
* Returns an ARRAY containing all elements from the source ARRAY as well as
the new element.
* The new element/column is located at end of the ARRAY.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 3.4.0
*/
def array_append(column: Column, element: Any): Column =
@@ -5752,7 +5764,7 @@ object functions {
* Returns `true` if `a1` and `a2` have at least one non-null element in
common. If not and both
* the arrays are non-empty and any of them contains a `null`, it returns
`null`. It returns
* `false` otherwise.
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def arrays_overlap(a1: Column, a2: Column): Column =
Column.fn("arrays_overlap", a1, a2)
@@ -5765,7 +5777,7 @@ object functions {
* @param start the starting index
* @param length the length of the slice
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def slice(x: Column, start: Int, length: Int): Column =
@@ -5779,7 +5791,7 @@ object functions {
* @param start the starting index
* @param length the length of the slice
*
- * @group collection_funcs
+ * @group array_funcs
* @since 3.1.0
*/
def slice(x: Column, start: Column, length: Column): Column =
@@ -5788,7 +5800,7 @@ object functions {
/**
* Concatenates the elements of `column` using the `delimiter`. Null values
are replaced with
* `nullReplacement`.
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_join(column: Column, delimiter: String, nullReplacement: String):
Column =
@@ -5796,7 +5808,7 @@ object functions {
/**
* Concatenates the elements of `column` using the `delimiter`.
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_join(column: Column, delimiter: String): Column =
@@ -5821,7 +5833,7 @@ object functions {
* @note The position is not zero based, but 1 based index. Returns 0 if
value
* could not be found in array.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_position(column: Column, value: Any): Column =
@@ -5844,7 +5856,7 @@ object functions {
* (map, key) - Returns value for given key. The function always returns
NULL if the key is not
* contained in the map.
*
- * @group map_funcs
+ * @group collection_funcs
* @since 3.5.0
*/
def try_element_at(column: Column, value: Column): Column =
@@ -5854,7 +5866,7 @@ object functions {
* Returns element of array at given (0-based) index. If the index points
* outside of the array boundaries, then this function returns NULL.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 3.4.0
*/
def get(column: Column, index: Column): Column = Column.fn("get", column,
index)
@@ -5884,7 +5896,7 @@ object functions {
/**
* Remove all elements that equal to element from the given array.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_remove(column: Column, element: Any): Column =
@@ -5893,7 +5905,7 @@ object functions {
/**
* Remove all null elements from the given array.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 3.4.0
*/
def array_compact(column: Column): Column = Column.fn("array_compact",
column)
@@ -5902,7 +5914,7 @@ object functions {
* Returns an array containing value as well as all elements from array. The
new element is
* positioned at the beginning of the array.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 3.5.0
*/
def array_prepend(column: Column, element: Any): Column =
@@ -5910,7 +5922,7 @@ object functions {
/**
* Removes duplicate values from the array.
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_distinct(e: Column): Column = Column.fn("array_distinct", e)
@@ -5919,7 +5931,7 @@ object functions {
* Returns an array of the elements in the intersection of the given two
arrays,
* without duplicates.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_intersect(col1: Column, col2: Column): Column =
@@ -5928,7 +5940,7 @@ object functions {
/**
* Adds an item into a given array at a specified position
*
- * @group collection_funcs
+ * @group array_funcs
* @since 3.4.0
*/
def array_insert(arr: Column, pos: Column, value: Column): Column =
@@ -5937,7 +5949,7 @@ object functions {
/**
* Returns an array of the elements in the union of the given two arrays,
without duplicates.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_union(col1: Column, col2: Column): Column =
@@ -5947,7 +5959,7 @@ object functions {
* Returns an array of the elements in the first array but not in the second
array,
* without duplicates. The order of elements in the result is not determined
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_except(col1: Column, col2: Column): Column =
@@ -6239,7 +6251,7 @@ object functions {
* Uses the default column name `col` for elements in the array and
* `key` and `value` for elements in the map unless specified otherwise.
*
- * @group collection_funcs
+ * @group generator_funcs
* @since 1.3.0
*/
def explode(e: Column): Column = Column.fn("explode", e)
@@ -6250,7 +6262,7 @@ object functions {
* `key` and `value` for elements in the map unless specified otherwise.
* Unlike explode, if the array/map is null or empty then null is produced.
*
- * @group collection_funcs
+ * @group generator_funcs
* @since 2.2.0
*/
def explode_outer(e: Column): Column = Column.fn("explode_outer", e)
@@ -6260,7 +6272,7 @@ object functions {
* Uses the default column name `pos` for position, and `col` for elements
in the array
* and `key` and `value` for elements in the map unless specified otherwise.
*
- * @group collection_funcs
+ * @group generator_funcs
* @since 2.1.0
*/
def posexplode(e: Column): Column = Column.fn("posexplode", e)
@@ -6271,7 +6283,7 @@ object functions {
* and `key` and `value` for elements in the map unless specified otherwise.
* Unlike posexplode, if the array/map is null or empty then the row (null,
null) is produced.
*
- * @group collection_funcs
+ * @group generator_funcs
* @since 2.2.0
*/
def posexplode_outer(e: Column): Column = Column.fn("posexplode_outer", e)
@@ -6279,7 +6291,7 @@ object functions {
/**
* Creates a new row for each element in the given array of structs.
*
- * @group collection_funcs
+ * @group generator_funcs
* @since 3.4.0
*/
def inline(e: Column): Column = Column.fn("inline", e)
@@ -6288,7 +6300,7 @@ object functions {
* Creates a new row for each element in the given array of structs.
* Unlike inline, if the array is null or empty then null is produced for
each nested column.
*
- * @group collection_funcs
+ * @group generator_funcs
* @since 3.4.0
*/
def inline_outer(e: Column): Column = Column.fn("inline_outer", e)
@@ -6297,7 +6309,7 @@ object functions {
* Extracts json object from a json string based on json path specified, and
returns json string
* of the extracted json object. It will return null if the input json
string is invalid.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 1.6.0
*/
def get_json_object(e: Column, path: String): Column =
@@ -6306,7 +6318,7 @@ object functions {
/**
* Creates a new row for a json column according to the given field names.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 1.6.0
*/
@scala.annotation.varargs
@@ -6329,7 +6341,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.1.0
*/
// scalastyle:on line.size.limit
@@ -6351,7 +6363,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.2.0
*/
// scalastyle:on line.size.limit
@@ -6373,7 +6385,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.1.0
*/
// scalastyle:on line.size.limit
@@ -6395,7 +6407,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.2.0
*/
// scalastyle:on line.size.limit
@@ -6410,7 +6422,7 @@ object functions {
* @param e a string column containing JSON data.
* @param schema the schema to use when parsing the json string
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.1.0
*/
def from_json(e: Column, schema: StructType): Column =
@@ -6424,7 +6436,7 @@ object functions {
* @param e a string column containing JSON data.
* @param schema the schema to use when parsing the json string
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.2.0
*/
def from_json(e: Column, schema: DataType): Column =
@@ -6445,7 +6457,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.1.0
*/
// scalastyle:on line.size.limit
@@ -6468,7 +6480,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.3.0
*/
// scalastyle:on line.size.limit
@@ -6488,7 +6500,7 @@ object functions {
* @param e a string column containing JSON data.
* @param schema the schema to use when parsing the json string
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.4.0
*/
def from_json(e: Column, schema: Column): Column = {
@@ -6510,7 +6522,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.4.0
*/
// scalastyle:on line.size.limit
@@ -6549,7 +6561,7 @@ object functions {
*
* @param json a JSON string.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.4.0
*/
def schema_of_json(json: String): Column = schema_of_json(lit(json))
@@ -6559,7 +6571,7 @@ object functions {
*
* @param json a foldable string column containing a JSON string.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.4.0
*/
def schema_of_json(json: Column): Column = Column.fn("schema_of_json", json)
@@ -6577,7 +6589,7 @@ object functions {
* Data Source Option</a> in the version you use.
* @return a column with string literal containing schema in DDL format.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 3.0.0
*/
// scalastyle:on line.size.limit
@@ -6588,7 +6600,7 @@ object functions {
* Returns the number of elements in the outermost JSON array. `NULL` is
returned in case of
* any other valid JSON string, `NULL` or an invalid JSON.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 3.5.0
*/
def json_array_length(e: Column): Column = Column.fn("json_array_length", e)
@@ -6598,7 +6610,7 @@ object functions {
* given, all the keys of the outermost object will be returned as an array.
If it is any
* other valid JSON string, an invalid JSON string or an empty string, the
function returns null.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 3.5.0
*/
def json_object_keys(e: Column): Column = Column.fn("json_object_keys", e)
@@ -6619,7 +6631,7 @@ object functions {
* Additionally the function supports the `pretty` option
which enables
* pretty JSON generation.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.1.0
*/
// scalastyle:on line.size.limit
@@ -6642,7 +6654,7 @@ object functions {
* Additionally the function supports the `pretty` option
which enables
* pretty JSON generation.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.1.0
*/
// scalastyle:on line.size.limit
@@ -6656,7 +6668,7 @@ object functions {
*
* @param e a column containing a struct, an array or a map.
*
- * @group collection_funcs
+ * @group json_funcs
* @since 2.1.0
*/
def to_json(e: Column): Column =
@@ -6783,7 +6795,7 @@ object functions {
* according to the natural ordering of the array elements.
* Null elements will be placed at the beginning of the returned array.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 1.5.0
*/
def sort_array(e: Column): Column = sort_array(e, asc = true)
@@ -6795,7 +6807,7 @@ object functions {
* array in ascending order or
* at the end of the returned array in descending order.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 1.5.0
*/
def sort_array(e: Column, asc: Boolean): Column = Column.fn("sort_array", e,
lit(asc))
@@ -6804,7 +6816,7 @@ object functions {
* Returns the minimum value in the array. NaN is greater than any non-NaN
elements for
* double/float type. NULL elements are skipped.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_min(e: Column): Column = Column.fn("array_min", e)
@@ -6813,7 +6825,7 @@ object functions {
* Returns the maximum value in the array. NaN is greater than any non-NaN
elements for
* double/float type. NULL elements are skipped.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_max(e: Column): Column = Column.fn("array_max", e)
@@ -6821,7 +6833,7 @@ object functions {
/**
* Returns the total number of elements in the array. The function returns
null for null input.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 3.5.0
*/
def array_size(e: Column): Column = Column.fn("array_size", e)
@@ -6841,7 +6853,7 @@ object functions {
*
* @note The function is non-deterministic.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def shuffle(e: Column): Column = withExpr { Shuffle(e.expr,
Some(Utils.random.nextLong)) }
@@ -6856,7 +6868,7 @@ object functions {
/**
* Creates a single array from an array of arrays. If a structure of nested
arrays is deeper than
* two levels, only one level of nesting is removed.
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def flatten(e: Column): Column = Column.fn("flatten", e)
@@ -6864,7 +6876,7 @@ object functions {
/**
* Generate a sequence of integers from start to stop, incrementing by step.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def sequence(start: Column, stop: Column, step: Column): Column =
@@ -6874,7 +6886,7 @@ object functions {
* Generate a sequence of integers from start to stop,
* incrementing by 1 if start is less than or equal to stop, otherwise -1.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def sequence(start: Column, stop: Column): Column = Column.fn("sequence",
start, stop)
@@ -6883,7 +6895,7 @@ object functions {
* Creates an array containing the left argument repeated the number of
times given by the
* right argument.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_repeat(left: Column, right: Column): Column =
Column.fn("array_repeat", left, right)
@@ -6892,14 +6904,14 @@ object functions {
* Creates an array containing the left argument repeated the number of
times given by the
* right argument.
*
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
def array_repeat(e: Column, count: Int): Column = array_repeat(e, lit(count))
/**
* Returns true if the map contains the key.
- * @group collection_funcs
+ * @group map_funcs
* @since 3.3.0
*/
def map_contains_key(column: Column, key: Any): Column =
@@ -6907,28 +6919,28 @@ object functions {
/**
* Returns an unordered array containing the keys of the map.
- * @group collection_funcs
+ * @group map_funcs
* @since 2.3.0
*/
def map_keys(e: Column): Column = Column.fn("map_keys", e)
/**
* Returns an unordered array containing the values of the map.
- * @group collection_funcs
+ * @group map_funcs
* @since 2.3.0
*/
def map_values(e: Column): Column = Column.fn("map_values", e)
/**
* Returns an unordered array of all entries in the given map.
- * @group collection_funcs
+ * @group map_funcs
* @since 3.0.0
*/
def map_entries(e: Column): Column = Column.fn("map_entries", e)
/**
* Returns a map created from the given array of entries.
- * @group collection_funcs
+ * @group map_funcs
* @since 2.4.0
*/
def map_from_entries(e: Column): Column = Column.fn("map_from_entries", e)
@@ -6936,7 +6948,7 @@ object functions {
/**
* Returns a merged array of structs in which the N-th struct contains all
N-th values of input
* arrays.
- * @group collection_funcs
+ * @group array_funcs
* @since 2.4.0
*/
@scala.annotation.varargs
@@ -6944,7 +6956,7 @@ object functions {
/**
* Returns the union of all the given maps.
- * @group collection_funcs
+ * @group map_funcs
* @since 2.4.0
*/
@scala.annotation.varargs
@@ -6964,7 +6976,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group csv_funcs
* @since 3.0.0
*/
// scalastyle:on line.size.limit
@@ -6985,7 +6997,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group csv_funcs
* @since 3.0.0
*/
// scalastyle:on line.size.limit
@@ -7000,7 +7012,7 @@ object functions {
*
* @param csv a CSV string.
*
- * @group collection_funcs
+ * @group csv_funcs
* @since 3.0.0
*/
def schema_of_csv(csv: String): Column = schema_of_csv(lit(csv))
@@ -7010,7 +7022,7 @@ object functions {
*
* @param csv a foldable string column containing a CSV string.
*
- * @group collection_funcs
+ * @group csv_funcs
* @since 3.0.0
*/
def schema_of_csv(csv: Column): Column = schema_of_csv(csv,
Collections.emptyMap())
@@ -7028,7 +7040,7 @@ object functions {
* Data Source Option</a> in the version you use.
* @return a column with string literal containing schema in DDL format.
*
- * @group collection_funcs
+ * @group csv_funcs
* @since 3.0.0
*/
// scalastyle:on line.size.limit
@@ -7048,7 +7060,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
* Data Source Option</a> in the version you use.
*
- * @group collection_funcs
+ * @group csv_funcs
* @since 3.0.0
*/
// scalastyle:on line.size.limit
@@ -7061,7 +7073,7 @@ object functions {
*
* @param e a column containing a struct.
*
- * @group collection_funcs
+ * @group csv_funcs
* @since 3.0.0
*/
def to_csv(e: Column): Column = to_csv(e, Map.empty[String, String].asJava)
@@ -7079,7 +7091,7 @@ object functions {
* <a href=
*
"https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
* Data Source Option</a> in the version you use.
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
// scalastyle:on line.size.limit
@@ -7100,7 +7112,7 @@ object functions {
* <a href=
*
"https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
* Data Source Option</a> in the version you use.
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
// scalastyle:on line.size.limit
@@ -7121,7 +7133,7 @@ object functions {
*
* @param e a string column containing XML data.
* @param schema the schema to use when parsing the XML string
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
// scalastyle:on line.size.limit
@@ -7142,7 +7154,7 @@ object functions {
* <a href=
*
"https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
* Data Source Option</a> in the version you use.
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
// scalastyle:on line.size.limit
@@ -7157,7 +7169,7 @@ object functions {
* @param e a string column containing XML data.
* @param schema the schema to use when parsing the XML string
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
def from_xml(e: Column, schema: StructType): Column =
@@ -7171,7 +7183,7 @@ object functions {
* Parses a XML string and infers its schema in DDL format.
*
* @param xml a XML string.
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
def schema_of_xml(xml: String): Column = schema_of_xml(lit(xml))
@@ -7180,7 +7192,7 @@ object functions {
* Parses a XML string and infers its schema in DDL format.
*
* @param xml a foldable string column containing a XML string.
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
def schema_of_xml(xml: Column): Column = withExpr(new SchemaOfXml(xml.expr))
@@ -7198,7 +7210,7 @@ object functions {
*
"https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
* Data Source Option</a> in the version you use.
* @return a column with string literal containing schema in DDL format.
- * @group collection_funcs
+ * @group xml_funcs
* @since 4.0.0
*/
// scalastyle:on line.size.limit
@@ -7233,7 +7245,7 @@ object functions {
/**
* Returns a string array of values within the nodes of xml that match the
XPath expression.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath(xml: Column, path: Column): Column =
@@ -7242,7 +7254,7 @@ object functions {
/**
* Returns true if the XPath expression evaluates to true, or if a matching
node is found.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_boolean(xml: Column, path: Column): Column =
@@ -7252,7 +7264,7 @@ object functions {
* Returns a double value, the value zero if no match is found,
* or NaN if a match is found but the value is non-numeric.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_double(xml: Column, path: Column): Column =
@@ -7262,7 +7274,7 @@ object functions {
* Returns a double value, the value zero if no match is found,
* or NaN if a match is found but the value is non-numeric.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_number(xml: Column, path: Column): Column =
@@ -7272,7 +7284,7 @@ object functions {
* Returns a float value, the value zero if no match is found,
* or NaN if a match is found but the value is non-numeric.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_float(xml: Column, path: Column): Column =
@@ -7282,7 +7294,7 @@ object functions {
* Returns an integer value, or the value zero if no match is found,
* or a match is found but the value is non-numeric.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_int(xml: Column, path: Column): Column =
@@ -7292,7 +7304,7 @@ object functions {
* Returns a long integer value, or the value zero if no match is found,
* or a match is found but the value is non-numeric.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_long(xml: Column, path: Column): Column =
@@ -7302,7 +7314,7 @@ object functions {
* Returns a short integer value, or the value zero if no match is found,
* or a match is found but the value is non-numeric.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_short(xml: Column, path: Column): Column =
@@ -7311,7 +7323,7 @@ object functions {
/**
* Returns the text contents of the first xml node that matches the XPath
expression.
*
- * @group "xml_funcs"
+ * @group xml_funcs
* @since 3.5.0
*/
def xpath_string(xml: Column, path: Column): Column =
@@ -7632,7 +7644,7 @@ object functions {
/**
* Returns `col2` if `col1` is null, or `col1` otherwise.
*
- * @group predicates_funcs
+ * @group conditional_funcs
* @since 3.5.0
*/
def ifnull(col1: Column, col2: Column): Column = Column.fn("ifnull", col1,
col2)
@@ -7640,7 +7652,7 @@ object functions {
/**
* Returns true if `col` is not null, or false otherwise.
*
- * @group predicates_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def isnotnull(col: Column): Column = Column.fn("isnotnull", col)
@@ -7649,7 +7661,7 @@ object functions {
* Returns same result as the EQUAL(=) operator for non-null operands,
* but returns true if both are null, false if one of the them is null.
*
- * @group predicates_funcs
+ * @group predicate_funcs
* @since 3.5.0
*/
def equal_null(col1: Column, col2: Column): Column = Column.fn("equal_null",
col1, col2)
@@ -7657,7 +7669,7 @@ object functions {
/**
* Returns null if `col1` equals to `col2`, or `col1` otherwise.
*
- * @group predicates_funcs
+ * @group conditional_funcs
* @since 3.5.0
*/
def nullif(col1: Column, col2: Column): Column = Column.fn("nullif", col1,
col2)
@@ -7665,7 +7677,7 @@ object functions {
/**
* Returns `col2` if `col1` is null, or `col1` otherwise.
*
- * @group predicates_funcs
+ * @group conditional_funcs
* @since 3.5.0
*/
def nvl(col1: Column, col2: Column): Column = Column.fn("nvl", col1, col2)
@@ -7673,7 +7685,7 @@ object functions {
/**
* Returns `col2` if `col1` is not null, or `col3` otherwise.
*
- * @group predicates_funcs
+ * @group conditional_funcs
* @since 3.5.0
*/
def nvl2(col1: Column, col2: Column, col3: Column): Column =
Column.fn("nvl2", col1, col2, col3)
@@ -7756,6 +7768,7 @@ object functions {
*
* @return a UserDefinedFunction that can be used as an aggregating
expression.
*
+ * @group udf_funcs
* @note The input encoder is inferred from the input type IN.
*/
def udaf[IN: TypeTag, BUF, OUT](agg: Aggregator[IN, BUF, OUT]):
UserDefinedFunction = {
@@ -7786,6 +7799,7 @@ object functions {
*
* @return a UserDefinedFunction that can be used as an aggregating
expression
*
+ * @group udf_funcs
* @note This overloading takes an explicit input encoder, to support UDAF
* declarations in Java.
*/
@@ -8207,6 +8221,7 @@ object functions {
* @param funcName function name that follows the SQL identifier syntax
* (can be quoted, can be qualified)
* @param cols the expression parameters of function
+ * @group normal_funcs
* @since 3.5.0
*/
@scala.annotation.varargs
@@ -8224,7 +8239,7 @@ object functions {
/**
* Unwrap UDT data type column into its underlying type.
- *
+ * @group udf_funcs
* @since 3.4.0
*/
def unwrap_udt(column: Column): Column = withExpr {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]