This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 368ee62 [SPARK-27297][DOC][FOLLOW-UP] Improve documentation for
various Scala functions
368ee62 is described below
commit 368ee62a5dce83682ccaec92feeea8428af5a8cf
Author: yi.wu <[email protected]>
AuthorDate: Thu Feb 6 20:34:29 2020 +0800
[SPARK-27297][DOC][FOLLOW-UP] Improve documentation for various Scala
functions
### What changes were proposed in this pull request?
Add examples and parameter description for these Scala functions:
* transform
* exists
* forall
* aggregate
* zip_with
* transform_keys
* transform_values
* map_filter
* map_zip_with
### Why are the changes needed?
Better documentation for UX.
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
Pass Jenkins.
Closes #27449 from Ngone51/doc-funcs.
Authored-by: yi.wu <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../scala/org/apache/spark/sql/functions.scala | 93 +++++++++++++++++++---
1 file changed, 83 insertions(+), 10 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index da26c5a..d125581 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3410,6 +3410,12 @@ object functions {
/**
* Returns an array of elements after applying a transformation to each
element
* in the input array.
+ * {{{
+ * df.select(transform(col("i"), x => x + 1))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f col => transformed_col, the lambda function to transform the
input column
*
* @group collection_funcs
* @since 3.0.0
@@ -3421,6 +3427,13 @@ object functions {
/**
* Returns an array of elements after applying a transformation to each
element
* in the input array.
+ * {{{
+ * df.select(transform(col("i"), (x, i) => x + i))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f (col, index) => transformed_col, the lambda function to filter
the input column
+ * given the index. Indices start at 0.
*
* @group collection_funcs
* @since 3.0.0
@@ -3431,6 +3444,12 @@ object functions {
/**
* Returns whether a predicate holds for one or more elements in the array.
+ * {{{
+ * df.select(exists(col("i"), _ % 2 === 0))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f col => predicate, the Boolean predicate to check the input column
*
* @group collection_funcs
* @since 3.0.0
@@ -3441,6 +3460,12 @@ object functions {
/**
* Returns whether a predicate holds for every element in the array.
+ * {{{
+ * df.select(forall(col("i"), x => x % 2 === 0))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f col => predicate, the Boolean predicate to check the input column
*
* @group collection_funcs
* @since 3.0.0
@@ -3453,11 +3478,10 @@ object functions {
* Returns an array of elements for which a predicate holds in a given array.
* {{{
* df.select(filter(col("s"), x => x % 2 === 0))
- * df.selectExpr("filter(col, x -> x % 2 == 0)")
* }}}
*
- * @param column: the input array column
- * @param f: col => predicate, the Boolean predicate to filter the input
column
+ * @param column the input array column
+ * @param f col => predicate, the Boolean predicate to filter the input
column
*
* @group collection_funcs
* @since 3.0.0
@@ -3470,11 +3494,10 @@ object functions {
* Returns an array of elements for which a predicate holds in a given array.
* {{{
* df.select(filter(col("s"), (x, i) => i % 2 === 0))
- * df.selectExpr("filter(col, (x, i) -> i % 2 == 0)")
* }}}
*
- * @param column: the input array column
- * @param f: (col, index) => predicate, the Boolean predicate to filter the
input column
+ * @param column the input array column
+ * @param f (col, index) => predicate, the Boolean predicate to filter the
input column
* given the index. Indices start at 0.
*
* @group collection_funcs
@@ -3488,18 +3511,28 @@ object functions {
* Applies a binary operator to an initial state and all elements in the
array,
* and reduces this to a single state. The final state is converted into the
final result
* by applying a finish function.
+ * {{{
+ * df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10))
+ * }}}
+ *
+ * @param expr the input array column
+ * @param initialValue the initial value
+ * @param merge (combined_value, input_value) => combined_value, the merge
function to merge
+ * an input value to the combined_value
+ * @param finish combined_value => final_value, the lambda function to
convert the combined value
+ * of all inputs to final result
*
* @group collection_funcs
* @since 3.0.0
*/
def aggregate(
expr: Column,
- zero: Column,
+ initialValue: Column,
merge: (Column, Column) => Column,
finish: Column => Column): Column = withExpr {
ArrayAggregate(
expr.expr,
- zero.expr,
+ initialValue.expr,
createLambda(merge),
createLambda(finish)
)
@@ -3508,17 +3541,31 @@ object functions {
/**
* Applies a binary operator to an initial state and all elements in the
array,
* and reduces this to a single state.
+ * {{{
+ * df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x))
+ * }}}
*
+ * @param expr the input array column
+ * @param initialValue the initial value
+ * @param merge (combined_value, input_value) => combined_value, the merge
function to merge
+ * an input value to the combined_value
* @group collection_funcs
* @since 3.0.0
*/
- def aggregate(expr: Column, zero: Column, merge: (Column, Column) =>
Column): Column =
- aggregate(expr, zero, merge, c => c)
+ def aggregate(expr: Column, initialValue: Column, merge: (Column, Column) =>
Column): Column =
+ aggregate(expr, initialValue, merge, c => c)
/**
* Merge two given arrays, element-wise, into a single array using a
function.
* If one array is shorter, nulls are appended at the end to match the
length of the longer
* array, before applying the function.
+ * {{{
+ * df.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y))
+ * }}}
+ *
+ * @param left the left input array column
+ * @param right the right input array column
+ * @param f (lCol, rCol) => col, the lambda function to merge two input
columns into one column
*
* @group collection_funcs
* @since 3.0.0
@@ -3530,6 +3577,12 @@ object functions {
/**
* Applies a function to every key-value pair in a map and returns
* a map with the results of those applications as the new keys for the
pairs.
+ * {{{
+ * df.select(transform_keys(col("i"), (k, v) => k + v))
+ * }}}
+ *
+ * @param expr the input map column
+ * @param f (key, value) => new_key, the lambda function to transform the
key of input map column
*
* @group collection_funcs
* @since 3.0.0
@@ -3541,6 +3594,13 @@ object functions {
/**
* Applies a function to every key-value pair in a map and returns
* a map with the results of those applications as the new values for the
pairs.
+ * {{{
+ * df.select(transform_values(col("i"), (k, v) => k + v))
+ * }}}
+ *
+ * @param expr the input map column
+ * @param f (key, value) => new_value, the lambda function to transform the
value of input map
+ * column
*
* @group collection_funcs
* @since 3.0.0
@@ -3551,6 +3611,12 @@ object functions {
/**
* Returns a map whose key-value pairs satisfy a predicate.
+ * {{{
+ * df.select(map_filter(col("m"), (k, v) => k * 10 === v))
+ * }}}
+ *
+ * @param expr the input map column
+ * @param f (key, value) => predicate, the Boolean predicate to filter the
input map column
*
* @group collection_funcs
* @since 3.0.0
@@ -3561,6 +3627,13 @@ object functions {
/**
* Merge two given maps, key-wise into a single map using a function.
+ * {{{
+ * df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 +
v2))
+ * }}}
+ *
+ * @param left the left input map column
+ * @param right the right input map column
+ * @param f (key, value1, value2) => new_value, the lambda function to merge
the map values
*
* @group collection_funcs
* @since 3.0.0
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]