This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 659f8c8 [SPARK-27297][DOC][FOLLOW-UP] Improve documentation for various Scala functions 659f8c8 is described below commit 659f8c8ef549fd040596978478898266d24e88ff Author: yi.wu <yi...@databricks.com> AuthorDate: Thu Feb 6 20:34:29 2020 +0800 [SPARK-27297][DOC][FOLLOW-UP] Improve documentation for various Scala functions ### What changes were proposed in this pull request? Add examples and parameter description for these Scala functions: * transform * exists * forall * aggregate * zip_with * transform_keys * transform_values * map_filter * map_zip_with ### Why are the changes needed? Better documentation for UX. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Pass Jenkins. Closes #27449 from Ngone51/doc-funcs. Authored-by: yi.wu <yi...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 368ee62a5dce83682ccaec92feeea8428af5a8cf) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../scala/org/apache/spark/sql/functions.scala | 93 +++++++++++++++++++--- 1 file changed, 83 insertions(+), 10 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index da26c5a..d125581 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3410,6 +3410,12 @@ object functions { /** * Returns an array of elements after applying a transformation to each element * in the input array. + * {{{ + * df.select(transform(col("i"), x => x + 1)) + * }}} + * + * @param column the input array column + * @param f col => transformed_col, the lambda function to transform the input column * * @group collection_funcs * @since 3.0.0 @@ -3421,6 +3427,13 @@ object functions { /** * Returns an array of elements after applying a transformation to each element * in the input array. + * {{{ + * df.select(transform(col("i"), (x, i) => x + i)) + * }}} + * + * @param column the input array column + * @param f (col, index) => transformed_col, the lambda function to filter the input column + * given the index. Indices start at 0. * * @group collection_funcs * @since 3.0.0 @@ -3431,6 +3444,12 @@ object functions { /** * Returns whether a predicate holds for one or more elements in the array. + * {{{ + * df.select(exists(col("i"), _ % 2 === 0)) + * }}} + * + * @param column the input array column + * @param f col => predicate, the Boolean predicate to check the input column * * @group collection_funcs * @since 3.0.0 @@ -3441,6 +3460,12 @@ object functions { /** * Returns whether a predicate holds for every element in the array. + * {{{ + * df.select(forall(col("i"), x => x % 2 === 0)) + * }}} + * + * @param column the input array column + * @param f col => predicate, the Boolean predicate to check the input column * * @group collection_funcs * @since 3.0.0 @@ -3453,11 +3478,10 @@ object functions { * Returns an array of elements for which a predicate holds in a given array. * {{{ * df.select(filter(col("s"), x => x % 2 === 0)) - * df.selectExpr("filter(col, x -> x % 2 == 0)") * }}} * - * @param column: the input array column - * @param f: col => predicate, the Boolean predicate to filter the input column + * @param column the input array column + * @param f col => predicate, the Boolean predicate to filter the input column * * @group collection_funcs * @since 3.0.0 @@ -3470,11 +3494,10 @@ object functions { * Returns an array of elements for which a predicate holds in a given array. * {{{ * df.select(filter(col("s"), (x, i) => i % 2 === 0)) - * df.selectExpr("filter(col, (x, i) -> i % 2 == 0)") * }}} * - * @param column: the input array column - * @param f: (col, index) => predicate, the Boolean predicate to filter the input column + * @param column the input array column + * @param f (col, index) => predicate, the Boolean predicate to filter the input column * given the index. Indices start at 0. * * @group collection_funcs @@ -3488,18 +3511,28 @@ object functions { * Applies a binary operator to an initial state and all elements in the array, * and reduces this to a single state. The final state is converted into the final result * by applying a finish function. + * {{{ + * df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10)) + * }}} + * + * @param expr the input array column + * @param initialValue the initial value + * @param merge (combined_value, input_value) => combined_value, the merge function to merge + * an input value to the combined_value + * @param finish combined_value => final_value, the lambda function to convert the combined value + * of all inputs to final result * * @group collection_funcs * @since 3.0.0 */ def aggregate( expr: Column, - zero: Column, + initialValue: Column, merge: (Column, Column) => Column, finish: Column => Column): Column = withExpr { ArrayAggregate( expr.expr, - zero.expr, + initialValue.expr, createLambda(merge), createLambda(finish) ) @@ -3508,17 +3541,31 @@ object functions { /** * Applies a binary operator to an initial state and all elements in the array, * and reduces this to a single state. + * {{{ + * df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x)) + * }}} * + * @param expr the input array column + * @param initialValue the initial value + * @param merge (combined_value, input_value) => combined_value, the merge function to merge + * an input value to the combined_value * @group collection_funcs * @since 3.0.0 */ - def aggregate(expr: Column, zero: Column, merge: (Column, Column) => Column): Column = - aggregate(expr, zero, merge, c => c) + def aggregate(expr: Column, initialValue: Column, merge: (Column, Column) => Column): Column = + aggregate(expr, initialValue, merge, c => c) /** * Merge two given arrays, element-wise, into a single array using a function. * If one array is shorter, nulls are appended at the end to match the length of the longer * array, before applying the function. + * {{{ + * df.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y)) + * }}} + * + * @param left the left input array column + * @param right the right input array column + * @param f (lCol, rCol) => col, the lambda function to merge two input columns into one column * * @group collection_funcs * @since 3.0.0 @@ -3530,6 +3577,12 @@ object functions { /** * Applies a function to every key-value pair in a map and returns * a map with the results of those applications as the new keys for the pairs. + * {{{ + * df.select(transform_keys(col("i"), (k, v) => k + v)) + * }}} + * + * @param expr the input map column + * @param f (key, value) => new_key, the lambda function to transform the key of input map column * * @group collection_funcs * @since 3.0.0 @@ -3541,6 +3594,13 @@ object functions { /** * Applies a function to every key-value pair in a map and returns * a map with the results of those applications as the new values for the pairs. + * {{{ + * df.select(transform_values(col("i"), (k, v) => k + v)) + * }}} + * + * @param expr the input map column + * @param f (key, value) => new_value, the lambda function to transform the value of input map + * column * * @group collection_funcs * @since 3.0.0 @@ -3551,6 +3611,12 @@ object functions { /** * Returns a map whose key-value pairs satisfy a predicate. + * {{{ + * df.select(map_filter(col("m"), (k, v) => k * 10 === v)) + * }}} + * + * @param expr the input map column + * @param f (key, value) => predicate, the Boolean predicate to filter the input map column * * @group collection_funcs * @since 3.0.0 @@ -3561,6 +3627,13 @@ object functions { /** * Merge two given maps, key-wise into a single map using a function. + * {{{ + * df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 + v2)) + * }}} + * + * @param left the left input map column + * @param right the right input map column + * @param f (key, value1, value2) => new_value, the lambda function to merge the map values * * @group collection_funcs * @since 3.0.0 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org