array_insert`

yangjie01 Thu, 21 Dec 2023 19:01:33 -0800

This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 6b931530d75 [SPARK-46472][PYTHON][DOCS] Refine docstring of 
`array_prepend/array_append/array_insert`
6b931530d75 is described below

commit 6b931530d75cb4f00236f9c6283de8ef450963ad
Author: yangjie01 <yangji...@baidu.com>
AuthorDate: Fri Dec 22 11:01:05 2023 +0800

    [SPARK-46472][PYTHON][DOCS] Refine docstring of 
`array_prepend/array_append/array_insert`
    
    ### What changes were proposed in this pull request?
    This pr refine docstring of `array_prepend/array_append/array_insert` and 
add some new examples.
    
    ### Why are the changes needed?
    To improve PySpark documentation
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Pass Github Actions
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #44436 from LuciferYang/SPARK-46472.
    
    Authored-by: yangjie01 <yangji...@baidu.com>
    Signed-off-by: yangjie01 <yangji...@baidu.com>
---
 python/pyspark/sql/functions/builtin.py | 217 ++++++++++++++++++++++++++++----
 1 file changed, 191 insertions(+), 26 deletions(-)

diff --git a/python/pyspark/sql/functions/builtin.py 
b/python/pyspark/sql/functions/builtin.py
index 54a91792404..571572df30a 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -12875,9 +12875,8 @@ def get(col: "ColumnOrName", index: 
Union["ColumnOrName", int]) -> Column:
 @_try_remote_functions
 def array_prepend(col: "ColumnOrName", value: Any) -> Column:
     """
-    Collection function: Returns an array containing element as
-    well as all elements from array. The new element is positioned
-    at the beginning of the array.
+    Array function: Returns an array containing the given element as
+    the first element and the rest of the elements from the original array.
 
     .. versionadded:: 3.5.0
 
@@ -12891,13 +12890,72 @@ def array_prepend(col: "ColumnOrName", value: Any) -> 
Column:
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        an array excluding given value.
+        an array with the given value prepended.
 
     Examples
     --------
-    >>> df = spark.createDataFrame([([2, 3, 4],), ([],)], ['data'])
-    >>> df.select(array_prepend(df.data, 1)).collect()
-    [Row(array_prepend(data, 1)=[1, 2, 3, 4]), Row(array_prepend(data, 1)=[1])]
+    Example 1: Prepending a column value to an array column
+
+    >>> from pyspark.sql import Row, functions as sf
+    >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2="c")])
+    >>> df.select(sf.array_prepend(df.c1, df.c2)).show()
+    +---------------------+
+    |array_prepend(c1, c2)|
+    +---------------------+
+    |         [c, b, a, c]|
+    +---------------------+
+
+    Example 2: Prepending a numeric value to an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([1, 2, 3],)], ['data'])
+    >>> df.select(sf.array_prepend(df.data, 4)).show()
+    +----------------------+
+    |array_prepend(data, 4)|
+    +----------------------+
+    |          [4, 1, 2, 3]|
+    +----------------------+
+
+    Example 3: Prepending a null value to an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([1, 2, 3],)], ['data'])
+    >>> df.select(sf.array_prepend(df.data, None)).show()
+    +-------------------------+
+    |array_prepend(data, NULL)|
+    +-------------------------+
+    |          [NULL, 1, 2, 3]|
+    +-------------------------+
+
+    Example 4: Prepending a value to a NULL array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import ArrayType, IntegerType, StructType, 
StructField
+    >>> schema = StructType([
+    ...   StructField("data", ArrayType(IntegerType()), True)
+    ... ])
+    >>> df = spark.createDataFrame([(None,)], schema=schema)
+    >>> df.select(sf.array_prepend(df.data, 4)).show()
+    +----------------------+
+    |array_prepend(data, 4)|
+    +----------------------+
+    |                  NULL|
+    +----------------------+
+
+    Example 5: Prepending a value to an empty array
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import ArrayType, IntegerType, StructType, 
StructField
+    >>> schema = StructType([
+    ...   StructField("data", ArrayType(IntegerType()), True)
+    ... ])
+    >>> df = spark.createDataFrame([([],)], schema=schema)
+    >>> df.select(sf.array_prepend(df.data, 1)).show()
+    +----------------------+
+    |array_prepend(data, 1)|
+    +----------------------+
+    |                   [1]|
+    +----------------------+
     """
     return _invoke_function_over_columns("array_prepend", col, lit(value))
 
@@ -12965,7 +13023,7 @@ def array_distinct(col: "ColumnOrName") -> Column:
 @_try_remote_functions
 def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: 
Any) -> Column:
     """
-    Collection function: adds an item into a given array at a specified array 
index.
+    Array function: Inserts an item into a given array at a specified array 
index.
     Array indices start at 1, or start from the end if index is negative.
     Index above array size appends the array, or prepends the array if index 
is negative,
     with 'null' elements.
@@ -12993,14 +13051,66 @@ def array_insert(arr: "ColumnOrName", pos: 
Union["ColumnOrName", int], value: An
 
     Examples
     --------
-    >>> df = spark.createDataFrame(
-    ...     [(['a', 'b', 'c'], 2, 'd'), (['c', 'b', 'a'], -2, 'd')],
-    ...     ['data', 'pos', 'val']
-    ... )
-    >>> df.select(array_insert(df.data, df.pos.cast('integer'), 
df.val).alias('data')).collect()
-    [Row(data=['a', 'd', 'b', 'c']), Row(data=['c', 'b', 'd', 'a'])]
-    >>> df.select(array_insert(df.data, 5, 'hello').alias('data')).collect()
-    [Row(data=['a', 'b', 'c', None, 'hello']), Row(data=['c', 'b', 'a', None, 
'hello'])]
+    Example 1: Inserting a value at a specific position
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(['a', 'b', 'c'],)], ['data'])
+    >>> df.select(sf.array_insert(df.data, 2, 'd')).show()
+    +------------------------+
+    |array_insert(data, 2, d)|
+    +------------------------+
+    |            [a, d, b, c]|
+    +------------------------+
+
+    Example 2: Inserting a value at a negative position
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(['a', 'b', 'c'],)], ['data'])
+    >>> df.select(sf.array_insert(df.data, -2, 'd')).show()
+    +-------------------------+
+    |array_insert(data, -2, d)|
+    +-------------------------+
+    |             [a, b, d, c]|
+    +-------------------------+
+
+    Example 3: Inserting a value at a position greater than the array size
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(['a', 'b', 'c'],)], ['data'])
+    >>> df.select(sf.array_insert(df.data, 5, 'e')).show()
+    +------------------------+
+    |array_insert(data, 5, e)|
+    +------------------------+
+    |      [a, b, c, NULL, e]|
+    +------------------------+
+
+    Example 4: Inserting a NULL value
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import StringType
+    >>> df = spark.createDataFrame([(['a', 'b', 'c'],)], ['data'])
+    >>> df.select(sf.array_insert(df.data, 2, sf.lit(None).cast(StringType()))
+    ...   .alias("result")).show()
+    +---------------+
+    |         result|
+    +---------------+
+    |[a, NULL, b, c]|
+    +---------------+
+
+    Example 5: Inserting a value into a NULL array
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import ArrayType, IntegerType, StructType, 
StructField
+    >>> schema = StructType([
+    ...   StructField("data", ArrayType(IntegerType()), True)
+    ... ])
+    >>> df = spark.createDataFrame([(None,)], schema=schema)
+    >>> df.select(sf.array_insert(df.data, 1, 5)).show()
+    +------------------------+
+    |array_insert(data, 1, 5)|
+    +------------------------+
+    |                    NULL|
+    +------------------------+
     """
     pos = lit(pos) if isinstance(pos, int) else pos
 
@@ -13139,22 +13249,21 @@ def array_compact(col: "ColumnOrName") -> Column:
 @_try_remote_functions
 def array_append(col: "ColumnOrName", value: Any) -> Column:
     """
-    Collection function: returns an array of the elements in col1 along
-    with the added element in col2 at the last of the array.
+    Array function: returns a new array column by appending `value` to the 
existing array `col`.
 
     .. versionadded:: 3.4.0
 
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
-        name of column containing array
+        The name of the column containing the array.
     value :
-        a literal value, or a :class:`~pyspark.sql.Column` expression.
+        A literal value, or a :class:`~pyspark.sql.Column` expression to be 
appended to the array.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        an array of values from first array along with the element.
+        A new array column with `value` appended to the original array.
 
     Notes
     -----
@@ -13162,12 +13271,68 @@ def array_append(col: "ColumnOrName", value: Any) -> 
Column:
 
     Examples
     --------
-    >>> from pyspark.sql import Row
+    Example 1: Appending a column value to an array column
+
+    >>> from pyspark.sql import Row, functions as sf
     >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2="c")])
-    >>> df.select(array_append(df.c1, df.c2)).collect()
-    [Row(array_append(c1, c2)=['b', 'a', 'c', 'c'])]
-    >>> df.select(array_append(df.c1, 'x')).collect()
-    [Row(array_append(c1, x)=['b', 'a', 'c', 'x'])]
+    >>> df.select(sf.array_append(df.c1, df.c2)).show()
+    +--------------------+
+    |array_append(c1, c2)|
+    +--------------------+
+    |        [b, a, c, c]|
+    +--------------------+
+
+    Example 2: Appending a numeric value to an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([1, 2, 3],)], ['data'])
+    >>> df.select(sf.array_append(df.data, 4)).show()
+    +---------------------+
+    |array_append(data, 4)|
+    +---------------------+
+    |         [1, 2, 3, 4]|
+    +---------------------+
+
+    Example 3: Appending a null value to an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([1, 2, 3],)], ['data'])
+    >>> df.select(sf.array_append(df.data, None)).show()
+    +------------------------+
+    |array_append(data, NULL)|
+    +------------------------+
+    |         [1, 2, 3, NULL]|
+    +------------------------+
+
+    Example 4: Appending a value to a NULL array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import ArrayType, IntegerType, StructType, 
StructField
+    >>> schema = StructType([
+    ...   StructField("data", ArrayType(IntegerType()), True)
+    ... ])
+    >>> df = spark.createDataFrame([(None,)], schema=schema)
+    >>> df.select(sf.array_append(df.data, 4)).show()
+    +---------------------+
+    |array_append(data, 4)|
+    +---------------------+
+    |                 NULL|
+    +---------------------+
+
+    Example 5: Appending a value to an empty array
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import ArrayType, IntegerType, StructType, 
StructField
+    >>> schema = StructType([
+    ...   StructField("data", ArrayType(IntegerType()), True)
+    ... ])
+    >>> df = spark.createDataFrame([([],)], schema=schema)
+    >>> df.select(sf.array_append(df.data, 1)).show()
+    +---------------------+
+    |array_append(data, 1)|
+    +---------------------+
+    |                  [1]|
+    +---------------------+
     """
     return _invoke_function_over_columns("array_append", col, lit(value))
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-46472][PYTHON][DOCS] Refine docstring of `array_prepend/array_append/array_insert`

Reply via email to