ntjohnson1 commented on code in PR #1468:
URL:
https://github.com/apache/datafusion-python/pull/1468#discussion_r3039072951
##########
python/datafusion/functions.py:
##########
@@ -3354,6 +3470,209 @@ def list_resize(array: Expr, size: Expr, value: Expr)
-> Expr:
return array_resize(array, size, value)
+def array_any_value(array: Expr) -> Expr:
+ """Returns the first non-null element in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[None, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_any_value(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 2
+ """
+ return Expr(f.array_any_value(array.expr))
+
+
+def list_any_value(array: Expr) -> Expr:
+ """Returns the first non-null element in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_any_value`.
+ """
+ return array_any_value(array)
+
+
+def array_distance(array1: Expr, array2: Expr) -> Expr:
+ """Returns the Euclidean distance between two numeric arrays.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1.0, 2.0]], "b": [[1.0, 4.0]]})
+ >>> result = df.select(
+ ... dfn.functions.array_distance(
+ ... dfn.col("a"), dfn.col("b"),
+ ... ).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 2.0
+ """
+ return Expr(f.array_distance(array1.expr, array2.expr))
+
+
+def list_distance(array1: Expr, array2: Expr) -> Expr:
+ """Returns the Euclidean distance between two numeric arrays.
+
+ See Also:
+ This is an alias for :py:func:`array_distance`.
+ """
+ return array_distance(array1, array2)
+
+
+def array_max(array: Expr) -> Expr:
+ """Returns the maximum value in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_max(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 3
+ """
+ return Expr(f.array_max(array.expr))
+
+
+def list_max(array: Expr) -> Expr:
+ """Returns the maximum value in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_max`.
+ """
+ return array_max(array)
+
+
+def array_min(array: Expr) -> Expr:
+ """Returns the minimum value in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_min(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 1
+ """
+ return Expr(f.array_min(array.expr))
+
+
+def list_min(array: Expr) -> Expr:
+ """Returns the minimum value in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_min`.
+ """
+ return array_min(array)
+
+
+def array_reverse(array: Expr) -> Expr:
+ """Reverses the order of elements in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_reverse(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ [3, 2, 1]
+ """
+ return Expr(f.array_reverse(array.expr))
+
+
+def list_reverse(array: Expr) -> Expr:
+ """Reverses the order of elements in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_reverse`.
+ """
+ return array_reverse(array)
+
+
+def arrays_zip(*arrays: Expr) -> Expr:
+ """Combines multiple arrays into a single array of structs.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
+ >>> result = df.select(
+ ... dfn.functions.arrays_zip(dfn.col("a"),
dfn.col("b")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ [{'c0': 1, 'c1': 3}, {'c0': 2, 'c1': 4}]
+ """
+ args = [a.expr for a in arrays]
+ return Expr(f.arrays_zip(args))
+
+
+def list_zip(*arrays: Expr) -> Expr:
+ """Combines multiple arrays into a single array of structs.
+
+ See Also:
+ This is an alias for :py:func:`arrays_zip`.
+ """
+ return arrays_zip(*arrays)
+
+
+def string_to_array(
+ string: Expr, delimiter: Expr, null_string: Expr | None = None
+) -> Expr:
+ """Splits a string based on a delimiter and returns an array of parts.
+
+ Any parts matching the optional ``null_string`` will be replaced with
``NULL``.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["hello,world"]})
+ >>> result = df.select(
+ ... dfn.functions.string_to_array(
+ ... dfn.col("a"), dfn.lit(","),
+ ... ).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ ['hello', 'world']
+ """
+ null_expr = null_string.expr if null_string is not None else None
+ return Expr(f.string_to_array(string.expr, delimiter.expr, null_expr))
+
+
+def string_to_list(
+ string: Expr, delimiter: Expr, null_string: Expr | None = None
+) -> Expr:
+ """Splits a string based on a delimiter and returns an array of parts.
+
+ See Also:
+ This is an alias for :py:func:`string_to_array`.
+ """
+ return string_to_array(string, delimiter, null_string)
+
+
+def gen_series(start: Expr, stop: Expr, step: Expr | None = None) -> Expr:
+ """Creates a list of values in the range between start and stop.
+
+ Unlike :py:func:`range`, this includes the upper bound.
+
+ Examples:
Review Comment:
Missing optional parameter example
##########
python/datafusion/functions.py:
##########
@@ -3354,6 +3470,209 @@ def list_resize(array: Expr, size: Expr, value: Expr)
-> Expr:
return array_resize(array, size, value)
+def array_any_value(array: Expr) -> Expr:
+ """Returns the first non-null element in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[None, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_any_value(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 2
+ """
+ return Expr(f.array_any_value(array.expr))
+
+
+def list_any_value(array: Expr) -> Expr:
+ """Returns the first non-null element in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_any_value`.
+ """
+ return array_any_value(array)
+
+
+def array_distance(array1: Expr, array2: Expr) -> Expr:
+ """Returns the Euclidean distance between two numeric arrays.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1.0, 2.0]], "b": [[1.0, 4.0]]})
+ >>> result = df.select(
+ ... dfn.functions.array_distance(
+ ... dfn.col("a"), dfn.col("b"),
+ ... ).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 2.0
+ """
+ return Expr(f.array_distance(array1.expr, array2.expr))
+
+
+def list_distance(array1: Expr, array2: Expr) -> Expr:
+ """Returns the Euclidean distance between two numeric arrays.
+
+ See Also:
+ This is an alias for :py:func:`array_distance`.
+ """
+ return array_distance(array1, array2)
+
+
+def array_max(array: Expr) -> Expr:
+ """Returns the maximum value in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_max(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 3
+ """
+ return Expr(f.array_max(array.expr))
+
+
+def list_max(array: Expr) -> Expr:
+ """Returns the maximum value in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_max`.
+ """
+ return array_max(array)
+
+
+def array_min(array: Expr) -> Expr:
+ """Returns the minimum value in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_min(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ 1
+ """
+ return Expr(f.array_min(array.expr))
+
+
+def list_min(array: Expr) -> Expr:
+ """Returns the minimum value in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_min`.
+ """
+ return array_min(array)
+
+
+def array_reverse(array: Expr) -> Expr:
+ """Reverses the order of elements in the array.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+ >>> result = df.select(
+ ... dfn.functions.array_reverse(dfn.col("a")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ [3, 2, 1]
+ """
+ return Expr(f.array_reverse(array.expr))
+
+
+def list_reverse(array: Expr) -> Expr:
+ """Reverses the order of elements in the array.
+
+ See Also:
+ This is an alias for :py:func:`array_reverse`.
+ """
+ return array_reverse(array)
+
+
+def arrays_zip(*arrays: Expr) -> Expr:
+ """Combines multiple arrays into a single array of structs.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
+ >>> result = df.select(
+ ... dfn.functions.arrays_zip(dfn.col("a"),
dfn.col("b")).alias("result"))
+ >>> result.collect_column("result")[0].as_py()
+ [{'c0': 1, 'c1': 3}, {'c0': 2, 'c1': 4}]
+ """
+ args = [a.expr for a in arrays]
+ return Expr(f.arrays_zip(args))
+
+
+def list_zip(*arrays: Expr) -> Expr:
+ """Combines multiple arrays into a single array of structs.
+
+ See Also:
+ This is an alias for :py:func:`arrays_zip`.
+ """
+ return arrays_zip(*arrays)
+
+
+def string_to_array(
+ string: Expr, delimiter: Expr, null_string: Expr | None = None
+) -> Expr:
+ """Splits a string based on a delimiter and returns an array of parts.
+
+ Any parts matching the optional ``null_string`` will be replaced with
``NULL``.
+
+ Examples:
Review Comment:
Doesn't demonstrate the optional parameter.
We can probably update the copilot rules that functions should have examples
that cover base functionality and extra examples for optional arguments.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]