This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 58d1a892faf8 [SPARK-48555][PYTHON][FOLLOW-UP] Simplify the support of
`Any` parameters
58d1a892faf8 is described below
commit 58d1a892faf87939edd85c5dc39a96db95813dde
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Jun 27 12:37:19 2024 +0800
[SPARK-48555][PYTHON][FOLLOW-UP] Simplify the support of `Any` parameters
### What changes were proposed in this pull request?
Simplify the support of column type `Any`
### Why are the changes needed?
I checked all the `Any` parameters, and all of them supports the Column
type now.
but there are two kinds of implementations, I think the approach of
`array_append` is much simpler, so I try to unify the implementations:
```
_try_remote_functions
def array_append(col: "ColumnOrName", value: Any) -> Column:
return _invoke_function_over_columns("array_append", col, lit(value))
```
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #47110 from zhengruifeng/py_func_any.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/functions/builtin.py | 34 ++++++++++++---------------------
1 file changed, 12 insertions(+), 22 deletions(-)
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index ed66ca8684ef..b496cdaf0955 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -10938,11 +10938,15 @@ def substring(
target column to work on.
pos : :class:`~pyspark.sql.Column` or str or int
starting position in str.
+
+ .. versionchanged:: 4.0.0
+ `pos` now accepts column and column name.
+
len : :class:`~pyspark.sql.Column` or str or int
length of chars.
.. versionchanged:: 4.0.0
- `pos` and `len` now also accept Columns or names of Columns.
+ `len` now accepts column and column name.
Returns
-------
@@ -10962,11 +10966,9 @@ def substring(
>>> df.select(substring(df.s, df.p, df.l).alias('s')).collect()
[Row(s='par')]
"""
- from pyspark.sql.classic.column import _to_java_column
-
- pos = _to_java_column(lit(pos) if isinstance(pos, int) else pos)
- len = _to_java_column(lit(len) if isinstance(len, int) else len)
- return _invoke_function("substring", _to_java_column(str), pos, len)
+ pos = lit(pos) if isinstance(pos, int) else pos
+ len = lit(len) if isinstance(len, int) else len
+ return _invoke_function_over_columns("substring", str, pos, len)
@_try_remote_functions
@@ -13618,10 +13620,7 @@ def array_contains(col: "ColumnOrName", value: Any) ->
Column:
| true|
+----------+
"""
- from pyspark.sql.classic.column import _to_java_column
-
- value = value._jc if isinstance(value, Column) else value
- return _invoke_function("array_contains", _to_java_column(col), value)
+ return _invoke_function_over_columns("array_contains", col, lit(value))
@_try_remote_functions
@@ -14064,10 +14063,7 @@ def array_position(col: "ColumnOrName", value: Any) ->
Column:
+-------------------------+
"""
- from pyspark.sql.classic.column import _to_java_column
-
- value = _to_java_column(value) if isinstance(value, Column) else value
- return _invoke_function("array_position", _to_java_column(col), value)
+ return _invoke_function_over_columns("array_position", col, lit(value))
@_try_remote_functions
@@ -14515,10 +14511,7 @@ def array_remove(col: "ColumnOrName", element: Any) ->
Column:
| [2, 3]|
+-----------------------+
"""
- from pyspark.sql.classic.column import _to_java_column
-
- element = _to_java_column(element) if isinstance(element, Column) else
element
- return _invoke_function("array_remove", _to_java_column(col), element)
+ return _invoke_function_over_columns("array_remove", col, lit(element))
@_try_remote_functions
@@ -17327,10 +17320,7 @@ def map_contains_key(col: "ColumnOrName", value: Any)
-> Column:
| true|
+---------------------------+
"""
- from pyspark.sql.classic.column import _to_java_column
-
- value = _to_java_column(value) if isinstance(value, Column) else value
- return _invoke_function("map_contains_key", _to_java_column(col), value)
+ return _invoke_function_over_columns("map_contains_key", col, lit(value))
@_try_remote_functions
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]