itholic commented on code in PR #41514:
URL: https://github.com/apache/spark/pull/41514#discussion_r1228837088
##########
python/pyspark/sql/utils.py:
##########
@@ -237,17 +237,25 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
return cast(FuncT, wrapped)
-def pyspark_column_op(func_name: str) -> Callable[..., "SeriesOrIndex"]:
+def pyspark_column_op(
+ func_name: str, left: "IndexOpsLike", right: Any, fillna: Any = None
+) -> Union["SeriesOrIndex", None]:
"""
Wrapper function for column_op to get proper Column class.
"""
from pyspark.pandas.base import column_op
from pyspark.sql.column import Column as PySparkColumn
+ from pyspark.pandas.data_type_ops.base import _is_extension_dtypes
if is_remote():
from pyspark.sql.connect.column import Column as ConnectColumn
Column = ConnectColumn
else:
Column = PySparkColumn # type: ignore[assignment]
- return column_op(getattr(Column, func_name))
+ result = column_op(getattr(Column, func_name))(left, right)
+ # It works as expected on extension dtype, so we don't need to call
`fillna` for this case.
Review Comment:
I means that it works the same as pandas without any additional computation
such as `fillna` for extension dtype as below:
```python
>>> pser = pd.Series([1.0, 2.0, np.nan], dtype="Float64")
>>> psser = ps.from_pandas(pser)
>>> pser.eq(pser)
0 True
1 True
2 <NA>
dtype: boolean
>>> psser.eq(psser)
0 True
1 True
2 <NA>
dtype: boolean
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]