This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d56fe6c7b202 [SPARK-46465][PYTHON][CONNECT] Add `Column.isNaN` in
PySpark
d56fe6c7b202 is described below
commit d56fe6c7b20244238a2d807a08bb646d8edcaf32
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Dec 21 08:59:45 2023 +0900
[SPARK-46465][PYTHON][CONNECT] Add `Column.isNaN` in PySpark
### What changes were proposed in this pull request?
add `Column.isNaN` in pyspark
### Why are the changes needed?
`Column.isNaN` was added in scala since 1.5.0, but it is still missing in
python
this pr adds it for parity
### Does this PR introduce _any_ user-facing change?
yes
```
In [1]: from pyspark.sql import Row
In [2]: df = spark.createDataFrame([Row(name='Tom', height=80.0),
Row(name='Alice', height=float('nan'))])
In [3]: df.show()
+-----+------+
| name|height|
+-----+------+
| Tom| 80.0|
|Alice| NaN|
+-----+------+
In [4]: df.filter(df.height.isNaN()).show()
+-----+------+
| name|height|
+-----+------+
|Alice| NaN|
+-----+------+
```
### How was this patch tested?
added doctest
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #44422 from zhengruifeng/py_isNaN.
Lead-authored-by: Ruifeng Zheng <[email protected]>
Co-authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/column.py | 14 ++++++++++++++
python/pyspark/sql/connect/column.py | 1 +
2 files changed, 15 insertions(+)
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 198dd9ff3e40..5fa7fb3d42b0 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -1153,9 +1153,23 @@ class Column:
>>> df.filter(df.height.isNotNull()).collect()
[Row(name='Tom', height=80)]
"""
+ _isNaN_doc = """
+ True if the current expression is NaN.
+
+ .. versionadded:: 4.0.0
+
+ Examples
+ --------
+ >>> from pyspark.sql import Row
+ >>> df = spark.createDataFrame(
+ ... [Row(name='Tom', height=80.0), Row(name='Alice',
height=float('nan'))])
+ >>> df.filter(df.height.isNaN()).collect()
+ [Row(name='Alice', height=nan)]
+ """
isNull = _unary_op("isNull", _isNull_doc)
isNotNull = _unary_op("isNotNull", _isNotNull_doc)
+ isNaN = _unary_op("isNaN", _isNaN_doc)
def alias(self, *alias: str, **kwargs: Any) -> "Column":
"""
diff --git a/python/pyspark/sql/connect/column.py
b/python/pyspark/sql/connect/column.py
index 13b00fd83d8b..052151d5417e 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -160,6 +160,7 @@ class Column:
isNull = _unary_op("isnull", PySparkColumn.isNull.__doc__)
isNotNull = _unary_op("isnotnull", PySparkColumn.isNotNull.__doc__)
+ isNaN = _unary_op("isNaN", PySparkColumn.isNaN.__doc__)
def __ne__( # type: ignore[override]
self,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]