This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2ff4e3a252 GH-34154: [Python] Add `is_nan` method to Array and
Expression (#34184)
2ff4e3a252 is described below
commit 2ff4e3a2523bd0c58168d6ca4bcb14f45393ff2b
Author: Fokko Driesprong <[email protected]>
AuthorDate: Thu Mar 9 11:05:54 2023 +0100
GH-34154: [Python] Add `is_nan` method to Array and Expression (#34184)
* Closes: GH-34154
Lead-authored-by: Fokko Driesprong <[email protected]>
Co-authored-by: Fokko Driesprong <[email protected]>
Co-authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/_compute.pyx | 13 +++++++++++++
python/pyarrow/array.pxi | 10 ++++++++++
python/pyarrow/table.pxi | 24 ++++++++++++++++++++++++
python/pyarrow/tests/test_compute.py | 18 ++++++++++++++++++
4 files changed, 65 insertions(+)
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 0051286707..0eed056479 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -2440,6 +2440,19 @@ cdef class Expression(_Weakrefable):
options = NullOptions(nan_is_null=nan_is_null)
return Expression._call("is_null", [self], options)
+ def is_nan(self):
+ """
+ Check whether the expression is NaN.
+
+ This creates a new expression equivalent to calling the
+ `is_nan` compute function on this expression.
+
+ Returns
+ -------
+ is_nan : Expression
+ """
+ return Expression._call("is_nan", [self])
+
def cast(self, type=None, safe=None, options=None):
"""
Explicitly set or change the expression's data type.
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index dca99af812..9deccfa117 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1261,6 +1261,16 @@ cdef class Array(_PandasConvertible):
options = _pc().NullOptions(nan_is_null=nan_is_null)
return _pc().call_function('is_null', [self], options)
+ def is_nan(self):
+ """
+ Return BooleanArray indicating the NaN values.
+
+ Returns
+ -------
+ array : boolean Array
+ """
+ return _pc().call_function('is_nan', [self])
+
def is_valid(self):
"""
Return BooleanArray indicating the non-null values.
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index e400605e56..5af381e608 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -327,6 +327,30 @@ cdef class ChunkedArray(_PandasConvertible):
options = _pc().NullOptions(nan_is_null=nan_is_null)
return _pc().call_function('is_null', [self], options)
+ def is_nan(self):
+ """
+ Return boolean array indicating the NaN values.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> import numpy as np
+ >>> arr = pa.chunked_array([[2, np.nan, 4], [4, None, 100]])
+ >>> arr.is_nan()
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ false,
+ true,
+ false,
+ false,
+ null,
+ false
+ ]
+ ]
+ """
+ return _pc().is_nan(self)
+
def is_valid(self):
"""
Return boolean array indicating the non-null values.
diff --git a/python/pyarrow/tests/test_compute.py
b/python/pyarrow/tests/test_compute.py
index 24e9950f21..c3213ec3f0 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -35,6 +35,7 @@ except ImportError:
import pyarrow as pa
import pyarrow.compute as pc
+from pyarrow.lib import ArrowNotImplementedError
all_array_types = [
('bool', [True, False, False, True, True]),
@@ -1635,6 +1636,23 @@ def test_is_null():
assert result.equals(expected)
+def test_is_nan():
+ arr = pa.array([1, 2, 3, None, np.nan])
+ result = arr.is_nan()
+ expected = pa.array([False, False, False, None, True])
+ assert result.equals(expected)
+
+ arr = pa.array(["1", "2", None], type=pa.string())
+ with pytest.raises(
+ ArrowNotImplementedError, match="has no kernel matching input
types"):
+ _ = arr.is_nan()
+
+ with pytest.raises(
+ ArrowNotImplementedError, match="has no kernel matching input
types"):
+ arr = pa.array([b'a', b'bb', None], type=pa.large_binary())
+ _ = arr.is_nan()
+
+
def test_fill_null():
arr = pa.array([1, 2, None, 4], type=pa.int8())
fill_value = pa.array([5], type=pa.int8())