llama90 commented on code in PR #40761:
URL: https://github.com/apache/arrow/pull/40761#discussion_r1562439775
##########
python/pyarrow/types.py:
##########
@@ -20,295 +20,54 @@
from pyarrow.lib import (is_boolean_value, # noqa
is_integer_value,
- is_float_value)
-
-import pyarrow.lib as lib
-from pyarrow.util import doc
-
-
-_SIGNED_INTEGER_TYPES = {lib.Type_INT8, lib.Type_INT16, lib.Type_INT32,
- lib.Type_INT64}
-_UNSIGNED_INTEGER_TYPES = {lib.Type_UINT8, lib.Type_UINT16, lib.Type_UINT32,
- lib.Type_UINT64}
-_INTEGER_TYPES = _SIGNED_INTEGER_TYPES | _UNSIGNED_INTEGER_TYPES
-_FLOATING_TYPES = {lib.Type_HALF_FLOAT, lib.Type_FLOAT, lib.Type_DOUBLE}
-_DECIMAL_TYPES = {lib.Type_DECIMAL128, lib.Type_DECIMAL256}
-_DATE_TYPES = {lib.Type_DATE32, lib.Type_DATE64}
-_TIME_TYPES = {lib.Type_TIME32, lib.Type_TIME64}
-_INTERVAL_TYPES = {lib.Type_INTERVAL_MONTH_DAY_NANO}
-_TEMPORAL_TYPES = ({lib.Type_TIMESTAMP,
- lib.Type_DURATION} | _TIME_TYPES | _DATE_TYPES |
- _INTERVAL_TYPES)
-_UNION_TYPES = {lib.Type_SPARSE_UNION, lib.Type_DENSE_UNION}
-_NESTED_TYPES = {lib.Type_LIST, lib.Type_FIXED_SIZE_LIST, lib.Type_LARGE_LIST,
- lib.Type_LIST_VIEW, lib.Type_LARGE_LIST_VIEW,
- lib.Type_STRUCT, lib.Type_MAP} | _UNION_TYPES
-
-
-@doc(datatype="null")
-def is_null(t):
- """
- Return True if value is an instance of type: {datatype}.
-
- Parameters
- ----------
- t : DataType
- """
- return t.id == lib.Type_NA
-
-
-@doc(is_null, datatype="boolean")
-def is_boolean(t):
- return t.id == lib.Type_BOOL
-
-
-@doc(is_null, datatype="any integer")
-def is_integer(t):
- return t.id in _INTEGER_TYPES
-
-
-@doc(is_null, datatype="signed integer")
-def is_signed_integer(t):
- return t.id in _SIGNED_INTEGER_TYPES
-
-
-@doc(is_null, datatype="unsigned integer")
-def is_unsigned_integer(t):
- return t.id in _UNSIGNED_INTEGER_TYPES
-
-
-@doc(is_null, datatype="int8")
-def is_int8(t):
- return t.id == lib.Type_INT8
-
-
-@doc(is_null, datatype="int16")
-def is_int16(t):
- return t.id == lib.Type_INT16
-
-
-@doc(is_null, datatype="int32")
-def is_int32(t):
- return t.id == lib.Type_INT32
-
-
-@doc(is_null, datatype="int64")
-def is_int64(t):
- return t.id == lib.Type_INT64
-
-
-@doc(is_null, datatype="uint8")
-def is_uint8(t):
- return t.id == lib.Type_UINT8
-
-
-@doc(is_null, datatype="uint16")
-def is_uint16(t):
- return t.id == lib.Type_UINT16
-
-
-@doc(is_null, datatype="uint32")
-def is_uint32(t):
- return t.id == lib.Type_UINT32
-
-
-@doc(is_null, datatype="uint64")
-def is_uint64(t):
- return t.id == lib.Type_UINT64
-
-
-@doc(is_null, datatype="floating point numeric")
-def is_floating(t):
- return t.id in _FLOATING_TYPES
-
-
-@doc(is_null, datatype="float16 (half-precision)")
-def is_float16(t):
- return t.id == lib.Type_HALF_FLOAT
-
-
-@doc(is_null, datatype="float32 (single precision)")
-def is_float32(t):
- return t.id == lib.Type_FLOAT
-
-
-@doc(is_null, datatype="float64 (double precision)")
-def is_float64(t):
- return t.id == lib.Type_DOUBLE
-
-
-@doc(is_null, datatype="list")
-def is_list(t):
- return t.id == lib.Type_LIST
-
-
-@doc(is_null, datatype="large list")
-def is_large_list(t):
- return t.id == lib.Type_LARGE_LIST
-
-
-@doc(is_null, datatype="fixed size list")
-def is_fixed_size_list(t):
- return t.id == lib.Type_FIXED_SIZE_LIST
-
-
-@doc(is_null, datatype="list view")
-def is_list_view(t):
- return t.id == lib.Type_LIST_VIEW
-
-
-@doc(is_null, datatype="large list view")
-def is_large_list_view(t):
- return t.id == lib.Type_LARGE_LIST_VIEW
-
-
-@doc(is_null, datatype="struct")
-def is_struct(t):
- return t.id == lib.Type_STRUCT
-
-
-@doc(is_null, datatype="union")
-def is_union(t):
- return t.id in _UNION_TYPES
-
-
-@doc(is_null, datatype="nested type")
-def is_nested(t):
- return t.id in _NESTED_TYPES
-
-
-@doc(is_null, datatype="run-end encoded")
-def is_run_end_encoded(t):
- return t.id == lib.Type_RUN_END_ENCODED
-
-
-@doc(is_null, datatype="date, time, timestamp or duration")
-def is_temporal(t):
- return t.id in _TEMPORAL_TYPES
-
-
-@doc(is_null, datatype="timestamp")
-def is_timestamp(t):
- return t.id == lib.Type_TIMESTAMP
-
-
-@doc(is_null, datatype="duration")
-def is_duration(t):
- return t.id == lib.Type_DURATION
-
-
-@doc(is_null, datatype="time")
-def is_time(t):
- return t.id in _TIME_TYPES
-
-
-@doc(is_null, datatype="time32")
-def is_time32(t):
- return t.id == lib.Type_TIME32
-
-
-@doc(is_null, datatype="time64")
-def is_time64(t):
- return t.id == lib.Type_TIME64
-
-
-@doc(is_null, datatype="variable-length binary")
-def is_binary(t):
- return t.id == lib.Type_BINARY
-
-
-@doc(is_null, datatype="large variable-length binary")
-def is_large_binary(t):
- return t.id == lib.Type_LARGE_BINARY
-
-
-@doc(method="is_string")
-def is_unicode(t):
- """
- Alias for {method}.
-
- Parameters
- ----------
- t : DataType
- """
- return is_string(t)
-
-
-@doc(is_null, datatype="string (utf8 unicode)")
-def is_string(t):
- return t.id == lib.Type_STRING
-
-
-@doc(is_unicode, method="is_large_string")
-def is_large_unicode(t):
- return is_large_string(t)
-
-
-@doc(is_null, datatype="large string (utf8 unicode)")
-def is_large_string(t):
- return t.id == lib.Type_LARGE_STRING
-
-
-@doc(is_null, datatype="fixed size binary")
-def is_fixed_size_binary(t):
- return t.id == lib.Type_FIXED_SIZE_BINARY
-
-
-@doc(is_null, datatype="variable-length binary view")
-def is_binary_view(t):
- return t.id == lib.Type_BINARY_VIEW
-
-
-@doc(is_null, datatype="variable-length string (utf-8) view")
-def is_string_view(t):
- return t.id == lib.Type_STRING_VIEW
-
-
-@doc(is_null, datatype="date")
-def is_date(t):
- return t.id in _DATE_TYPES
-
-
-@doc(is_null, datatype="date32 (days)")
-def is_date32(t):
- return t.id == lib.Type_DATE32
-
-
-@doc(is_null, datatype="date64 (milliseconds)")
-def is_date64(t):
- return t.id == lib.Type_DATE64
-
-
-@doc(is_null, datatype="map")
-def is_map(t):
- return t.id == lib.Type_MAP
-
-
-@doc(is_null, datatype="decimal")
-def is_decimal(t):
- return t.id in _DECIMAL_TYPES
-
-
-@doc(is_null, datatype="decimal128")
-def is_decimal128(t):
- return t.id == lib.Type_DECIMAL128
-
-
-@doc(is_null, datatype="decimal256")
-def is_decimal256(t):
- return t.id == lib.Type_DECIMAL256
-
-
-@doc(is_null, datatype="dictionary-encoded")
-def is_dictionary(t):
- return t.id == lib.Type_DICTIONARY
-
-
-@doc(is_null, datatype="interval")
-def is_interval(t):
- return t.id == lib.Type_INTERVAL_MONTH_DAY_NANO
-
-
-@doc(is_null, datatype="primitive type")
-def is_primitive(t):
- return lib._is_primitive(t.id)
+ is_float_value,
+ ensure_type,
+ _is_integer as is_integer,
Review Comment:
Can you explain this in more detail?
It cannot have the same name as a function imported via `cdef extern from
"arrow/type_traits.h" namespace "arrow":`.
Also, it appears that if I declare it using a different name to avoid
conflict, I will eventually have to redeclare it using `as`.
If I expose `as` without prefixing it, compatibility seems to be broken.
<details><summary>error</summary>
```
============================== 1 failed in 0.20s
===============================
FAILED [100%]
pyarrow/tests/test_pandas.py:4465 (test_chunked_array_to_pandas_types_mapper)
@pytest.mark.pandas
def test_chunked_array_to_pandas_types_mapper():
# https://issues.apache.org/jira/browse/ARROW-9664
if Version(pd.__version__) < Version("1.2.0"):
pytest.skip("Float64Dtype extension dtype missing")
data = pa.chunked_array([pa.array([1, 2, 3], pa.int64())])
assert isinstance(data, pa.ChunkedArray)
# Test with mapper function
types_mapper = {pa.int64(): pd.Int64Dtype()}.get
> result = data.to_pandas(types_mapper=types_mapper)
tests/test_pandas.py:4477:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
???
pyarrow/table.pxi:475: in pyarrow.lib.ChunkedArray._to_pandas
???
pyarrow/array.pxi:1917: in pyarrow.lib._array_like_to_pandas
???
/Users/lama/anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/numeric.py:100:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(array,
dtype=self.numpy_dtype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _
arr = <pyarrow.lib.Int64Array object at 0x14d521360>
[
1,
2,
3
]
dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
/Users/lama/anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]