llama90 commented on PR #40761:
URL: https://github.com/apache/arrow/pull/40761#issuecomment-2041032884
@pitrou Hello.
I've structured functions in `types.pxi` to enable type checking.
The `types.py` is used to import and expose functions from `types.pxi` for
use with pyarrow.
- When a function with the same name already exists in `type_traits.h`
(e.g., `is_intger`), I prefix it with `_` in `types.pxi`, but when importing, I
remove the `_` using `as` (e.g., `_is_integer as is_integer`) to expose it
(`_is_integer as is_integer`). For instance, it's exposed as
`py.types.is_integer`.
- For functions that don't exist yet, I define them without the `_` prefix
(e.g., `is_boolean`) and utilize functions within `type_traits.h` wherever
possible to check types.
I think I've grasped the intention behind the code adjustments. I hope this
is the right intention! Please review it again when you have time. Thank you
always.
ps. Also, I attempted to omit `.types.` in calls, but encountered the
following error related to compatibility with `pandas`, hence I maintained the
use of `.types.` for invoking functions.
<details><summary>error message</summary>
```
==================================================================================================================
FAILURES
==================================================================================================================
_______________________________________________________________________________________
test_extension_to_pandas_storage_type[registered_period_type2]
_______________________________________________________________________________________
registered_period_type = (PeriodTypeWithToPandasDtype(DataType(int64)),
<class 'pyarrow.lib.ExtensionArray'>)
@pytest.mark.pandas
def test_extension_to_pandas_storage_type(registered_period_type):
period_type, _ = registered_period_type
np_arr = np.array([1, 2, 3, 4], dtype='i8')
storage = pa.array([1, 2, 3, 4], pa.int64())
arr = pa.ExtensionArray.from_storage(period_type, storage)
if isinstance(period_type, PeriodTypeWithToPandasDtype):
pandas_dtype = period_type.to_pandas_dtype()
else:
pandas_dtype = np_arr.dtype
# Test arrays
> result = arr.to_pandas()
pyarrow/tests/test_extension_type.py:1490:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/array.pxi:1544: in pyarrow.lib.Array._to_pandas
return _array_like_to_pandas(self, options, types_mapper=types_mapper)
pyarrow/array.pxi:1917: in pyarrow.lib._array_like_to_pandas
arr = dtype.__from_arrow__(obj)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:1146:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(arr,
dtype=np.dtype(np.int64))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
arr = <pyarrow.lib.ExtensionArray object at 0x16e060460>
[
1,
2,
3,
4
], dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
_________________________________________________________________________________________
test_convert_categories_to_array_with_string_pyarrow_dtype
_________________________________________________________________________________________
def test_convert_categories_to_array_with_string_pyarrow_dtype():
# gh-33727: categories should be converted to pa.Array
if Version(pd.__version__) < Version("1.3.0"):
pytest.skip("PyArrow backed string data type introduced in
pandas 1.3.0")
> df = pd.DataFrame({"x": ["foo", "bar", "foo"]},
dtype="string[pyarrow]")
pyarrow/tests/test_pandas.py:4173:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/frame.py:767:
in __init__
mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy,
typ=manager)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/internals/construction.py:503:
in dict_to_mgr
return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ,
consolidate=copy)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/internals/construction.py:119:
in arrays_to_mgr
arrays, refs = _homogenize(arrays, index, dtype)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/internals/construction.py:629:
in _homogenize
val = sanitize_array(val, index, dtype=dtype, copy=False)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/construction.py:596:
in sanitize_array
subarr = cls._from_sequence(data, dtype=dtype, copy=copy)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/string_arrow.py:199:
in _from_sequence
return cls(pa.array(result, type=pa.string(), from_pandas=True))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
self = <[TypeError("object of type 'NoneType' has no len()") raised in
repr()] ArrowStringArray object at 0x17ef27880>, values =
<pyarrow.lib.StringArray object at 0x17efb0fa0>
[
"foo",
"bar",
"foo"
]
def __init__(self, values) -> None:
_chk_pyarrow_available()
> if isinstance(values, (pa.Array, pa.ChunkedArray)) and
pa.types.is_string(
values.type
):
E AttributeError: module 'pyarrow.types' has no attribute 'is_string'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/string_arrow.py:131:
AttributeError
______________________________________________________________________________________________________
test_convert_to_extension_array
_______________________________________________________________________________________________________
monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x17fa37a90>
def test_convert_to_extension_array(monkeypatch):
# table converted from dataframe with extension types (so
pandas_metadata
# has this information)
df = pd.DataFrame(
{'a': [1, 2, 3], 'b': pd.array([2, 3, 4], dtype='Int64'),
'c': [4, 5, 6]})
table = pa.table(df)
# Int64Dtype is recognized -> convert to extension block by default
# for a proper roundtrip
> result = table.to_pandas()
pyarrow/tests/test_pandas.py:4298:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/table.pxi:4795: in pyarrow.lib.Table._to_pandas
df = table_to_dataframe(
pyarrow/pandas_compat.py:777: in table_to_dataframe
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
pyarrow/pandas_compat.py:1109: in _table_to_blocks
return [_reconstruct_block(item, columns, extension_columns)
pyarrow/pandas_compat.py:1109: in <listcomp>
return [_reconstruct_block(item, columns, extension_columns)
pyarrow/pandas_compat.py:737: in _reconstruct_block
pd_ext_arr = pandas_dtype.__from_arrow__(arr)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/numeric.py:100:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(array,
dtype=self.numpy_dtype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
arr = <pyarrow.lib.Int64Array object at 0x17fb04220>
[
2,
3,
4
], dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
______________________________________________________________________________________________
test_conversion_extensiontype_to_extensionarray
_______________________________________________________________________________________________
monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x17f95d3f0>
def test_conversion_extensiontype_to_extensionarray(monkeypatch):
# converting extension type to linked pandas ExtensionDtype/Array
storage = pa.array([1, 2, 3, 4], pa.int64())
arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
table = pa.table({'a': arr})
# extension type points to Int64Dtype, which knows how to create a
# pandas ExtensionArray
> result = arr.to_pandas()
pyarrow/tests/test_pandas.py:4344:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/array.pxi:1544: in pyarrow.lib.Array._to_pandas
return _array_like_to_pandas(self, options, types_mapper=types_mapper)
pyarrow/array.pxi:1917: in pyarrow.lib._array_like_to_pandas
arr = dtype.__from_arrow__(obj)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
self = Int64Dtype(), array = <pyarrow.lib.ExtensionArray object at
0x17e88c160>
[
1,
2,
3,
4
]
def __from_arrow__(
self, array: pyarrow.Array | pyarrow.ChunkedArray
) -> BaseMaskedArray:
"""
Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
"""
import pyarrow
from pandas.core.arrays.arrow._arrow_utils import (
pyarrow_array_to_numpy_and_mask,
)
array_class = self.construct_array_type()
pyarrow_type = pyarrow.from_numpy_dtype(self.type)
> if not array.type.equals(pyarrow_type) and not pyarrow.types.is_null(
array.type
):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/numeric.py:77:
AttributeError
__________________________________________________________________________________________________
test_to_pandas_extension_dtypes_mapping
___________________________________________________________________________________________________
def test_to_pandas_extension_dtypes_mapping():
table = pa.table({'a': pa.array([1, 2, 3], pa.int64())})
# default use numpy dtype
result = table.to_pandas()
assert result['a'].dtype == np.dtype('int64')
# specify to override the default
> result = table.to_pandas(types_mapper={pa.int64():
pd.Int64Dtype()}.get)
pyarrow/tests/test_pandas.py:4380:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/table.pxi:4795: in pyarrow.lib.Table._to_pandas
df = table_to_dataframe(
pyarrow/pandas_compat.py:777: in table_to_dataframe
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
pyarrow/pandas_compat.py:1109: in _table_to_blocks
return [_reconstruct_block(item, columns, extension_columns)
pyarrow/pandas_compat.py:1109: in <listcomp>
return [_reconstruct_block(item, columns, extension_columns)
pyarrow/pandas_compat.py:737: in _reconstruct_block
pd_ext_arr = pandas_dtype.__from_arrow__(arr)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/numeric.py:100:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(array,
dtype=self.numpy_dtype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
arr = <pyarrow.lib.Int64Array object at 0x17e88cee0>
[
1,
2,
3
], dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
____________________________________________________________________________________________________________
test_array_to_pandas
____________________________________________________________________________________________________________
def test_array_to_pandas():
if Version(pd.__version__) < Version("1.1"):
pytest.skip("ExtensionDtype to_pandas method missing")
for arr in [pd.period_range("2012-01-01", periods=3, freq="D").array,
pd.interval_range(1, 4).array]:
> result = pa.array(arr).to_pandas()
pyarrow/tests/test_pandas.py:4403:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/array.pxi:1544: in pyarrow.lib.Array._to_pandas
return _array_like_to_pandas(self, options, types_mapper=types_mapper)
pyarrow/array.pxi:1917: in pyarrow.lib._array_like_to_pandas
arr = dtype.__from_arrow__(obj)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:1146:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(arr,
dtype=np.dtype(np.int64))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
arr = <pyarrow.lib.ExtensionArray object at 0x17f993ca0>
[
15340,
15341,
15342
], dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
____________________________________________________________________________________________________
test_to_pandas_types_mapper_index[a]
____________________________________________________________________________________________________
index = 'a'
@pytest.mark.parametrize("index", ["a", ["a", "b"]])
def test_to_pandas_types_mapper_index(index):
if Version(pd.__version__) < Version("1.5.0"):
pytest.skip("ArrowDtype missing")
> df = pd.DataFrame(
{
"a": [1, 2],
"b": [3, 4],
"c": [5, 6],
},
dtype=pd.ArrowDtype(pa.int64()),
).set_index(index)
pyarrow/tests/test_pandas.py:4429:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/frame.py:693:
in __init__
dtype = self._validate_dtype(dtype)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/generic.py:518:
in _validate_dtype
if dtype.kind == "V":
properties.pyx:36: in pandas._libs.properties.CachedProperty.__get__
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
self = int64[pyarrow]
@cache_readonly
def kind(self) -> str:
> if pa.types.is_timestamp(self.pyarrow_dtype):
E AttributeError: module 'pyarrow.types' has no attribute
'is_timestamp'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:2205:
AttributeError
_________________________________________________________________________________________________
test_to_pandas_types_mapper_index[index1]
__________________________________________________________________________________________________
index = ['a', 'b']
@pytest.mark.parametrize("index", ["a", ["a", "b"]])
def test_to_pandas_types_mapper_index(index):
if Version(pd.__version__) < Version("1.5.0"):
pytest.skip("ArrowDtype missing")
> df = pd.DataFrame(
{
"a": [1, 2],
"b": [3, 4],
"c": [5, 6],
},
dtype=pd.ArrowDtype(pa.int64()),
).set_index(index)
pyarrow/tests/test_pandas.py:4429:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/frame.py:693:
in __init__
dtype = self._validate_dtype(dtype)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/generic.py:518:
in _validate_dtype
if dtype.kind == "V":
properties.pyx:36: in pandas._libs.properties.CachedProperty.__get__
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
self = int64[pyarrow]
@cache_readonly
def kind(self) -> str:
> if pa.types.is_timestamp(self.pyarrow_dtype):
E AttributeError: module 'pyarrow.types' has no attribute
'is_timestamp'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/dtypes/dtypes.py:2205:
AttributeError
_____________________________________________________________________________________________________
test_array_to_pandas_types_mapper
______________________________________________________________________________________________________
def test_array_to_pandas_types_mapper():
# https://issues.apache.org/jira/browse/ARROW-9664
if Version(pd.__version__) < Version("1.2.0"):
pytest.skip("Float64Dtype extension dtype missing")
data = pa.array([1, 2, 3], pa.int64())
# Test with mapper function
types_mapper = {pa.int64(): pd.Int64Dtype()}.get
> result = data.to_pandas(types_mapper=types_mapper)
pyarrow/tests/test_pandas.py:4452:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/array.pxi:1544: in pyarrow.lib.Array._to_pandas
return _array_like_to_pandas(self, options, types_mapper=types_mapper)
pyarrow/array.pxi:1917: in pyarrow.lib._array_like_to_pandas
arr = dtype.__from_arrow__(obj)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/numeric.py:100:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(array,
dtype=self.numpy_dtype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
arr = <pyarrow.lib.Int64Array object at 0x17f6f41c0>
[
1,
2,
3
], dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
_________________________________________________________________________________________________
test_chunked_array_to_pandas_types_mapper
__________________________________________________________________________________________________
@pytest.mark.pandas
def test_chunked_array_to_pandas_types_mapper():
# https://issues.apache.org/jira/browse/ARROW-9664
if Version(pd.__version__) < Version("1.2.0"):
pytest.skip("Float64Dtype extension dtype missing")
data = pa.chunked_array([pa.array([1, 2, 3], pa.int64())])
assert isinstance(data, pa.ChunkedArray)
# Test with mapper function
types_mapper = {pa.int64(): pd.Int64Dtype()}.get
> result = data.to_pandas(types_mapper=types_mapper)
pyarrow/tests/test_pandas.py:4477:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/table.pxi:475: in pyarrow.lib.ChunkedArray._to_pandas
return _array_like_to_pandas(self, options, types_mapper=types_mapper)
pyarrow/array.pxi:1917: in pyarrow.lib._array_like_to_pandas
arr = dtype.__from_arrow__(obj)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/numeric.py:100:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(array,
dtype=self.numpy_dtype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
arr = <pyarrow.lib.Int64Array object at 0x17f6f5fc0>
[
1,
2,
3
], dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
____________________________________________________________________________________________________________
test_table_join_asof
____________________________________________________________________________________________________________
@pytest.mark.dataset
def test_table_join_asof():
t1 = pa.Table.from_pydict({
"colA": [1, 1, 5, 6, 7],
"col2": ["a", "b", "a", "b", "f"]
})
t2 = pa.Table.from_pydict({
"colB": [2, 9, 15],
"col3": ["a", "b", "g"],
"colC": [1., 3., 5.]
})
r = t1.join_asof(
t2, on="colA", by="col2", tolerance=1,
right_on="colB", right_by="col3",
)
> assert r.combine_chunks() == pa.table({
"colA": [1, 1, 5, 6, 7],
"col2": ["a", "b", "a", "b", "f"],
"colC": [1., None, None, None, None],
})
E assert pyarrow.Table...ll,null,null]] ==
pyarrow.Table...ll,null,null]]
E Use -v to get more diff
pyarrow/tests/test_table.py:2892: AssertionError
_________________________________________________________________________________________________
test_categories_with_string_pyarrow_dtype
__________________________________________________________________________________________________
tempdir =
PosixPath('/private/var/folders/cp/mk09m76d0ws97l5r7k7vdjh40000gn/T/pytest-of-lama/pytest-123/test_categories_with_string_py0')
@pytest.mark.pandas
def test_categories_with_string_pyarrow_dtype(tempdir):
# gh-33727: writing to parquet should not fail
if Version(pd.__version__) < Version("1.3.0"):
pytest.skip("PyArrow backed string data type introduced in
pandas 1.3.0")
> df1 = pd.DataFrame({"x": ["foo", "bar", "foo"]},
dtype="string[pyarrow]")
pyarrow/tests/parquet/test_pandas.py:504:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/frame.py:767:
in __init__
mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy,
typ=manager)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/internals/construction.py:503:
in dict_to_mgr
return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ,
consolidate=copy)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/internals/construction.py:119:
in arrays_to_mgr
arrays, refs = _homogenize(arrays, index, dtype)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/internals/construction.py:629:
in _homogenize
val = sanitize_array(val, index, dtype=dtype, copy=False)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/construction.py:596:
in sanitize_array
subarr = cls._from_sequence(data, dtype=dtype, copy=copy)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/string_arrow.py:199:
in _from_sequence
return cls(pa.array(result, type=pa.string(), from_pandas=True))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
self = <[TypeError("object of type 'NoneType' has no len()") raised in
repr()] ArrowStringArray object at 0x304d98580>, values =
<pyarrow.lib.StringArray object at 0x304d92980>
[
"foo",
"bar",
"foo"
]
def __init__(self, values) -> None:
_chk_pyarrow_available()
> if isinstance(values, (pa.Array, pa.ChunkedArray)) and
pa.types.is_string(
values.type
):
E AttributeError: module 'pyarrow.types' has no attribute 'is_string'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/string_arrow.py:131:
AttributeError
___________________________________________________________________________________________
test_write_to_dataset_pandas_preserve_extensiondtypes
____________________________________________________________________________________________
tempdir =
PosixPath('/private/var/folders/cp/mk09m76d0ws97l5r7k7vdjh40000gn/T/pytest-of-lama/pytest-123/test_write_to_dataset_pandas_p0')
@pytest.mark.pandas
def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
df = pd.DataFrame({'part': 'a', "col": [1, 2, 3]})
df['col'] = df['col'].astype("Int64")
table = pa.table(df)
pq.write_to_dataset(
table, str(tempdir / "case1"), partition_cols=['part'],
)
> result = pq.read_table(str(tempdir / "case1")).to_pandas()
pyarrow/tests/parquet/test_pandas.py:531:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
pyarrow/array.pxi:899: in pyarrow.lib._PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/table.pxi:4795: in pyarrow.lib.Table._to_pandas
df = table_to_dataframe(
pyarrow/pandas_compat.py:777: in table_to_dataframe
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
pyarrow/pandas_compat.py:1109: in _table_to_blocks
return [_reconstruct_block(item, columns, extension_columns)
pyarrow/pandas_compat.py:1109: in <listcomp>
return [_reconstruct_block(item, columns, extension_columns)
pyarrow/pandas_compat.py:737: in _reconstruct_block
pd_ext_arr = pandas_dtype.__from_arrow__(arr)
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/numeric.py:100:
in __from_arrow__
data, mask = pyarrow_array_to_numpy_and_mask(array,
dtype=self.numpy_dtype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_
arr = <pyarrow.lib.Int64Array object at 0x3058fa4a0>
[
1,
2,
3
], dtype = dtype('int64')
def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
) -> tuple[np.ndarray, np.ndarray]:
"""
Convert a primitive pyarrow.Array to a numpy array and boolean mask
based
on the buffers of the Array.
At the moment pyarrow.BooleanArray is not supported.
Parameters
----------
arr : pyarrow.Array
dtype : numpy.dtype
Returns
-------
(data, mask)
Tuple of two numpy arrays with the raw data (with specified
dtype) and
a boolean mask (validity mask, so False means missing)
"""
dtype = np.dtype(dtype)
> if pyarrow.types.is_null(arr.type):
E AttributeError: module 'pyarrow.types' has no attribute 'is_null'
../../../anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/arrays/arrow/_arrow_utils.py:45:
AttributeError
==============================================================================================================
warnings summary
==============================================================================================================
pyarrow/tests/test_pandas.py::TestConvertMetadata::test_empty_list_metadata
pyarrow/tests/test_pandas.py::TestConvertListTypes::test_empty_list_roundtrip
/Users/lama/anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/dtypes/missing.py:576:
DeprecationWarning: The truth value of an empty array is ambiguous. Returning
False, but in future this will result in an error. Use `array.size > 0` to
check that an array is not empty.
return lib.array_equivalent_object(left, right)
pyarrow/tests/test_pandas.py: 4 warnings
pyarrow/tests/parquet/test_data_types.py: 4 warnings
pyarrow/tests/parquet/test_pandas.py: 2 warnings
pyarrow/tests/parquet/test_parquet_file.py: 6 warnings
/Users/lama/anaconda3/envs/pyarrow-dev-310/lib/python3.10/site-packages/pandas/core/dtypes/missing.py:577:
DeprecationWarning: The truth value of an empty array is ambiguous. Returning
False, but in future this will result in an error. Use `array.size > 0` to
check that an array is not empty.
if not lib.array_equivalent_object(left[~mask], right[~mask]):
pyarrow/tests/test_pandas.py::TestConvertListTypes::test_list_view_to_pandas_with_null_values[ListViewArray]
pyarrow/tests/test_pandas.py::TestConvertListTypes::test_list_view_to_pandas_with_null_values[LargeListViewArray]
/Users/lama/workspace/arrow-new/python/pyarrow/tests/test_pandas.py:2576:
FutureWarning: Mismatched null-like values nan and None found. In a future
version, pandas equality-testing functions (e.g. assert_frame_equal) will
consider these not-matching and raise.
tm.assert_series_equal(actual, expected)
pyarrow/tests/test_pandas.py::TestConvertListTypes::test_list_view_to_pandas_multiple_chunks[ListViewArray]
pyarrow/tests/test_pandas.py::TestConvertListTypes::test_list_view_to_pandas_multiple_chunks[LargeListViewArray]
/Users/lama/workspace/arrow-new/python/pyarrow/tests/test_pandas.py:2598:
FutureWarning: Mismatched null-like values nan and None found. In a future
version, pandas equality-testing functions (e.g. assert_frame_equal) will
consider these not-matching and raise.
tm.assert_series_equal(actual, expected)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
==========================================================================================================
short test summary info
===========================================================================================================
FAILED
pyarrow/tests/test_extension_type.py::test_extension_to_pandas_storage_type[registered_period_type2]
- AttributeError: module 'pyarrow.types' has no attribute 'is_null'
FAILED
pyarrow/tests/test_pandas.py::test_convert_categories_to_array_with_string_pyarrow_dtype
- AttributeError: module 'pyarrow.types' has no attribute 'is_string'
FAILED pyarrow/tests/test_pandas.py::test_convert_to_extension_array -
AttributeError: module 'pyarrow.types' has no attribute 'is_null'
FAILED
pyarrow/tests/test_pandas.py::test_conversion_extensiontype_to_extensionarray -
AttributeError: module 'pyarrow.types' has no attribute 'is_null'
FAILED pyarrow/tests/test_pandas.py::test_to_pandas_extension_dtypes_mapping
- AttributeError: module 'pyarrow.types' has no attribute 'is_null'
FAILED pyarrow/tests/test_pandas.py::test_array_to_pandas - AttributeError:
module 'pyarrow.types' has no attribute 'is_null'
FAILED pyarrow/tests/test_pandas.py::test_to_pandas_types_mapper_index[a] -
AttributeError: module 'pyarrow.types' has no attribute 'is_timestamp'
FAILED
pyarrow/tests/test_pandas.py::test_to_pandas_types_mapper_index[index1] -
AttributeError: module 'pyarrow.types' has no attribute 'is_timestamp'
FAILED pyarrow/tests/test_pandas.py::test_array_to_pandas_types_mapper -
AttributeError: module 'pyarrow.types' has no attribute 'is_null'
FAILED
pyarrow/tests/test_pandas.py::test_chunked_array_to_pandas_types_mapper -
AttributeError: module 'pyarrow.types' has no attribute 'is_null'
FAILED pyarrow/tests/test_table.py::test_table_join_asof - assert
pyarrow.Table...ll,null,null]] == pyarrow.Table...ll,null,null]]
FAILED
pyarrow/tests/parquet/test_pandas.py::test_categories_with_string_pyarrow_dtype
- AttributeError: module 'pyarrow.types' has no attribute 'is_string'
FAILED
pyarrow/tests/parquet/test_pandas.py::test_write_to_dataset_pandas_preserve_extensiondtypes
- AttributeError: module 'pyarrow.types' has no attribute 'is_null'
======================================================================== 13
failed, 7116 passed, 515 skipped, 15 xfailed, 1 xpassed, 22 warnings in 106.98s
(0:01:46)
========================================================================
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]