[ https://issues.apache.org/jira/browse/ARROW-10855?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zhenghui Jin updated ARROW-10855: --------------------------------- Description: After upgrading numpy to 1.20.0rc1 version, pandas .to_parquet() will raise ArrowTypeError. NumPy 1.19.4, Python 3.7.9, macos: {code:java} Python 3.7.9 (default, Nov 20 2020, 23:58:42) [Clang 12.0.0 (clang-1200.0.32.27)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import numpy as np >>> import pandas as pd >>> np.__version__ '1.19.4' >>> pd.DataFrame({'i': [1, 2, 3, np.nan]}, >>> dtype='Int64').to_parquet('nullint.parquet') >>> {code} NumPy 1.20.0rc1, Python 3.7.9, macos: {code:java} Python 3.7.9 (default, Nov 20 2020, 23:58:42) [Clang 12.0.0 (clang-1200.0.32.27)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import numpy as np >>> import pandas as pd >>> np.__version__ '1.20.0rc1' >>> pd.DataFrame({'i': [1, 2, 3, np.nan]}, >>> dtype='Int64').to_parquet('nullint.parquet') Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/local/lib/python3.7/site-packages/pandas/util/_decorators.py", line 199, in wrapper return func(*args, **kwargs) File "/usr/local/lib/python3.7/site-packages/pandas/core/frame.py", line 2372, in to_parquet **kwargs, File "/usr/local/lib/python3.7/site-packages/pandas/io/parquet.py", line 276, in to_parquet **kwargs, File "/usr/local/lib/python3.7/site-packages/pandas/io/parquet.py", line 101, in write table = self.api.Table.from_pandas(df, **from_pandas_kwargs) File "pyarrow/table.pxi", line 1394, in pyarrow.lib.Table.from_pandas File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", line 588, in dataframe_to_arrays for c, f in zip(columns_to_convert, convert_fields)] File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", line 588, in <listcomp> for c, f in zip(columns_to_convert, convert_fields)] File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", line 574, in convert_column raise e File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", line 568, in convert_column result = pa.array(col, type=type_, from_pandas=True, safe=safe) File "pyarrow/array.pxi", line 242, in pyarrow.lib.array File "pyarrow/array.pxi", line 110, in pyarrow.lib._handle_arrow_array_protocol File "/usr/local/lib/python3.7/site-packages/pandas/core/arrays/masked.py", line 227, in __arrow_array__ return pa.array(self._data, mask=self._mask, type=type) File "pyarrow/array.pxi", line 292, in pyarrow.lib.array File "pyarrow/array.pxi", line 79, in pyarrow.lib._ndarray_to_array File "pyarrow/array.pxi", line 67, in pyarrow.lib._ndarray_to_type File "pyarrow/error.pxi", line 107, in pyarrow.lib.check_status pyarrow.lib.ArrowTypeError: ('Did not pass numpy.dtype object', 'Conversion failed for column i with type Int64') >>> {code} was: After upgrading numpy to 1.20.0rc1 version, pandas .to_parquet() will raise ArrowTypeError. NumPy 1.19.4, Python 3.7.9, macos: {code:java} Python 3.7.9 (default, Nov 20 2020, 23:58:42) [Clang 12.0.0 (clang-1200.0.32.27)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import numpy as np >>> import pandas as pd >>> np.__version__ '1.19.4' >>> pd.DataFrame({'i': [1, 2, 3, np.nan]}, >>> dtype='Int64').to_parquet('nullint.parquet') >>> {code} NumPy 1.20.0rc1, Python 3.7.9, macos: {code:java} Python 3.7.9 (default, Nov 20 2020, 23:58:42) [Clang 12.0.0 (clang-1200.0.32.27)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import numpy as np >>> import pandas as pd >>> np.__version__ '1.19.4' >>> pd.DataFrame({'i': [1, 2, 3, np.nan]}, >>> dtype='Int64').to_parquet('nullint.parquet') >>> {code} > [Python][Numpy] ArrowTypeError after upgrading NumPy to 1.20.0rc1 > ----------------------------------------------------------------- > > Key: ARROW-10855 > URL: https://issues.apache.org/jira/browse/ARROW-10855 > Project: Apache Arrow > Issue Type: Bug > Components: Python > Affects Versions: 2.0.0 > Environment: macOS Big Sur 11.0.1 > Reporter: Zhenghui Jin > Priority: Major > > After upgrading numpy to 1.20.0rc1 version, pandas .to_parquet() will raise > ArrowTypeError. > NumPy 1.19.4, Python 3.7.9, macos: > > {code:java} > Python 3.7.9 (default, Nov 20 2020, 23:58:42) > [Clang 12.0.0 (clang-1200.0.32.27)] on darwin > Type "help", "copyright", "credits" or "license" for more information. > >>> import numpy as np > >>> import pandas as pd > >>> np.__version__ > '1.19.4' > >>> pd.DataFrame({'i': [1, 2, 3, np.nan]}, > >>> dtype='Int64').to_parquet('nullint.parquet') > >>> > {code} > NumPy 1.20.0rc1, Python 3.7.9, macos: > {code:java} > Python 3.7.9 (default, Nov 20 2020, 23:58:42) > [Clang 12.0.0 (clang-1200.0.32.27)] on darwin > Type "help", "copyright", "credits" or "license" for more information. > >>> import numpy as np > >>> import pandas as pd > >>> np.__version__ > '1.20.0rc1' > >>> pd.DataFrame({'i': [1, 2, 3, np.nan]}, > >>> dtype='Int64').to_parquet('nullint.parquet') > Traceback (most recent call last): > File "<stdin>", line 1, in <module> > File "/usr/local/lib/python3.7/site-packages/pandas/util/_decorators.py", > line 199, in wrapper > return func(*args, **kwargs) > File "/usr/local/lib/python3.7/site-packages/pandas/core/frame.py", line > 2372, in to_parquet > **kwargs, > File "/usr/local/lib/python3.7/site-packages/pandas/io/parquet.py", line > 276, in to_parquet > **kwargs, > File "/usr/local/lib/python3.7/site-packages/pandas/io/parquet.py", line > 101, in write > table = self.api.Table.from_pandas(df, **from_pandas_kwargs) > File "pyarrow/table.pxi", line 1394, in pyarrow.lib.Table.from_pandas > File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", > line 588, in dataframe_to_arrays > for c, f in zip(columns_to_convert, convert_fields)] > File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", > line 588, in <listcomp> > for c, f in zip(columns_to_convert, convert_fields)] > File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", > line 574, in convert_column > raise e > File "/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py", > line 568, in convert_column > result = pa.array(col, type=type_, from_pandas=True, safe=safe) > File "pyarrow/array.pxi", line 242, in pyarrow.lib.array > File "pyarrow/array.pxi", line 110, in > pyarrow.lib._handle_arrow_array_protocol > File "/usr/local/lib/python3.7/site-packages/pandas/core/arrays/masked.py", > line 227, in __arrow_array__ > return pa.array(self._data, mask=self._mask, type=type) > File "pyarrow/array.pxi", line 292, in pyarrow.lib.array > File "pyarrow/array.pxi", line 79, in pyarrow.lib._ndarray_to_array > File "pyarrow/array.pxi", line 67, in pyarrow.lib._ndarray_to_type > File "pyarrow/error.pxi", line 107, in pyarrow.lib.check_status > pyarrow.lib.ArrowTypeError: ('Did not pass numpy.dtype object', 'Conversion > failed for column i with type Int64') > >>> > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)