evangriffiths opened a new issue, #36392:
URL: https://github.com/apache/arrow/issues/36392
### Describe the bug, including details regarding any error messages,
version, and platform.
```
import tempfile
import numpy as np
import pandas as pd
df = pd.DataFrame(
{
"timedelta": pd.to_timedelta(
np.random.choice([8, 4, 1, 2, 3], size=10), unit="days"
),
}
)
with tempfile.TemporaryDirectory() as tmpdir:
path = f"{tmpdir}/test.parquet"
df.to_parquet(path, engine="fastparquet")
df = pd.read_parquet(path, engine="pyarrow")
```
Gives the trace:
```
Traceback (most recent call last):
File "/Users/evan/scratch/write_read_parquet_nrows.py", line 17, in
<module>
df = pd.read_parquet(path, engine="pyarrow")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pandas/io/parquet.py",
line 509, in read_parquet
return impl.read(
^^^^^^^^^^
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pandas/io/parquet.py",
line 230, in read
result = pa_table.to_pandas(**to_pandas_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pyarrow/array.pxi", line 830, in
pyarrow.lib._PandasConvertible.to_pandas
File "pyarrow/table.pxi", line 3990, in pyarrow.lib.Table._to_pandas
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pyarrow/pandas_compat.py",
line 820, in table_to_blockmanager
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pyarrow/pandas_compat.py",
line 1169, in _table_to_blocks
result = pa.lib.table_to_blocks(options, block_table, categories,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pyarrow/table.pxi", line 2646, in pyarrow.lib.table_to_blocks
ValueError: hour must be in 0..23
(synth) synth % python /Users/evan/scratch/write_read_parquet_nrows.py
Traceback (most recent call last):
File "/Users/evan/scratch/write_read_parquet_nrows.py", line 18, in
<module>
read_data_input(path, nrows=1)
File
"/Users/evan/hazy_dev/synth/configurator/lib/hazy_configurator/utils/data/__init__.py",
line 44, in read_data_input
return read_func(location, dtypes, nrows)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/evan/hazy_dev/synth/configurator/lib/hazy_configurator/utils/data/parquet.py",
line 13, in read_parquet
df = pa.Table.from_batches([first_nrows]).to_pandas()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pyarrow/array.pxi", line 830, in
pyarrow.lib._PandasConvertible.to_pandas
File "pyarrow/table.pxi", line 3990, in pyarrow.lib.Table._to_pandas
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pyarrow/pandas_compat.py",
line 820, in table_to_blockmanager
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pyarrow/pandas_compat.py",
line 1169, in _table_to_blocks
result = pa.lib.table_to_blocks(options, block_table, categories,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pyarrow/table.pxi", line 2646, in pyarrow.lib.table_to_blocks
ValueError: hour must be in 0..23
(synth) synth % python /Users/evan/scratch/write_read_parquet_nrows.py
Traceback (most recent call last):
File "/Users/evan/scratch/write_read_parquet_nrows.py", line 16, in
<module>
df = pd.read_parquet(path, engine="pyarrow")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pandas/io/parquet.py",
line 509, in read_parquet
return impl.read(
^^^^^^^^^^
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pandas/io/parquet.py",
line 230, in read
result = pa_table.to_pandas(**to_pandas_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pyarrow/array.pxi", line 830, in
pyarrow.lib._PandasConvertible.to_pandas
File "pyarrow/table.pxi", line 3990, in pyarrow.lib.Table._to_pandas
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pyarrow/pandas_compat.py",
line 820, in table_to_blockmanager
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/evan/miniconda3/envs/synth/lib/python3.11/site-packages/pyarrow/pandas_compat.py",
line 1169, in _table_to_blocks
result = pa.lib.table_to_blocks(options, block_table, categories,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pyarrow/table.pxi", line 2646, in pyarrow.lib.table_to_blocks
ValueError: hour must be in 0..23
```
### Component(s)
Python
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]