[
https://issues.apache.org/jira/browse/ARROW-7002?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Christian Roth updated ARROW-7002:
----------------------------------
Description:
Pandas has a nullable integer type Int64 which does not seem to be supported by
feather yet.
{code:python}
from pyarrow import feather
import pandas as pd
col1 = pd.Series([0, None, 1, 23]).astype('Int64')
col2 = pd.Series([1, 3, 2, 1]).astype('Int64')
df = pd.DataFrame({'a': col1, 'b': col2})
feather.write_feather(df, '/tmp/foo')
{code}
Gives following error message:
{code:java}
---------------------------------------------------------------------------
ArrowTypeError Traceback (most recent call last)
<ipython-input-107-8cc611a30355> in <module>
----> 1 feather.write_feather(df, '/tmp/foo')
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/feather.py in
write_feather(df, dest)
181 writer = FeatherWriter(dest)
182 try:
--> 183 writer.write(df)
184 except Exception:
185 # Try to make sure the resource is closed
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/feather.py in
write(self, df)
92 # TODO(wesm): Remove this length check, see ARROW-1732
93 if len(df.columns) > 0:
---> 94 table = Table.from_pandas(df, preserve_index=False)
95 for i, name in enumerate(table.schema.names):
96 col = table[i]
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/table.pxi in
pyarrow.lib.Table.from_pandas()
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
551 if nthreads == 1:
552 arrays = [convert_column(c, f)
--> 553 for c, f in zip(columns_to_convert, convert_fields)]
554 else:
555 from concurrent import futures
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
<listcomp>(.0)
551 if nthreads == 1:
552 arrays = [convert_column(c, f)
--> 553 for c, f in zip(columns_to_convert, convert_fields)]
554 else:
555 from concurrent import futures
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
convert_column(col, field)
542 e.args += ("Conversion failed for column {0!s} with type
{1!s}"
543 .format(col.name, col.dtype),)
--> 544 raise e
545 if not field_nullable and result.null_count > 0:
546 raise ValueError("Field {} was non-nullable but pandas
column "
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
convert_column(col, field)
536
537 try:
--> 538 result = pa.array(col, type=type_, from_pandas=True,
safe=safe)
539 except (pa.ArrowInvalid,
540 pa.ArrowNotImplementedError,
ArrowTypeError: ('Did not pass numpy.dtype object', 'Conversion failed for
column a with type Int64')
{code}
xref:
[https://stackoverflow.com/questions/58571419/exporting-dataframe-with-null-able-int64-from-pandas-to-r]
was:
Pandas has a nullable integer type Int64 which does not seem to be supported by
feather yet.
{code:python}
from pyarrow import feather
import pandas as pd
col1 = pd.Series([0, None, 1, 23]).astype('Int64')
col2 = pd.Series([1, 3, 2, 1]).astype('Int64')
df = pd.DataFrame({'a': col1, 'b': col2})
feather.write_feather(df, '/tmp/foo')
{code}
Gives following error message:
{code}
---------------------------------------------------------------------------
ArrowTypeError Traceback (most recent call last)
<ipython-input-107-8cc611a30355> in <module>
----> 1 feather.write_feather(df, '/tmp/foo')
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/feather.py in
write_feather(df, dest)
181 writer = FeatherWriter(dest)
182 try:
--> 183 writer.write(df)
184 except Exception:
185 # Try to make sure the resource is closed
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/feather.py in
write(self, df)
92 # TODO(wesm): Remove this length check, see ARROW-1732
93 if len(df.columns) > 0:
---> 94 table = Table.from_pandas(df, preserve_index=False)
95 for i, name in enumerate(table.schema.names):
96 col = table[i]
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/table.pxi in
pyarrow.lib.Table.from_pandas()
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
551 if nthreads == 1:
552 arrays = [convert_column(c, f)
--> 553 for c, f in zip(columns_to_convert, convert_fields)]
554 else:
555 from concurrent import futures
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
<listcomp>(.0)
551 if nthreads == 1:
552 arrays = [convert_column(c, f)
--> 553 for c, f in zip(columns_to_convert, convert_fields)]
554 else:
555 from concurrent import futures
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
convert_column(col, field)
542 e.args += ("Conversion failed for column {0!s} with type
{1!s}"
543 .format(col.name, col.dtype),)
--> 544 raise e
545 if not field_nullable and result.null_count > 0:
546 raise ValueError("Field {} was non-nullable but pandas
column "
~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py in
convert_column(col, field)
536
537 try:
--> 538 result = pa.array(col, type=type_, from_pandas=True,
safe=safe)
539 except (pa.ArrowInvalid,
540 pa.ArrowNotImplementedError,
ArrowTypeError: ('Did not pass numpy.dtype object', 'Conversion failed for
column a with type Int64')
{code}
xref:
> Support pandas nullable integer type Int64
> ------------------------------------------
>
> Key: ARROW-7002
> URL: https://issues.apache.org/jira/browse/ARROW-7002
> Project: Apache Arrow
> Issue Type: New Feature
> Reporter: Christian Roth
> Priority: Major
>
> Pandas has a nullable integer type Int64 which does not seem to be supported
> by feather yet.
> {code:python}
> from pyarrow import feather
> import pandas as pd
> col1 = pd.Series([0, None, 1, 23]).astype('Int64')
> col2 = pd.Series([1, 3, 2, 1]).astype('Int64')
> df = pd.DataFrame({'a': col1, 'b': col2})
> feather.write_feather(df, '/tmp/foo')
> {code}
> Gives following error message:
> {code:java}
> ---------------------------------------------------------------------------
> ArrowTypeError Traceback (most recent call last)
> <ipython-input-107-8cc611a30355> in <module>
> ----> 1 feather.write_feather(df, '/tmp/foo')
> ~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/feather.py in
> write_feather(df, dest)
> 181 writer = FeatherWriter(dest)
> 182 try:
> --> 183 writer.write(df)
> 184 except Exception:
> 185 # Try to make sure the resource is closed
> ~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/feather.py in
> write(self, df)
> 92 # TODO(wesm): Remove this length check, see ARROW-1732
> 93 if len(df.columns) > 0:
> ---> 94 table = Table.from_pandas(df, preserve_index=False)
> 95 for i, name in enumerate(table.schema.names):
> 96 col = table[i]
> ~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/table.pxi in
> pyarrow.lib.Table.from_pandas()
> ~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py
> in dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
> 551 if nthreads == 1:
> 552 arrays = [convert_column(c, f)
> --> 553 for c, f in zip(columns_to_convert, convert_fields)]
> 554 else:
> 555 from concurrent import futures
> ~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py
> in <listcomp>(.0)
> 551 if nthreads == 1:
> 552 arrays = [convert_column(c, f)
> --> 553 for c, f in zip(columns_to_convert, convert_fields)]
> 554 else:
> 555 from concurrent import futures
> ~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py
> in convert_column(col, field)
> 542 e.args += ("Conversion failed for column {0!s} with type
> {1!s}"
> 543 .format(col.name, col.dtype),)
> --> 544 raise e
> 545 if not field_nullable and result.null_count > 0:
> 546 raise ValueError("Field {} was non-nullable but pandas
> column "
> ~/miniconda3/envs/sci36/lib/python3.6/site-packages/pyarrow/pandas_compat.py
> in convert_column(col, field)
> 536
> 537 try:
> --> 538 result = pa.array(col, type=type_, from_pandas=True,
> safe=safe)
> 539 except (pa.ArrowInvalid,
> 540 pa.ArrowNotImplementedError,
> ArrowTypeError: ('Did not pass numpy.dtype object', 'Conversion failed for
> column a with type Int64')
> {code}
> xref:
> [https://stackoverflow.com/questions/58571419/exporting-dataframe-with-null-able-int64-from-pandas-to-r]
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)