[
https://issues.apache.org/jira/browse/ARROW-16491?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Alenka Frim updated ARROW-16491:
--------------------------------
Description:
Hello,
safe option is working well for scalar but not for list. To reproduce :
{code:python}
import pandas as pd
import pyarrow as pa
int_dataframe = pd.DataFrame({"array": [[1, 2]]})
float_dataframe = pd.DataFrame({"array": [[1.5, 2.3]]})
int_table = pa.Table.from_pandas(int_dataframe)
{code}
{code:python}
>>> int_table
pyarrow.Table
array: list<item: int64>
child 0, item: int64
----
array: [[[1,2]]]
{code}
{code:python}
# this is working instead of throwing a `ArrowInvalid: ... Conversion failed
for column array with type`
>>> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
>>> table
pyarrow.Table
array: list<item: int64>
child 0, item: int64
----
array: [[[1,2]]]
{code}
Behavior for scalar is correct :
{code:python}
int_dataframe = pd.DataFrame({"array": [1]})
float_dataframe = pd.DataFrame({"array": [1.5]})
int_table = pa.Table.from_pandas(int_dataframe)
table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema) # raise:
---------------------------------------------------------------------------
ArrowInvalid Traceback (most recent call last)
Input In [6], in <module>
----> 1 table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
2 table
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/table.pxi:1782,
in pyarrow.lib.Table.from_pandas()
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
in dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
589 return (isinstance(arr, np.ndarray) and
590 arr.flags.contiguous and
591 issubclass(arr.dtype.type, np.integer))
593 if nthreads == 1:
--> 594 arrays = [convert_column(c, f)
595 for c, f in zip(columns_to_convert, convert_fields)]
596 else:
597 arrays = []
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
in <listcomp>(.0)
589 return (isinstance(arr, np.ndarray) and
590 arr.flags.contiguous and
591 issubclass(arr.dtype.type, np.integer))
593 if nthreads == 1:
--> 594 arrays = [convert_column(c, f)
595 for c, f in zip(columns_to_convert, convert_fields)]
596 else:
597 arrays = []
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:581,
in dataframe_to_arrays.<locals>.convert_column(col, field)
576 except (pa.ArrowInvalid,
577 pa.ArrowNotImplementedError,
578 pa.ArrowTypeError) as e:
579 e.args += ("Conversion failed for column {!s} with type {!s}"
580 .format(col.name, col.dtype),)
--> 581 raise e
582 if not field_nullable and result.null_count > 0:
583 raise ValueError("Field {} was non-nullable but pandas column "
584 "had {} null values".format(str(field),
585 result.null_count))
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:575,
in dataframe_to_arrays.<locals>.convert_column(col, field)
572 type_ = field.type
574 try:
--> 575 result = pa.array(col, type=type_, from_pandas=True, safe=safe)
576 except (pa.ArrowInvalid,
577 pa.ArrowNotImplementedError,
578 pa.ArrowTypeError) as e:
579 e.args += ("Conversion failed for column {!s} with type {!s}"
580 .format(col.name, col.dtype),)
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:312, in
pyarrow.lib.array()
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:83, in
pyarrow.lib._ndarray_to_array()
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/error.pxi:99, in
pyarrow.lib.check_status()
ArrowInvalid: ('Float value 1.5 was truncated converting to int64', 'Conversion
failed for column array with type float64')
{code}
was:
Hello,
safe option is working well for scalar but not for list. To reproduce :
{code:python}
import pandas as pd
import pyarrow as pa
int_dataframe = pd.DataFrame({"array": [[1, 2]]})
float_dataframe = pd.DataFrame({"array": [[1.5, 2.3]]})
int_table = pa.Table.from_pandas(int_dataframe)
{code}
{code:python}
>>> int_table
pyarrow.Table array: list<item: int64> child 0, item: int64 ---- array:
[[[1,2]]]
{code}
{code:python}
# this is working instead of throwing a `ArrowInvalid: ... Conversion failed
for column array with type`
>>> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
>>> table
pyarrow.Table array: list<item: int64> child 0, item: int64 ---- array:
[[[1,2]]]
{code}
Behavior for scalar is correct :
{code:python}
int_dataframe = pd.DataFrame({"array": [1]})
float_dataframe = pd.DataFrame({"array": [1.5]})
int_table = pa.Table.from_pandas(int_dataframe)
table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema) # raise:
---------------------------------------------------------------------------
ArrowInvalid Traceback (most recent call last)
Input In [6], in <module>
----> 1 table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
2 table
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/table.pxi:1782,
in pyarrow.lib.Table.from_pandas()
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
in dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
589 return (isinstance(arr, np.ndarray) and
590 arr.flags.contiguous and
591 issubclass(arr.dtype.type, np.integer))
593 if nthreads == 1:
--> 594 arrays = [convert_column(c, f)
595 for c, f in zip(columns_to_convert, convert_fields)]
596 else:
597 arrays = []
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
in <listcomp>(.0)
589 return (isinstance(arr, np.ndarray) and
590 arr.flags.contiguous and
591 issubclass(arr.dtype.type, np.integer))
593 if nthreads == 1:
--> 594 arrays = [convert_column(c, f)
595 for c, f in zip(columns_to_convert, convert_fields)]
596 else:
597 arrays = []
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:581,
in dataframe_to_arrays.<locals>.convert_column(col, field)
576 except (pa.ArrowInvalid,
577 pa.ArrowNotImplementedError,
578 pa.ArrowTypeError) as e:
579 e.args += ("Conversion failed for column {!s} with type {!s}"
580 .format(col.name, col.dtype),)
--> 581 raise e
582 if not field_nullable and result.null_count > 0:
583 raise ValueError("Field {} was non-nullable but pandas column "
584 "had {} null values".format(str(field),
585 result.null_count))
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:575,
in dataframe_to_arrays.<locals>.convert_column(col, field)
572 type_ = field.type
574 try:
--> 575 result = pa.array(col, type=type_, from_pandas=True, safe=safe)
576 except (pa.ArrowInvalid,
577 pa.ArrowNotImplementedError,
578 pa.ArrowTypeError) as e:
579 e.args += ("Conversion failed for column {!s} with type {!s}"
580 .format(col.name, col.dtype),)
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:312, in
pyarrow.lib.array()
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:83, in
pyarrow.lib._ndarray_to_array()
File
~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/error.pxi:99, in
pyarrow.lib.check_status()
ArrowInvalid: ('Float value 1.5 was truncated converting to int64', 'Conversion
failed for column array with type float64')
{code}
> Table.from_pandas is doing unsafe cast for float array to int array
> -------------------------------------------------------------------
>
> Key: ARROW-16491
> URL: https://issues.apache.org/jira/browse/ARROW-16491
> Project: Apache Arrow
> Issue Type: Bug
> Components: Python
> Affects Versions: 6.0.1, 7.0.0
> Reporter: LOUSSOUARN Brieuc
> Priority: Minor
>
> Hello,
> safe option is working well for scalar but not for list. To reproduce :
> {code:python}
> import pandas as pd
> import pyarrow as pa
> int_dataframe = pd.DataFrame({"array": [[1, 2]]})
> float_dataframe = pd.DataFrame({"array": [[1.5, 2.3]]})
> int_table = pa.Table.from_pandas(int_dataframe)
> {code}
> {code:python}
> >>> int_table
> pyarrow.Table
> array: list<item: int64>
> child 0, item: int64
> ----
> array: [[[1,2]]]
> {code}
> {code:python}
> # this is working instead of throwing a `ArrowInvalid: ... Conversion failed
> for column array with type`
> >>> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
> >>> table
> pyarrow.Table
> array: list<item: int64>
> child 0, item: int64
> ----
> array: [[[1,2]]]
> {code}
> Behavior for scalar is correct :
> {code:python}
> int_dataframe = pd.DataFrame({"array": [1]})
> float_dataframe = pd.DataFrame({"array": [1.5]})
> int_table = pa.Table.from_pandas(int_dataframe)
> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema) #
> raise:
> ---------------------------------------------------------------------------
> ArrowInvalid Traceback (most recent call last)
> Input In [6], in <module>
> ----> 1 table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
> 2 table
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/table.pxi:1782,
> in pyarrow.lib.Table.from_pandas()
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
> in dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
> 589 return (isinstance(arr, np.ndarray) and
> 590 arr.flags.contiguous and
> 591 issubclass(arr.dtype.type, np.integer))
> 593 if nthreads == 1:
> --> 594 arrays = [convert_column(c, f)
> 595 for c, f in zip(columns_to_convert, convert_fields)]
> 596 else:
> 597 arrays = []
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
> in <listcomp>(.0)
> 589 return (isinstance(arr, np.ndarray) and
> 590 arr.flags.contiguous and
> 591 issubclass(arr.dtype.type, np.integer))
> 593 if nthreads == 1:
> --> 594 arrays = [convert_column(c, f)
> 595 for c, f in zip(columns_to_convert, convert_fields)]
> 596 else:
> 597 arrays = []
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:581,
> in dataframe_to_arrays.<locals>.convert_column(col, field)
> 576 except (pa.ArrowInvalid,
> 577 pa.ArrowNotImplementedError,
> 578 pa.ArrowTypeError) as e:
> 579 e.args += ("Conversion failed for column {!s} with type {!s}"
> 580 .format(col.name, col.dtype),)
> --> 581 raise e
> 582 if not field_nullable and result.null_count > 0:
> 583 raise ValueError("Field {} was non-nullable but pandas column "
> 584 "had {} null values".format(str(field),
> 585 result.null_count))
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:575,
> in dataframe_to_arrays.<locals>.convert_column(col, field)
> 572 type_ = field.type
> 574 try:
> --> 575 result = pa.array(col, type=type_, from_pandas=True, safe=safe)
> 576 except (pa.ArrowInvalid,
> 577 pa.ArrowNotImplementedError,
> 578 pa.ArrowTypeError) as e:
> 579 e.args += ("Conversion failed for column {!s} with type {!s}"
> 580 .format(col.name, col.dtype),)
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:312,
> in pyarrow.lib.array()
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:83,
> in pyarrow.lib._ndarray_to_array()
> File
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/error.pxi:99,
> in pyarrow.lib.check_status()
> ArrowInvalid: ('Float value 1.5 was truncated converting to int64',
> 'Conversion failed for column array with type float64')
> {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.7#820007)