[ 
https://issues.apache.org/jira/browse/ARROW-16491?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17532779#comment-17532779
 ] 

Alenka Frim commented on ARROW-16491:
-------------------------------------

I noticed that this works fine with a normal array:
{code:python}
>>> import pandas as pd
>>> import pyarrow as pa

# See here there are no double brackets
>>> int_dataframe = pd.DataFrame({"array": [1, 2]})
>>> float_dataframe = pd.DataFrame({"array": [1.5, 2.3]})

# Error is raised
>>> int_table = pa.Table.from_pandas(int_dataframe)
>>> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema) 
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pyarrow/table.pxi", line 3457, in pyarrow.lib.Table.from_pandas
    arrays, schema, n_rows = dataframe_to_arrays(
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
594, in dataframe_to_arrays
    arrays = [convert_column(c, f)
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
594, in <listcomp>
    arrays = [convert_column(c, f)
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
581, in convert_column
    raise e
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
575, in convert_column
    result = pa.array(col, type=type_, from_pandas=True, safe=safe)
  File "pyarrow/array.pxi", line 312, in pyarrow.lib.array
    return _ndarray_to_array(values, mask, type, c_from_pandas, safe,
  File "pyarrow/array.pxi", line 83, in pyarrow.lib._ndarray_to_array
    check_status(NdarrayToArrow(pool, values, mask, from_pandas,
  File "pyarrow/error.pxi", line 100, in pyarrow.lib.check_status
    raise ArrowInvalid(message)
pyarrow.lib.ArrowInvalid: ('Float value 1.5 was truncated converting to 
int64\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc:177
  CheckFloatToIntTruncation(batch[0], 
*out)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/compute/exec.cc:701  
kernel_->exec(kernel_ctx_, batch, 
&out)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/compute/exec.cc:642  
ExecuteBatch(batch, 
listener)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/compute/function.cc:255  
executor->Execute(implicitly_cast_args, 
&listener)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/compute/cast.cc:229  
Cast(Datum(value), to_type, options, 
ctx)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:351  
compute::Cast(*MakeArray(tmp_data), out_type, cast_options, 
&context)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:470
  CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_, 
cast_options_, pool_, 
data)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:281 
 
ConvertData<ArrowType>(&data)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:856
  converter.Convert()', 'Conversion failed for column array with type float64')
{code}
so I think the reason for the behaviour you see is due to the fact that in your 
example converted table columns are ListArrays.

But I do get a different error in this next case:
{code:python}
>>> int_dataframe = pd.DataFrame({"array": [[1]]})
>>> float_dataframe = pd.DataFrame({"array": [[1.5]]})
>>> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pyarrow/table.pxi", line 3457, in pyarrow.lib.Table.from_pandas
    arrays, schema, n_rows = dataframe_to_arrays(
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
594, in dataframe_to_arrays
    arrays = [convert_column(c, f)
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
594, in <listcomp>
    arrays = [convert_column(c, f)
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
581, in convert_column
    raise e
  File "/Users/alenkafrim/repos/arrow/python/pyarrow/pandas_compat.py", line 
575, in convert_column
    result = pa.array(col, type=type_, from_pandas=True, safe=safe)
  File "pyarrow/array.pxi", line 312, in pyarrow.lib.array
    return _ndarray_to_array(values, mask, type, c_from_pandas, safe,
  File "pyarrow/array.pxi", line 83, in pyarrow.lib._ndarray_to_array
    check_status(NdarrayToArrow(pool, values, mask, from_pandas,
  File "pyarrow/error.pxi", line 100, in pyarrow.lib.check_status
    raise ArrowInvalid(message)
pyarrow.lib.ArrowInvalid: ('Could not convert [1.5] with type list: tried to 
convert to 
int64\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/python_to_arrow.cc:623
  PyValue::Convert(this->primitive_type_, this->options_, 
value)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/iterators.h:57  
func(objects[i], i, 
&keep_going)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/python_to_arrow.cc:1182
  converter->Extend(seq, 
size)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:325 
 ConvertPySequence(reinterpret_cast<PyObject*>(arr_), 
reinterpret_cast<PyObject*>(mask_), py_options, 
pool_)\n/Users/alenkafrim/repos/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:856
  converter.Convert()', 'Conversion failed for column array with type object')
{code}
 

> Table.from_pandas is doing unsafe cast for float array to int array
> -------------------------------------------------------------------
>
>                 Key: ARROW-16491
>                 URL: https://issues.apache.org/jira/browse/ARROW-16491
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>    Affects Versions: 6.0.1, 7.0.0
>            Reporter: LOUSSOUARN Brieuc
>            Priority: Minor
>
> Hello,
> safe option is working well for scalar but not for list. To reproduce :
> {code:python}
> import pandas as pd
> import pyarrow as pa
> int_dataframe = pd.DataFrame({"array": [[1, 2]]})
> float_dataframe = pd.DataFrame({"array": [[1.5, 2.3]]})
> int_table = pa.Table.from_pandas(int_dataframe)
> {code}
> {code:python}
> >>> int_table
> pyarrow.Table
> array: list<item: int64>
>   child 0, item: int64
> ----
> array: [[[1,2]]]
> {code}
> {code:python}
> # this is working instead of throwing a `ArrowInvalid: ... Conversion failed 
> for column array with type`
> >>> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema) 
> >>> table
> pyarrow.Table
> array: list<item: int64>
>   child 0, item: int64
> ----
> array: [[[1,2]]]
> {code}
> Behavior for scalar is correct :
> {code:python}
> int_dataframe = pd.DataFrame({"array": [1]})
> float_dataframe = pd.DataFrame({"array": [1.5]})
> int_table = pa.Table.from_pandas(int_dataframe)
> table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema) # 
> raise:
> ---------------------------------------------------------------------------
> ArrowInvalid                              Traceback (most recent call last)
> Input In [6], in <module>
> ----> 1 table = pa.Table.from_pandas(float_dataframe, schema=int_table.schema)
>       2 table
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/table.pxi:1782, 
> in pyarrow.lib.Table.from_pandas()
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
>  in dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
>     589     return (isinstance(arr, np.ndarray) and
>     590             arr.flags.contiguous and
>     591             issubclass(arr.dtype.type, np.integer))
>     593 if nthreads == 1:
> --> 594     arrays = [convert_column(c, f)
>     595               for c, f in zip(columns_to_convert, convert_fields)]
>     596 else:
>     597     arrays = []
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:594,
>  in <listcomp>(.0)
>     589     return (isinstance(arr, np.ndarray) and
>     590             arr.flags.contiguous and
>     591             issubclass(arr.dtype.type, np.integer))
>     593 if nthreads == 1:
> --> 594     arrays = [convert_column(c, f)
>     595               for c, f in zip(columns_to_convert, convert_fields)]
>     596 else:
>     597     arrays = []
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:581,
>  in dataframe_to_arrays.<locals>.convert_column(col, field)
>     576 except (pa.ArrowInvalid,
>     577         pa.ArrowNotImplementedError,
>     578         pa.ArrowTypeError) as e:
>     579     e.args += ("Conversion failed for column {!s} with type {!s}"
>     580                .format(col.name, col.dtype),)
> --> 581     raise e
>     582 if not field_nullable and result.null_count > 0:
>     583     raise ValueError("Field {} was non-nullable but pandas column "
>     584                      "had {} null values".format(str(field),
>     585                                                  result.null_count))
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py:575,
>  in dataframe_to_arrays.<locals>.convert_column(col, field)
>     572     type_ = field.type
>     574 try:
> --> 575     result = pa.array(col, type=type_, from_pandas=True, safe=safe)
>     576 except (pa.ArrowInvalid,
>     577         pa.ArrowNotImplementedError,
>     578         pa.ArrowTypeError) as e:
>     579     e.args += ("Conversion failed for column {!s} with type {!s}"
>     580                .format(col.name, col.dtype),)
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:312, 
> in pyarrow.lib.array()
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/array.pxi:83, 
> in pyarrow.lib._ndarray_to_array()
> File 
> ~/Documents/chouket/.venv/lib/python3.9/site-packages/pyarrow/error.pxi:99, 
> in pyarrow.lib.check_status()
> ArrowInvalid: ('Float value 1.5 was truncated converting to int64', 
> 'Conversion failed for column array with type float64')
> {code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to