[
https://issues.apache.org/jira/browse/ARROW-7663?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17182116#comment-17182116
]
Andrew Wieteska commented on ARROW-7663:
----------------------------------------
On 1.0 master I get an ArrowTypeError:
{code:java}
In [10]: pa.Table.from_pandas(pd.DataFrame({"a": ['a', 1]}))
---------------------------------------------------------------------------
ArrowTypeError Traceback (most recent call last)
<ipython-input-10-36fa38ffb14b> in <module>
----> 1 pa.Table.from_pandas(pd.DataFrame({"a": ['a', 1]}))
~/git_repo/arrow/python/pyarrow/table.pxi in pyarrow.lib.Table.from_pandas()
1392 """
1393 from pyarrow.pandas_compat import dataframe_to_arrays
-> 1394 arrays, schema = dataframe_to_arrays(
1395 df,
1396 schema=schema,
~/git_repo/arrow/python/pyarrow/pandas_compat.py in dataframe_to_arrays(df,
schema, preserve_index, nthreads, columns, safe)
577 if nthreads == 1:
578 arrays = [convert_column(c, f)
--> 579 for c, f in zip(columns_to_convert, convert_fields)]
580 else:
581 from concurrent import futures
~/git_repo/arrow/python/pyarrow/pandas_compat.py in <listcomp>(.0)
577 if nthreads == 1:
578 arrays = [convert_column(c, f)
--> 579 for c, f in zip(columns_to_convert, convert_fields)]
580 else:
581 from concurrent import futures
~/git_repo/arrow/python/pyarrow/pandas_compat.py in convert_column(col, field)
563 e.args += ("Conversion failed for column {!s} with type {!s}"
564 .format(col.name, col.dtype),)
--> 565 raise e
566 if not field_nullable and result.null_count > 0:
567 raise ValueError("Field {} was non-nullable but pandas column "
~/git_repo/arrow/python/pyarrow/pandas_compat.py in convert_column(col, field)
557
558 try:
--> 559 result = pa.array(col, type=type_, from_pandas=True, safe=safe)
560 except (pa.ArrowInvalid,
561 pa.ArrowNotImplementedError,
~/git_repo/arrow/python/pyarrow/array.pxi in pyarrow.lib.array()
265 values, type = pandas_api.compat.get_datetimetz_type(
266 values, obj.dtype, type)
--> 267 return _ndarray_to_array(values, mask, type, c_from_pandas, safe,
268 pool)
269 else:
~/git_repo/arrow/python/pyarrow/array.pxi in pyarrow.lib._ndarray_to_array()
80
81 with nogil:
---> 82 check_status(NdarrayToArrow(pool, values, mask, from_pandas,
83 c_type, cast_options, &chunked_out))
84
~/git_repo/arrow/python/pyarrow/error.pxi in pyarrow.lib.check_status()
105 raise ArrowNotImplementedError(message)
106 elif status.IsTypeError():
--> 107 raise ArrowTypeError(message)
108 elif status.IsCapacityError():
109 raise ArrowCapacityError(message)
ArrowTypeError: ("Expected a bytes object, got a 'int' object", 'Conversion
failed for column a with type object')
{code}
> [Python] from_pandas gives TypeError instead of ArrowTypeError in some cases
> ----------------------------------------------------------------------------
>
> Key: ARROW-7663
> URL: https://issues.apache.org/jira/browse/ARROW-7663
> Project: Apache Arrow
> Issue Type: Bug
> Components: Python
> Affects Versions: 0.15.1
> Reporter: David Li
> Priority: Minor
> Fix For: 2.0.0
>
>
> from_pandas sometimes raises a TypeError with an uninformative error message
> rather than an ArrowTypeError with the full, informative type error for
> mixed-type array columns:
> {noformat}
> >>> pa.Table.from_pandas(pd.DataFrame({"a": ['a', 1]}))
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> File "pyarrow/table.pxi", line 1177, in pyarrow.lib.Table.from_pandas
> File
> "/Users/lidavidm/Flight/arrow/build/python/lib.macosx-10.12-x86_64-3.7/pyarrow/pandas_compat.py",
> line 575, in dataframe_to_arrays
> for c, f in zip(columns_to_convert, convert_fields)]
> File
> "/Users/lidavidm/Flight/arrow/build/python/lib.macosx-10.12-x86_64-3.7/pyarrow/pandas_compat.py",
> line 575, in <listcomp>
> for c, f in zip(columns_to_convert, convert_fields)]
> File
> "/Users/lidavidm/Flight/arrow/build/python/lib.macosx-10.12-x86_64-3.7/pyarrow/pandas_compat.py",
> line 566, in convert_column
> raise e
> File
> "/Users/lidavidm/Flight/arrow/build/python/lib.macosx-10.12-x86_64-3.7/pyarrow/pandas_compat.py",
> line 560, in convert_column
> result = pa.array(col, type=type_, from_pandas=True, safe=safe)
> File "pyarrow/array.pxi", line 265, in pyarrow.lib.array
> File "pyarrow/array.pxi", line 80, in pyarrow.lib._ndarray_to_array
> File "pyarrow/error.pxi", line 107, in pyarrow.lib.check_status
> pyarrow.lib.ArrowTypeError: ("Expected a bytes object, got a 'int' object",
> 'Conversion failed for column a with type object')
> >>> pa.Table.from_pandas(pd.DataFrame({"a": [1, 'a']}))
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> File "pyarrow/table.pxi", line 1177, in pyarrow.lib.Table.from_pandas
> File
> "/Users/lidavidm/Flight/arrow/build/python/lib.macosx-10.12-x86_64-3.7/pyarrow/pandas_compat.py",
> line 575, in dataframe_to_arrays
> for c, f in zip(columns_to_convert, convert_fields)]
> File
> "/Users/lidavidm/Flight/arrow/build/python/lib.macosx-10.12-x86_64-3.7/pyarrow/pandas_compat.py",
> line 575, in <listcomp>
> for c, f in zip(columns_to_convert, convert_fields)]
> File
> "/Users/lidavidm/Flight/arrow/build/python/lib.macosx-10.12-x86_64-3.7/pyarrow/pandas_compat.py",
> line 560, in convert_column
> result = pa.array(col, type=type_, from_pandas=True, safe=safe)
> File "pyarrow/array.pxi", line 265, in pyarrow.lib.array
> File "pyarrow/array.pxi", line 80, in pyarrow.lib._ndarray_to_array
> TypeError: an integer is required (got type str)
> {noformat}
> Noticed on 0.15.1 and on master when we tried to upgrade. On 0.14.1, both
> cases gave ArrowTypeError.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)