Wes McKinney created ARROW-10147:
------------------------------------

             Summary: [Python] Constructing pandas metadata fails if an Index 
name is not JSON-serializable by default
                 Key: ARROW-10147
                 URL: https://issues.apache.org/jira/browse/ARROW-10147
             Project: Apache Arrow
          Issue Type: Bug
          Components: Python
            Reporter: Wes McKinney
             Fix For: 2.0.0


originally reported in https://github.com/apache/arrow/issues/8270

here's a minimal reproduction:

{code}
In [24]: idx = pd.RangeIndex(0, 4, name=np.int64(6))                            
                   

In [25]: df = pd.DataFrame(index=idx)                                           
                   

In [26]: pa.table(df)                                                           
                   
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-26-957dcd80b415> in <module>
----> 1 pa.table(df)

~/code/arrow/python/pyarrow/table.pxi in pyarrow.lib.table()

~/code/arrow/python/pyarrow/table.pxi in pyarrow.lib.Table.from_pandas()

~/code/arrow/python/pyarrow/pandas_compat.py in dataframe_to_arrays(df, schema, 
preserve_index, nthreads, columns, safe)
    604     pandas_metadata = construct_metadata(df, column_names, 
index_columns,
    605                                          index_descriptors, 
preserve_index,
--> 606                                          types)
    607     metadata = deepcopy(schema.metadata) if schema.metadata else dict()
    608     metadata.update(pandas_metadata)

~/code/arrow/python/pyarrow/pandas_compat.py in construct_metadata(df, 
column_names, index_levels, index_descriptors, preserve_index, types)
    243                 'version': pa.__version__
    244             },
--> 245             'pandas_version': _pandas_api.version
    246         }).encode('utf8')
    247     }

~/miniconda/envs/arrow-3.7/lib/python3.7/json/__init__.py in dumps(obj, 
skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, 
default, sort_keys, **kw)
    229         cls is None and indent is None and separators is None and
    230         default is None and not sort_keys and not kw):
--> 231         return _default_encoder.encode(obj)
    232     if cls is None:
    233         cls = JSONEncoder

~/miniconda/envs/arrow-3.7/lib/python3.7/json/encoder.py in encode(self, o)
    197         # exceptions aren't as detailed.  The list call should be 
roughly
    198         # equivalent to the PySequence_Fast that ''.join() would do.
--> 199         chunks = self.iterencode(o, _one_shot=True)
    200         if not isinstance(chunks, (list, tuple)):
    201             chunks = list(chunks)

~/miniconda/envs/arrow-3.7/lib/python3.7/json/encoder.py in iterencode(self, o, 
_one_shot)
    255                 self.key_separator, self.item_separator, self.sort_keys,
    256                 self.skipkeys, _one_shot)
--> 257         return _iterencode(o, 0)
    258 
    259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,

~/miniconda/envs/arrow-3.7/lib/python3.7/json/encoder.py in default(self, o)
    177 
    178         """
--> 179         raise TypeError(f'Object of type {o.__class__.__name__} '
    180                         f'is not JSON serializable')
    181 

TypeError: Object of type int64 is not JSON serializable
{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to