This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new d64e144 ARROW-2170: [Python] construct_metadata fails on reading files where no index was preserved d64e144 is described below commit d64e144cd4de03ad517f0d38ea72c2f308a8ea35 Author: Uwe L. Korn <uw...@xhochy.com> AuthorDate: Mon Feb 19 11:14:15 2018 -0500 ARROW-2170: [Python] construct_metadata fails on reading files where no index was preserved cc @cpcloud The result was that we only persisted empty Pandas metadata for these files. Author: Uwe L. Korn <uw...@xhochy.com> Closes #1623 from xhochy/ARROW-2170 and squashes the following commits: ba1ce3f3 [Uwe L. Korn] ARROW-2170: [Python] construct_metadata fails on reading files where no index was preserved --- python/pyarrow/pandas_compat.py | 7 +++++-- python/pyarrow/tests/test_parquet.py | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index f5e56a9..e8fa83f 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -197,8 +197,11 @@ def construct_metadata(df, column_names, index_levels, index_column_names, ------- dict """ - df_types = types[:-len(index_levels)] - index_types = types[-len(index_levels):] + # Use ntypes instead of Python shorthand notation [:-len(x)] as [:-0] + # behaves differently to what we want. + ntypes = len(types) + df_types = types[:ntypes - len(index_levels)] + index_types = types[ntypes - len(index_levels):] column_metadata = [ get_column_metadata( diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index c49f3d3..bd76feb 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -215,6 +215,9 @@ def test_pandas_parquet_2_0_rountrip_read_pandas_no_index_written(tmpdir): arrow_table = pa.Table.from_pandas(df, preserve_index=False) js = json.loads(arrow_table.schema.metadata[b'pandas'].decode('utf8')) assert not js['index_columns'] + # ARROW-2170 + # While index_columns should be empty, columns needs to be filled still. + assert js['columns'] _write_table(arrow_table, filename.strpath, version="2.0", coerce_timestamps='ms') -- To stop receiving notification emails like this one, please contact w...@apache.org.