Repository: arrow Updated Branches: refs/heads/master 7f048a4b8 -> 48f9780a8
ARROW-375: Fix unicode Python 3 issue in columns argument of parquet.read_table Author: Wes McKinney <[email protected]> Closes #204 from wesm/ARROW-375 and squashes the following commits: 9e6f2a6 [Wes McKinney] BUG: convert unicode to utf8 bytes for column filtering Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/48f9780a Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/48f9780a Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/48f9780a Branch: refs/heads/master Commit: 48f9780a8677546cb143a09b25b0b57c1946ba07 Parents: 7f048a4 Author: Wes McKinney <[email protected]> Authored: Fri Nov 11 14:20:36 2016 -0500 Committer: Wes McKinney <[email protected]> Committed: Fri Nov 11 14:20:36 2016 -0500 ---------------------------------------------------------------------- python/pyarrow/parquet.pyx | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/48f9780a/python/pyarrow/parquet.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx index 2152f89..a6e3ac3 100644 --- a/python/pyarrow/parquet.pyx +++ b/python/pyarrow/parquet.pyx @@ -93,15 +93,18 @@ cdef class ParquetReader: Integer index of the position of the column """ cdef: - const FileMetaData* metadata = self.reader.get().parquet_reader().metadata() + const FileMetaData* metadata = (self.reader.get() + .parquet_reader().metadata()) int i = 0 if self.column_idx_map is None: self.column_idx_map = {} for i in range(0, metadata.num_columns()): - self.column_idx_map[str(metadata.schema().Column(i).path().get().ToDotString())] = i + col_bytes = tobytes(metadata.schema().Column(i) + .path().get().ToDotString()) + self.column_idx_map[col_bytes] = i - return self.column_idx_map[column_name] + return self.column_idx_map[tobytes(column_name)] def read_column(self, int column_index): cdef: @@ -109,7 +112,8 @@ cdef class ParquetReader: shared_ptr[CArray] carray with nogil: - check_status(self.reader.get().ReadFlatColumn(column_index, &carray)) + check_status(self.reader.get() + .ReadFlatColumn(column_index, &carray)) array.init(carray) return array
