Hi,
Is it possible to read just selected columns from a dataframe with
hierarchical levels in the columns, passing tuple to 'columns' argument?
Example:
pd.read_parquet('file.parquet', engine='pyarrow', columns=[('level_0_key',
'level_1_key')])
Trying to accomplish this in version '0.17.1' of pyarrow package , it
raises the following error:
"
AttributeError Traceback (most recent call last)
in
----> 1 pd.read_parquet('test.parquet', columns=[('IBOV Index', 'PX_LAST')])
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pandas\io\parquet.py
in read_parquet(path, engine, columns, **kwargs)
308
309 impl = get_engine(engine)
--> 310 return impl.read(path, columns=columns, **kwargs)
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pandas\io\parquet.py
in read(self, path, columns, **kwargs)
122
123 kwargs["use_pandas_metadata"] = True
--> 124 result = self.api.parquet.read_table(
125 path, columns=columns, **kwargs
126 ).to_pandas()
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pyarrow\array.pxi
in pyarrow.lib._PandasConvertible.to_pandas()
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pyarrow\table.pxi
in pyarrow.lib.Table._to_pandas()
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pyarrow\pandas_compat.py
in table_to_blockmanager(options, table, categories, ignore_metadata,
types_mapper)
763
764 _check_data_column_metadata_consistency(all_columns)
--> 765 columns = _deserialize_column_index(table, all_columns,
column_indexes)
766 blocks = _table_to_blocks(options, table, categories,
ext_columns_dtypes)
767
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pyarrow\pandas_compat.py
in _deserialize_column_index(block_table, all_columns, column_indexes)
875 # if we're reconstructing the index
876 if len(column_indexes) > 0:
--> 877 columns = _reconstruct_columns_from_metadata(columns,
column_indexes)
878
879 # ARROW-1751: flatten a single level column MultiIndex for
pandas 0.21.0
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pyarrow\pandas_compat.py
in _reconstruct_columns_from_metadata(columns, column_indexes)
1068
1069 # Convert each level to the dtype provided in the metadata
-> 1070 levels_dtypes = [
1071 (level, col_index.get('pandas_type', str(level.dtype)))
1072 for level, col_index in zip_longest(
~\AppData\Local\Continuum\anaconda3\envs\vam_trading\lib\site-packages\pyarrow\pandas_compat.py
in (.0)
1069 # Convert each level to the dtype provided in the metadata
1070 levels_dtypes = [
-> 1071 (level, col_index.get('pandas_type', str(level.dtype)))
1072 for level, col_index in zip_longest(
1073 levels, column_indexes, fillvalue={}
AttributeError: 'dict' object has no attribute 'dtype'
"
Thanks in advance,
Rafael.