I have included some code below that shows the context of where this is
being retrieved but basically I am trying to do the following:
std::shared_ptr<parquet::RowGroupStatistics> statistics =
columnMetaData->statistics();
if(statistics->HasMinMax()){
minStrings[blazingColumnIndex][rowGroupIndex] = statistics->EncodeMin();
maxStrings[blazingColumnIndex][rowGroupIndex] = statistics->EncodeMax();
I look at the values in statistics->EncodeMin() and I am not exactly sure
how to interpret them. What is the proper way for getting this value into
an Int or Long or whatever C type represents the underlying data? What is
the most concise way of retrieving the min and max values of every column
in every row group inside of a parquet file?
Any help is greatly appreciated.
Felipe Aramburu
for(int rowGroupIndex = 0; rowGroupIndex < num_row_groups;
rowGroupIndex++){
std::shared_ptr<parquet::RowGroupReader> groupReader =
parquet_reader->RowGroup(rowGroupIndex);
const parquet::RowGroupMetaData* rowGroupMetadata = groupReader->metadata();
for(int blazingColumnIndex = 0; blazingColumnIndex <
blazingColumnToParquetColumn.size(); blazingColumnIndex++){
std::unique_ptr<parquet::ColumnChunkMetaData> columnMetaData =
rowGroupMetadata->ColumnChunk(blazingColumnToParquetColumn[blazingColumnIndex]);
const parquet::ColumnDescriptor * column =
schema->Column(blazingColumnToParquetColumn[blazingColumnIndex]);
if(columnMetaData->is_stats_set()){
std::shared_ptr<parquet::RowGroupStatistics> statistics =
columnMetaData->statistics();
if(statistics->HasMinMax()){
minStrings[blazingColumnIndex][rowGroupIndex] = statistics->EncodeMin();
maxStrings[blazingColumnIndex][rowGroupIndex] = statistics->EncodeMax();
}else{
//set min and max max values
minStrings[blazingColumnIndex][rowGroupIndex] = "min";
maxStrings[blazingColumnIndex][rowGroupIndex] = "max";
}
}else{
//set minData to value min and maxData to value max if not statistics exists
minStrings[blazingColumnIndex][rowGroupIndex] = "min";
maxStrings[blazingColumnIndex][rowGroupIndex] = "max";
}
}
}