Github user parthchandra commented on a diff in the pull request:
https://github.com/apache/drill/pull/949#discussion_r140033471
--- Diff:
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
---
@@ -819,63 +827,64 @@ private void init() throws IOException {
}
}
rowGroupInfo.setEndpointByteMap(endpointByteMap);
+ rowGroupInfo.setColumns(rg.getColumns());
rgIndex++;
rowGroupInfos.add(rowGroupInfo);
}
}
this.endpointAffinities =
AffinityCreator.getAffinityMap(rowGroupInfos);
+ updatePartitionColTypeMap();
+ }
+ private void updatePartitionColTypeMap() {
columnValueCounts = Maps.newHashMap();
this.rowCount = 0;
boolean first = true;
- for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
- for (RowGroupMetadata rowGroup : file.getRowGroups()) {
- long rowCount = rowGroup.getRowCount();
- for (ColumnMetadata column : rowGroup.getColumns()) {
- SchemaPath schemaPath =
SchemaPath.getCompoundPath(column.getName());
- Long previousCount = columnValueCounts.get(schemaPath);
- if (previousCount != null) {
- if (previousCount != GroupScan.NO_COLUMN_STATS) {
- if (column.getNulls() != null) {
- Long newCount = rowCount - column.getNulls();
- columnValueCounts.put(schemaPath,
columnValueCounts.get(schemaPath) + newCount);
- }
- }
- } else {
+ for (RowGroupInfo rowGroup : this.rowGroupInfos) {
--- End diff --
Isn't this doing the same thing as the original code? RowGroupInfos is
built from the RowGroupMetadata in the files?
---