Github user sachouche commented on a diff in the pull request: https://github.com/apache/drill/pull/976#discussion_r143837353 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java --- @@ -156,18 +160,39 @@ public ScanBatch getBatch(FragmentContext context, ParquetRowGroupScan rowGroupS return new ScanBatch(rowGroupScan, context, oContext, readers, implicitColumns); } - private static boolean isComplex(ParquetMetadata footer) { - MessageType schema = footer.getFileMetaData().getSchema(); + private static boolean isComplex(ParquetMetadata footer, List<SchemaPath> columns) { + if (Utilities.isStarQuery(columns)) { + MessageType schema = footer.getFileMetaData().getSchema(); - for (Type type : schema.getFields()) { - if (!type.isPrimitive()) { - return true; + for (Type type : schema.getFields()) { + if (!type.isPrimitive()) { + return true; + } } - } - for (ColumnDescriptor col : schema.getColumns()) { - if (col.getMaxRepetitionLevel() > 0) { - return true; + for (ColumnDescriptor col : schema.getColumns()) { + if (col.getMaxRepetitionLevel() > 0) { + return true; + } + } + return false; + } else { + for (SchemaPath column : columns) { + if (isColumnComplex(footer.getFileMetaData().getSchema(), column)) { --- End diff -- Can you please use the already defined schema variable instead of invoking "footer.getFileMetaData().getSchema()" multiple times.
---