Github user sachouche commented on a diff in the pull request:
https://github.com/apache/drill/pull/976#discussion_r143837353
--- Diff:
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
---
@@ -156,18 +160,39 @@ public ScanBatch getBatch(FragmentContext context,
ParquetRowGroupScan rowGroupS
return new ScanBatch(rowGroupScan, context, oContext, readers,
implicitColumns);
}
- private static boolean isComplex(ParquetMetadata footer) {
- MessageType schema = footer.getFileMetaData().getSchema();
+ private static boolean isComplex(ParquetMetadata footer,
List<SchemaPath> columns) {
+ if (Utilities.isStarQuery(columns)) {
+ MessageType schema = footer.getFileMetaData().getSchema();
- for (Type type : schema.getFields()) {
- if (!type.isPrimitive()) {
- return true;
+ for (Type type : schema.getFields()) {
+ if (!type.isPrimitive()) {
+ return true;
+ }
}
- }
- for (ColumnDescriptor col : schema.getColumns()) {
- if (col.getMaxRepetitionLevel() > 0) {
- return true;
+ for (ColumnDescriptor col : schema.getColumns()) {
+ if (col.getMaxRepetitionLevel() > 0) {
+ return true;
+ }
+ }
+ return false;
+ } else {
+ for (SchemaPath column : columns) {
+ if (isColumnComplex(footer.getFileMetaData().getSchema(), column))
{
--- End diff --
Can you please use the already defined schema variable instead of invoking
"footer.getFileMetaData().getSchema()" multiple times.
---