Github user arina-ielchiieva commented on a diff in the pull request:
https://github.com/apache/drill/pull/1214#discussion_r183981354
--- Diff:
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java
---
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import com.google.common.base.Functions;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.Stopwatch;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.ops.ExecutorFragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.ScanBatch;
+import org.apache.drill.exec.store.ColumnExplorer;
+import org.apache.drill.exec.store.RecordReader;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import
org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader;
+import org.apache.drill.exec.store.parquet2.DrillParquetReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
+import org.apache.parquet.hadoop.CodecFactory;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Type;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+public abstract class AbstractParquetScanBatchCreator {
+
+ private static final org.slf4j.Logger logger =
org.slf4j.LoggerFactory.getLogger(AbstractParquetScanBatchCreator.class);
+
+ private static final String ENABLE_BYTES_READ_COUNTER =
"parquet.benchmark.bytes.read";
+ private static final String ENABLE_BYTES_TOTAL_COUNTER =
"parquet.benchmark.bytes.total";
+ private static final String ENABLE_TIME_READ_COUNTER =
"parquet.benchmark.time.read";
+
+ protected ScanBatch getBatch(ExecutorFragmentContext context,
AbstractParquetRowGroupScan rowGroupScan, OperatorContext oContext) throws
ExecutionSetupException {
+ final ColumnExplorer columnExplorer = new
ColumnExplorer(context.getOptions(), rowGroupScan.getColumns());
+
+ if (!columnExplorer.isStarQuery()) {
+ rowGroupScan = rowGroupScan.copy(columnExplorer.getTableColumns());
+ rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());
+ }
+
+ boolean useAsyncPageReader =
+
context.getOptions().getOption(ExecConstants.PARQUET_PAGEREADER_ASYNC).bool_val;
+
+ AbstractDrillFileSystemManager fsManager =
getDrillFileSystemCreator(oContext, useAsyncPageReader);
+
+ // keep footers in a map to avoid re-reading them
+ Map<String, ParquetMetadata> footers = new HashMap<>();
+ List<RecordReader> readers = new LinkedList<>();
+ List<Map<String, String>> implicitColumns = new ArrayList<>();
+ Map<String, String> mapWithMaxColumns = new LinkedHashMap<>();
+ for(RowGroupReadEntry rowGroup :
rowGroupScan.getRowGroupReadEntries()) {
--- End diff --
Fixed.
---