[09/10] incubator-drill git commit: DRILL-1704: Use complex reader for dictionary encoded files, as original reader seems to be broken

jacques Thu, 13 Nov 2014 09:19:24 -0800

DRILL-1704: Use complex reader for dictionary encoded files, as original reader 
seems to be broken



Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/b37dc08a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/b37dc08a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/b37dc08a

Branch: refs/heads/master
Commit: b37dc08a46dea1b1c0bfeba21ea73dd6ac0116bd
Parents: 116f6d1
Author: Jason Altekruse <altekruseja...@gmail.com>
Authored: Wed Nov 12 18:10:32 2014 -0800
Committer: Jacques Nadeau <jacq...@apache.org>
Committed: Thu Nov 13 09:17:36 2014 -0800

----------------------------------------------------------------------
 .../exec/store/parquet/ParquetScanBatchCreator.java     | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b37dc08a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
index 53a6ffc..4467825 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
@@ -41,7 +41,9 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
 import parquet.column.ColumnDescriptor;
+import parquet.column.Encoding;
 import parquet.hadoop.ParquetFileReader;
+import parquet.hadoop.metadata.ColumnChunkMetaData;
 import parquet.hadoop.metadata.ParquetMetadata;
 import parquet.schema.MessageType;
 import parquet.schema.Type;
@@ -107,7 +109,7 @@ public class ParquetScanBatchCreator implements 
BatchCreator<ParquetRowGroupScan
           footers.put(e.getPath(),
               ParquetFileReader.readFooter( fs.getConf(), new 
Path(e.getPath())));
         }
-        if 
(!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val
 && !isComplex(footers.get(e.getPath()))) {
+        if 
(!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val
 && !isComplex(footers.get(e.getPath()), e.getRowGroupIndex())) {
           readers.add(
               new ParquetRecordReader(
                   context, e.getPath(), e.getRowGroupIndex(), fs,
@@ -153,7 +155,12 @@ public class ParquetScanBatchCreator implements 
BatchCreator<ParquetRowGroupScan
     return s;
   }
 
-  private static boolean isComplex(ParquetMetadata footer) {
+  private static boolean isComplex(ParquetMetadata footer, int rowGroupIndex) {
+    for (ColumnChunkMetaData md : 
footer.getBlocks().get(rowGroupIndex).getColumns()) {
+      if (md.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
+        return true; // for now, use Complex reader for Dictionary encoded
+      }
+    }
     MessageType schema = footer.getFileMetaData().getSchema();
 
     for (Type type : schema.getFields()) {
@@ -161,6 +168,7 @@ public class ParquetScanBatchCreator implements 
BatchCreator<ParquetRowGroupScan
         return true;
       }
     }
+    ColumnDescriptor desc;
     for (ColumnDescriptor col : schema.getColumns()) {
       if (col.getMaxRepetitionLevel() > 0) {
         return true;

[09/10] incubator-drill git commit: DRILL-1704: Use complex reader for dictionary encoded files, as original reader seems to be broken

Reply via email to