[1/4] incubator-drill git commit: DRILL-1701: Fix for nullable dictionary columns in optimized parquet reader. Re-enable the optimized reader for dictionary encoded files.

jacques Thu, 13 Nov 2014 19:16:25 -0800

Repository: incubator-drill
Updated Branches:
  refs/heads/master fd54e71e8 -> 108d29fce



DRILL-1701: Fix for nullable dictionary columns in optimized parquet reader. 
Re-enable the optimized reader for dictionary encoded files.


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/ade74b18
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/ade74b18
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/ade74b18

Branch: refs/heads/master
Commit: ade74b18dec5645cb8ce1fb4d0aa9ec40839895a
Parents: fd54e71
Author: Jason Altekruse <altekruseja...@gmail.com>
Authored: Thu Nov 13 14:02:51 2014 -0800
Committer: Jason Altekruse <altekruseja...@gmail.com>
Committed: Thu Nov 13 14:52:55 2014 -0800

----------------------------------------------------------------------
 .../exec/store/parquet/ParquetScanBatchCreator.java     | 12 ++----------
 .../columnreaders/NullableVarLengthValuesColumn.java    | 10 +++++-----
 2 files changed, 7 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/ade74b18/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
index 4467825..53a6ffc 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
@@ -41,9 +41,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
 import parquet.column.ColumnDescriptor;
-import parquet.column.Encoding;
 import parquet.hadoop.ParquetFileReader;
-import parquet.hadoop.metadata.ColumnChunkMetaData;
 import parquet.hadoop.metadata.ParquetMetadata;
 import parquet.schema.MessageType;
 import parquet.schema.Type;
@@ -109,7 +107,7 @@ public class ParquetScanBatchCreator implements 
BatchCreator<ParquetRowGroupScan
           footers.put(e.getPath(),
               ParquetFileReader.readFooter( fs.getConf(), new 
Path(e.getPath())));
         }
-        if 
(!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val
 && !isComplex(footers.get(e.getPath()), e.getRowGroupIndex())) {
+        if 
(!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val
 && !isComplex(footers.get(e.getPath()))) {
           readers.add(
               new ParquetRecordReader(
                   context, e.getPath(), e.getRowGroupIndex(), fs,
@@ -155,12 +153,7 @@ public class ParquetScanBatchCreator implements 
BatchCreator<ParquetRowGroupScan
     return s;
   }
 
-  private static boolean isComplex(ParquetMetadata footer, int rowGroupIndex) {
-    for (ColumnChunkMetaData md : 
footer.getBlocks().get(rowGroupIndex).getColumns()) {
-      if (md.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
-        return true; // for now, use Complex reader for Dictionary encoded
-      }
-    }
+  private static boolean isComplex(ParquetMetadata footer) {
     MessageType schema = footer.getFileMetaData().getSchema();
 
     for (Type type : schema.getFields()) {
@@ -168,7 +161,6 @@ public class ParquetScanBatchCreator implements 
BatchCreator<ParquetRowGroupScan
         return true;
       }
     }
-    ColumnDescriptor desc;
     for (ColumnDescriptor col : schema.getColumns()) {
       if (col.getMaxRepetitionLevel() > 0) {
         return true;

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/ade74b18/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
index 2e24674..aa3d9c5 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java
@@ -122,14 +122,14 @@ public abstract class NullableVarLengthValuesColumn<V 
extends ValueVector> exten
   protected void readField(long recordsToRead) {
     // TODO - unlike most implementations of this method, the 
recordsReadInThisIteration field is not set here
     // should verify that this is not breaking anything
-    if (usingDictionary) {
-      currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
-      // re-purposing  this field here for length in BYTES to prevent 
repetitive multiplication/division
-    }
-    dataTypeLengthInBits = 
variableWidthVector.getAccessor().getValueLength(valuesReadInCurrentPass);
     currentValNull = 
variableWidthVector.getAccessor().getObject(valuesReadInCurrentPass) == null;
     // again, I am re-purposing the unused field here, it is a length n BYTES, 
not bits
     if (! currentValNull) {
+      if (usingDictionary) {
+        currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
+      }
+      // re-purposing  this field here for length in BYTES to prevent 
repetitive multiplication/division
+      dataTypeLengthInBits = 
variableWidthVector.getAccessor().getValueLength(valuesReadInCurrentPass);
       boolean success = setSafe(valuesReadInCurrentPass, 
pageReader.pageDataByteArray,
           (int) pageReader.readPosInBytes + 4, dataTypeLengthInBits);
       assert success;

[1/4] incubator-drill git commit: DRILL-1701: Fix for nullable dictionary columns in optimized parquet reader. Re-enable the optimized reader for dictionary encoded files.

Reply via email to