DRILL-1359 Fix reading and writing of repeated scalar types in Parquet

Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/8bf3c4e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/8bf3c4e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/8bf3c4e0

Branch: refs/heads/master
Commit: 8bf3c4e0d8f3b2d7dc5b798f31272afa3d7eaf15
Parents: 4dcac19
Author: Parth Chandra <pchan...@maprtech.com>
Authored: Thu Aug 28 22:23:56 2014 -0700
Committer: Jacques Nadeau <jacq...@apache.org>
Committed: Fri Aug 29 08:29:43 2014 -0700

----------------------------------------------------------------------
 .../templates/ParquetOutputRecordWriter.java    |  4 +-
 .../store/parquet/ParquetScanBatchCreator.java  |  8 +++
 .../physical/impl/writer/TestParquetWriter.java |  7 ++
 .../src/test/resources/testRepeatedWrite.json   | 74 ++++++++++++++++++++
 4 files changed, 91 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java 
b/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java
index d4b5af5..cb37a1b 100644
--- a/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java
+++ b/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java
@@ -215,9 +215,9 @@ public abstract class ParquetOutputRecordWriter extends 
AbstractRecordWriter imp
   <#elseif minor.class == "VarChar" || minor.class == "Var16Char" || 
minor.class == "VarBinary">
     <#if mode.prefix == "Repeated">
       reader.read(i, holder);
-      consumer.startField(fieldName, fieldId);
+      //consumer.startField(fieldName, fieldId);
       
consumer.addBinary(Binary.fromByteBuffer(holder.buffer.nioBuffer(holder.start, 
holder.end - holder.start)));
-      consumer.endField(fieldName, fieldId);
+      //consumer.endField(fieldName, fieldId);
     <#else>
     reader.read(holder);
     ByteBuf buf = holder.buffer;

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
index f9b6d91..608c689 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
+import parquet.column.ColumnDescriptor;
 import parquet.hadoop.ParquetFileReader;
 import parquet.hadoop.metadata.ParquetMetadata;
 import parquet.schema.MessageType;
@@ -153,11 +154,18 @@ public class ParquetScanBatchCreator implements 
BatchCreator<ParquetRowGroupScan
 
   private static boolean isComplex(ParquetMetadata footer) {
     MessageType schema = footer.getFileMetaData().getSchema();
+
     for (Type type : schema.getFields()) {
       if (!type.isPrimitive()) {
         return true;
       }
     }
+    for (ColumnDescriptor col : schema.getColumns()) {
+      if (col.getMaxRepetitionLevel() > 0) {
+        return true;
+      }
+    }
     return false;
   }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index 0f0743a..877ffc2 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -72,6 +72,13 @@ public class TestParquetWriter extends BaseTestQuery {
   }
 
   @Test
+  public void testComplexRepeated() throws Exception {
+    String selection = "*";
+    String inputTable = "cp.`testRepeatedWrite.json`";
+    runTestAndValidate(selection, selection, inputTable, "repeated_json");
+  }
+
+  @Test
   public void testCastProjectBug_Drill_929() throws Exception {
     String selection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, 
L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " +
         "L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, cast(L_COMMITDATE as DATE) as 
COMMITDATE, cast(L_RECEIPTDATE as DATE) AS RECEIPTDATE, L_SHIPINSTRUCT, 
L_SHIPMODE, L_COMMENT";

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/test/resources/testRepeatedWrite.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/testRepeatedWrite.json 
b/exec/java-exec/src/test/resources/testRepeatedWrite.json
new file mode 100644
index 0000000..fa5da83
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testRepeatedWrite.json
@@ -0,0 +1,74 @@
+{
+               "id": "0001",
+               "type": "donut",
+               "name": "Cake",
+               "ppu": 0.55,
+               "sales": 35,
+               "topping":
+                       [
+                               "None",
+                               "Glazed",
+                               "Sugar",
+                               "Powdered Sugar",
+                               "Chocolate with Sprinkles",
+                               "Chocolate",
+                               "Maple" 
+                       ]
+       }
+       {
+               "id": "0002",
+               "type": "donut",
+               "name": "Raised",
+               "ppu": 0.69,
+               "sales": 145,
+               "topping":
+                       [
+                               "None",
+                               "Glazed",
+                               "Sugar",
+                               "Chocolate",
+                               "Maple"
+                       ]
+       }
+       {
+               "id": "0003",
+               "type": "donut",
+               "name": "Old Fashioned",
+               "ppu": 0.55,
+               "sales": 300,
+               "topping":
+                       [
+                               "None",
+                               "Glazed",
+                               "Chocolate",
+                               "Maple"
+                       ]
+       }
+               {
+               "id": "0004",
+               "type": "donut",
+               "name": "Filled",
+               "ppu": 0.69,
+               "sales": 14,
+               "topping":
+                       [
+                               "None",
+                               "Glazed",
+                               "Sugar",
+                               "Powdered Sugar",
+                               "Chocolate with Sprinkles",
+                               "Chocolate",
+                               "Maple"
+                       ]
+       }
+               {
+               "id": "0005",
+               "type": "donut",
+               "name": "Apple Fritter",
+               "ppu": 1.00,
+               "sales": 700,
+               "topping":
+                       [
+                               "Glazed" 
+                       ]
+       }

Reply via email to