DRILL-1359 Fix reading and writing of repeated scalar types in Parquet
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/8bf3c4e0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/8bf3c4e0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/8bf3c4e0 Branch: refs/heads/master Commit: 8bf3c4e0d8f3b2d7dc5b798f31272afa3d7eaf15 Parents: 4dcac19 Author: Parth Chandra <pchan...@maprtech.com> Authored: Thu Aug 28 22:23:56 2014 -0700 Committer: Jacques Nadeau <jacq...@apache.org> Committed: Fri Aug 29 08:29:43 2014 -0700 ---------------------------------------------------------------------- .../templates/ParquetOutputRecordWriter.java | 4 +- .../store/parquet/ParquetScanBatchCreator.java | 8 +++ .../physical/impl/writer/TestParquetWriter.java | 7 ++ .../src/test/resources/testRepeatedWrite.json | 74 ++++++++++++++++++++ 4 files changed, 91 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java b/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java index d4b5af5..cb37a1b 100644 --- a/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java +++ b/exec/java-exec/src/main/codegen/templates/ParquetOutputRecordWriter.java @@ -215,9 +215,9 @@ public abstract class ParquetOutputRecordWriter extends AbstractRecordWriter imp <#elseif minor.class == "VarChar" || minor.class == "Var16Char" || minor.class == "VarBinary"> <#if mode.prefix == "Repeated"> reader.read(i, holder); - consumer.startField(fieldName, fieldId); + //consumer.startField(fieldName, fieldId); consumer.addBinary(Binary.fromByteBuffer(holder.buffer.nioBuffer(holder.start, holder.end - holder.start))); - consumer.endField(fieldName, fieldId); + //consumer.endField(fieldName, fieldId); <#else> reader.read(holder); ByteBuf buf = holder.buffer; http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java index f9b6d91..608c689 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java @@ -44,6 +44,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import parquet.column.ColumnDescriptor; import parquet.hadoop.ParquetFileReader; import parquet.hadoop.metadata.ParquetMetadata; import parquet.schema.MessageType; @@ -153,11 +154,18 @@ public class ParquetScanBatchCreator implements BatchCreator<ParquetRowGroupScan private static boolean isComplex(ParquetMetadata footer) { MessageType schema = footer.getFileMetaData().getSchema(); + for (Type type : schema.getFields()) { if (!type.isPrimitive()) { return true; } } + for (ColumnDescriptor col : schema.getColumns()) { + if (col.getMaxRepetitionLevel() > 0) { + return true; + } + } return false; } + } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java index 0f0743a..877ffc2 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java @@ -72,6 +72,13 @@ public class TestParquetWriter extends BaseTestQuery { } @Test + public void testComplexRepeated() throws Exception { + String selection = "*"; + String inputTable = "cp.`testRepeatedWrite.json`"; + runTestAndValidate(selection, selection, inputTable, "repeated_json"); + } + + @Test public void testCastProjectBug_Drill_929() throws Exception { String selection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " + "L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, cast(L_COMMITDATE as DATE) as COMMITDATE, cast(L_RECEIPTDATE as DATE) AS RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT"; http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/8bf3c4e0/exec/java-exec/src/test/resources/testRepeatedWrite.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/testRepeatedWrite.json b/exec/java-exec/src/test/resources/testRepeatedWrite.json new file mode 100644 index 0000000..fa5da83 --- /dev/null +++ b/exec/java-exec/src/test/resources/testRepeatedWrite.json @@ -0,0 +1,74 @@ +{ + "id": "0001", + "type": "donut", + "name": "Cake", + "ppu": 0.55, + "sales": 35, + "topping": + [ + "None", + "Glazed", + "Sugar", + "Powdered Sugar", + "Chocolate with Sprinkles", + "Chocolate", + "Maple" + ] + } + { + "id": "0002", + "type": "donut", + "name": "Raised", + "ppu": 0.69, + "sales": 145, + "topping": + [ + "None", + "Glazed", + "Sugar", + "Chocolate", + "Maple" + ] + } + { + "id": "0003", + "type": "donut", + "name": "Old Fashioned", + "ppu": 0.55, + "sales": 300, + "topping": + [ + "None", + "Glazed", + "Chocolate", + "Maple" + ] + } + { + "id": "0004", + "type": "donut", + "name": "Filled", + "ppu": 0.69, + "sales": 14, + "topping": + [ + "None", + "Glazed", + "Sugar", + "Powdered Sugar", + "Chocolate with Sprinkles", + "Chocolate", + "Maple" + ] + } + { + "id": "0005", + "type": "donut", + "name": "Apple Fritter", + "ppu": 1.00, + "sales": 700, + "topping": + [ + "Glazed" + ] + }