PARQUET-660: Ignore extension fields in protobuf messages. Currently, converting protobuf messages with extension can result in an uninformative error or a data corruption. A more detailed explanation in the corresponding [jira](https://issues.apache.org/jira/browse/PARQUET-660).
This patch simply ignores extension fields in protobuf messages. In the longer run, I'd like to add a proper support for Protobuf extensions. This might take a little longer though, so I've decided to improve the current situation with this patch. Author: Jakub Kukul <jakub.ku...@gmail.com> Closes #351 from jkukul/master and squashes the following commits: 27580ab [Jakub Kukul] PARQUET-660: Throw Unsupported exception for messages with extensions. db6e08b [Jakub Kukul] PARQUET-660: Refactor: Don't use additional variable for indexing fieldWriters. e910a8a [Jakub Kukul] PARQUET-660: Refactor: Add missing indentation. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/4beb0609 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/4beb0609 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/4beb0609 Branch: refs/heads/parquet-1.8.x Commit: 4beb06094b6e14379c6bb16ae09b7b3a47c8c5fb Parents: 6e95d45 Author: Jakub Kukul <jakub.ku...@gmail.com> Authored: Thu Sep 8 14:48:42 2016 -0700 Committer: Ryan Blue <b...@apache.org> Committed: Mon Jan 9 16:54:54 2017 -0800 ---------------------------------------------------------------------- .../parquet/proto/ProtoSchemaConverter.java | 30 ++++++++++---------- .../apache/parquet/proto/ProtoWriteSupport.java | 13 ++++++--- .../parquet/proto/ProtoWriteSupportTest.java | 15 ++++++++++ .../src/test/resources/TestProtobuf.proto | 11 +++++++ 4 files changed, 50 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java index 8c9685a..3f6ed6b 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java @@ -86,21 +86,21 @@ public class ProtoSchemaConverter { Type.Repetition repetition = getRepetition(descriptor); JavaType javaType = descriptor.getJavaType(); switch (javaType) { - case BOOLEAN : return builder.primitive(BOOLEAN, repetition); - case INT : return builder.primitive(INT32, repetition); - case LONG : return builder.primitive(INT64, repetition); - case FLOAT : return builder.primitive(FLOAT, repetition); - case DOUBLE: return builder.primitive(DOUBLE, repetition); - case BYTE_STRING: return builder.primitive(BINARY, repetition); - case STRING: return builder.primitive(BINARY, repetition).as(UTF8); - case MESSAGE: { - GroupBuilder<GroupBuilder<T>> group = builder.group(repetition); - convertFields(group, descriptor.getMessageType().getFields()); - return group; - } - case ENUM: return builder.primitive(BINARY, repetition).as(ENUM); - default: - throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType); + case BOOLEAN: return builder.primitive(BOOLEAN, repetition); + case INT: return builder.primitive(INT32, repetition); + case LONG: return builder.primitive(INT64, repetition); + case FLOAT: return builder.primitive(FLOAT, repetition); + case DOUBLE: return builder.primitive(DOUBLE, repetition); + case BYTE_STRING: return builder.primitive(BINARY, repetition); + case STRING: return builder.primitive(BINARY, repetition).as(UTF8); + case MESSAGE: { + GroupBuilder<GroupBuilder<T>> group = builder.group(repetition); + convertFields(group, descriptor.getMessageType().getFields()); + return group; + } + case ENUM: return builder.primitive(BINARY, repetition).as(ENUM); + default: + throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType); } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java index d7f7a53..cef2b93 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java @@ -156,7 +156,6 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport< List<Descriptors.FieldDescriptor> fields = descriptor.getFields(); fieldWriters = (FieldWriter[]) Array.newInstance(FieldWriter.class, fields.size()); - int i = 0; for (Descriptors.FieldDescriptor fieldDescriptor: fields) { String name = fieldDescriptor.getName(); Type type = schema.getType(name); @@ -169,8 +168,7 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport< writer.setFieldName(name); writer.setIndex(schema.getFieldIndex(name)); - fieldWriters[i] = writer; - i++; + fieldWriters[fieldDescriptor.getIndex()] = writer; } } @@ -220,6 +218,13 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport< for (Map.Entry<Descriptors.FieldDescriptor, Object> entry : changedPbFields.entrySet()) { Descriptors.FieldDescriptor fieldDescriptor = entry.getKey(); + + if(fieldDescriptor.isExtension()) { + // Field index of an extension field might overlap with a base field. + throw new UnsupportedOperationException( + "Cannot convert Protobuf message with extension field(s)"); + } + int fieldIndex = fieldDescriptor.getIndex(); fieldWriters[fieldIndex].writeField(entry.getValue()); } @@ -276,7 +281,7 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport< } class IntWriter extends FieldWriter { - @Override + @Override final void writeRawValue(Object value) { recordConsumer.addInteger((Integer) value); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index 73f7734..3a273c9 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -165,4 +165,19 @@ public class ProtoWriteSupportTest { inOrder.verify(readConsumerMock).endMessage(); Mockito.verifyNoMoreInteractions(readConsumerMock); } + + @Test(expected = UnsupportedOperationException.class) + public void testMessageWithExtensions() throws Exception { + RecordConsumer readConsumerMock = Mockito.mock(RecordConsumer.class); + ProtoWriteSupport instance = createReadConsumerInstance(TestProtobuf.Vehicle.class, readConsumerMock); + + TestProtobuf.Vehicle.Builder msg = TestProtobuf.Vehicle.newBuilder(); + msg.setHorsePower(300); + // Currently there's no support for extension fields. This test tests that the extension field + // will cause an exception. + msg.setExtension(TestProtobuf.Airplane.wingSpan, 50); + + instance.write(msg.build()); + } + } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/4beb0609/parquet-protobuf/src/test/resources/TestProtobuf.proto ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto b/parquet-protobuf/src/test/resources/TestProtobuf.proto index afa0f63..d7cdf03 100644 --- a/parquet-protobuf/src/test/resources/TestProtobuf.proto +++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto @@ -137,3 +137,14 @@ message SecondCustomClassMessage { } //please place your unit test Protocol Buffer definitions here. + +message Vehicle { + optional int32 horsePower = 1; + extensions 100 to 199; +} + +message Airplane { + extend Vehicle { + optional int32 wingSpan = 101; + } +}