Wael Nasreddine created PARQUET-665:
---------------------------------------

             Summary: Parquet-mr: Protobuf 3 support
                 Key: PARQUET-665
                 URL: https://issues.apache.org/jira/browse/PARQUET-665
             Project: Parquet
          Issue Type: Improvement
          Components: parquet-mr
            Reporter: Wael Nasreddine


Does parquet-mr support Protobuf version 3? I've applied the following patch 
and the tests are failing mostly due to optional vs required.

{code}
diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml
index b3e4e50..aa67423 100644
--- a/parquet-protobuf/pom.xml
+++ b/parquet-protobuf/pom.xml
@@ -31,7 +31,7 @@
 
   <properties>
     <elephant-bird.version>4.4</elephant-bird.version>
-    <protobuf.version>2.5.0</protobuf.version>
+    <protobuf.version>3.0.0-beta-4</protobuf.version>
   </properties>
 
 
diff --git 
a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
 
b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
index 5c6ebca..7e2557f 100644
--- 
a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
+++ 
b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
@@ -88,7 +88,7 @@ public class ProtoInputOutputFormatTest {
 
 
     //test that only requested fields were deserialized
-    assertTrue(readDocument.hasDocId());
+    assertTrue(readDocument.getDocId() == 12345);
     assertTrue("Found data outside projection.", readDocument.getNameCount() 
== 0);
   }
 
diff --git 
a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
 
b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
index 5318bd2..1cbb972 100644
--- 
a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
+++ 
b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
@@ -183,16 +183,16 @@ public class ProtoRecordConverterTest {
     TestProtobuf.InnerMessage third = result.getInner(2);
 
     assertEquals("First inner", first.getOne());
-    assertFalse(first.hasTwo());
-    assertFalse(first.hasThree());
+    assertEquals(first.getTwo(), "");
+    assertEquals(first.getThree(), "");
 
     assertEquals("Second inner", second.getTwo());
-    assertFalse(second.hasOne());
-    assertFalse(second.hasThree());
+    assertEquals(second.getOne(), "");
+    assertEquals(second.getThree(), "");
 
     assertEquals("Third inner", third.getThree());
-    assertFalse(third.hasOne());
-    assertFalse(third.hasTwo());
+    assertEquals(third.getOne(), "");
+    assertEquals(third.getTwo(), "");
   }
 
 
diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto 
b/parquet-protobuf/src/test/resources/TestProtobuf.proto
index afa0f63..caf7926 100644
--- a/parquet-protobuf/src/test/resources/TestProtobuf.proto
+++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto
@@ -9,7 +9,7 @@
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
-// Unless required by applicable law or agreed to in writing,
+// Unless by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
@@ -17,6 +17,8 @@
 // under the License.
 //
 
+syntax = "proto3";
+
 package TestProtobuf;
 
 option java_package = "org.apache.parquet.proto.test";
@@ -25,17 +27,18 @@ option java_package = "org.apache.parquet.proto.test";
 // messages but groups were deprecated.
 
 message Document {
-    required int64 DocId = 1;
-    optional Links links = 32;
-    repeated group Name = 24 {
+    int64 DocId = 1;
+    Links links = 32;
+    message Name  {
         repeated Language name = 4;
-        optional string url = 5;
+        string url = 5;
     }
+    repeated Name name = 24;
 }
 
 message Language {
-    required string code = 12;
-    optional string Country = 14;
+    string code = 12;
+    string Country = 14;
 }
 
 message Links {
@@ -47,42 +50,43 @@ message Links {
 // begin - protocol buffers for ProtoSchemaConverterTest
 
  message SchemaConverterSimpleMessage {
-     optional int32 someId = 3;
+     int32 someId = 3;
  }
 
  message SchemaConverterAllDatatypes {
-     optional double optionalDouble = 1;
-     optional float optionalFloat = 2;
-     optional int32 optionalInt32 = 3;
-     optional int64 optionalInt64 = 4;
-     optional uint32 optionalUInt32 = 5;
-     optional uint64 optionalUInt64 = 6;
-     optional sint32 optionalSInt32 = 7;
-     optional sint64 optionalSInt64 = 8;
-     optional fixed32 optionalFixed32 = 9;
-     optional fixed64 optionalFixed64 = 10;
-     optional sfixed32 optionalSFixed32 = 11;
-     optional sfixed64 optionalSFixed64 = 12;
-     optional bool optionalBool = 13;
-     optional string optionalString = 14;
-     optional bytes optionalBytes = 15;
-     optional SchemaConverterSimpleMessage optionalMessage = 16;
-     optional group PbGroup  = 17 {
-       optional int32 groupInt = 2;
+     double optionalDouble = 1;
+     float optionalFloat = 2;
+     int32 optionalInt32 = 3;
+     int64 optionalInt64 = 4;
+     uint32 optionalUInt32 = 5;
+     uint64 optionalUInt64 = 6;
+     sint32 optionalSInt32 = 7;
+     sint64 optionalSInt64 = 8;
+     fixed32 optionalFixed32 = 9;
+     fixed64 optionalFixed64 = 10;
+     sfixed32 optionalSFixed32 = 11;
+     sfixed64 optionalSFixed64 = 12;
+     bool optionalBool = 13;
+     string optionalString = 14;
+     bytes optionalBytes = 15;
+     SchemaConverterSimpleMessage optionalMessage = 16;
+     message PbGroup {
+       int32 groupInt = 2;
      }
+     PbGroup pbGroup = 17;
     enum TestEnum {
         FIRST = 0;
         SECOND = 1;
     }
-    optional TestEnum optionalEnum = 18;
+    TestEnum optionalEnum = 18;
  }
 
  message SchemaConverterRepetition {
-     optional int32 optionalPrimitive = 1;
-     required int32 requiredPrimitive = 2;
+     int32 optionalPrimitive = 1;
+     int32 requiredPrimitive = 2;
      repeated int32 repeatedPrimitive = 3;
-     optional SchemaConverterSimpleMessage optionalMessage = 7;
-     required SchemaConverterSimpleMessage requiredMessage = 8;
+     SchemaConverterSimpleMessage optionalMessage = 7;
+     SchemaConverterSimpleMessage requiredMessage = 8;
      repeated SchemaConverterSimpleMessage repeatedMessage = 9;
  }
 
@@ -92,22 +96,22 @@ message Links {
 //begin protocol buffers for ProtoInputOutputFormatTest
 
 message InputOutputMsgFormat {
-    optional int32 someId = 3;
+    int32 someId = 3;
 }
 
 message IOFormatMessage {
-    optional double optionalDouble = 1;
+    double optionalDouble = 1;
     repeated string repeatedString = 2;
-    optional InputOutputMsgFormat msg = 3;
+    InputOutputMsgFormat msg = 3;
  }
 
 //end protocol buffers for ProtoInputOutputFormatTest
 
 
 message InnerMessage {
-    optional string one = 1;
-    optional string two = 2;
-    optional string three = 3;
+    string one = 1;
+    string two = 2;
+    string three = 3;
 }
 
 message TopMessage {
@@ -115,7 +119,7 @@ message TopMessage {
 }
 
 message MessageA {
-    optional InnerMessage inner = 123;
+    InnerMessage inner = 123;
 }
 
 message RepeatedIntMessage {
@@ -129,11 +133,11 @@ message HighIndexMessage {
 //custom proto class - ProtoInputOutputFormatTest
 
 message FirstCustomClassMessage {
-    optional string string = 11;
+    string string = 11;
 }
 
 message SecondCustomClassMessage {
-    optional string string = 11;
+    string string = 11;
 }
 
 //please place your unit test Protocol Buffer definitions here.
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to