[ 
https://issues.apache.org/jira/browse/PARQUET-1335?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16536654#comment-16536654
 ] 

ASF GitHub Bot commented on PARQUET-1335:
-----------------------------------------

gszadovszky closed pull request #503: PARQUET-1335: Logical type names in 
parquet-mr are not consistent with parquet-format
URL: https://github.com/apache/parquet-mr/pull/503
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java 
b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
index 1abd56a26..d8536012b 100644
--- 
a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
+++ 
b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
@@ -20,6 +20,7 @@
 
 import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.junit.Assert.assertEquals;
@@ -119,7 +120,7 @@ public void testEachPrimitiveType() {
   }
 
   @Test
-  public void testUTF8Annotation() {
+  public void testSTRINGAnnotation() {
     String message =
         "message StringMessage {\n" +
         "  required binary string (STRING);\n" +
@@ -127,7 +128,7 @@ public void testUTF8Annotation() {
 
     MessageType parsed = parseMessageType(message);
     MessageType expected = buildMessage()
-        .required(BINARY).as(UTF8).named("string")
+        .required(BINARY).as(stringType()).named("string")
         .named("StringMessage");
 
     assertEquals(expected, parsed);
@@ -135,11 +136,28 @@ public void testUTF8Annotation() {
     assertEquals(expected, reparsed);
   }
 
+  @Test
+  public void testUTF8Annotation() {
+    String message =
+      "message StringMessage {\n" +
+        "  required binary string (UTF8);\n" +
+        "}\n";
+
+    MessageType parsed = parseMessageType(message);
+    MessageType expected = buildMessage()
+      .required(BINARY).as(UTF8).named("string")
+      .named("StringMessage");
+
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
+  }
+
   @Test
   public void testIDs() {
     String message =
         "message Message {\n" +
-        "  required binary string (STRING) = 6;\n" +
+        "  required binary string (UTF8) = 6;\n" +
         "  required int32 i=1;\n" +
         "  required binary s2= 3;\n" +
         "  required binary s3 =4;\n" +
@@ -165,7 +183,7 @@ public void testMAPAnnotations() {
         "message Message {\n" +
         "  optional group aMap (MAP) {\n" +
         "    repeated group map (MAP_KEY_VALUE) {\n" +
-        "      required binary key (STRING);\n" +
+        "      required binary key (UTF8);\n" +
         "      required int32 value;\n" +
         "    }\n" +
         "  }\n" +
@@ -192,7 +210,7 @@ public void testLISTAnnotation() {
     String message =
         "message Message {\n" +
         "  required group aList (LIST) {\n" +
-        "    repeated binary string (STRING);\n" +
+        "    repeated binary string (UTF8);\n" +
         "  }\n" +
         "}\n";
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Logical type names in parquet-mr are not consistent with parquet-format
> -----------------------------------------------------------------------
>
>                 Key: PARQUET-1335
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1335
>             Project: Parquet
>          Issue Type: Improvement
>          Components: parquet-mr
>    Affects Versions: 1.11.0
>            Reporter: Nandor Kollar
>            Assignee: Nandor Kollar
>            Priority: Minor
>              Labels: pull-request-available
>
> UTF8 logical type should be called STRING, INT should be called INTEGER.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to