[ 
https://issues.apache.org/jira/browse/PARQUET-1297?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16473575#comment-16473575
 ] 

ASF GitHub Bot commented on PARQUET-1297:
-----------------------------------------

xhochy closed pull request #477: PARQUET-1297: SchemaConverter should not 
convert from Timestamp(TimeUnit.SECOND) and Timestamp(TimeUnit.NANOSECOND) of 
Arrow
URL: https://github.com/apache/parquet-mr/pull/477
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
 
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index f298558ac..a7df48cee 100644
--- 
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++ 
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -27,6 +27,7 @@
 import static org.apache.parquet.schema.OriginalType.INT_64;
 import static org.apache.parquet.schema.OriginalType.INT_8;
 import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
 import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
 import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
 import static org.apache.parquet.schema.OriginalType.UINT_16;
@@ -259,7 +260,13 @@ public TypeMapping visit(Time type) {
 
       @Override
       public TypeMapping visit(Timestamp type) {
-        return primitive(INT64, TIMESTAMP_MILLIS);
+        TimeUnit timeUnit = type.getUnit();
+        if (timeUnit == TimeUnit.MILLISECOND) {
+          return primitive(INT64, TIMESTAMP_MILLIS);
+        } else if (timeUnit == TimeUnit.MICROSECOND) {
+          return primitive(INT64, TIMESTAMP_MICROS);
+        }
+        throw new UnsupportedOperationException("Unsupported type " + type);
       }
 
       /**
@@ -415,14 +422,9 @@ public TypeMapping convertINT32(PrimitiveTypeName 
primitiveTypeName) throws Runt
             return decimal(type.getDecimalMetadata());
           case DATE:
             return field(new ArrowType.Date(DateUnit.DAY));
-          case TIMESTAMP_MICROS:
-            return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"));
-          case TIMESTAMP_MILLIS:
-            return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"));
           case TIME_MILLIS:
             return field(new ArrowType.Time(TimeUnit.MILLISECOND, 32));
           default:
-          case TIME_MICROS:
           case INT_64:
           case UINT_64:
           case UTF8:
@@ -433,6 +435,9 @@ public TypeMapping convertINT32(PrimitiveTypeName 
primitiveTypeName) throws Runt
           case LIST:
           case MAP:
           case MAP_KEY_VALUE:
+          case TIMESTAMP_MICROS:
+          case TIMESTAMP_MILLIS:
+          case TIME_MICROS:
             throw new IllegalArgumentException("illegal type " + type);
         }
       }
diff --git 
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
 
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index 4c3da35f1..2d1f028e2 100644
--- 
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++ 
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -27,6 +27,7 @@
 import static org.apache.parquet.schema.OriginalType.INT_64;
 import static org.apache.parquet.schema.OriginalType.INT_8;
 import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
 import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
 import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
 import static org.apache.parquet.schema.OriginalType.UINT_16;
@@ -413,4 +414,66 @@ public void testParquetInt32TimeMicrosToArrow() {
     converter.fromParquet(Types.buildMessage()
       
.addField(Types.optional(INT32).as(TIME_MICROS).named("a")).named("root"));
   }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testArrowTimestampSecondToParquet() {
+    converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"))
+    ))).getParquetSchema();
+  }
+
+  @Test
+  public void testArrowTimestampMillisecondToParquet() {
+    MessageType expected = converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
+    ))).getParquetSchema();
+    Assert.assertEquals(expected, 
Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root"));
+  }
+
+  @Test
+  public void testArrowTimestampMicrosecondToParquet() {
+    MessageType expected = converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"))
+    ))).getParquetSchema();
+    Assert.assertEquals(expected, 
Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root"));
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testArrowTimestampNanosecondToParquet() {
+    converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"))
+    ))).getParquetSchema();
+  }
+
+  @Test
+  public void testParquetInt64TimestampMillisToArrow() {
+    MessageType parquet = Types.buildMessage()
+      
.addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
+    ));
+    Assert.assertEquals(expected, 
converter.fromParquet(parquet).getArrowSchema());
+  }
+
+  @Test
+  public void testParquetInt64TimestampMicrosToArrow() {
+    MessageType parquet = Types.buildMessage()
+      
.addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"))
+    ));
+    Assert.assertEquals(expected, 
converter.fromParquet(parquet).getArrowSchema());
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testParquetInt32TimestampMillisToArrow() {
+    converter.fromParquet(Types.buildMessage()
+      
.addField(Types.optional(INT32).as(TIMESTAMP_MILLIS).named("a")).named("root"));
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testParquetInt32TimestampMicrosToArrow() {
+    converter.fromParquet(Types.buildMessage()
+      
.addField(Types.optional(INT32).as(TIMESTAMP_MICROS).named("a")).named("root"));
+  }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [Java] SchemaConverter should not convert from Timestamp(TimeUnit.SECOND) and 
> Timestamp(TimeUnit.NANOSECOND) of Arrow
> ---------------------------------------------------------------------------------------------------------------------
>
>                 Key: PARQUET-1297
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1297
>             Project: Parquet
>          Issue Type: Bug
>          Components: parquet-mr
>            Reporter: Masayuki Takahashi
>            Assignee: Masayuki Takahashi
>            Priority: Minor
>             Fix For: 1.11.0
>
>
> Arrow's 'Timestamp' definition is below:
> {code:java}
> {
>   "name" : "timestamp",
>   "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND"
> }
> {code}
> [http://arrow.apache.org/docs/metadata.html]
> But Parquet only supports 'TIMESTAMP_MILLIS' and 'TIMESTAMP_MICROS'.
>  [https://github.com/Apache/parquet-format/blob/master/LogicalTypes.md]
> Therefore SchemaConverter should not convert from Timestamp(TimeUnit.SECOND) 
> and Timestamp(TimeUnit.NANOSECOND) of Arrow to Parquet.
> Related:
> https://issues.apache.org/jira/browse/PARQUET-1285
>   



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to