[ 
https://issues.apache.org/jira/browse/PARQUET-1285?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16465612#comment-16465612
 ] 

ASF GitHub Bot commented on PARQUET-1285:
-----------------------------------------

xhochy closed pull request #469: PARQUET-1285: [Java] SchemaConverter should 
not convert from TimeUnit.SECOND and TimeUnit.NANOSECOND of Arrow
URL: https://github.com/apache/parquet-mr/pull/469
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
 
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index 1d69c4523..f298558ac 100644
--- 
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++ 
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -28,6 +28,7 @@
 import static org.apache.parquet.schema.OriginalType.INT_8;
 import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
 import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
 import static org.apache.parquet.schema.OriginalType.UINT_16;
 import static org.apache.parquet.schema.OriginalType.UINT_32;
 import static org.apache.parquet.schema.OriginalType.UINT_64;
@@ -49,6 +50,7 @@
 
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
 import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
@@ -245,7 +247,14 @@ public TypeMapping visit(Date type) {
 
       @Override
       public TypeMapping visit(Time type) {
-        return primitive(INT32, TIME_MILLIS);
+        int bitWidth = type.getBitWidth();
+        TimeUnit timeUnit = type.getUnit();
+        if (bitWidth == 32 && timeUnit == TimeUnit.MILLISECOND) {
+          return primitive(INT32, TIME_MILLIS);
+        } else if (bitWidth == 64 && timeUnit == TimeUnit.MICROSECOND) {
+          return primitive(INT64, TIME_MICROS);
+        }
+        throw new UnsupportedOperationException("Unsupported type " + type);
       }
 
       @Override
@@ -407,11 +416,11 @@ public TypeMapping convertINT32(PrimitiveTypeName 
primitiveTypeName) throws Runt
           case DATE:
             return field(new ArrowType.Date(DateUnit.DAY));
           case TIMESTAMP_MICROS:
-            return field(new 
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
+            return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"));
           case TIMESTAMP_MILLIS:
-            return field(new 
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
+            return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"));
           case TIME_MILLIS:
-            return field(new 
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32));
+            return field(new ArrowType.Time(TimeUnit.MILLISECOND, 32));
           default:
           case TIME_MICROS:
           case INT_64:
@@ -456,11 +465,12 @@ public TypeMapping convertINT64(PrimitiveTypeName 
primitiveTypeName) throws Runt
           case DATE:
             return field(new ArrowType.Date(DateUnit.DAY));
           case TIMESTAMP_MICROS:
-            return field(new 
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
+            return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"));
           case TIMESTAMP_MILLIS:
-            return field(new 
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
-          default:
+            return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"));
           case TIME_MICROS:
+            return field(new ArrowType.Time(TimeUnit.MICROSECOND, 64));
+          default:
           case UTF8:
           case ENUM:
           case BSON:
diff --git 
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
 
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index 654f773f9..4c3da35f1 100644
--- 
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++ 
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -28,6 +28,7 @@
 import static org.apache.parquet.schema.OriginalType.INT_8;
 import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
 import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
 import static org.apache.parquet.schema.OriginalType.UINT_16;
 import static org.apache.parquet.schema.OriginalType.UINT_32;
 import static org.apache.parquet.schema.OriginalType.UINT_64;
@@ -43,11 +44,12 @@
 
 import java.io.IOException;
 import java.util.List;
-import org.apache.arrow.vector.types.IntervalUnit;
 
-import org.apache.arrow.vector.types.UnionMode;
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -86,7 +88,7 @@ private static Field field(String name, ArrowType type, 
Field... children) {
     field("e", new ArrowType.List(), field(null, new 
ArrowType.Date(DateUnit.DAY))),
     field("f", new ArrowType.FixedSizeList(1), field(null, new 
ArrowType.Date(DateUnit.DAY))),
     field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
-    field("h", new 
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")),
+    field("h", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")),
     field("i", new ArrowType.Interval(IntervalUnit.DAY_TIME))
   ));
   private final MessageType complexParquetSchema = Types.buildMessage()
@@ -129,11 +131,12 @@ private static Field field(String name, ArrowType type, 
Field... children) {
     field("k1", new ArrowType.Decimal(15, 5)),
     field("k2", new ArrowType.Decimal(25, 5)),
     field("l", new ArrowType.Date(DateUnit.DAY)),
-    field("m", new 
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.SECOND, 32)),
-    field("n", new 
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")),
+    field("m", new ArrowType.Time(TimeUnit.MILLISECOND, 32)),
+    field("n", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")),
     field("o", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
     field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
   ));
+
   private final MessageType allTypesParquetSchema = Types.buildMessage()
     .addField(Types.optional(BINARY).named("a"))
     .addField(Types.optionalGroup()
@@ -191,8 +194,8 @@ private static Field field(String name, ArrowType type, 
Field... children) {
     field("j1", new ArrowType.Decimal(15, 5)),
     field("j2", new ArrowType.Decimal(25, 5)),
     field("k", new ArrowType.Date(DateUnit.DAY)),
-    field("l", new 
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32)),
-    field("m", new 
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"))
+    field("l", new ArrowType.Time(TimeUnit.MILLISECOND, 32)),
+    field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
   ));
 
   private final MessageType supportedTypesParquetSchema = Types.buildMessage()
@@ -348,4 +351,66 @@ public void testRepeatedMap() throws IOException {
     SchemaMapping map = converter.map(paperArrowSchema, Paper.schema);
     Assert.assertEquals("p, s<r<p>, r<p>>, r<s<r<s<p, p>>, p>>", 
toSummaryString(map));
   }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testArrowTimeSecondToParquet() {
+    converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Time(TimeUnit.SECOND, 32))
+    ))).getParquetSchema();
+  }
+
+  @Test
+  public void testArrowTimeMillisecondToParquet() {
+    MessageType expected = converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Time(TimeUnit.MILLISECOND, 32))
+    ))).getParquetSchema();
+    Assert.assertEquals(expected, 
Types.buildMessage().addField(Types.optional(INT32).as(TIME_MILLIS).named("a")).named("root"));
+  }
+
+  @Test
+  public void testArrowTimeMicrosecondToParquet() {
+    MessageType expected = converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Time(TimeUnit.MICROSECOND, 64))
+    ))).getParquetSchema();
+    Assert.assertEquals(expected, 
Types.buildMessage().addField(Types.optional(INT64).as(TIME_MICROS).named("a")).named("root"));
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testArrowTimeNanosecondToParquet() {
+    converter.fromArrow(new Schema(asList(
+      field("a", new ArrowType.Time(TimeUnit.NANOSECOND, 64))
+    ))).getParquetSchema();
+  }
+
+  @Test
+  public void testParquetInt32TimeMillisToArrow() {
+    MessageType parquet = Types.buildMessage()
+      
.addField(Types.optional(INT32).as(TIME_MILLIS).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Time(TimeUnit.MILLISECOND, 32))
+    ));
+    Assert.assertEquals(expected, 
converter.fromParquet(parquet).getArrowSchema());
+  }
+
+  @Test
+  public void testParquetInt64TimeMicrosToArrow() {
+    MessageType parquet = Types.buildMessage()
+      
.addField(Types.optional(INT64).as(TIME_MICROS).named("a")).named("root");
+    Schema expected = new Schema(asList(
+      field("a", new ArrowType.Time(TimeUnit.MICROSECOND, 64))
+    ));
+    Assert.assertEquals(expected, 
converter.fromParquet(parquet).getArrowSchema());
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testParquetInt64TimeMillisToArrow() {
+    converter.fromParquet(Types.buildMessage()
+      
.addField(Types.optional(INT64).as(TIME_MILLIS).named("a")).named("root"));
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testParquetInt32TimeMicrosToArrow() {
+    converter.fromParquet(Types.buildMessage()
+      
.addField(Types.optional(INT32).as(TIME_MICROS).named("a")).named("root"));
+  }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [Java] SchemaConverter should not convert from TimeUnit.SECOND AND 
> TimeUnit.NANOSECOND of Arrow
> -----------------------------------------------------------------------------------------------
>
>                 Key: PARQUET-1285
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1285
>             Project: Parquet
>          Issue Type: Bug
>          Components: parquet-mr
>            Reporter: Masayuki Takahashi
>            Priority: Minor
>             Fix For: 1.10.0
>
>
> Arrow's 'Time' definition is below:
> {code:java}
> { "name" : "time", "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND", 
> "bitWidth": /* integer: 32 or 64 */ }{code}
> [http://arrow.apache.org/docs/metadata.html]
>  
> But Parquet only supports 'TIME_MILLIS' and 'TIME_MICROS'.
>  [https://github.com/Apache/parquet-format/blob/master/LogicalTypes.md]
> Therefore SchemaConverter should not convert from TimeUnit.SECOND AND 
> TimeUnit.NANOSECOND of Arrow to Parquet.
>   



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to