This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new b635beb PARQUET-1297: SchemaConverter should not convert from
Timestamp(TimeUnit.SECOND) and Timestamp(TimeUnit.NANOSECOND) of Arrow (#477)
b635beb is described below
commit b635beb6efc07a97c143775c78a32d42b3b73c8e
Author: Masayuki Takahashi <[email protected]>
AuthorDate: Mon May 14 02:31:02 2018 +0900
PARQUET-1297: SchemaConverter should not convert from
Timestamp(TimeUnit.SECOND) and Timestamp(TimeUnit.NANOSECOND) of Arrow (#477)
Arrow's 'Timestamp' definition is below:
{
"name" : "timestamp",
"unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND"
}
http://arrow.apache.org/docs/metadata.html
But Parquet only supports 'TIMESTAMP_MILLIS' and 'TIMESTAMP_MICROS'.
https://github.com/Apache/parquet-format/blob/master/LogicalTypes.md
Therefore SchemaConverter should not convert from
Timestamp(TimeUnit.SECOND) and Timestamp(TimeUnit.NANOSECOND) of Arrow to
Parquet.
Related:
https://issues.apache.org/jira/browse/PARQUET-1285
Author: Masayuki Takahashi <[email protected]>
---
.../parquet/arrow/schema/SchemaConverter.java | 17 +++---
.../parquet/arrow/schema/TestSchemaConverter.java | 63 ++++++++++++++++++++++
2 files changed, 74 insertions(+), 6 deletions(-)
diff --git
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index f298558..a7df48c 100644
---
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -27,6 +27,7 @@ import static org.apache.parquet.schema.OriginalType.INT_32;
import static org.apache.parquet.schema.OriginalType.INT_64;
import static org.apache.parquet.schema.OriginalType.INT_8;
import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
import static org.apache.parquet.schema.OriginalType.UINT_16;
@@ -259,7 +260,13 @@ public class SchemaConverter {
@Override
public TypeMapping visit(Timestamp type) {
- return primitive(INT64, TIMESTAMP_MILLIS);
+ TimeUnit timeUnit = type.getUnit();
+ if (timeUnit == TimeUnit.MILLISECOND) {
+ return primitive(INT64, TIMESTAMP_MILLIS);
+ } else if (timeUnit == TimeUnit.MICROSECOND) {
+ return primitive(INT64, TIMESTAMP_MICROS);
+ }
+ throw new UnsupportedOperationException("Unsupported type " + type);
}
/**
@@ -415,14 +422,9 @@ public class SchemaConverter {
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date(DateUnit.DAY));
- case TIMESTAMP_MICROS:
- return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"));
- case TIMESTAMP_MILLIS:
- return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"));
case TIME_MILLIS:
return field(new ArrowType.Time(TimeUnit.MILLISECOND, 32));
default:
- case TIME_MICROS:
case INT_64:
case UINT_64:
case UTF8:
@@ -433,6 +435,9 @@ public class SchemaConverter {
case LIST:
case MAP:
case MAP_KEY_VALUE:
+ case TIMESTAMP_MICROS:
+ case TIMESTAMP_MILLIS:
+ case TIME_MICROS:
throw new IllegalArgumentException("illegal type " + type);
}
}
diff --git
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index 4c3da35..2d1f028 100644
---
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -27,6 +27,7 @@ import static org.apache.parquet.schema.OriginalType.INT_32;
import static org.apache.parquet.schema.OriginalType.INT_64;
import static org.apache.parquet.schema.OriginalType.INT_8;
import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
+import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
import static org.apache.parquet.schema.OriginalType.UINT_16;
@@ -413,4 +414,66 @@ public class TestSchemaConverter {
converter.fromParquet(Types.buildMessage()
.addField(Types.optional(INT32).as(TIME_MICROS).named("a")).named("root"));
}
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testArrowTimestampSecondToParquet() {
+ converter.fromArrow(new Schema(asList(
+ field("a", new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"))
+ ))).getParquetSchema();
+ }
+
+ @Test
+ public void testArrowTimestampMillisecondToParquet() {
+ MessageType expected = converter.fromArrow(new Schema(asList(
+ field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
+ ))).getParquetSchema();
+ Assert.assertEquals(expected,
Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root"));
+ }
+
+ @Test
+ public void testArrowTimestampMicrosecondToParquet() {
+ MessageType expected = converter.fromArrow(new Schema(asList(
+ field("a", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"))
+ ))).getParquetSchema();
+ Assert.assertEquals(expected,
Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root"));
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testArrowTimestampNanosecondToParquet() {
+ converter.fromArrow(new Schema(asList(
+ field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"))
+ ))).getParquetSchema();
+ }
+
+ @Test
+ public void testParquetInt64TimestampMillisToArrow() {
+ MessageType parquet = Types.buildMessage()
+
.addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root");
+ Schema expected = new Schema(asList(
+ field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
+ ));
+ Assert.assertEquals(expected,
converter.fromParquet(parquet).getArrowSchema());
+ }
+
+ @Test
+ public void testParquetInt64TimestampMicrosToArrow() {
+ MessageType parquet = Types.buildMessage()
+
.addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root");
+ Schema expected = new Schema(asList(
+ field("a", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"))
+ ));
+ Assert.assertEquals(expected,
converter.fromParquet(parquet).getArrowSchema());
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testParquetInt32TimestampMillisToArrow() {
+ converter.fromParquet(Types.buildMessage()
+
.addField(Types.optional(INT32).as(TIMESTAMP_MILLIS).named("a")).named("root"));
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testParquetInt32TimestampMicrosToArrow() {
+ converter.fromParquet(Types.buildMessage()
+
.addField(Types.optional(INT32).as(TIMESTAMP_MICROS).named("a")).named("root"));
+ }
}
--
To stop receiving notification emails like this one, please contact
[email protected].