This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 00a7a47 PARQUET-1504: Add an option to convert Int96 to Arrow
Timestamp (#594)
00a7a47 is described below
commit 00a7a470dbf73d6ae3bdd0774706abcda353b178
Author: Yongyan Wang <[email protected]>
AuthorDate: Sun Jan 27 12:25:53 2019 -0800
PARQUET-1504: Add an option to convert Int96 to Arrow Timestamp (#594)
PARQUET-1504: Add an option to convert Parquet Int96 to Arrow Timestamp
---
.../parquet/arrow/schema/SchemaConverter.java | 16 ++++++++++++++--
.../parquet/arrow/schema/TestSchemaConverter.java | 22 ++++++++++++++++++++++
2 files changed, 36 insertions(+), 2 deletions(-)
diff --git
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index 0bfb888..6275ca3 100644
---
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -86,10 +86,19 @@ import org.apache.parquet.schema.Types.GroupBuilder;
*/
public class SchemaConverter {
+ // Indicates if Int96 should be converted to Arrow Timestamp
+ private final boolean convertInt96ToArrowTimestamp;
+
/**
* For when we'll need this to be configurable
*/
public SchemaConverter() {
+ this(false);
+ }
+
+ // TODO(PARQUET-1511): pass the parameters in a configuration object
+ public SchemaConverter(final boolean convertInt96ToArrowTimestamp) {
+ this.convertInt96ToArrowTimestamp = convertInt96ToArrowTimestamp;
}
/**
@@ -492,8 +501,11 @@ public class SchemaConverter {
@Override
public TypeMapping convertINT96(PrimitiveTypeName primitiveTypeName)
throws RuntimeException {
- // Possibly timestamp
- return field(new ArrowType.Binary());
+ if (convertInt96ToArrowTimestamp) {
+ return field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null));
+ } else {
+ return field(new ArrowType.Binary());
+ }
}
@Override
diff --git
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index e21f36c..764621a 100644
---
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -47,6 +47,7 @@ import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LE
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
import java.io.IOException;
import java.util.List;
@@ -439,6 +440,27 @@ public class TestSchemaConverter {
Assert.assertEquals(expected,
converter.fromParquet(parquet).getArrowSchema());
}
+ @Test
+ public void testParquetInt96ToArrowBinary() {
+ MessageType parquet = Types.buildMessage()
+ .addField(Types.optional(INT96).named("a")).named("root");
+ Schema expected = new Schema(asList(
+ field("a", new ArrowType.Binary())
+ ));
+ Assert.assertEquals(expected,
converter.fromParquet(parquet).getArrowSchema());
+ }
+
+ @Test
+ public void testParquetInt96ToArrowTimestamp() {
+ final SchemaConverter converterInt96ToTimestamp = new
SchemaConverter(true);
+ MessageType parquet = Types.buildMessage()
+ .addField(Types.optional(INT96).named("a")).named("root");
+ Schema expected = new Schema(asList(
+ field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, null))
+ ));
+ Assert.assertEquals(expected,
converterInt96ToTimestamp.fromParquet(parquet).getArrowSchema());
+ }
+
@Test(expected = IllegalStateException.class)
public void testParquetInt64TimeMillisToArrow() {
converter.fromParquet(Types.buildMessage()