This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 797e32a  PARQUET-1487: Do not write original type for 
timezone-agnostic timestamps (#585)
797e32a is described below

commit 797e32aca0eadd1d460e5f5cd477e37bc828b67d
Author: nandorKollar <[email protected]>
AuthorDate: Wed Jan 9 13:42:00 2019 +0100

    PARQUET-1487: Do not write original type for timezone-agnostic timestamps 
(#585)
---
 .../parquet/schema/LogicalTypeAnnotation.java      |  6 ++
 .../format/converter/ParquetMetadataConverter.java |  6 ++
 .../converter/TestParquetMetadataConverter.java    | 80 ++++++++++++++++++----
 3 files changed, 79 insertions(+), 13 deletions(-)

diff --git 
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
 
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 5f61ed6..4472376 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -555,6 +555,9 @@ public abstract class LogicalTypeAnnotation {
     @Override
     @InterfaceAudience.Private
     public OriginalType toOriginalType() {
+      if (!isAdjustedToUTC) {
+        return null;
+      }
       switch (unit) {
         case MILLIS:
           return OriginalType.TIME_MILLIS;
@@ -634,6 +637,9 @@ public abstract class LogicalTypeAnnotation {
     @Override
     @InterfaceAudience.Private
     public OriginalType toOriginalType() {
+      if (!isAdjustedToUTC) {
+        return null;
+      }
       switch (unit) {
         case MILLIS:
           return OriginalType.TIMESTAMP_MILLIS;
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index efb0608..fb0ca7b 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -309,6 +309,9 @@ public class ParquetMetadataConverter {
 
     @Override
     public Optional<ConvertedType> 
visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+      if (!timeLogicalType.isAdjustedToUTC()) {
+        return empty();
+      }
       switch (timeLogicalType.getUnit()) {
         case MILLIS:
           return of(ConvertedType.TIME_MILLIS);
@@ -323,6 +326,9 @@ public class ParquetMetadataConverter {
 
     @Override
     public Optional<ConvertedType> 
visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation 
timestampLogicalType) {
+      if (!timestampLogicalType.isAdjustedToUTC()) {
+        return empty();
+      }
       switch (timestampLogicalType.getUnit()) {
         case MICROS:
           return of(ConvertedType.TIMESTAMP_MICROS);
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 358a29a..65244f4 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -20,6 +20,18 @@ package org.apache.parquet.format.converter;
 
 import static java.util.Collections.emptyList;
 import static 
org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.listType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.mapType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
@@ -196,40 +208,40 @@ public class TestParquetMetadataConverter {
     ParquetMetadataConverter parquetMetadataConverter = new 
ParquetMetadataConverter();
     MessageType expected = Types.buildMessage()
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timestampType(false, MILLIS))
       .named("aTimestampNonUtcMillis")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timestampType(true, MILLIS))
       .named("aTimestampUtcMillis")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timestampType(false, MICROS))
       .named("aTimestampNonUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timestampType(true, MICROS))
       .named("aTimestampUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timestampType(false, NANOS))
       .named("aTimestampNonUtcNanos")
       .required(PrimitiveTypeName.INT64)
-      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timestampType(true, NANOS))
       .named("aTimestampUtcNanos")
       .required(PrimitiveTypeName.INT32)
-      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timeType(false, MILLIS))
       .named("aTimeNonUtcMillis")
       .required(PrimitiveTypeName.INT32)
-      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .as(timeType(true, MILLIS))
       .named("aTimeUtcMillis")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timeType(false, MICROS))
       .named("aTimeNonUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .as(timeType(true, MICROS))
       .named("aTimeUtcMicros")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timeType(false, NANOS))
       .named("aTimeNonUtcNanos")
       .required(PrimitiveTypeName.INT64)
-      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .as(timeType(true, NANOS))
       .named("aTimeUtcNanos")
       .named("Message");
     List<SchemaElement> parquetSchema = 
parquetMetadataConverter.toParquetSchema(expected);
@@ -238,6 +250,48 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
+  public void testLogicalToConvertedTypeConversion() {
+    ParquetMetadataConverter parquetMetadataConverter = new 
ParquetMetadataConverter();
+
+    assertEquals(ConvertedType.UTF8, 
parquetMetadataConverter.convertToConvertedType(stringType()));
+    assertEquals(ConvertedType.ENUM, 
parquetMetadataConverter.convertToConvertedType(enumType()));
+
+    assertEquals(ConvertedType.INT_8, 
parquetMetadataConverter.convertToConvertedType(intType(8, true)));
+    assertEquals(ConvertedType.INT_16, 
parquetMetadataConverter.convertToConvertedType(intType(16, true)));
+    assertEquals(ConvertedType.INT_32, 
parquetMetadataConverter.convertToConvertedType(intType(32, true)));
+    assertEquals(ConvertedType.INT_64, 
parquetMetadataConverter.convertToConvertedType(intType(64, true)));
+    assertEquals(ConvertedType.UINT_8, 
parquetMetadataConverter.convertToConvertedType(intType(8, false)));
+    assertEquals(ConvertedType.UINT_16, 
parquetMetadataConverter.convertToConvertedType(intType(16, false)));
+    assertEquals(ConvertedType.UINT_32, 
parquetMetadataConverter.convertToConvertedType(intType(32, false)));
+    assertEquals(ConvertedType.UINT_64, 
parquetMetadataConverter.convertToConvertedType(intType(64, false)));
+    assertEquals(ConvertedType.DECIMAL, 
parquetMetadataConverter.convertToConvertedType(decimalType(8, 16)));
+
+    assertEquals(ConvertedType.TIMESTAMP_MILLIS, 
parquetMetadataConverter.convertToConvertedType(timestampType(true, MILLIS)));
+    assertEquals(ConvertedType.TIMESTAMP_MICROS, 
parquetMetadataConverter.convertToConvertedType(timestampType(true, MICROS)));
+    
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(true, 
NANOS)));
+    
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, 
MILLIS)));
+    
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, 
MICROS)));
+    
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false, 
NANOS)));
+
+    assertEquals(ConvertedType.TIME_MILLIS, 
parquetMetadataConverter.convertToConvertedType(timeType(true, MILLIS)));
+    assertEquals(ConvertedType.TIME_MICROS, 
parquetMetadataConverter.convertToConvertedType(timeType(true, MICROS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(true, 
NANOS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, 
MILLIS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, 
MICROS)));
+    assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false, 
NANOS)));
+
+    assertEquals(ConvertedType.DATE, 
parquetMetadataConverter.convertToConvertedType(dateType()));
+
+    assertEquals(ConvertedType.INTERVAL, 
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()));
+    assertEquals(ConvertedType.JSON, 
parquetMetadataConverter.convertToConvertedType(jsonType()));
+    assertEquals(ConvertedType.BSON, 
parquetMetadataConverter.convertToConvertedType(bsonType()));
+
+    assertEquals(ConvertedType.LIST, 
parquetMetadataConverter.convertToConvertedType(listType()));
+    assertEquals(ConvertedType.MAP, 
parquetMetadataConverter.convertToConvertedType(mapType()));
+    assertEquals(ConvertedType.MAP_KEY_VALUE, 
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance()));
+  }
+
+  @Test
   public void testEnumEquivalence() {
     ParquetMetadataConverter parquetMetadataConverter = new 
ParquetMetadataConverter();
     for (org.apache.parquet.column.Encoding encoding : 
org.apache.parquet.column.Encoding.values()) {
@@ -1024,7 +1078,7 @@ public class TestParquetMetadataConverter {
     stats.updateStats(200l);
     stats.updateStats(500l);
     builder.add(stats);
-    org.apache.parquet.format.ColumnIndex parquetColumnIndex = 
+    org.apache.parquet.format.ColumnIndex parquetColumnIndex =
         ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
     ColumnIndex columnIndex = 
ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
     assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());

Reply via email to