This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 797e32a PARQUET-1487: Do not write original type for
timezone-agnostic timestamps (#585)
797e32a is described below
commit 797e32aca0eadd1d460e5f5cd477e37bc828b67d
Author: nandorKollar <[email protected]>
AuthorDate: Wed Jan 9 13:42:00 2019 +0100
PARQUET-1487: Do not write original type for timezone-agnostic timestamps
(#585)
---
.../parquet/schema/LogicalTypeAnnotation.java | 6 ++
.../format/converter/ParquetMetadataConverter.java | 6 ++
.../converter/TestParquetMetadataConverter.java | 80 ++++++++++++++++++----
3 files changed, 79 insertions(+), 13 deletions(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 5f61ed6..4472376 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -555,6 +555,9 @@ public abstract class LogicalTypeAnnotation {
@Override
@InterfaceAudience.Private
public OriginalType toOriginalType() {
+ if (!isAdjustedToUTC) {
+ return null;
+ }
switch (unit) {
case MILLIS:
return OriginalType.TIME_MILLIS;
@@ -634,6 +637,9 @@ public abstract class LogicalTypeAnnotation {
@Override
@InterfaceAudience.Private
public OriginalType toOriginalType() {
+ if (!isAdjustedToUTC) {
+ return null;
+ }
switch (unit) {
case MILLIS:
return OriginalType.TIMESTAMP_MILLIS;
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index efb0608..fb0ca7b 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -309,6 +309,9 @@ public class ParquetMetadataConverter {
@Override
public Optional<ConvertedType>
visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+ if (!timeLogicalType.isAdjustedToUTC()) {
+ return empty();
+ }
switch (timeLogicalType.getUnit()) {
case MILLIS:
return of(ConvertedType.TIME_MILLIS);
@@ -323,6 +326,9 @@ public class ParquetMetadataConverter {
@Override
public Optional<ConvertedType>
visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
timestampLogicalType) {
+ if (!timestampLogicalType.isAdjustedToUTC()) {
+ return empty();
+ }
switch (timestampLogicalType.getUnit()) {
case MICROS:
return of(ConvertedType.TIMESTAMP_MICROS);
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 358a29a..65244f4 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -20,6 +20,18 @@ package org.apache.parquet.format.converter;
import static java.util.Collections.emptyList;
import static
org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.listType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.mapType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
@@ -196,40 +208,40 @@ public class TestParquetMetadataConverter {
ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
MessageType expected = Types.buildMessage()
.required(PrimitiveTypeName.INT64)
- .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .as(timestampType(false, MILLIS))
.named("aTimestampNonUtcMillis")
.required(PrimitiveTypeName.INT64)
- .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .as(timestampType(true, MILLIS))
.named("aTimestampUtcMillis")
.required(PrimitiveTypeName.INT64)
- .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .as(timestampType(false, MICROS))
.named("aTimestampNonUtcMicros")
.required(PrimitiveTypeName.INT64)
- .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .as(timestampType(true, MICROS))
.named("aTimestampUtcMicros")
.required(PrimitiveTypeName.INT64)
- .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+ .as(timestampType(false, NANOS))
.named("aTimestampNonUtcNanos")
.required(PrimitiveTypeName.INT64)
- .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+ .as(timestampType(true, NANOS))
.named("aTimestampUtcNanos")
.required(PrimitiveTypeName.INT32)
- .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .as(timeType(false, MILLIS))
.named("aTimeNonUtcMillis")
.required(PrimitiveTypeName.INT32)
- .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .as(timeType(true, MILLIS))
.named("aTimeUtcMillis")
.required(PrimitiveTypeName.INT64)
- .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .as(timeType(false, MICROS))
.named("aTimeNonUtcMicros")
.required(PrimitiveTypeName.INT64)
- .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .as(timeType(true, MICROS))
.named("aTimeUtcMicros")
.required(PrimitiveTypeName.INT64)
- .as(timeType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+ .as(timeType(false, NANOS))
.named("aTimeNonUtcNanos")
.required(PrimitiveTypeName.INT64)
- .as(timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+ .as(timeType(true, NANOS))
.named("aTimeUtcNanos")
.named("Message");
List<SchemaElement> parquetSchema =
parquetMetadataConverter.toParquetSchema(expected);
@@ -238,6 +250,48 @@ public class TestParquetMetadataConverter {
}
@Test
+ public void testLogicalToConvertedTypeConversion() {
+ ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
+
+ assertEquals(ConvertedType.UTF8,
parquetMetadataConverter.convertToConvertedType(stringType()));
+ assertEquals(ConvertedType.ENUM,
parquetMetadataConverter.convertToConvertedType(enumType()));
+
+ assertEquals(ConvertedType.INT_8,
parquetMetadataConverter.convertToConvertedType(intType(8, true)));
+ assertEquals(ConvertedType.INT_16,
parquetMetadataConverter.convertToConvertedType(intType(16, true)));
+ assertEquals(ConvertedType.INT_32,
parquetMetadataConverter.convertToConvertedType(intType(32, true)));
+ assertEquals(ConvertedType.INT_64,
parquetMetadataConverter.convertToConvertedType(intType(64, true)));
+ assertEquals(ConvertedType.UINT_8,
parquetMetadataConverter.convertToConvertedType(intType(8, false)));
+ assertEquals(ConvertedType.UINT_16,
parquetMetadataConverter.convertToConvertedType(intType(16, false)));
+ assertEquals(ConvertedType.UINT_32,
parquetMetadataConverter.convertToConvertedType(intType(32, false)));
+ assertEquals(ConvertedType.UINT_64,
parquetMetadataConverter.convertToConvertedType(intType(64, false)));
+ assertEquals(ConvertedType.DECIMAL,
parquetMetadataConverter.convertToConvertedType(decimalType(8, 16)));
+
+ assertEquals(ConvertedType.TIMESTAMP_MILLIS,
parquetMetadataConverter.convertToConvertedType(timestampType(true, MILLIS)));
+ assertEquals(ConvertedType.TIMESTAMP_MICROS,
parquetMetadataConverter.convertToConvertedType(timestampType(true, MICROS)));
+
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(true,
NANOS)));
+
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false,
MILLIS)));
+
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false,
MICROS)));
+
assertNull(parquetMetadataConverter.convertToConvertedType(timestampType(false,
NANOS)));
+
+ assertEquals(ConvertedType.TIME_MILLIS,
parquetMetadataConverter.convertToConvertedType(timeType(true, MILLIS)));
+ assertEquals(ConvertedType.TIME_MICROS,
parquetMetadataConverter.convertToConvertedType(timeType(true, MICROS)));
+ assertNull(parquetMetadataConverter.convertToConvertedType(timeType(true,
NANOS)));
+ assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false,
MILLIS)));
+ assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false,
MICROS)));
+ assertNull(parquetMetadataConverter.convertToConvertedType(timeType(false,
NANOS)));
+
+ assertEquals(ConvertedType.DATE,
parquetMetadataConverter.convertToConvertedType(dateType()));
+
+ assertEquals(ConvertedType.INTERVAL,
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()));
+ assertEquals(ConvertedType.JSON,
parquetMetadataConverter.convertToConvertedType(jsonType()));
+ assertEquals(ConvertedType.BSON,
parquetMetadataConverter.convertToConvertedType(bsonType()));
+
+ assertEquals(ConvertedType.LIST,
parquetMetadataConverter.convertToConvertedType(listType()));
+ assertEquals(ConvertedType.MAP,
parquetMetadataConverter.convertToConvertedType(mapType()));
+ assertEquals(ConvertedType.MAP_KEY_VALUE,
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance()));
+ }
+
+ @Test
public void testEnumEquivalence() {
ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
for (org.apache.parquet.column.Encoding encoding :
org.apache.parquet.column.Encoding.values()) {
@@ -1024,7 +1078,7 @@ public class TestParquetMetadataConverter {
stats.updateStats(200l);
stats.updateStats(500l);
builder.add(stats);
- org.apache.parquet.format.ColumnIndex parquetColumnIndex =
+ org.apache.parquet.format.ColumnIndex parquetColumnIndex =
ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
ColumnIndex columnIndex =
ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());