[
https://issues.apache.org/jira/browse/PARQUET-1371?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16571878#comment-16571878
]
ASF GitHub Bot commented on PARQUET-1371:
-----------------------------------------
zivanfi closed pull request #511: PARQUET-1371: Time/Timestamp UTC
normalization parameter doesn't work
URL: https://github.com/apache/parquet-mr/pull/511
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index d2225052d..1442910c8 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -799,7 +799,7 @@ Type getType(PrimitiveTypeName type) {
}
// Visible for testing
- LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement
schemaElement) {
+ LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type,
SchemaElement schemaElement) {
switch (type) {
case UTF8:
return LogicalTypeAnnotation.stringType();
@@ -852,7 +852,7 @@ LogicalTypeAnnotation getOriginalType(ConvertedType type,
SchemaElement schemaEl
}
}
- LogicalTypeAnnotation getOriginalType(LogicalType type) {
+ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
switch (type.getSetField()) {
case MAP:
return LogicalTypeAnnotation.mapType();
@@ -1194,12 +1194,15 @@ private void buildChildren(Types.GroupBuilder builder,
}
if (schemaElement.isSetLogicalType()) {
- childBuilder.as(getOriginalType(schemaElement.logicalType));
+ childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType));
}
if (schemaElement.isSetConverted_type()) {
- LogicalTypeAnnotation originalType =
getOriginalType(schemaElement.converted_type, schemaElement);
- LogicalTypeAnnotation newLogicalType =
schemaElement.isSetLogicalType() ? getOriginalType(schemaElement.logicalType) :
null;
- if (!originalType.equals(newLogicalType)) {
+ OriginalType originalType =
getLogicalTypeAnnotation(schemaElement.converted_type,
schemaElement).toOriginalType();
+ OriginalType newOriginalType = (schemaElement.isSetLogicalType() &&
getLogicalTypeAnnotation(schemaElement.logicalType) != null) ?
+
getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null;
+ if (!originalType.equals(newOriginalType)) {
+ LOG.warn("Converted type and logical type metadata mismatch
(convertedType: {}, logical type: {}). Using value in converted type.",
+ schemaElement.converted_type, schemaElement.logicalType);
childBuilder.as(originalType);
}
}
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 1474525ba..d1a3a3c23 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -20,6 +20,8 @@
import static java.util.Collections.emptyList;
import static
org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -162,6 +164,61 @@ public void
testLogicalTypesBackwardCompatibleWithConvertedTypes() {
assertEquals(expected, schema);
}
+ @Test
+ public void testIncompatibleLogicalAndConvertedTypes() {
+ ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
+ MessageType schema = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(OriginalType.DECIMAL).precision(9).scale(2)
+ .named("aBinary")
+ .named("Message");
+ MessageType expected = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.jsonType())
+ .named("aBinary")
+ .named("Message");
+
+ List<SchemaElement> parquetSchema =
parquetMetadataConverter.toParquetSchema(schema);
+ // Set converted type field to a different type to verify that in case of
mismatch, it overrides logical type
+ parquetSchema.get(1).setConverted_type(ConvertedType.JSON);
+ MessageType actual =
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void testTimeLogicalTypes() {
+ ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
+ MessageType expected = Types.buildMessage()
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimestampNonUtcMillis")
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimestampUtcMillis")
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimestampNonUtcMicros")
+ .required(PrimitiveTypeName.INT64)
+ .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimestampUtcMicros")
+ .required(PrimitiveTypeName.INT32)
+ .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimeNonUtcMillis")
+ .required(PrimitiveTypeName.INT32)
+ .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+ .named("aTimeUtcMillis")
+ .required(PrimitiveTypeName.INT64)
+ .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimeNonUtcMicros")
+ .required(PrimitiveTypeName.INT64)
+ .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+ .named("aTimeUtcMicros")
+ .named("Message");
+ List<SchemaElement> parquetSchema =
parquetMetadataConverter.toParquetSchema(expected);
+ MessageType schema =
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+ assertEquals(expected, schema);
+ }
+
@Test
public void testEnumEquivalence() {
ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
@@ -184,11 +241,11 @@ public void testEnumEquivalence() {
assertEquals(type,
parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type)));
}
for (OriginalType original : OriginalType.values()) {
- assertEquals(original, parquetMetadataConverter.getOriginalType(
+ assertEquals(original, parquetMetadataConverter.getLogicalTypeAnnotation(
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.fromOriginalType(original,
null)), null).toOriginalType());
}
for (ConvertedType converted : ConvertedType.values()) {
- assertEquals(converted,
parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getOriginalType(converted,
null)));
+ assertEquals(converted,
parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getLogicalTypeAnnotation(converted,
null)));
}
}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Time/Timestamp UTC normalization parameter doesn't work
> -------------------------------------------------------
>
> Key: PARQUET-1371
> URL: https://issues.apache.org/jira/browse/PARQUET-1371
> Project: Parquet
> Issue Type: Bug
> Reporter: Nandor Kollar
> Assignee: Nandor Kollar
> Priority: Major
> Labels: pull-request-available
>
> After creating a Parquet file with non-UTC normalized logical type, when
> reading back with the API, the result show it is UTC normalized. Looks like
> the read path incorrectly reads the actual logical type (with new API).
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)