[ 
https://issues.apache.org/jira/browse/PARQUET-1253?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16424019#comment-16424019
 ] 

ASF GitHub Bot commented on PARQUET-1253:
-----------------------------------------

gszadovszky commented on a change in pull request #463: PARQUET-1253: Support 
for new logical type representation
URL: https://github.com/apache/parquet-mr/pull/463#discussion_r178821039
 
 

 ##########
 File path: 
parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
 ##########
 @@ -586,108 +595,105 @@ Type getType(PrimitiveTypeName type) {
   }
 
   // Visible for testing
-  OriginalType getOriginalType(ConvertedType type) {
+  LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement 
schemaElement) {
     switch (type) {
       case UTF8:
-        return OriginalType.UTF8;
+        return LogicalTypeAnnotation.StringLogicalTypeAnnotation.create();
       case MAP:
-        return OriginalType.MAP;
+        return LogicalTypeAnnotation.MapLogicalTypeAnnotation.create();
       case MAP_KEY_VALUE:
-        return OriginalType.MAP_KEY_VALUE;
+        return LogicalTypeAnnotation.MapKeyValueTypeAnnotation.create();
       case LIST:
-        return OriginalType.LIST;
+        return LogicalTypeAnnotation.ListLogicalTypeAnnotation.create();
       case ENUM:
-        return OriginalType.ENUM;
+        return LogicalTypeAnnotation.EnumLogicalTypeAnnotation.create();
       case DECIMAL:
-        return OriginalType.DECIMAL;
+        if (schemaElement == null) {
+          return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create();
+        }
+        return 
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(schemaElement.scale, 
schemaElement.precision);
       case DATE:
-        return OriginalType.DATE;
+        return LogicalTypeAnnotation.DateLogicalTypeAnnotation.create();
       case TIME_MILLIS:
-        return OriginalType.TIME_MILLIS;
+        return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(true, 
LogicalTypeAnnotation.TimeUnit.MILLIS);
       case TIME_MICROS:
-        return OriginalType.TIME_MICROS;
+        return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(true, 
LogicalTypeAnnotation.TimeUnit.MICROS);
       case TIMESTAMP_MILLIS:
-        return OriginalType.TIMESTAMP_MILLIS;
+        return 
LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(true, 
LogicalTypeAnnotation.TimeUnit.MILLIS);
       case TIMESTAMP_MICROS:
-        return OriginalType.TIMESTAMP_MICROS;
+        return 
LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(true, 
LogicalTypeAnnotation.TimeUnit.MICROS);
       case INTERVAL:
-        return OriginalType.INTERVAL;
+        return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.create();
       case INT_8:
-        return OriginalType.INT_8;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 8, 
true);
       case INT_16:
-        return OriginalType.INT_16;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 
16, true);
       case INT_32:
-        return OriginalType.INT_32;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 
32, true);
       case INT_64:
-        return OriginalType.INT_64;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 
64, true);
       case UINT_8:
-        return OriginalType.UINT_8;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 8, 
false);
       case UINT_16:
-        return OriginalType.UINT_16;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 
16, false);
       case UINT_32:
-        return OriginalType.UINT_32;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 
32, false);
       case UINT_64:
-        return OriginalType.UINT_64;
+        return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 
64, false);
       case JSON:
-        return OriginalType.JSON;
+        return LogicalTypeAnnotation.JsonLogicalTypeAnnotation.create();
       case BSON:
-        return OriginalType.BSON;
+        return LogicalTypeAnnotation.BsonLogicalTypeAnnotation.create();
       default:
-        throw new RuntimeException("Unknown converted type " + type);
+        return LogicalTypeAnnotation.NullLogicalTypeAnnotation.create();
     }
   }
 
-  // Visible for testing
-  ConvertedType getConvertedType(OriginalType type) {
-    switch (type) {
-      case UTF8:
-        return ConvertedType.UTF8;
+  LogicalTypeAnnotation getOriginalType(LogicalType type) {
+    switch (type.getSetField()) {
       case MAP:
-        return ConvertedType.MAP;
-      case MAP_KEY_VALUE:
-        return ConvertedType.MAP_KEY_VALUE;
-      case LIST:
-        return ConvertedType.LIST;
-      case ENUM:
-        return ConvertedType.ENUM;
-      case DECIMAL:
-        return ConvertedType.DECIMAL;
+        return LogicalTypeAnnotation.MapLogicalTypeAnnotation.create();
+      case BSON:
+        return LogicalTypeAnnotation.BsonLogicalTypeAnnotation.create();
       case DATE:
-        return ConvertedType.DATE;
-      case TIME_MILLIS:
-        return ConvertedType.TIME_MILLIS;
-      case TIME_MICROS:
-        return ConvertedType.TIME_MICROS;
-      case TIMESTAMP_MILLIS:
-        return ConvertedType.TIMESTAMP_MILLIS;
-      case TIMESTAMP_MICROS:
-        return ConvertedType.TIMESTAMP_MICROS;
-      case INTERVAL:
-        return ConvertedType.INTERVAL;
-      case INT_8:
-        return ConvertedType.INT_8;
-      case INT_16:
-        return ConvertedType.INT_16;
-      case INT_32:
-        return ConvertedType.INT_32;
-      case INT_64:
-        return ConvertedType.INT_64;
-      case UINT_8:
-        return ConvertedType.UINT_8;
-      case UINT_16:
-        return ConvertedType.UINT_16;
-      case UINT_32:
-        return ConvertedType.UINT_32;
-      case UINT_64:
-        return ConvertedType.UINT_64;
+        return LogicalTypeAnnotation.DateLogicalTypeAnnotation.create();
+      case ENUM:
+        return LogicalTypeAnnotation.EnumLogicalTypeAnnotation.create();
       case JSON:
-        return ConvertedType.JSON;
-      case BSON:
-        return ConvertedType.BSON;
+        return LogicalTypeAnnotation.JsonLogicalTypeAnnotation.create();
+      case LIST:
+        return LogicalTypeAnnotation.ListLogicalTypeAnnotation.create();
+      case TIME:
+        TimeType time = type.getTIME();
+        return 
LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(time.isAdjustedToUTC, 
convertTimeUnit(time.unit));
+      case STRING:
+        return LogicalTypeAnnotation.StringLogicalTypeAnnotation.create();
+      case DECIMAL:
+        DecimalType decimal = type.getDECIMAL();
+        return 
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(decimal.scale, 
decimal.precision);
+      case INTEGER:
+        IntType integer = type.getINTEGER();
+        return 
LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(integer.bitWidth, 
integer.isSigned);
+      case UNKNOWN:
+        return null;
+      case TIMESTAMP:
+        TimestampType timestamp = type.getTIMESTAMP();
+        return 
LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(timestamp.isAdjustedToUTC,
 convertTimeUnit(timestamp.unit));
       default:
-        throw new RuntimeException("Unknown original type " + type);
-     }
-   }
+        throw new RuntimeException("Unknown logical type " + type);
+    }
+  }
+
+  LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) {
 
 Review comment:
   As far as I can see this method can be private. Please, use visibility as 
tight as possible and comment if something is visible for testing purposes only.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Support for new logical type representation
> -------------------------------------------
>
>                 Key: PARQUET-1253
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1253
>             Project: Parquet
>          Issue Type: Improvement
>          Components: parquet-mr
>            Reporter: Nandor Kollar
>            Assignee: Nandor Kollar
>            Priority: Major
>
> Latest parquet-format 
> [introduced|https://github.com/apache/parquet-format/commit/863875e0be3237c6aa4ed71733d54c91a51deabe#diff-0f9d1b5347959e15259da7ba8f4b6252]
>  a new representation for logical types. As of now this is not yet supported 
> in parquet-mr, thus there's no way to use parametrized UTC normalized 
> timestamp data types. When reading and writing Parquet files, besides 
> 'converted_type' parquet-mr should use the new 'logicalType' field in 
> SchemaElement to tell the current logical type annotation. To maintain 
> backward compatibility, the semantic of converted_type shouldn't change.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to