[
https://issues.apache.org/jira/browse/PARQUET-1253?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16462219#comment-16462219
]
ASF GitHub Bot commented on PARQUET-1253:
-----------------------------------------
gszadovszky commented on a change in pull request #463: PARQUET-1253: Support
for new logical type representation
URL: https://github.com/apache/parquet-mr/pull/463#discussion_r185746812
##########
File path:
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
##########
@@ -77,63 +77,116 @@ static LogicalTypeAnnotation
fromOriginalType(OriginalType originalType, Decimal
}
switch (originalType) {
case UTF8:
- return StringLogicalTypeAnnotation.create();
+ return stringType();
case MAP:
- return MapLogicalTypeAnnotation.create();
+ return mapType();
case DECIMAL:
int scale = (decimalMetadata == null ? 0 : decimalMetadata.getScale());
int precision = (decimalMetadata == null ? 0 :
decimalMetadata.getPrecision());
- return DecimalLogicalTypeAnnotation.create(scale, precision);
+ return decimalType(scale, precision);
case LIST:
- return ListLogicalTypeAnnotation.create();
+ return listType();
case DATE:
- return DateLogicalTypeAnnotation.create();
+ return dateType();
case INTERVAL:
- return IntervalLogicalTypeAnnotation.create();
+ return intervalType();
case TIMESTAMP_MILLIS:
- return TimestampLogicalTypeAnnotation.create(true,
LogicalTypeAnnotation.TimeUnit.MILLIS);
+ return timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS);
case TIMESTAMP_MICROS:
- return TimestampLogicalTypeAnnotation.create(true,
LogicalTypeAnnotation.TimeUnit.MICROS);
+ return timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS);
case TIME_MILLIS:
- return TimeLogicalTypeAnnotation.create(true,
LogicalTypeAnnotation.TimeUnit.MILLIS);
+ return timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS);
case TIME_MICROS:
- return TimeLogicalTypeAnnotation.create(true,
LogicalTypeAnnotation.TimeUnit.MICROS);
+ return timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS);
case UINT_8:
- return IntLogicalTypeAnnotation.create(8, false);
+ return intType(8, false);
case UINT_16:
- return IntLogicalTypeAnnotation.create(16, false);
+ return intType(16, false);
case UINT_32:
- return IntLogicalTypeAnnotation.create(32, false);
+ return intType(32, false);
case UINT_64:
- return IntLogicalTypeAnnotation.create(64, false);
+ return intType(64, false);
case INT_8:
- return IntLogicalTypeAnnotation.create(8, true);
+ return intType(8, true);
case INT_16:
- return IntLogicalTypeAnnotation.create(16, true);
+ return intType(16, true);
case INT_32:
- return IntLogicalTypeAnnotation.create(32, true);
+ return intType(32, true);
case INT_64:
- return IntLogicalTypeAnnotation.create(64, true);
+ return intType(64, true);
case ENUM:
- return EnumLogicalTypeAnnotation.create();
+ return enumType();
case JSON:
- return JsonLogicalTypeAnnotation.create();
+ return jsonType();
case BSON:
- return BsonLogicalTypeAnnotation.create();
+ return bsonType();
case MAP_KEY_VALUE:
- return MapKeyValueTypeAnnotation.create();
+ return mapKeyValueType();
default:
throw new RuntimeException("Can't convert original type to logical
type, unknown original type " + originalType);
}
}
+
+ static StringLogicalTypeAnnotation stringType() {
+ return StringLogicalTypeAnnotation.INSTANCE;
+ }
+
+ static MapLogicalTypeAnnotation mapType() {
+ return MapLogicalTypeAnnotation.INSTANCE;
+ }
+
+ static ListLogicalTypeAnnotation listType() {
+ return ListLogicalTypeAnnotation.INSTANCE;
+ }
+
+ static EnumLogicalTypeAnnotation enumType() {
+ return EnumLogicalTypeAnnotation.INSTANCE;
+ }
+
+ static DecimalLogicalTypeAnnotation decimalType(final int scale, final int
precision) {
+ return new DecimalLogicalTypeAnnotation(scale, precision);
+ }
+
+ static DateLogicalTypeAnnotation dateType() {
+ return DateLogicalTypeAnnotation.INSTANCE;
+ }
+
+ static TimeLogicalTypeAnnotation timeType(final boolean isAdjustedToUTC,
final TimeUnit unit) {
+ return new TimeLogicalTypeAnnotation(isAdjustedToUTC, unit);
+ }
+
+ static TimestampLogicalTypeAnnotation timestampType(final boolean
isAdjustedToUTC, final TimeUnit unit) {
+ return new TimestampLogicalTypeAnnotation(isAdjustedToUTC, unit);
+ }
+
+ static IntLogicalTypeAnnotation intType(final int bitWidth, final boolean
isSigned) {
+ Preconditions.checkArgument(
+ bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64,
+ "Invalid bit width for integer logical type, " + bitWidth + " is not
allowed, " +
+ "valid bit width values: 8, 16, 32, 64");
+ return new IntLogicalTypeAnnotation(bitWidth, isSigned);
+ }
+
+ static JsonLogicalTypeAnnotation jsonType() {
+ return JsonLogicalTypeAnnotation.INSTANCE;
+ }
+
+ static BsonLogicalTypeAnnotation bsonType() {
+ return BsonLogicalTypeAnnotation.INSTANCE;
+ }
+
+ static IntervalLogicalTypeAnnotation intervalType() {
Review comment:
Interval is not supported in LogicalType currently, so I would not expose it
yet. It is always possible to make a restricted access public but in the other
way it would break backward compatibility.
Abstract class sounds reasonable to me.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Support for new logical type representation
> -------------------------------------------
>
> Key: PARQUET-1253
> URL: https://issues.apache.org/jira/browse/PARQUET-1253
> Project: Parquet
> Issue Type: Improvement
> Components: parquet-mr
> Reporter: Nandor Kollar
> Assignee: Nandor Kollar
> Priority: Major
>
> Latest parquet-format
> [introduced|https://github.com/apache/parquet-format/commit/863875e0be3237c6aa4ed71733d54c91a51deabe#diff-0f9d1b5347959e15259da7ba8f4b6252]
> a new representation for logical types. As of now this is not yet supported
> in parquet-mr, thus there's no way to use parametrized UTC normalized
> timestamp data types. When reading and writing Parquet files, besides
> 'converted_type' parquet-mr should use the new 'logicalType' field in
> SchemaElement to tell the current logical type annotation. To maintain
> backward compatibility, the semantic of converted_type shouldn't change.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)