This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 37a29d98f Add logical type annotation for `UnknownType` (#3154)
37a29d98f is described below
commit 37a29d98f943a5ff1ae12875b5cc6deec854d1ef
Author: Fokko Driesprong <[email protected]>
AuthorDate: Thu Feb 27 20:44:20 2025 +0100
Add logical type annotation for `UnknownType` (#3154)
---
.../parquet/schema/LogicalTypeAnnotation.java | 41 ++++++++++++++++++++++
.../parquet/schema/PrimitiveStringifier.java | 7 ++++
.../main/java/org/apache/parquet/schema/Types.java | 6 ++++
.../format/converter/ParquetMetadataConverter.java | 16 +++++++--
4 files changed, 68 insertions(+), 2 deletions(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 05629dd38..78b0f9a0c 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -146,6 +146,12 @@ public abstract class LogicalTypeAnnotation {
protected LogicalTypeAnnotation fromString(List<String> params) {
return float16Type();
}
+ },
+ UNKNOWN {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ return unknownType();
+ }
};
protected abstract LogicalTypeAnnotation fromString(List<String> params);
@@ -316,6 +322,10 @@ public abstract class LogicalTypeAnnotation {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static UnknownLogicalTypeAnnotation unknownType() {
+ return UnknownLogicalTypeAnnotation.INSTANCE;
+ }
+
public static class StringLogicalTypeAnnotation extends
LogicalTypeAnnotation {
private static final StringLogicalTypeAnnotation INSTANCE = new
StringLogicalTypeAnnotation();
@@ -989,6 +999,33 @@ public abstract class LogicalTypeAnnotation {
}
}
+ public static class UnknownLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private static final UnknownLogicalTypeAnnotation INSTANCE = new
UnknownLogicalTypeAnnotation();
+
+ private UnknownLogicalTypeAnnotation() {}
+
+ @Override
+ public OriginalType toOriginalType() {
+ // No OriginalType for UknownType
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.UNKNOWN;
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.UNKNOWN_STRINGIFIER;
+ }
+ }
+
// This logical type annotation is implemented to support backward
compatibility with ConvertedType.
// The new logical type representation in parquet-format doesn't have any
interval type,
// thus this annotation is mapped to UNKNOWN.
@@ -1162,5 +1199,9 @@ public abstract class LogicalTypeAnnotation {
default Optional<T> visit(Float16LogicalTypeAnnotation float16LogicalType)
{
return empty();
}
+
+ default Optional<T> visit(UnknownLogicalTypeAnnotation
unknownLogicalTypeAnnotation) {
+ return empty();
+ }
}
}
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
index c46e94367..7aface72a 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
@@ -449,4 +449,11 @@ public abstract class PrimitiveStringifier {
return Float16.toFloatString(value);
}
};
+
+ static final PrimitiveStringifier UNKNOWN_STRINGIFIER = new
PrimitiveStringifier("UNKNOWN_STRINGIFIER") {
+
+ public String stringify(Binary ignored) {
+ return "UNKNOWN";
+ }
+ };
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 5bc2f89f4..399672022 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -472,6 +472,12 @@ public class Types {
LogicalTypeAnnotation.Float16LogicalTypeAnnotation.BYTES,
float16LogicalType);
}
+ @Override
+ public Optional<Boolean> visit(
+ LogicalTypeAnnotation.UnknownLogicalTypeAnnotation
unknownLogicalType) {
+ return Optional.of(true);
+ }
+
@Override
public Optional<Boolean> visit(
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation
decimalLogicalType) {
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index e72f2c33a..3c38f04af 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -515,6 +515,11 @@ public class ParquetMetadataConverter {
return of(LogicalType.FLOAT16(new Float16Type()));
}
+ @Override
+ public Optional<LogicalType>
visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation intervalLogicalType) {
+ return of(LogicalType.UNKNOWN(new NullType()));
+ }
+
@Override
public Optional<LogicalType>
visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
return of(LogicalType.UNKNOWN(new NullType()));
@@ -894,7 +899,8 @@ public class ParquetMetadataConverter {
LogicalTypeAnnotation.StringLogicalTypeAnnotation.class,
LogicalTypeAnnotation.EnumLogicalTypeAnnotation.class,
LogicalTypeAnnotation.JsonLogicalTypeAnnotation.class,
- LogicalTypeAnnotation.Float16LogicalTypeAnnotation.class)));
+ LogicalTypeAnnotation.Float16LogicalTypeAnnotation.class,
+ LogicalTypeAnnotation.UnknownLogicalTypeAnnotation.class)));
/**
* Returns whether to use signed order min and max with a type. It is safe to
@@ -997,6 +1003,12 @@ public class ParquetMetadataConverter {
return of(SortOrder.SIGNED);
}
+ @Override
+ public Optional<SortOrder> visit(
+ LogicalTypeAnnotation.UnknownLogicalTypeAnnotation
unknownLogicalTypeAnnotation) {
+ return of(SortOrder.UNKNOWN);
+ }
+
@Override
public Optional<SortOrder> visit(
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation
decimalLogicalType) {
@@ -1167,7 +1179,7 @@ public class ParquetMetadataConverter {
IntType integer = type.getINTEGER();
return LogicalTypeAnnotation.intType(integer.bitWidth,
integer.isSigned);
case UNKNOWN:
- return null;
+ return LogicalTypeAnnotation.unknownType();
case TIMESTAMP:
TimestampType timestamp = type.getTIMESTAMP();
return LogicalTypeAnnotation.timestampType(timestamp.isAdjustedToUTC,
convertTimeUnit(timestamp.unit));