This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 66e0c4eec GH-3070: Add Variant logical type annotation to parquet-java
(#3072)
66e0c4eec is described below
commit 66e0c4eec5a7da37cc182ee0ebf6f2dbdeaac036
Author: Aihua Xu <[email protected]>
AuthorDate: Thu Apr 17 12:07:14 2025 -0700
GH-3070: Add Variant logical type annotation to parquet-java (#3072)
---
.../parquet/schema/LogicalTypeAnnotation.java | 59 ++++++++++++++++++++++
.../apache/parquet/schema/MessageTypeParser.java | 34 +++++++++++--
.../apache/parquet/parser/TestParquetParser.java | 27 ++++++++++
.../apache/parquet/schema/TestTypeBuilders.java | 47 +++++++++++++++++
.../schema/TestTypeBuildersWithLogicalTypes.java | 55 ++++++++++++++++++++
.../org/apache/parquet/format/LogicalTypes.java | 6 +++
.../format/converter/ParquetMetadataConverter.java | 9 ++++
.../converter/TestParquetMetadataConverter.java | 23 +++++++++
8 files changed, 255 insertions(+), 5 deletions(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 78b0f9a0c..749beaa95 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -56,6 +56,14 @@ public abstract class LogicalTypeAnnotation {
return listType();
}
},
+ VARIANT {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ Preconditions.checkArgument(
+ params.size() == 1, "Expecting only spec version for variant
annotation args: %s", params);
+ return variantType(Byte.parseByte(params.get(0)));
+ }
+ },
STRING {
@Override
protected LogicalTypeAnnotation fromString(List<String> params) {
@@ -269,6 +277,10 @@ public abstract class LogicalTypeAnnotation {
return ListLogicalTypeAnnotation.INSTANCE;
}
+ public static VariantLogicalTypeAnnotation variantType(byte specVersion) {
+ return new VariantLogicalTypeAnnotation(specVersion);
+ }
+
public static EnumLogicalTypeAnnotation enumType() {
return EnumLogicalTypeAnnotation.INSTANCE;
}
@@ -1128,6 +1140,49 @@ public abstract class LogicalTypeAnnotation {
}
}
+ public static class VariantLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private byte specVersion;
+
+ private VariantLogicalTypeAnnotation(byte specVersion) {
+ this.specVersion = specVersion;
+ }
+
+ @Override
+ public OriginalType toOriginalType() {
+ // No OriginalType for Variant
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.VARIANT;
+ }
+
+ public byte getSpecVersion() {
+ return this.specVersion;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ return "(" + specVersion + ")";
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof VariantLogicalTypeAnnotation)) {
+ return false;
+ }
+
+ VariantLogicalTypeAnnotation other = (VariantLogicalTypeAnnotation) obj;
+ return specVersion == other.specVersion;
+ }
+ }
+
/**
* Implement this interface to visit a logical type annotation in the schema.
* The default implementation for each logical type specific visitor method
is empty.
@@ -1152,6 +1207,10 @@ public abstract class LogicalTypeAnnotation {
return empty();
}
+ default Optional<T> visit(VariantLogicalTypeAnnotation variantLogicalType)
{
+ return empty();
+ }
+
default Optional<T> visit(EnumLogicalTypeAnnotation enumLogicalType) {
return empty();
}
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
index 2e6cb2096..c7f4b900b 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
@@ -118,12 +118,36 @@ public class MessageTypeParser {
String name = st.nextToken();
// Read annotation, if any.
+ String annotation = null;
t = st.nextToken();
- OriginalType originalType = null;
if (t.equalsIgnoreCase("(")) {
- originalType = OriginalType.valueOf(st.nextToken());
- childBuilder.as(originalType);
- check(st.nextToken(), ")", "original type ended by )", st);
+ t = st.nextToken();
+ if (isLogicalType(t)) {
+ LogicalTypeAnnotation.LogicalTypeToken logicalType =
LogicalTypeAnnotation.LogicalTypeToken.valueOf(t);
+ t = st.nextToken();
+ List<String> tokens = new ArrayList<>();
+ if ("(".equals(t)) {
+ while (!")".equals(t)) {
+ if (!(",".equals(t) || "(".equals(t) || ")".equals(t))) {
+ tokens.add(t);
+ }
+ t = st.nextToken();
+ }
+ t = st.nextToken();
+ }
+
+ LogicalTypeAnnotation logicalTypeAnnotation =
logicalType.fromString(tokens);
+ childBuilder.as(logicalTypeAnnotation);
+ annotation = logicalTypeAnnotation.toString();
+ } else {
+ // Try to parse as OriginalType
+ OriginalType originalType = OriginalType.valueOf(t);
+ childBuilder.as(originalType);
+ annotation = originalType.toString();
+ t = st.nextToken();
+ }
+
+ check(t, ")", "logical type ended by )", st);
t = st.nextToken();
}
if (t.equals("=")) {
@@ -134,7 +158,7 @@ public class MessageTypeParser {
addGroupTypeFields(t, st, childBuilder);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
- "problem reading type: type = group, name = " + name + ", original
type = " + originalType, e);
+ "problem reading type: type = group, name = " + name + ", annotation
= " + annotation, e);
}
childBuilder.named(name);
diff --git
a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
index 04b4a9432..5172e788b 100644
---
a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
+++
b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
@@ -55,6 +55,7 @@ import static org.apache.parquet.schema.Types.buildMessage;
import static org.junit.Assert.assertEquals;
import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.schema.OriginalType;
@@ -447,4 +448,30 @@ public class TestParquetParser {
MessageType reparsed =
MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
+
+ @Test
+ public void testVARIANTAnnotation() {
+ String message = "message Message {\n"
+ + " required group aVariant (VARIANT(2)) {\n"
+ + " required binary metadata;\n"
+ + " required binary value;\n"
+ + " }\n"
+ + "}\n";
+
+ MessageType expected = buildMessage()
+ .requiredGroup()
+ .as(LogicalTypeAnnotation.variantType((byte) 2))
+ .required(BINARY)
+ .named("metadata")
+ .required(BINARY)
+ .named("value")
+ .named("aVariant")
+ .named("Message");
+
+ MessageType parsed = parseMessageType(message);
+
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
+ }
}
diff --git
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
index 579077897..71886d120 100644
---
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
+++
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
@@ -50,6 +50,7 @@ import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
+import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
@@ -1414,6 +1415,52 @@ public class TestTypeBuilders {
Assert.assertEquals(nonUtcMicrosExpected, nonUtcMicrosActual);
}
+ @Test
+ public void testVariantLogicalType() {
+ byte specVersion = 1;
+ String name = "variant_field";
+ GroupType variantExpected = new GroupType(
+ REQUIRED,
+ name,
+ LogicalTypeAnnotation.variantType(specVersion),
+ new PrimitiveType(REQUIRED, BINARY, "metadata"),
+ new PrimitiveType(REQUIRED, BINARY, "value"));
+
+ GroupType variantActual = Types.buildGroup(REQUIRED)
+ .addFields(
+ Types.required(BINARY).named("metadata"),
+ Types.required(BINARY).named("value"))
+ .as(LogicalTypeAnnotation.variantType(specVersion))
+ .named(name);
+
+ assertEquals(variantExpected, variantActual);
+ }
+
+ @Test
+ public void testVariantLogicalTypeWithShredded() {
+ byte specVersion = 1;
+ String name = "variant_field";
+ GroupType variantExpected = new GroupType(
+ REQUIRED,
+ name,
+ LogicalTypeAnnotation.variantType(specVersion),
+ new PrimitiveType(REQUIRED, BINARY, "metadata"),
+ new PrimitiveType(OPTIONAL, BINARY, "value"),
+ new PrimitiveType(OPTIONAL, BINARY, "typed_value",
LogicalTypeAnnotation.stringType()));
+
+ GroupType variantActual = Types.buildGroup(REQUIRED)
+ .addFields(
+ Types.required(BINARY).named("metadata"),
+ Types.optional(BINARY).named("value"),
+ Types.optional(BINARY)
+ .as(LogicalTypeAnnotation.stringType())
+ .named("typed_value"))
+ .as(LogicalTypeAnnotation.variantType(specVersion))
+ .named(name);
+
+ assertEquals(variantExpected, variantActual);
+ }
+
@Test(expected = IllegalArgumentException.class)
public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() {
Types.required(BINARY)
diff --git
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
index 54853e813..61fe3065e 100644
---
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
+++
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
@@ -41,6 +41,8 @@ import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
import java.util.concurrent.Callable;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -473,6 +475,59 @@ public class TestTypeBuildersWithLogicalTypes {
.toString());
}
+ @Test
+ public void testVariantLogicalType() {
+ byte specVersion = 1;
+ String name = "variant_field";
+ GroupType variant = new GroupType(
+ REQUIRED,
+ name,
+ LogicalTypeAnnotation.variantType(specVersion),
+ Types.required(BINARY).named("metadata"),
+ Types.required(BINARY).named("value"));
+
+ assertEquals(
+ "required group variant_field (VARIANT(1)) {\n"
+ + " required binary metadata;\n"
+ + " required binary value;\n"
+ + "}",
+ variant.toString());
+
+ LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation();
+ assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT,
annotation.getType());
+ assertNull(annotation.toOriginalType());
+ assertTrue(annotation instanceof
LogicalTypeAnnotation.VariantLogicalTypeAnnotation);
+ assertEquals(specVersion,
((LogicalTypeAnnotation.VariantLogicalTypeAnnotation)
annotation).getSpecVersion());
+ }
+
+ @Test
+ public void testVariantLogicalTypeWithShredded() {
+ byte specVersion = 1;
+
+ String name = "variant_field";
+ GroupType variant = new GroupType(
+ REQUIRED,
+ name,
+ LogicalTypeAnnotation.variantType(specVersion),
+ Types.required(BINARY).named("metadata"),
+ Types.optional(BINARY).named("value"),
+
Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("typed_value"));
+
+ assertEquals(
+ "required group variant_field (VARIANT(1)) {\n"
+ + " required binary metadata;\n"
+ + " optional binary value;\n"
+ + " optional binary typed_value (STRING);\n"
+ + "}",
+ variant.toString());
+
+ LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation();
+ assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT,
annotation.getType());
+ assertNull(annotation.toOriginalType());
+ assertTrue(annotation instanceof
LogicalTypeAnnotation.VariantLogicalTypeAnnotation);
+ assertEquals(specVersion,
((LogicalTypeAnnotation.VariantLogicalTypeAnnotation)
annotation).getSpecVersion());
+ }
+
/**
* A convenience method to avoid a large number of @Test(expected=...) tests
*
diff --git
a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
index b8c2ae8f7..8956d3944 100644
---
a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
+++
b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
@@ -32,6 +32,12 @@ public class LogicalTypes {
return LogicalType.DECIMAL(new DecimalType(scale, precision));
}
+ public static LogicalType VARIANT(byte specificationVersion) {
+ VariantType type = new VariantType();
+ type.setSpecification_version(specificationVersion);
+ return LogicalType.VARIANT(type);
+ }
+
public static final LogicalType UTF8 = LogicalType.STRING(new StringType());
public static final LogicalType MAP = LogicalType.MAP(new MapType());
public static final LogicalType LIST = LogicalType.LIST(new ListType());
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index c3d6ec8e0..5759be234 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -103,6 +103,7 @@ import org.apache.parquet.format.TimestampType;
import org.apache.parquet.format.Type;
import org.apache.parquet.format.TypeDefinedOrder;
import org.apache.parquet.format.Uncompressed;
+import org.apache.parquet.format.VariantType;
import org.apache.parquet.format.XxHash;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -514,6 +515,11 @@ public class ParquetMetadataConverter {
public Optional<LogicalType>
visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
return of(LogicalTypes.UNKNOWN);
}
+
+ @Override
+ public Optional<LogicalType>
visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) {
+ return of(LogicalTypes.VARIANT(variantLogicalType.getSpecVersion()));
+ }
}
private void addRowGroup(
@@ -1177,6 +1183,9 @@ public class ParquetMetadataConverter {
return LogicalTypeAnnotation.uuidType();
case FLOAT16:
return LogicalTypeAnnotation.float16Type();
+ case VARIANT:
+ VariantType variant = type.getVARIANT();
+ return
LogicalTypeAnnotation.variantType(variant.getSpecification_version());
default:
throw new RuntimeException("Unknown logical type " + type);
}
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 6b3259070..322d4c4ab 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -41,6 +41,7 @@ import static
org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.variantType;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -1589,6 +1590,28 @@ public class TestParquetMetadataConverter {
verifyMapMessageType(messageType, "map");
}
+ @Test
+ public void testVariantLogicalType() {
+ byte specVersion = 1;
+ MessageType expected = Types.buildMessage()
+ .requiredGroup()
+ .as(variantType(specVersion))
+ .required(PrimitiveTypeName.BINARY)
+ .named("metadata")
+ .required(PrimitiveTypeName.BINARY)
+ .named("value")
+ .named("v")
+ .named("example");
+
+ ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
+ List<SchemaElement> parquetSchema =
parquetMetadataConverter.toParquetSchema(expected);
+ MessageType schema =
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+ assertEquals(expected, schema);
+ LogicalTypeAnnotation logicalType =
schema.getType("v").getLogicalTypeAnnotation();
+ assertEquals(LogicalTypeAnnotation.variantType(specVersion), logicalType);
+ assertEquals(specVersion,
((LogicalTypeAnnotation.VariantLogicalTypeAnnotation)
logicalType).getSpecVersion());
+ }
+
private void verifyMapMessageType(final MessageType messageType, final
String keyValueName) throws IOException {
Path file = new
Path(temporaryFolder.newFolder("verifyMapMessageType").getPath(), keyValueName
+ ".parquet");