This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 66e0c4eec GH-3070: Add Variant logical type annotation to parquet-java 
(#3072)
66e0c4eec is described below

commit 66e0c4eec5a7da37cc182ee0ebf6f2dbdeaac036
Author: Aihua Xu <[email protected]>
AuthorDate: Thu Apr 17 12:07:14 2025 -0700

    GH-3070: Add Variant logical type annotation to parquet-java (#3072)
---
 .../parquet/schema/LogicalTypeAnnotation.java      | 59 ++++++++++++++++++++++
 .../apache/parquet/schema/MessageTypeParser.java   | 34 +++++++++++--
 .../apache/parquet/parser/TestParquetParser.java   | 27 ++++++++++
 .../apache/parquet/schema/TestTypeBuilders.java    | 47 +++++++++++++++++
 .../schema/TestTypeBuildersWithLogicalTypes.java   | 55 ++++++++++++++++++++
 .../org/apache/parquet/format/LogicalTypes.java    |  6 +++
 .../format/converter/ParquetMetadataConverter.java |  9 ++++
 .../converter/TestParquetMetadataConverter.java    | 23 +++++++++
 8 files changed, 255 insertions(+), 5 deletions(-)

diff --git 
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
 
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 78b0f9a0c..749beaa95 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -56,6 +56,14 @@ public abstract class LogicalTypeAnnotation {
         return listType();
       }
     },
+    VARIANT {
+      @Override
+      protected LogicalTypeAnnotation fromString(List<String> params) {
+        Preconditions.checkArgument(
+            params.size() == 1, "Expecting only spec version for variant 
annotation args: %s", params);
+        return variantType(Byte.parseByte(params.get(0)));
+      }
+    },
     STRING {
       @Override
       protected LogicalTypeAnnotation fromString(List<String> params) {
@@ -269,6 +277,10 @@ public abstract class LogicalTypeAnnotation {
     return ListLogicalTypeAnnotation.INSTANCE;
   }
 
+  public static VariantLogicalTypeAnnotation variantType(byte specVersion) {
+    return new VariantLogicalTypeAnnotation(specVersion);
+  }
+
   public static EnumLogicalTypeAnnotation enumType() {
     return EnumLogicalTypeAnnotation.INSTANCE;
   }
@@ -1128,6 +1140,49 @@ public abstract class LogicalTypeAnnotation {
     }
   }
 
+  public static class VariantLogicalTypeAnnotation extends 
LogicalTypeAnnotation {
+    private byte specVersion;
+
+    private VariantLogicalTypeAnnotation(byte specVersion) {
+      this.specVersion = specVersion;
+    }
+
+    @Override
+    public OriginalType toOriginalType() {
+      // No OriginalType for Variant
+      return null;
+    }
+
+    @Override
+    public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T> 
logicalTypeAnnotationVisitor) {
+      return logicalTypeAnnotationVisitor.visit(this);
+    }
+
+    @Override
+    LogicalTypeToken getType() {
+      return LogicalTypeToken.VARIANT;
+    }
+
+    public byte getSpecVersion() {
+      return this.specVersion;
+    }
+
+    @Override
+    protected String typeParametersAsString() {
+      return "(" + specVersion + ")";
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof VariantLogicalTypeAnnotation)) {
+        return false;
+      }
+
+      VariantLogicalTypeAnnotation other = (VariantLogicalTypeAnnotation) obj;
+      return specVersion == other.specVersion;
+    }
+  }
+
   /**
    * Implement this interface to visit a logical type annotation in the schema.
    * The default implementation for each logical type specific visitor method 
is empty.
@@ -1152,6 +1207,10 @@ public abstract class LogicalTypeAnnotation {
       return empty();
     }
 
+    default Optional<T> visit(VariantLogicalTypeAnnotation variantLogicalType) 
{
+      return empty();
+    }
+
     default Optional<T> visit(EnumLogicalTypeAnnotation enumLogicalType) {
       return empty();
     }
diff --git 
a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java 
b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
index 2e6cb2096..c7f4b900b 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
@@ -118,12 +118,36 @@ public class MessageTypeParser {
     String name = st.nextToken();
 
     // Read annotation, if any.
+    String annotation = null;
     t = st.nextToken();
-    OriginalType originalType = null;
     if (t.equalsIgnoreCase("(")) {
-      originalType = OriginalType.valueOf(st.nextToken());
-      childBuilder.as(originalType);
-      check(st.nextToken(), ")", "original type ended by )", st);
+      t = st.nextToken();
+      if (isLogicalType(t)) {
+        LogicalTypeAnnotation.LogicalTypeToken logicalType = 
LogicalTypeAnnotation.LogicalTypeToken.valueOf(t);
+        t = st.nextToken();
+        List<String> tokens = new ArrayList<>();
+        if ("(".equals(t)) {
+          while (!")".equals(t)) {
+            if (!(",".equals(t) || "(".equals(t) || ")".equals(t))) {
+              tokens.add(t);
+            }
+            t = st.nextToken();
+          }
+          t = st.nextToken();
+        }
+
+        LogicalTypeAnnotation logicalTypeAnnotation = 
logicalType.fromString(tokens);
+        childBuilder.as(logicalTypeAnnotation);
+        annotation = logicalTypeAnnotation.toString();
+      } else {
+        // Try to parse as OriginalType
+        OriginalType originalType = OriginalType.valueOf(t);
+        childBuilder.as(originalType);
+        annotation = originalType.toString();
+        t = st.nextToken();
+      }
+
+      check(t, ")", "logical type ended by )", st);
       t = st.nextToken();
     }
     if (t.equals("=")) {
@@ -134,7 +158,7 @@ public class MessageTypeParser {
       addGroupTypeFields(t, st, childBuilder);
     } catch (IllegalArgumentException e) {
       throw new IllegalArgumentException(
-          "problem reading type: type = group, name = " + name + ", original 
type = " + originalType, e);
+          "problem reading type: type = group, name = " + name + ", annotation 
= " + annotation, e);
     }
 
     childBuilder.named(name);
diff --git 
a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java 
b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
index 04b4a9432..5172e788b 100644
--- 
a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
+++ 
b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
@@ -55,6 +55,7 @@ import static org.apache.parquet.schema.Types.buildMessage;
 import static org.junit.Assert.assertEquals;
 
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
 import org.apache.parquet.schema.OriginalType;
@@ -447,4 +448,30 @@ public class TestParquetParser {
     MessageType reparsed = 
MessageTypeParser.parseMessageType(parsed.toString());
     assertEquals(expected, reparsed);
   }
+
+  @Test
+  public void testVARIANTAnnotation() {
+    String message = "message Message {\n"
+        + "  required group aVariant (VARIANT(2)) {\n"
+        + "     required binary metadata;\n"
+        + "     required binary value;\n"
+        + "  }\n"
+        + "}\n";
+
+    MessageType expected = buildMessage()
+        .requiredGroup()
+        .as(LogicalTypeAnnotation.variantType((byte) 2))
+        .required(BINARY)
+        .named("metadata")
+        .required(BINARY)
+        .named("value")
+        .named("aVariant")
+        .named("Message");
+
+    MessageType parsed = parseMessageType(message);
+
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
+  }
 }
diff --git 
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java 
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
index 579077897..71886d120 100644
--- 
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
+++ 
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
@@ -50,6 +50,7 @@ import static 
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
 import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
 import static org.apache.parquet.schema.Type.Repetition.REPEATED;
 import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
+import static org.junit.Assert.assertEquals;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -1414,6 +1415,52 @@ public class TestTypeBuilders {
     Assert.assertEquals(nonUtcMicrosExpected, nonUtcMicrosActual);
   }
 
+  @Test
+  public void testVariantLogicalType() {
+    byte specVersion = 1;
+    String name = "variant_field";
+    GroupType variantExpected = new GroupType(
+        REQUIRED,
+        name,
+        LogicalTypeAnnotation.variantType(specVersion),
+        new PrimitiveType(REQUIRED, BINARY, "metadata"),
+        new PrimitiveType(REQUIRED, BINARY, "value"));
+
+    GroupType variantActual = Types.buildGroup(REQUIRED)
+        .addFields(
+            Types.required(BINARY).named("metadata"),
+            Types.required(BINARY).named("value"))
+        .as(LogicalTypeAnnotation.variantType(specVersion))
+        .named(name);
+
+    assertEquals(variantExpected, variantActual);
+  }
+
+  @Test
+  public void testVariantLogicalTypeWithShredded() {
+    byte specVersion = 1;
+    String name = "variant_field";
+    GroupType variantExpected = new GroupType(
+        REQUIRED,
+        name,
+        LogicalTypeAnnotation.variantType(specVersion),
+        new PrimitiveType(REQUIRED, BINARY, "metadata"),
+        new PrimitiveType(OPTIONAL, BINARY, "value"),
+        new PrimitiveType(OPTIONAL, BINARY, "typed_value", 
LogicalTypeAnnotation.stringType()));
+
+    GroupType variantActual = Types.buildGroup(REQUIRED)
+        .addFields(
+            Types.required(BINARY).named("metadata"),
+            Types.optional(BINARY).named("value"),
+            Types.optional(BINARY)
+                .as(LogicalTypeAnnotation.stringType())
+                .named("typed_value"))
+        .as(LogicalTypeAnnotation.variantType(specVersion))
+        .named(name);
+
+    assertEquals(variantExpected, variantActual);
+  }
+
   @Test(expected = IllegalArgumentException.class)
   public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() {
     Types.required(BINARY)
diff --git 
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
 
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
index 54853e813..61fe3065e 100644
--- 
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
+++ 
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
@@ -41,6 +41,8 @@ import static 
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
 import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 import java.util.concurrent.Callable;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -473,6 +475,59 @@ public class TestTypeBuildersWithLogicalTypes {
         .toString());
   }
 
+  @Test
+  public void testVariantLogicalType() {
+    byte specVersion = 1;
+    String name = "variant_field";
+    GroupType variant = new GroupType(
+        REQUIRED,
+        name,
+        LogicalTypeAnnotation.variantType(specVersion),
+        Types.required(BINARY).named("metadata"),
+        Types.required(BINARY).named("value"));
+
+    assertEquals(
+        "required group variant_field (VARIANT(1)) {\n"
+            + "  required binary metadata;\n"
+            + "  required binary value;\n"
+            + "}",
+        variant.toString());
+
+    LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation();
+    assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT, 
annotation.getType());
+    assertNull(annotation.toOriginalType());
+    assertTrue(annotation instanceof 
LogicalTypeAnnotation.VariantLogicalTypeAnnotation);
+    assertEquals(specVersion, 
((LogicalTypeAnnotation.VariantLogicalTypeAnnotation) 
annotation).getSpecVersion());
+  }
+
+  @Test
+  public void testVariantLogicalTypeWithShredded() {
+    byte specVersion = 1;
+
+    String name = "variant_field";
+    GroupType variant = new GroupType(
+        REQUIRED,
+        name,
+        LogicalTypeAnnotation.variantType(specVersion),
+        Types.required(BINARY).named("metadata"),
+        Types.optional(BINARY).named("value"),
+        
Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("typed_value"));
+
+    assertEquals(
+        "required group variant_field (VARIANT(1)) {\n"
+            + "  required binary metadata;\n"
+            + "  optional binary value;\n"
+            + "  optional binary typed_value (STRING);\n"
+            + "}",
+        variant.toString());
+
+    LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation();
+    assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT, 
annotation.getType());
+    assertNull(annotation.toOriginalType());
+    assertTrue(annotation instanceof 
LogicalTypeAnnotation.VariantLogicalTypeAnnotation);
+    assertEquals(specVersion, 
((LogicalTypeAnnotation.VariantLogicalTypeAnnotation) 
annotation).getSpecVersion());
+  }
+
   /**
    * A convenience method to avoid a large number of @Test(expected=...) tests
    *
diff --git 
a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
index b8c2ae8f7..8956d3944 100644
--- 
a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
+++ 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
@@ -32,6 +32,12 @@ public class LogicalTypes {
     return LogicalType.DECIMAL(new DecimalType(scale, precision));
   }
 
+  public static LogicalType VARIANT(byte specificationVersion) {
+    VariantType type = new VariantType();
+    type.setSpecification_version(specificationVersion);
+    return LogicalType.VARIANT(type);
+  }
+
   public static final LogicalType UTF8 = LogicalType.STRING(new StringType());
   public static final LogicalType MAP = LogicalType.MAP(new MapType());
   public static final LogicalType LIST = LogicalType.LIST(new ListType());
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index c3d6ec8e0..5759be234 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -103,6 +103,7 @@ import org.apache.parquet.format.TimestampType;
 import org.apache.parquet.format.Type;
 import org.apache.parquet.format.TypeDefinedOrder;
 import org.apache.parquet.format.Uncompressed;
+import org.apache.parquet.format.VariantType;
 import org.apache.parquet.format.XxHash;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -514,6 +515,11 @@ public class ParquetMetadataConverter {
     public Optional<LogicalType> 
visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
       return of(LogicalTypes.UNKNOWN);
     }
+
+    @Override
+    public Optional<LogicalType> 
visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) {
+      return of(LogicalTypes.VARIANT(variantLogicalType.getSpecVersion()));
+    }
   }
 
   private void addRowGroup(
@@ -1177,6 +1183,9 @@ public class ParquetMetadataConverter {
         return LogicalTypeAnnotation.uuidType();
       case FLOAT16:
         return LogicalTypeAnnotation.float16Type();
+      case VARIANT:
+        VariantType variant = type.getVARIANT();
+        return 
LogicalTypeAnnotation.variantType(variant.getSpecification_version());
       default:
         throw new RuntimeException("Unknown logical type " + type);
     }
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 6b3259070..322d4c4ab 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -41,6 +41,7 @@ import static 
org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.variantType;
 import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -1589,6 +1590,28 @@ public class TestParquetMetadataConverter {
     verifyMapMessageType(messageType, "map");
   }
 
+  @Test
+  public void testVariantLogicalType() {
+    byte specVersion = 1;
+    MessageType expected = Types.buildMessage()
+        .requiredGroup()
+        .as(variantType(specVersion))
+        .required(PrimitiveTypeName.BINARY)
+        .named("metadata")
+        .required(PrimitiveTypeName.BINARY)
+        .named("value")
+        .named("v")
+        .named("example");
+
+    ParquetMetadataConverter parquetMetadataConverter = new 
ParquetMetadataConverter();
+    List<SchemaElement> parquetSchema = 
parquetMetadataConverter.toParquetSchema(expected);
+    MessageType schema = 
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+    assertEquals(expected, schema);
+    LogicalTypeAnnotation logicalType = 
schema.getType("v").getLogicalTypeAnnotation();
+    assertEquals(LogicalTypeAnnotation.variantType(specVersion), logicalType);
+    assertEquals(specVersion, 
((LogicalTypeAnnotation.VariantLogicalTypeAnnotation) 
logicalType).getSpecVersion());
+  }
+
   private void verifyMapMessageType(final MessageType messageType, final 
String keyValueName) throws IOException {
     Path file = new 
Path(temporaryFolder.newFolder("verifyMapMessageType").getPath(), keyValueName 
+ ".parquet");
 

Reply via email to