This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new b6b89268de Add variant type support to ParquetTypeVisitor (#14588)
b6b89268de is described below

commit b6b89268de26b119817db9ce20b540baa349c541
Author: Tamas Mate <[email protected]>
AuthorDate: Tue Nov 18 19:39:14 2025 +0100

    Add variant type support to ParquetTypeVisitor (#14588)
    
    * Parquet: Add variant type support to ParquetTypeVisitor
    
    Implement variant(GroupType) method in ParquetTypeVisitor and all
    subclasses to enable proper handling of Parquet variant logical types
    during schema conversion and manipulation operations.
    
    * Address review comments
    
     - Replace variant spec version with constant
     - Add variant tests
    
    * Column id reordering
    
    * Address review comments
---
 .../apache/iceberg/parquet/ApplyNameMapping.java   |  6 ++
 .../apache/iceberg/parquet/MessageTypeToType.java  |  5 ++
 .../apache/iceberg/parquet/ParquetSchemaUtil.java  |  5 ++
 .../apache/iceberg/parquet/ParquetTypeVisitor.java | 12 +++
 .../java/org/apache/iceberg/parquet/RemoveIds.java | 10 +++
 .../iceberg/parquet/TestParquetSchemaUtil.java     | 88 ++++++++++++++++++++++
 6 files changed, 126 insertions(+)

diff --git 
a/parquet/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java 
b/parquet/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java
index 3ad11ab6e5..b6569a6eb6 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java
@@ -103,6 +103,12 @@ class ApplyNameMapping extends ParquetTypeVisitor<Type> {
     return field == null ? primitive : primitive.withId(field.id());
   }
 
+  @Override
+  public Type variant(GroupType variant) {
+    MappedField field = nameMapping.find(currentPath());
+    return field == null ? variant : variant.withId(field.id());
+  }
+
   @Override
   public void beforeField(Type type) {
     fieldNames.push(type.getName());
diff --git 
a/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java 
b/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java
index 26ef6e468e..841777152e 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java
@@ -179,6 +179,11 @@ class MessageTypeToType extends ParquetTypeVisitor<Type> {
     throw new UnsupportedOperationException("Cannot convert unknown primitive 
type: " + primitive);
   }
 
+  @Override
+  public Type variant(GroupType variant) {
+    return Types.VariantType.get();
+  }
+
   private static class ParquetLogicalTypeVisitor
       implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Type> {
     private static final ParquetLogicalTypeVisitor INSTANCE = new 
ParquetLogicalTypeVisitor();
diff --git 
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java 
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java
index f4760738df..9a81626827 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java
@@ -217,6 +217,11 @@ public class ParquetSchemaUtil {
     public Boolean primitive(PrimitiveType primitive) {
       return primitive.getId() != null;
     }
+
+    @Override
+    public Boolean variant(GroupType variant) {
+      return variant.getId() != null;
+    }
   }
 
   public static Type determineListElementType(GroupType array) {
diff --git 
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java 
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java
index 43c62a8c77..271d9e8bf8 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java
@@ -22,6 +22,7 @@ import java.util.Deque;
 import java.util.List;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.variants.Variant;
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
@@ -46,6 +47,9 @@ public class ParquetTypeVisitor<T> {
         return visitList(group, visitor);
       } else if (LogicalTypeAnnotation.mapType().equals(annotation)) {
         return visitMap(group, visitor);
+      } else if 
(LogicalTypeAnnotation.variantType(Variant.VARIANT_SPEC_VERSION)
+          .equals(annotation)) {
+        return visitVariant(group, visitor);
       }
 
       return visitor.struct(group, visitFields(group, visitor));
@@ -168,6 +172,10 @@ public class ParquetTypeVisitor<T> {
     }
   }
 
+  private static <T> T visitVariant(GroupType variant, ParquetTypeVisitor<T> 
visitor) {
+    return visitor.variant(variant);
+  }
+
   private static <T> List<T> visitFields(GroupType group, 
ParquetTypeVisitor<T> visitor) {
     List<T> results = 
Lists.newArrayListWithExpectedSize(group.getFieldCount());
     for (Type field : group.getFields()) {
@@ -202,6 +210,10 @@ public class ParquetTypeVisitor<T> {
     return null;
   }
 
+  public T variant(GroupType variant) {
+    return null;
+  }
+
   public void beforeField(Type type) {
     fieldNames.push(type.getName());
   }
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/RemoveIds.java 
b/parquet/src/main/java/org/apache/iceberg/parquet/RemoveIds.java
index 37ead23a67..b1d1d91bd5 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/RemoveIds.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/RemoveIds.java
@@ -77,6 +77,16 @@ public class RemoveIds extends ParquetTypeVisitor<Type> {
         .named(primitive.getName());
   }
 
+  @Override
+  public Type variant(GroupType variant) {
+    Types.GroupBuilder<GroupType> builder =
+        
Types.buildGroup(variant.getRepetition()).as(variant.getLogicalTypeAnnotation());
+    for (Type field : variant.getFields()) {
+      builder.addField(field);
+    }
+    return builder.named(variant.getName());
+  }
+
   public static MessageType removeIds(MessageType type) {
     return (MessageType) ParquetTypeVisitor.visit(type, new RemoveIds());
   }
diff --git 
a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java 
b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java
index 84fbf2a7d9..1df904f13c 100644
--- 
a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java
+++ 
b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java
@@ -28,7 +28,9 @@ import org.apache.iceberg.mapping.MappingUtil;
 import org.apache.iceberg.mapping.NameMapping;
 import org.apache.iceberg.types.TypeUtil;
 import org.apache.iceberg.types.Types;
+import org.apache.iceberg.variants.Variant;
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
 import org.apache.parquet.schema.PrimitiveType;
@@ -127,6 +129,37 @@ public class TestParquetSchemaUtil {
     
assertThat(messageTypeWithIdsFromNameMapping).isEqualTo(messageTypeWithIds);
   }
 
+  @Test
+  public void testAssignIdsToVariantTypesByNameMapping() {
+    Types.StructType structType =
+        Types.StructType.of(
+            optional(30, "variant_col", Types.VariantType.get()),
+            optional(
+                31,
+                "struct_with_variant",
+                Types.StructType.of(
+                    Types.NestedField.required(32, "id", 
Types.IntegerType.get()),
+                    Types.NestedField.optional(33, "data", 
Types.VariantType.get()))),
+            required(
+                34, "list_of_variants", Types.ListType.ofOptional(35, 
Types.VariantType.get())),
+            required(
+                36,
+                "map_with_variant_value",
+                Types.MapType.ofOptional(37, 38, Types.StringType.get(), 
Types.VariantType.get())));
+
+    Schema schema =
+        new Schema(
+            TypeUtil.assignFreshIds(structType, new 
AtomicInteger(0)::incrementAndGet)
+                .asStructType()
+                .fields());
+    NameMapping nameMapping = MappingUtil.create(schema);
+    MessageType messageTypeWithIds = ParquetSchemaUtil.convert(schema, 
"parquet_type");
+    MessageType messageTypeWithIdsFromNameMapping =
+        
ParquetSchemaUtil.applyNameMapping(RemoveIds.removeIds(messageTypeWithIds), 
nameMapping);
+
+    
assertThat(messageTypeWithIdsFromNameMapping).isEqualTo(messageTypeWithIds);
+  }
+
   @Test
   public void testSchemaConversionWithoutAssigningIds() {
     MessageType messageType =
@@ -264,6 +297,47 @@ public class TestParquetSchemaUtil {
         .isEqualTo(expectedSchema.asStruct());
   }
 
+  @Test
+  public void testVariantTypesWithoutAssigningIds() {
+    MessageType messageType =
+        new MessageType(
+            "test",
+            variant(30, "variant_col_1", Repetition.OPTIONAL),
+            variant(null, "variant_col_2", Repetition.REQUIRED),
+            struct(
+                31,
+                "struct_col_3",
+                Repetition.REQUIRED,
+                primitive(32, "n1", PrimitiveTypeName.INT32, 
Repetition.REQUIRED),
+                variant(null, "variant_field", Repetition.OPTIONAL)),
+            list(33, "list_col_6", Repetition.OPTIONAL, variant(34, "v", 
Repetition.OPTIONAL)),
+            map(
+                35,
+                "map_col_6",
+                Repetition.REQUIRED,
+                primitive(36, "k", PrimitiveTypeName.INT32, 
Repetition.REQUIRED),
+                variant(37, "v", Repetition.OPTIONAL)));
+
+    Schema expectedSchema =
+        new Schema(
+            optional(30, "variant_col_1", Types.VariantType.get()),
+            required(
+                31,
+                "struct_col_3",
+                Types.StructType.of(required(32, "n1", 
Types.IntegerType.get()))),
+            optional(33, "list_col_6", Types.ListType.ofOptional(34, 
Types.VariantType.get())),
+            required(
+                35,
+                "map_col_6",
+                Types.MapType.ofOptional(
+                    36, 37, Types.IntegerType.get(), 
Types.VariantType.get())));
+
+    Schema actualSchema = ParquetSchemaUtil.convertAndPrune(messageType);
+    assertThat(actualSchema.asStruct())
+        .as("Schema must match")
+        .isEqualTo(expectedSchema.asStruct());
+  }
+
   @Test
   public void testSchemaConversionForHiveStyleLists() {
     String parquetSchemaString =
@@ -464,4 +538,18 @@ public class TestParquetSchemaUtil {
     }
     return builder.named(name);
   }
+
+  private Type variant(Integer id, String name, Repetition repetition) {
+    GroupBuilder<GroupType> builder =
+        org.apache.parquet.schema.Types.buildGroup(repetition)
+            
.as(LogicalTypeAnnotation.variantType(Variant.VARIANT_SPEC_VERSION))
+            .primitive(PrimitiveTypeName.BINARY, Repetition.REQUIRED)
+            .named("metadata")
+            .primitive(PrimitiveTypeName.BINARY, Repetition.REQUIRED)
+            .named("value");
+    if (id != null) {
+      builder.id(id);
+    }
+    return builder.named(name);
+  }
 }

Reply via email to