This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch 1.10.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/1.10.x by this push:
new e5cd2f9746 Parquet: Handle NPE for VariantLogicalType in
TypeWithSchemaVisitor (#14261) (#14494)
e5cd2f9746 is described below
commit e5cd2f97464cf8e0da11a9af645f97d8bfa6f520
Author: Huaxin Gao <[email protected]>
AuthorDate: Mon Nov 3 16:18:30 2025 -0800
Parquet: Handle NPE for VariantLogicalType in TypeWithSchemaVisitor
(#14261) (#14494)
(cherry picked from commit 9be7c1820c5e27cc029590af1dfca470dbbeb8b7)
Co-authored-by: Aihua Xu <[email protected]>
---
.../iceberg/parquet/TypeWithSchemaVisitor.java | 2 +-
.../apache/iceberg/parquet/TestPruneColumns.java | 45 ++++++++++++++++++++++
2 files changed, 46 insertions(+), 1 deletion(-)
diff --git
a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
index 4ab4548297..c5268bf51a 100644
---
a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
+++
b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
@@ -64,7 +64,7 @@ public class TypeWithSchemaVisitor<T> {
} else if (annotation instanceof
LogicalTypeAnnotation.VariantLogicalTypeAnnotation
|| (iType != null && iType.isVariantType())) {
// when Parquet has a VARIANT logical type, use it here
- return visitVariant(iType.asVariantType(), group, visitor);
+ return visitVariant(iType != null ? iType.asVariantType() : null,
group, visitor);
}
Types.StructType struct = iType != null ? iType.asStructType() : null;
diff --git
a/parquet/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java
b/parquet/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java
index 70345adf1b..619b2c5a34 100644
--- a/parquet/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java
+++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java
@@ -21,12 +21,16 @@ package org.apache.iceberg.parquet;
import static org.assertj.core.api.Assertions.assertThat;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.types.Types.DoubleType;
+import org.apache.iceberg.types.Types.IntegerType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.StructType;
+import org.apache.iceberg.types.Types.VariantType;
+import org.apache.iceberg.variants.Variant;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -270,4 +274,45 @@ public class TestPruneColumns {
MessageType actual = ParquetSchemaUtil.pruneColumns(fileSchema,
projection);
assertThat(actual).as("Pruned schema should be
matched").isEqualTo(expected);
}
+
+ @Test
+ public void testVariant() {
+ MessageType fileSchema =
+ Types.buildMessage()
+ .addField(
+ Types.primitive(PrimitiveTypeName.INT32,
Type.Repetition.REQUIRED)
+ .id(1)
+ .named("id"))
+ .addField(buildVariantType(2, "variant_1"))
+ .addField(buildVariantType(3, "variant_2"))
+ .named("table");
+
+ Schema projection =
+ new Schema(
+ ImmutableList.of(
+ NestedField.required(1, "id", IntegerType.get()),
+ NestedField.required(2, "variant_1", VariantType.get())));
+ MessageType expected =
+ Types.buildMessage()
+ .addField(
+ Types.primitive(PrimitiveTypeName.INT32,
Type.Repetition.REQUIRED)
+ .id(1)
+ .named("id"))
+ .addField(buildVariantType(2, "variant_1"))
+ .named("table");
+
+ MessageType actual = ParquetSchemaUtil.pruneColumns(fileSchema,
projection);
+ assertThat(actual).as("Pruned schema should be
matched").isEqualTo(expected);
+ }
+
+ private static Type buildVariantType(int id, String name) {
+ return Types.buildGroup(Type.Repetition.OPTIONAL)
+ .as(LogicalTypeAnnotation.variantType(Variant.VARIANT_SPEC_VERSION))
+ .addField(
+ Types.primitive(PrimitiveTypeName.BINARY,
Type.Repetition.REQUIRED).named("metadata"))
+ .addField(
+ Types.primitive(PrimitiveTypeName.BINARY,
Type.Repetition.REQUIRED).named("value"))
+ .id(id)
+ .named(name);
+ }
}