This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 0b47faaada Core: Add variant type support to utils and visitors
(#11831)
0b47faaada is described below
commit 0b47faaada2aafa42c118be78445f6c40fc0ead6
Author: Aihua Xu <[email protected]>
AuthorDate: Tue Feb 18 08:27:38 2025 -0800
Core: Add variant type support to utils and visitors (#11831)
---
.../main/java/org/apache/iceberg/Accessors.java | 5 ++
.../org/apache/iceberg/types/AssignFreshIds.java | 5 ++
.../java/org/apache/iceberg/types/AssignIds.java | 5 ++
.../apache/iceberg/types/CheckCompatibility.java | 10 +++
.../org/apache/iceberg/types/FindTypeVisitor.java | 6 +-
.../org/apache/iceberg/types/GetProjectedIds.java | 9 +-
.../java/org/apache/iceberg/types/IndexById.java | 5 ++
.../java/org/apache/iceberg/types/IndexByName.java | 2 +-
.../org/apache/iceberg/types/IndexParents.java | 2 +-
.../org/apache/iceberg/types/PrimitiveHolder.java | 2 +-
.../org/apache/iceberg/types/PruneColumns.java | 5 ++
.../java/org/apache/iceberg/types/ReassignDoc.java | 5 ++
.../java/org/apache/iceberg/types/ReassignIds.java | 5 ++
.../main/java/org/apache/iceberg/types/Type.java | 8 ++
.../java/org/apache/iceberg/types/TypeUtil.java | 19 ++++-
.../main/java/org/apache/iceberg/types/Types.java | 26 +++++-
.../iceberg/types/TestReadabilityChecks.java | 39 +++++++++
.../iceberg/types/TestSerializableTypes.java | 10 +--
.../org/apache/iceberg/types/TestTypeUtil.java | 97 ++++++++++++++++++++++
.../java/org/apache/iceberg/types/TestTypes.java | 31 +++++++
.../main/java/org/apache/iceberg/SchemaParser.java | 6 +-
.../main/java/org/apache/iceberg/SchemaUpdate.java | 5 ++
.../org/apache/iceberg/mapping/MappingUtil.java | 5 ++
.../iceberg/schema/SchemaWithPartnerVisitor.java | 7 ++
.../apache/iceberg/schema/UnionByNameVisitor.java | 5 ++
.../java/org/apache/iceberg/types/FixupTypes.java | 6 ++
.../java/org/apache/iceberg/TestSchemaParser.java | 10 +++
.../apache/iceberg/TestSchemaUnionByFieldName.java | 25 ++++--
.../apache/iceberg/mapping/TestNameMapping.java | 12 +++
.../java/org/apache/iceberg/spark/Spark3Util.java | 5 ++
.../org/apache/iceberg/spark/TestSpark3Util.java | 5 +-
31 files changed, 352 insertions(+), 35 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java
b/api/src/main/java/org/apache/iceberg/Accessors.java
index 08233624f2..0b36730fbb 100644
--- a/api/src/main/java/org/apache/iceberg/Accessors.java
+++ b/api/src/main/java/org/apache/iceberg/Accessors.java
@@ -232,6 +232,11 @@ public class Accessors {
return accessors;
}
+ @Override
+ public Map<Integer, Accessor<StructLike>> variant(Types.VariantType
variant) {
+ return null;
+ }
+
@Override
public Map<Integer, Accessor<StructLike>> field(
Types.NestedField field, Map<Integer, Accessor<StructLike>>
fieldResult) {
diff --git a/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java
b/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java
index e58f76a8de..75055cddc1 100644
--- a/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java
+++ b/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java
@@ -124,6 +124,11 @@ class AssignFreshIds extends
TypeUtil.CustomOrderSchemaVisitor<Type> {
}
}
+ @Override
+ public Type variant(Types.VariantType variant) {
+ return variant;
+ }
+
@Override
public Type primitive(Type.PrimitiveType primitive) {
return primitive;
diff --git a/api/src/main/java/org/apache/iceberg/types/AssignIds.java
b/api/src/main/java/org/apache/iceberg/types/AssignIds.java
index 68588f581a..b2f72751eb 100644
--- a/api/src/main/java/org/apache/iceberg/types/AssignIds.java
+++ b/api/src/main/java/org/apache/iceberg/types/AssignIds.java
@@ -92,6 +92,11 @@ class AssignIds extends
TypeUtil.CustomOrderSchemaVisitor<Type> {
}
}
+ @Override
+ public Type variant(Types.VariantType variant) {
+ return variant;
+ }
+
@Override
public Type primitive(Type.PrimitiveType primitive) {
return primitive;
diff --git a/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java
b/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java
index 502e52c345..725f7f4256 100644
--- a/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java
+++ b/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java
@@ -250,6 +250,16 @@ public class CheckCompatibility extends
TypeUtil.CustomOrderSchemaVisitor<List<S
}
}
+ @Override
+ public List<String> variant(Types.VariantType readVariant) {
+ if (currentType.isVariantType()) {
+ return NO_ERRORS;
+ }
+
+ // Currently promotion is not allowed to variant type
+ return ImmutableList.of(String.format(": %s cannot be read as a %s",
currentType, readVariant));
+ }
+
@Override
public List<String> primitive(Type.PrimitiveType readPrimitive) {
if (currentType.equals(readPrimitive)) {
diff --git a/api/src/main/java/org/apache/iceberg/types/FindTypeVisitor.java
b/api/src/main/java/org/apache/iceberg/types/FindTypeVisitor.java
index f0750f337e..64faebb482 100644
--- a/api/src/main/java/org/apache/iceberg/types/FindTypeVisitor.java
+++ b/api/src/main/java/org/apache/iceberg/types/FindTypeVisitor.java
@@ -77,9 +77,9 @@ class FindTypeVisitor extends TypeUtil.SchemaVisitor<Type> {
}
@Override
- public Type variant() {
- if (predicate.test(Types.VariantType.get())) {
- return Types.VariantType.get();
+ public Type variant(Types.VariantType variant) {
+ if (predicate.test(variant)) {
+ return variant;
}
return null;
diff --git a/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java
b/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java
index a8a7de065e..1ec70b8578 100644
--- a/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java
+++ b/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java
@@ -47,7 +47,9 @@ class GetProjectedIds extends
TypeUtil.SchemaVisitor<Set<Integer>> {
@Override
public Set<Integer> field(Types.NestedField field, Set<Integer> fieldResult)
{
- if ((includeStructIds && field.type().isStructType()) ||
field.type().isPrimitiveType()) {
+ if ((includeStructIds && field.type().isStructType())
+ || field.type().isPrimitiveType()
+ || field.type().isVariantType()) {
fieldIds.add(field.fieldId());
}
return fieldIds;
@@ -72,4 +74,9 @@ class GetProjectedIds extends
TypeUtil.SchemaVisitor<Set<Integer>> {
}
return fieldIds;
}
+
+ @Override
+ public Set<Integer> variant(Types.VariantType variant) {
+ return null;
+ }
}
diff --git a/api/src/main/java/org/apache/iceberg/types/IndexById.java
b/api/src/main/java/org/apache/iceberg/types/IndexById.java
index 40280c5ed9..a7b96eb381 100644
--- a/api/src/main/java/org/apache/iceberg/types/IndexById.java
+++ b/api/src/main/java/org/apache/iceberg/types/IndexById.java
@@ -64,4 +64,9 @@ class IndexById extends TypeUtil.SchemaVisitor<Map<Integer,
Types.NestedField>>
}
return null;
}
+
+ @Override
+ public Map<Integer, Types.NestedField> variant(Types.VariantType variant) {
+ return null;
+ }
}
diff --git a/api/src/main/java/org/apache/iceberg/types/IndexByName.java
b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
index 131434c9a1..60258f5c5c 100644
--- a/api/src/main/java/org/apache/iceberg/types/IndexByName.java
+++ b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
@@ -177,7 +177,7 @@ public class IndexByName extends
TypeUtil.SchemaVisitor<Map<String, Integer>> {
}
@Override
- public Map<String, Integer> variant() {
+ public Map<String, Integer> variant(Types.VariantType variant) {
return nameToId;
}
diff --git a/api/src/main/java/org/apache/iceberg/types/IndexParents.java
b/api/src/main/java/org/apache/iceberg/types/IndexParents.java
index 952447ed27..6e611d47e9 100644
--- a/api/src/main/java/org/apache/iceberg/types/IndexParents.java
+++ b/api/src/main/java/org/apache/iceberg/types/IndexParents.java
@@ -77,7 +77,7 @@ public class IndexParents extends
TypeUtil.SchemaVisitor<Map<Integer, Integer>>
}
@Override
- public Map<Integer, Integer> variant() {
+ public Map<Integer, Integer> variant(Types.VariantType variant) {
return idToParent;
}
diff --git a/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java
b/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java
index 42f0da3816..928a65878d 100644
--- a/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java
+++ b/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java
@@ -33,6 +33,6 @@ class PrimitiveHolder implements Serializable {
}
Object readResolve() throws ObjectStreamException {
- return Types.fromPrimitiveString(typeAsString);
+ return Types.fromTypeName(typeAsString);
}
}
diff --git a/api/src/main/java/org/apache/iceberg/types/PruneColumns.java
b/api/src/main/java/org/apache/iceberg/types/PruneColumns.java
index daf2e6bbc0..56f01cf34b 100644
--- a/api/src/main/java/org/apache/iceberg/types/PruneColumns.java
+++ b/api/src/main/java/org/apache/iceberg/types/PruneColumns.java
@@ -159,6 +159,11 @@ class PruneColumns extends TypeUtil.SchemaVisitor<Type> {
return null;
}
+ @Override
+ public Type variant(Types.VariantType variant) {
+ return null;
+ }
+
@Override
public Type primitive(Type.PrimitiveType primitive) {
return null;
diff --git a/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java
b/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java
index 9ce04a7bd1..328d81c428 100644
--- a/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java
+++ b/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java
@@ -96,6 +96,11 @@ class ReassignDoc extends
TypeUtil.CustomOrderSchemaVisitor<Type> {
}
}
+ @Override
+ public Type variant(Types.VariantType variant) {
+ return variant;
+ }
+
@Override
public Type primitive(Type.PrimitiveType primitive) {
return primitive;
diff --git a/api/src/main/java/org/apache/iceberg/types/ReassignIds.java
b/api/src/main/java/org/apache/iceberg/types/ReassignIds.java
index 565ceee2a9..3d114f093f 100644
--- a/api/src/main/java/org/apache/iceberg/types/ReassignIds.java
+++ b/api/src/main/java/org/apache/iceberg/types/ReassignIds.java
@@ -157,6 +157,11 @@ class ReassignIds extends
TypeUtil.CustomOrderSchemaVisitor<Type> {
}
}
+ @Override
+ public Type variant(Types.VariantType variant) {
+ return variant;
+ }
+
@Override
public Type primitive(Type.PrimitiveType primitive) {
return primitive; // nothing to reassign
diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java
b/api/src/main/java/org/apache/iceberg/types/Type.java
index f4c6f22134..67e40df9e9 100644
--- a/api/src/main/java/org/apache/iceberg/types/Type.java
+++ b/api/src/main/java/org/apache/iceberg/types/Type.java
@@ -82,6 +82,10 @@ public interface Type extends Serializable {
throw new IllegalArgumentException("Not a map type: " + this);
}
+ default Types.VariantType asVariantType() {
+ throw new IllegalArgumentException("Not a variant type: " + this);
+ }
+
default boolean isNestedType() {
return false;
}
@@ -98,6 +102,10 @@ public interface Type extends Serializable {
return false;
}
+ default boolean isVariantType() {
+ return false;
+ }
+
default NestedType asNestedType() {
throw new IllegalArgumentException("Not a nested type: " + this);
}
diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
index 39f2898757..4892696ab4 100644
--- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
+++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
@@ -616,8 +616,16 @@ public class TypeUtil {
return null;
}
+ /**
+ * @deprecated will be removed in 2.0.0; use {@link
#variant(Types.VariantType)} instead.
+ */
+ @Deprecated
public T variant() {
- return null;
+ return variant(Types.VariantType.get());
+ }
+
+ public T variant(Types.VariantType variant) {
+ throw new UnsupportedOperationException("Unsupported type: variant");
}
public T primitive(Type.PrimitiveType primitive) {
@@ -684,7 +692,7 @@ public class TypeUtil {
return visitor.map(map, keyResult, valueResult);
case VARIANT:
- return visitor.variant();
+ return visitor.variant(type.asVariantType());
default:
return visitor.primitive(type.asPrimitiveType());
@@ -712,6 +720,10 @@ public class TypeUtil {
return null;
}
+ public T variant(Types.VariantType variant) {
+ throw new UnsupportedOperationException("Unsupported type: variant");
+ }
+
public T primitive(Type.PrimitiveType primitive) {
return null;
}
@@ -788,6 +800,9 @@ public class TypeUtil {
new VisitFuture<>(map.keyType(), visitor),
new VisitFuture<>(map.valueType(), visitor));
+ case VARIANT:
+ return visitor.variant(type.asVariantType());
+
default:
return visitor.primitive(type.asPrimitiveType());
}
diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java
b/api/src/main/java/org/apache/iceberg/types/Types.java
index e724bc5cd0..a866a31ea0 100644
--- a/api/src/main/java/org/apache/iceberg/types/Types.java
+++ b/api/src/main/java/org/apache/iceberg/types/Types.java
@@ -40,8 +40,8 @@ public class Types {
private Types() {}
- private static final ImmutableMap<String, PrimitiveType> TYPES =
- ImmutableMap.<String, PrimitiveType>builder()
+ private static final ImmutableMap<String, Type> TYPES =
+ ImmutableMap.<String, Type>builder()
.put(BooleanType.get().toString(), BooleanType.get())
.put(IntegerType.get().toString(), IntegerType.get())
.put(LongType.get().toString(), LongType.get())
@@ -57,13 +57,14 @@ public class Types {
.put(UUIDType.get().toString(), UUIDType.get())
.put(BinaryType.get().toString(), BinaryType.get())
.put(UnknownType.get().toString(), UnknownType.get())
+ .put(VariantType.get().toString(), VariantType.get())
.buildOrThrow();
private static final Pattern FIXED =
Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]");
private static final Pattern DECIMAL =
Pattern.compile("decimal\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)");
- public static PrimitiveType fromPrimitiveString(String typeString) {
+ public static Type fromTypeName(String typeString) {
String lowerTypeString = typeString.toLowerCase(Locale.ROOT);
if (TYPES.containsKey(lowerTypeString)) {
return TYPES.get(lowerTypeString);
@@ -82,6 +83,15 @@ public class Types {
throw new IllegalArgumentException("Cannot parse type string to primitive:
" + typeString);
}
+ public static PrimitiveType fromPrimitiveString(String typeString) {
+ Type type = fromTypeName(typeString);
+ if (type.isPrimitiveType()) {
+ return type.asPrimitiveType();
+ }
+
+ throw new IllegalArgumentException("Cannot parse type string: variant is
not a primitive type");
+ }
+
public static class BooleanType extends PrimitiveType {
private static final BooleanType INSTANCE = new BooleanType();
@@ -431,6 +441,16 @@ public class Types {
return "variant";
}
+ @Override
+ public boolean isVariantType() {
+ return true;
+ }
+
+ @Override
+ public VariantType asVariantType() {
+ return this;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
diff --git
a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java
b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java
index 2d02da5346..debb9c9dc1 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java
@@ -22,10 +22,15 @@ import static
org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;
+import java.util.Arrays;
import java.util.List;
+import java.util.stream.Stream;
import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Type.PrimitiveType;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
public class TestReadabilityChecks {
private static final Type.PrimitiveType[] PRIMITIVES =
@@ -112,6 +117,40 @@ public class TestReadabilityChecks {
.contains("cannot be read as a struct");
}
+ @Test
+ public void testVariantToVariant() {
+ Schema fromSchema = new Schema(required(1, "from_field",
Types.VariantType.get()));
+ List<String> errors =
+ CheckCompatibility.writeCompatibilityErrors(
+ new Schema(required(1, "to_field", Types.VariantType.get())),
fromSchema);
+ assertThat(errors).as("Should produce 0 error messages").isEmpty();
+ }
+
+ private static Stream<Arguments> incompatibleTypesToVariant() {
+ return Stream.of(
+ Stream.of(
+ Arguments.of(Types.StructType.of(required(1, "from",
Types.IntegerType.get()))),
+ Arguments.of(
+ Types.MapType.ofRequired(
+ 1, 2, Types.StringType.get(),
Types.IntegerType.get())),
+ Arguments.of(Types.ListType.ofRequired(1,
Types.StringType.get()))),
+ Arrays.stream(PRIMITIVES).map(type -> Arguments.of(type)))
+ .flatMap(s -> s);
+ }
+
+ @ParameterizedTest
+ @MethodSource("incompatibleTypesToVariant")
+ public void testIncompatibleTypesToVariant(Type from) {
+ Schema fromSchema = new Schema(required(3, "from_field", from));
+ List<String> errors =
+ CheckCompatibility.writeCompatibilityErrors(
+ new Schema(required(3, "to_field", Types.VariantType.get())),
fromSchema);
+ assertThat(errors).hasSize(1);
+ assertThat(errors.get(0))
+ .as("Should complain that other type to variant is not allowed")
+ .contains("cannot be read as a variant");
+ }
+
@Test
public void testRequiredSchemaField() {
Schema write = new Schema(optional(1, "from_field",
Types.IntegerType.get()));
diff --git
a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java
b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java
index a222e8e66b..790f59587c 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java
@@ -46,6 +46,7 @@ public class TestSerializableTypes {
Types.StringType.get(),
Types.UUIDType.get(),
Types.BinaryType.get(),
+ Types.UnknownType.get()
};
for (Type type : identityPrimitives) {
@@ -136,15 +137,6 @@ public class TestSerializableTypes {
.isEqualTo(variant);
}
- @Test
- public void testUnknown() throws Exception {
- Types.UnknownType unknown = Types.UnknownType.get();
- Type copy = TestHelpers.roundTripSerialize(unknown);
- assertThat(copy)
- .as("Unknown serialization should be equal to starting type")
- .isEqualTo(unknown);
- }
-
@Test
public void testSchema() throws Exception {
Schema schema =
diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
index b7ecb5e7ec..137d7c6630 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
@@ -23,13 +23,19 @@ import static
org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Stream;
import org.apache.iceberg.Schema;
import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Types.IntegerType;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
public class TestTypeUtil {
@Test
@@ -646,4 +652,95 @@ public class TestTypeUtil {
required(2, "FIELD2", Types.IntegerType.get())));
assertThat(actualSchema.asStruct()).isEqualTo(expectedSchema.asStruct());
}
+
+ private static Stream<Arguments> testTypes() {
+ return Stream.of(
+ Arguments.of(Types.UnknownType.get()),
+ Arguments.of(Types.VariantType.get()),
+ Arguments.of(Types.TimestampNanoType.withoutZone()),
+ Arguments.of(Types.TimestampNanoType.withZone()));
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testAssignIdsWithType(Type testType) {
+ Types.StructType sourceType =
+ Types.StructType.of(required(0, "id", IntegerType.get()), required(1,
"data", testType));
+ Type expectedType =
+ Types.StructType.of(required(10, "id", IntegerType.get()),
required(11, "data", testType));
+
+ Type assignedType = TypeUtil.assignIds(sourceType, oldId -> oldId + 10);
+ assertThat(assignedType).isEqualTo(expectedType);
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testAssignFreshIdsWithType(Type testType) {
+ Schema schema = new Schema(required(0, "id", IntegerType.get()),
required(1, "data", testType));
+
+ Schema assignedSchema = TypeUtil.assignFreshIds(schema, new
AtomicInteger(10)::incrementAndGet);
+ Schema expectedSchema =
+ new Schema(required(11, "id", IntegerType.get()), required(12, "data",
testType));
+ assertThat(assignedSchema.asStruct()).isEqualTo(expectedSchema.asStruct());
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testReassignIdsWithType(Type testType) {
+ Schema schema = new Schema(required(0, "id", IntegerType.get()),
required(1, "data", testType));
+ Schema sourceSchema =
+ new Schema(required(1, "id", IntegerType.get()), required(2, "data",
testType));
+
+ Schema reassignedSchema = TypeUtil.reassignIds(schema, sourceSchema);
+ assertThat(reassignedSchema.asStruct()).isEqualTo(sourceSchema.asStruct());
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testIndexByIdWithType(Type testType) {
+ Schema schema = new Schema(required(0, "id", IntegerType.get()),
required(1, "data", testType));
+
+ Map<Integer, Types.NestedField> indexByIds =
TypeUtil.indexById(schema.asStruct());
+ assertThat(indexByIds.get(1).type()).isEqualTo(testType);
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testIndexNameByIdWithType(Type testType) {
+ Schema schema = new Schema(required(0, "id", IntegerType.get()),
required(1, "data", testType));
+
+ Map<Integer, String> indexNameByIds =
TypeUtil.indexNameById(schema.asStruct());
+ assertThat(indexNameByIds.get(1)).isEqualTo("data");
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testProjectWithType(Type testType) {
+ Schema schema = new Schema(required(0, "id", IntegerType.get()),
required(1, "data", testType));
+
+ Schema expectedSchema = new Schema(required(1, "data", testType));
+ Schema projectedSchema = TypeUtil.project(schema, Sets.newHashSet(1));
+
assertThat(projectedSchema.asStruct()).isEqualTo(expectedSchema.asStruct());
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testGetProjectedIdsWithType(Type testType) {
+ Schema schema = new Schema(required(0, "id", IntegerType.get()),
required(1, "data", testType));
+
+ Set<Integer> projectedIds = TypeUtil.getProjectedIds(schema);
+ assertThat(Set.of(0, 1)).isEqualTo(projectedIds);
+ }
+
+ @ParameterizedTest
+ @MethodSource("testTypes")
+ public void testReassignDocWithType(Type testType) {
+ Schema schema = new Schema(required(0, "id", IntegerType.get()),
required(1, "data", testType));
+ Schema docSourceSchema =
+ new Schema(
+ required(0, "id", IntegerType.get(), "id"), required(1, "data",
testType, "data"));
+
+ Schema reassignedSchema = TypeUtil.reassignDoc(schema, docSourceSchema);
+
assertThat(reassignedSchema.asStruct()).isEqualTo(docSourceSchema.asStruct());
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java
b/api/src/test/java/org/apache/iceberg/types/TestTypes.java
index b464317c2f..f8ee4e2ccb 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java
@@ -27,6 +27,30 @@ import org.junit.jupiter.api.Test;
public class TestTypes {
+ @Test
+ public void fromTypeName() {
+
assertThat(Types.fromTypeName("boolean")).isSameAs(Types.BooleanType.get());
+
assertThat(Types.fromTypeName("BooLean")).isSameAs(Types.BooleanType.get());
+
+
assertThat(Types.fromTypeName("timestamp")).isSameAs(Types.TimestampType.withoutZone());
+
assertThat(Types.fromTypeName("timestamptz")).isSameAs(Types.TimestampType.withZone());
+
assertThat(Types.fromTypeName("timestamp_ns")).isSameAs(Types.TimestampNanoType.withoutZone());
+
assertThat(Types.fromTypeName("timestamptz_ns")).isSameAs(Types.TimestampNanoType.withZone());
+
+ assertThat(Types.fromTypeName("Fixed[ 3
]")).isEqualTo(Types.FixedType.ofLength(3));
+
+ assertThat(Types.fromTypeName("Decimal( 2 , 3
)")).isEqualTo(Types.DecimalType.of(2, 3));
+
+
assertThat(Types.fromTypeName("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2,
3));
+
+
assertThat(Types.fromTypeName("variant")).isSameAs(Types.VariantType.get());
+
assertThat(Types.fromTypeName("Variant")).isSameAs(Types.VariantType.get());
+
+ assertThatExceptionOfType(IllegalArgumentException.class)
+ .isThrownBy(() -> Types.fromTypeName("abcdefghij"))
+ .withMessage("Cannot parse type string to primitive: abcdefghij");
+ }
+
@Test
public void fromPrimitiveString() {
assertThat(Types.fromPrimitiveString("boolean")).isSameAs(Types.BooleanType.get());
@@ -45,6 +69,13 @@ public class TestTypes {
assertThat(Types.fromPrimitiveString("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2,
3));
+ assertThatExceptionOfType(IllegalArgumentException.class)
+ .isThrownBy(() -> Types.fromPrimitiveString("variant"))
+ .withMessage("Cannot parse type string: variant is not a primitive
type");
+ assertThatExceptionOfType(IllegalArgumentException.class)
+ .isThrownBy(() -> Types.fromPrimitiveString("Variant"))
+ .withMessage("Cannot parse type string: variant is not a primitive
type");
+
assertThatExceptionOfType(IllegalArgumentException.class)
.isThrownBy(() -> Types.fromPrimitiveString("abcdefghij"))
.withMessage("Cannot parse type string to primitive: abcdefghij");
diff --git a/core/src/main/java/org/apache/iceberg/SchemaParser.java
b/core/src/main/java/org/apache/iceberg/SchemaParser.java
index 64d3f8795d..d7c7567957 100644
--- a/core/src/main/java/org/apache/iceberg/SchemaParser.java
+++ b/core/src/main/java/org/apache/iceberg/SchemaParser.java
@@ -145,8 +145,8 @@ public class SchemaParser {
}
static void toJson(Type type, JsonGenerator generator) throws IOException {
- if (type.isPrimitiveType()) {
- toJson(type.asPrimitiveType(), generator);
+ if (type.isPrimitiveType() || type.isVariantType()) {
+ generator.writeString(type.toString());
} else {
Type.NestedType nested = type.asNestedType();
switch (type.typeId()) {
@@ -181,7 +181,7 @@ public class SchemaParser {
private static Type typeFromJson(JsonNode json) {
if (json.isTextual()) {
- return Types.fromPrimitiveString(json.asText());
+ return Types.fromTypeName(json.asText());
} else if (json.isObject()) {
JsonNode typeObj = json.get(TYPE);
if (typeObj != null) {
diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
index 4b6d7f6cdd..db02a0e96e 100644
--- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
+++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
@@ -742,6 +742,11 @@ class SchemaUpdate implements UpdateSchema {
}
}
+ @Override
+ public Type variant(Types.VariantType variant) {
+ return variant;
+ }
+
@Override
public Type primitive(Type.PrimitiveType primitive) {
return primitive;
diff --git a/core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java
b/core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java
index a3817b8ad9..72b2a6a783 100644
--- a/core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java
+++ b/core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java
@@ -304,6 +304,11 @@ public class MappingUtil {
MappedField.of(map.valueId(), "value", valueResult));
}
+ @Override
+ public MappedFields variant(Types.VariantType variant) {
+ return null; // no mapping because variant has no nested fields with IDs
+ }
+
@Override
public MappedFields primitive(Type.PrimitiveType primitive) {
return null; // no mapping because primitives have no nested fields
diff --git
a/core/src/main/java/org/apache/iceberg/schema/SchemaWithPartnerVisitor.java
b/core/src/main/java/org/apache/iceberg/schema/SchemaWithPartnerVisitor.java
index 9b2226f571..694bfb2f62 100644
--- a/core/src/main/java/org/apache/iceberg/schema/SchemaWithPartnerVisitor.java
+++ b/core/src/main/java/org/apache/iceberg/schema/SchemaWithPartnerVisitor.java
@@ -107,6 +107,9 @@ public abstract class SchemaWithPartnerVisitor<P, R> {
return visitor.map(map, partner, keyResult, valueResult);
+ case VARIANT:
+ return visitor.variant(type.asVariantType(), partner);
+
default:
return visitor.primitive(type.asPrimitiveType(), partner);
}
@@ -160,6 +163,10 @@ public abstract class SchemaWithPartnerVisitor<P, R> {
return null;
}
+ public R variant(Types.VariantType variant, P partner) {
+ throw new UnsupportedOperationException("Unsupported type: variant");
+ }
+
public R primitive(Type.PrimitiveType primitive, P partner) {
return null;
}
diff --git
a/core/src/main/java/org/apache/iceberg/schema/UnionByNameVisitor.java
b/core/src/main/java/org/apache/iceberg/schema/UnionByNameVisitor.java
index b7ac23816a..c3b9a50b20 100644
--- a/core/src/main/java/org/apache/iceberg/schema/UnionByNameVisitor.java
+++ b/core/src/main/java/org/apache/iceberg/schema/UnionByNameVisitor.java
@@ -142,6 +142,11 @@ public class UnionByNameVisitor extends
SchemaWithPartnerVisitor<Integer, Boolea
return false;
}
+ @Override
+ public Boolean variant(Types.VariantType variant, Integer partnerId) {
+ return partnerId == null;
+ }
+
@Override
public Boolean primitive(Type.PrimitiveType primitive, Integer partnerId) {
return partnerId == null;
diff --git a/core/src/main/java/org/apache/iceberg/types/FixupTypes.java
b/core/src/main/java/org/apache/iceberg/types/FixupTypes.java
index 23fccddda3..1e4c0b597a 100644
--- a/core/src/main/java/org/apache/iceberg/types/FixupTypes.java
+++ b/core/src/main/java/org/apache/iceberg/types/FixupTypes.java
@@ -147,6 +147,12 @@ public abstract class FixupTypes extends
TypeUtil.CustomOrderSchemaVisitor<Type>
}
}
+ @Override
+ public Type variant(Types.VariantType variant) {
+ // nothing to fix up
+ return variant;
+ }
+
@Override
public Type primitive(Type.PrimitiveType primitive) {
if (sourceType.equals(primitive)) {
diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaParser.java
b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java
index 108371416b..acdba85adf 100644
--- a/core/src/test/java/org/apache/iceberg/TestSchemaParser.java
+++ b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java
@@ -127,4 +127,14 @@ public class TestSchemaParser extends AvroDataTest {
assertThat(serialized.findField("col_with_default").writeDefault())
.isEqualTo(defaultValue.value());
}
+
+ @Test
+ public void testVariantType() throws IOException {
+ Schema schema =
+ new Schema(
+ Types.NestedField.required(1, "id", Types.IntegerType.get()),
+ Types.NestedField.optional(2, "data", Types.VariantType.get()));
+
+ writeAndValidate(schema);
+ }
}
diff --git
a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java
b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java
index 3a61ce8a15..c6f9cfdb24 100644
--- a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java
+++ b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java
@@ -27,7 +27,7 @@ import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.types.Type.PrimitiveType;
+import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.BinaryType;
import org.apache.iceberg.types.Types.BooleanType;
@@ -43,13 +43,16 @@ import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.StructType;
import org.apache.iceberg.types.Types.TimeType;
+import org.apache.iceberg.types.Types.TimestampNanoType;
import org.apache.iceberg.types.Types.TimestampType;
import org.apache.iceberg.types.Types.UUIDType;
+import org.apache.iceberg.types.Types.UnknownType;
+import org.apache.iceberg.types.Types.VariantType;
import org.junit.jupiter.api.Test;
public class TestSchemaUnionByFieldName {
- private static List<? extends PrimitiveType> primitiveTypes() {
+ private static List<? extends Type> primitiveTypes() {
return Lists.newArrayList(
StringType.get(),
TimeType.get(),
@@ -64,11 +67,15 @@ public class TestSchemaUnionByFieldName {
FixedType.ofLength(10),
DecimalType.of(10, 2),
LongType.get(),
- FloatType.get());
+ FloatType.get(),
+ VariantType.get(),
+ UnknownType.get(),
+ TimestampNanoType.withoutZone(),
+ TimestampNanoType.withZone());
}
private static NestedField[] primitiveFields(
- Integer initialValue, List<? extends PrimitiveType> primitiveTypes) {
+ Integer initialValue, List<? extends Type> primitiveTypes) {
AtomicInteger atomicInteger = new AtomicInteger(initialValue);
return primitiveTypes.stream()
.map(
@@ -76,7 +83,7 @@ public class TestSchemaUnionByFieldName {
optional(
atomicInteger.incrementAndGet(),
type.toString(),
- Types.fromPrimitiveString(type.toString())))
+ Types.fromTypeName(type.toString())))
.toArray(NestedField[]::new);
}
@@ -104,7 +111,7 @@ public class TestSchemaUnionByFieldName {
@Test
public void testAddTopLevelListOfPrimitives() {
- for (PrimitiveType primitiveType : primitiveTypes()) {
+ for (Type primitiveType : primitiveTypes()) {
Schema newSchema =
new Schema(optional(1, "aList", Types.ListType.ofOptional(2,
primitiveType)));
Schema applied = new SchemaUpdate(new Schema(),
0).unionByNameWith(newSchema).apply();
@@ -114,7 +121,7 @@ public class TestSchemaUnionByFieldName {
@Test
public void testAddTopLevelMapOfPrimitives() {
- for (PrimitiveType primitiveType : primitiveTypes()) {
+ for (Type primitiveType : primitiveTypes()) {
Schema newSchema =
new Schema(
optional(1, "aMap", Types.MapType.ofOptional(2, 3,
primitiveType, primitiveType)));
@@ -125,7 +132,7 @@ public class TestSchemaUnionByFieldName {
@Test
public void testAddTopLevelStructOfPrimitives() {
- for (PrimitiveType primitiveType : primitiveTypes()) {
+ for (Type primitiveType : primitiveTypes()) {
Schema currentSchema =
new Schema(
optional(1, "aStruct", Types.StructType.of(optional(2,
"primitive", primitiveType))));
@@ -136,7 +143,7 @@ public class TestSchemaUnionByFieldName {
@Test
public void testAddNestedPrimitive() {
- for (PrimitiveType primitiveType : primitiveTypes()) {
+ for (Type primitiveType : primitiveTypes()) {
Schema currentSchema = new Schema(optional(1, "aStruct",
Types.StructType.of()));
Schema newSchema =
new Schema(
diff --git a/core/src/test/java/org/apache/iceberg/mapping/TestNameMapping.java
b/core/src/test/java/org/apache/iceberg/mapping/TestNameMapping.java
index d30a93d50d..3ebf4d9242 100644
--- a/core/src/test/java/org/apache/iceberg/mapping/TestNameMapping.java
+++ b/core/src/test/java/org/apache/iceberg/mapping/TestNameMapping.java
@@ -289,4 +289,16 @@ public class TestNameMapping {
"location",
MappedFields.of(MappedField.of(11, "latitude"),
MappedField.of(12, "longitude"))));
}
+
+ @Test
+ public void testMappingVariantType() {
+ Schema schema =
+ new Schema(
+ required(1, "id", Types.LongType.get()), required(2, "data",
Types.VariantType.get()));
+
+ MappedFields expected = MappedFields.of(MappedField.of(1, "id"),
MappedField.of(2, "data"));
+
+ NameMapping mapping = MappingUtil.create(schema);
+ assertThat(mapping.asMappedFields()).isEqualTo(expected);
+ }
}
diff --git
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/Spark3Util.java
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/Spark3Util.java
index af0fa84f67..ad8a4beb55 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/Spark3Util.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/Spark3Util.java
@@ -565,6 +565,11 @@ public class Spark3Util {
return "map<" + keyResult + ", " + valueResult + ">";
}
+ @Override
+ public String variant(Types.VariantType variant) {
+ return "variant";
+ }
+
@Override
public String primitive(Type.PrimitiveType primitive) {
switch (primitive.typeId()) {
diff --git
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestSpark3Util.java
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestSpark3Util.java
index 6f900ffebb..e4e66abfef 100644
---
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestSpark3Util.java
+++
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestSpark3Util.java
@@ -105,12 +105,13 @@ public class TestSpark3Util extends TestBase {
3,
"pairs",
Types.MapType.ofOptional(4, 5, Types.StringType.get(),
Types.LongType.get())),
- required(6, "time", Types.TimestampType.withoutZone()));
+ required(6, "time", Types.TimestampType.withoutZone()),
+ required(7, "v", Types.VariantType.get()));
assertThat(Spark3Util.describe(schema))
.as("Schema description isn't correct.")
.isEqualTo(
- "struct<data: list<string> not null,pairs: map<string,
bigint>,time: timestamp not null>");
+ "struct<data: list<string> not null,pairs: map<string,
bigint>,time: timestamp not null,v: variant not null>");
}
@Test