This is an automated email from the ASF dual-hosted git repository. blue pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push: new e1e0a74047 API, Core: Add geometry and geography types support (#12346) e1e0a74047 is described below commit e1e0a7404740b2bf9e6638afb5f0ff19f2536713 Author: Kristin Cowalcijk <b...@wherobots.com> AuthorDate: Wed Mar 26 07:09:02 2025 +0800 API, Core: Add geometry and geography types support (#12346) --- api/src/main/java/org/apache/iceberg/Schema.java | 4 +- .../org/apache/iceberg/transforms/Identity.java | 11 +- .../org/apache/iceberg/types/EdgeAlgorithm.java | 61 +++++++++ .../main/java/org/apache/iceberg/types/Type.java | 2 + .../java/org/apache/iceberg/types/TypeUtil.java | 6 + .../main/java/org/apache/iceberg/types/Types.java | 152 +++++++++++++++++++++ .../iceberg/TestPartitionSpecValidation.java | 31 +++-- .../test/java/org/apache/iceberg/TestSchema.java | 7 +- .../apache/iceberg/transforms/TestBucketing.java | 28 ++++ .../apache/iceberg/transforms/TestIdentity.java | 28 ++-- .../iceberg/types/TestReadabilityChecks.java | 7 +- .../iceberg/types/TestSerializableTypes.java | 7 +- .../org/apache/iceberg/types/TestTypeUtil.java | 6 +- .../java/org/apache/iceberg/types/TestTypes.java | 90 ++++++++++++ .../main/java/org/apache/iceberg/SchemaParser.java | 4 - .../org/apache/iceberg/TestGeospatialTable.java | 77 +++++++++++ .../java/org/apache/iceberg/TestSchemaParser.java | 5 + .../apache/iceberg/TestSchemaUnionByFieldName.java | 9 +- .../java/org/apache/iceberg/TestSchemaUpdate.java | 7 +- .../java/org/apache/iceberg/TestSortOrder.java | 16 +++ .../java/org/apache/iceberg/data/DataTest.java | 18 +++ 21 files changed, 541 insertions(+), 35 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 07ed44b65c..e497b8e69a 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -63,7 +63,9 @@ public class Schema implements Serializable { ImmutableMap.of( Type.TypeID.TIMESTAMP_NANO, 3, Type.TypeID.VARIANT, 3, - Type.TypeID.UNKNOWN, 3); + Type.TypeID.UNKNOWN, 3, + Type.TypeID.GEOMETRY, 3, + Type.TypeID.GEOGRAPHY, 3); private final StructType struct; private final int schemaId; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Identity.java b/api/src/main/java/org/apache/iceberg/transforms/Identity.java index 099a99cc3c..71b4fa165a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Identity.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Identity.java @@ -19,16 +19,21 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.util.Set; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.types.Type; import org.apache.iceberg.util.SerializableFunction; class Identity<T> implements Transform<T, T> { private static final Identity<?> INSTANCE = new Identity<>(); + private static final Set<Type.TypeID> UNSUPPORTED_TYPES = + ImmutableSet.of(Type.TypeID.VARIANT, Type.TypeID.GEOMETRY, Type.TypeID.GEOGRAPHY); + private final Type type; /** @@ -39,7 +44,7 @@ class Identity<T> implements Transform<T, T> { @Deprecated public static <I> Identity<I> get(Type type) { Preconditions.checkArgument( - type.typeId() != Type.TypeID.VARIANT, "Unsupported type for identity: %s", type); + !UNSUPPORTED_TYPES.contains(type.typeId()), "Unsupported type for identity: %s", type); return new Identity<>(type); } @@ -93,6 +98,10 @@ class Identity<T> implements Transform<T, T> { @Override public boolean canTransform(Type maybePrimitive) { + if (UNSUPPORTED_TYPES.contains(maybePrimitive.typeId())) { + return false; + } + return maybePrimitive.isPrimitiveType(); } diff --git a/api/src/main/java/org/apache/iceberg/types/EdgeAlgorithm.java b/api/src/main/java/org/apache/iceberg/types/EdgeAlgorithm.java new file mode 100644 index 0000000000..5ddc55c64a --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/types/EdgeAlgorithm.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.types; + +import java.util.Locale; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** The algorithm for interpolating edges. */ +public enum EdgeAlgorithm { + /** Edges are interpolated as geodesics on a sphere. */ + SPHERICAL, + /** See <a href="https://en.wikipedia.org/wiki/Vincenty%27s_formulae">Vincenty's formulae</a> */ + VINCENTY, + /** + * Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval + * Oceanographic Office, 1970. + */ + THOMAS, + /** + * Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, + * 1965. + */ + ANDOYER, + /** + * <a href="https://link.springer.com/content/pdf/10.1007/s00190-012-0578-z.pdf">Karney, Charles + * FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55 </a>, and <a + * href="https://geographiclib.sourceforge.io/">GeographicLib</a>. + */ + KARNEY; + + public static EdgeAlgorithm fromName(String algorithmName) { + Preconditions.checkNotNull(algorithmName, "Invalid edge interpolation algorithm: null"); + try { + return EdgeAlgorithm.valueOf(algorithmName.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + String.format("Invalid edge interpolation algorithm: %s", algorithmName), e); + } + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ENGLISH); + } +} diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 475a7f2b5d..bed478d938 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -44,6 +44,8 @@ public interface Type extends Serializable { FIXED(ByteBuffer.class), BINARY(ByteBuffer.class), DECIMAL(BigDecimal.class), + GEOMETRY(ByteBuffer.class), + GEOGRAPHY(ByteBuffer.class), STRUCT(StructLike.class), LIST(List.class), MAP(Map.class), diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java index 91a922063f..486b1d695b 100644 --- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java +++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java @@ -540,6 +540,12 @@ public class TypeUtil { case BINARY: case VARIANT: return 80; + case GEOMETRY: + case GEOGRAPHY: + // 80 bytes is an approximate size for a polygon or linestring with 4 to 5 coordinates. + // This is a reasonable estimate for the size of a geometry or geography object without + // additional details. + return 80; case UNKNOWN: // Consider Unknown as null return 0; diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index bdc4e466e0..1c16c444d4 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -59,9 +59,16 @@ public class Types { .put(BinaryType.get().toString(), BinaryType.get()) .put(UnknownType.get().toString(), UnknownType.get()) .put(VariantType.get().toString(), VariantType.get()) + .put(GeometryType.crs84().toString(), GeometryType.crs84()) + .put(GeographyType.crs84().toString(), GeographyType.crs84()) .buildOrThrow(); private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]"); + private static final Pattern GEOMETRY_PARAMETERS = + Pattern.compile("geometry\\s*(?:\\(\\s*([^)]*?)\\s*\\))?", Pattern.CASE_INSENSITIVE); + private static final Pattern GEOGRAPHY_PARAMETERS = + Pattern.compile( + "geography\\s*(?:\\(\\s*([^,]*?)\\s*(?:,\\s*(\\w*)\\s*)?\\))?", Pattern.CASE_INSENSITIVE); private static final Pattern DECIMAL = Pattern.compile("decimal\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)"); @@ -71,6 +78,22 @@ public class Types { return TYPES.get(lowerTypeString); } + Matcher geometry = GEOMETRY_PARAMETERS.matcher(typeString); + if (geometry.matches()) { + String crs = geometry.group(1); + Preconditions.checkArgument(!crs.contains(","), "Invalid CRS: %s", crs); + return GeometryType.of(crs); + } + + Matcher geography = GEOGRAPHY_PARAMETERS.matcher(typeString); + if (geography.matches()) { + String crs = geography.group(1); + String algorithmName = geography.group(2); + EdgeAlgorithm algorithm = + algorithmName == null ? null : EdgeAlgorithm.fromName(algorithmName); + return GeographyType.of(crs, algorithm); + } + Matcher fixed = FIXED.matcher(lowerTypeString); if (fixed.matches()) { return FixedType.ofLength(Integer.parseInt(fixed.group(1))); @@ -548,6 +571,135 @@ public class Types { } } + public static class GeometryType extends PrimitiveType { + public static final String DEFAULT_CRS = "OGC:CRS84"; + + public static GeometryType crs84() { + return new GeometryType(); + } + + public static GeometryType of(String crs) { + return new GeometryType(crs); + } + + private final String crs; + + private GeometryType() { + crs = null; + } + + private GeometryType(String crs) { + Preconditions.checkArgument(crs == null || !crs.isEmpty(), "Invalid CRS: (empty string)"); + this.crs = DEFAULT_CRS.equalsIgnoreCase(crs) ? null : crs; + } + + @Override + public TypeID typeId() { + return TypeID.GEOMETRY; + } + + public String crs() { + return crs; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof GeometryType)) { + return false; + } + + GeometryType that = (GeometryType) o; + return Objects.equals(crs, that.crs); + } + + @Override + public int hashCode() { + return Objects.hash(GeometryType.class, crs); + } + + @Override + public String toString() { + if (crs == null) { + return "geometry"; + } + + return String.format("geometry(%s)", crs); + } + } + + public static class GeographyType extends PrimitiveType { + public static final String DEFAULT_CRS = "OGC:CRS84"; + + public static GeographyType crs84() { + return new GeographyType(); + } + + public static GeographyType of(String crs) { + return new GeographyType(crs, null); + } + + public static GeographyType of(String crs, EdgeAlgorithm algorithm) { + return new GeographyType(crs, algorithm); + } + + private final String crs; + private final EdgeAlgorithm algorithm; + + private GeographyType() { + this.crs = null; + this.algorithm = null; + } + + private GeographyType(String crs, EdgeAlgorithm algorithm) { + Preconditions.checkArgument(crs == null || !crs.isEmpty(), "Invalid CRS: (empty string)"); + this.crs = DEFAULT_CRS.equalsIgnoreCase(crs) ? null : crs; + this.algorithm = algorithm; + } + + @Override + public TypeID typeId() { + return TypeID.GEOGRAPHY; + } + + public String crs() { + return crs; + } + + public EdgeAlgorithm algorithm() { + return algorithm; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof GeographyType)) { + return false; + } + + GeographyType that = (GeographyType) o; + return Objects.equals(crs, that.crs) && Objects.equals(algorithm, that.algorithm); + } + + @Override + public int hashCode() { + return Objects.hash(GeographyType.class, crs, algorithm); + } + + @Override + public String toString() { + if (algorithm != null) { + return String.format("geography(%s, %s)", crs != null ? crs : DEFAULT_CRS, algorithm); + } else if (crs != null) { + return String.format("geography(%s)", crs); + } else { + return "geography"; + } + } + } + public static class NestedField implements Serializable { public static NestedField optional(int id, String name, Type type) { return new NestedField(true, id, name, type, null, null, null); diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index 125b0b519f..ee71d39bb2 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -26,6 +26,8 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; public class TestPartitionSpecValidation { private static final Schema SCHEMA = @@ -37,7 +39,9 @@ public class TestPartitionSpecValidation { NestedField.required(5, "another_d", Types.TimestampType.withZone()), NestedField.required(6, "s", Types.StringType.get()), NestedField.required(7, "v", Types.VariantType.get()), - NestedField.optional(8, "u", Types.UnknownType.get())); + NestedField.required(8, "geom", Types.GeometryType.crs84()), + NestedField.required(9, "geog", Types.GeographyType.crs84()), + NestedField.optional(10, "u", Types.UnknownType.get())); @Test public void testMultipleTimestampPartitions() { @@ -316,25 +320,24 @@ public class TestPartitionSpecValidation { assertThat(spec.lastAssignedFieldId()).isEqualTo(1006); } - @Test - public void testVariantUnsupported() { + @ParameterizedTest + @MethodSource("unsupportedFieldsProvider") + public void testUnsupported(int fieldId, String partitionName, String expectedErrorMessage) { assertThatThrownBy( () -> PartitionSpec.builderFor(SCHEMA) - .add(7, 1005, "variant_partition1", Transforms.bucket(5)) + .add(fieldId, 1005, partitionName, Transforms.bucket(5)) .build()) .isInstanceOf(ValidationException.class) - .hasMessage("Cannot partition by non-primitive source field: variant"); + .hasMessage(expectedErrorMessage); } - @Test - public void testUnknownUnsupported() { - assertThatThrownBy( - () -> - PartitionSpec.builderFor(SCHEMA) - .add(8, 1005, "unknown_partition1", Transforms.bucket(5)) - .build()) - .isInstanceOf(ValidationException.class) - .hasMessage("Invalid source type unknown for transform: bucket[5]"); + private static Object[][] unsupportedFieldsProvider() { + return new Object[][] { + {7, "variant_partition1", "Cannot partition by non-primitive source field: variant"}, + {8, "geom_partition1", "Invalid source type geometry for transform: bucket[5]"}, + {9, "geog_partition1", "Invalid source type geography for transform: bucket[5]"}, + {10, "unknown_partition1", "Invalid source type unknown for transform: bucket[5]"} + }; } } diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index 6b41baa471..4b164f963d 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -29,6 +29,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; @@ -43,7 +44,11 @@ public class TestSchema { ImmutableList.of( Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), - Types.VariantType.get()); + Types.VariantType.get(), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); private static final Schema INITIAL_DEFAULT_SCHEMA = new Schema( diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index 3c8ff93a85..81f4fa6098 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -431,6 +431,34 @@ public class TestBucketing { assertThat(bucket.canTransform(Types.VariantType.get())).isFalse(); } + @Test + public void testGeometryUnsupported() { + assertThatThrownBy(() -> Transforms.bucket(Types.GeometryType.crs84(), 3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geometry"); + + Transform<Object, Integer> bucket = Transforms.bucket(3); + assertThatThrownBy(() -> bucket.bind(Types.GeometryType.crs84())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geometry"); + + assertThat(bucket.canTransform(Types.GeometryType.crs84())).isFalse(); + } + + @Test + public void testGeographyUnsupported() { + assertThatThrownBy(() -> Transforms.bucket(Types.GeographyType.crs84(), 3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geography"); + + Transform<Object, Integer> bucket = Transforms.bucket(3); + assertThatThrownBy(() -> bucket.bind(Types.GeographyType.crs84())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geography"); + + assertThat(bucket.canTransform(Types.GeographyType.crs84())).isFalse(); + } + @Test public void testUnknownUnsupported() { assertThatThrownBy(() -> Transforms.bucket(Types.UnknownType.get(), 3)) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index fc24be8d56..62e5418ee2 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -24,8 +24,11 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.math.BigDecimal; import java.nio.ByteBuffer; import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; public class TestIdentity { @Test @@ -167,20 +170,27 @@ public class TestIdentity { .isEqualTo("null"); } - @Test - public void testVariantUnsupported() { - assertThatThrownBy(() -> Transforms.identity().bind(Types.VariantType.get())) + @ParameterizedTest + @MethodSource("unsupportedTypesProvider") + public void testUnsupported(Type type) { + assertThatThrownBy(() -> Transforms.identity().bind(type)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot bind to unsupported type: variant"); + .hasMessage("Cannot bind to unsupported type: " + type); - assertThatThrownBy(() -> Transforms.fromString(Types.VariantType.get(), "identity")) + assertThatThrownBy(() -> Transforms.fromString(type, "identity")) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Unsupported type for identity: variant"); + .hasMessage("Unsupported type for identity: " + type); - assertThatThrownBy(() -> Transforms.identity(Types.VariantType.get())) + assertThatThrownBy(() -> Transforms.identity(type)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Unsupported type for identity: variant"); + .hasMessage("Unsupported type for identity: " + type); + + assertThat(Transforms.identity().canTransform(type)).isFalse(); + } - assertThat(Transforms.identity().canTransform(Types.VariantType.get())).isFalse(); + private static Type[] unsupportedTypesProvider() { + return new Type[] { + Types.VariantType.get(), Types.GeometryType.crs84(), Types.GeographyType.crs84() + }; } } diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java index debb9c9dc1..20299cdafc 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java +++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java @@ -53,7 +53,12 @@ public class TestReadabilityChecks { Types.BinaryType.get(), Types.DecimalType.of(9, 2), Types.DecimalType.of(11, 2), - Types.DecimalType.of(9, 3) + Types.DecimalType.of(9, 3), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269"), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY), }; @Test diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index 84d9dfbfa7..2363bd8dc6 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -64,7 +64,12 @@ public class TestSerializableTypes { Types.DecimalType.of(9, 3), Types.DecimalType.of(11, 0), Types.FixedType.ofLength(4), - Types.FixedType.ofLength(34) + Types.FixedType.ofLength(34), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269"), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY), }; for (Type type : equalityPrimitives) { diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java index 9501d4e250..63f2027ab0 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java @@ -760,7 +760,11 @@ public class TestTypeUtil { Arguments.of(Types.UnknownType.get()), Arguments.of(Types.VariantType.get()), Arguments.of(Types.TimestampNanoType.withoutZone()), - Arguments.of(Types.TimestampNanoType.withZone())); + Arguments.of(Types.TimestampNanoType.withZone()), + Arguments.of(Types.GeometryType.crs84()), + Arguments.of(Types.GeometryType.of("srid:3857")), + Arguments.of(Types.GeographyType.crs84()), + Arguments.of(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY))); } @ParameterizedTest diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index f8ee4e2ccb..cc8d3586b8 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -46,6 +46,22 @@ public class TestTypes { assertThat(Types.fromTypeName("variant")).isSameAs(Types.VariantType.get()); assertThat(Types.fromTypeName("Variant")).isSameAs(Types.VariantType.get()); + assertThat(Types.fromTypeName("geometry")).isEqualTo(Types.GeometryType.crs84()); + assertThat(Types.fromTypeName("Geometry")).isEqualTo(Types.GeometryType.crs84()); + assertThat(Types.fromTypeName("geometry(srid:3857)")) + .isEqualTo(Types.GeometryType.of("srid:3857")); + assertThat(Types.fromTypeName("geometry ( srid:3857 )")) + .isEqualTo(Types.GeometryType.of("srid:3857")); + + assertThat(Types.fromTypeName("geography")).isEqualTo(Types.GeographyType.crs84()); + assertThat(Types.fromTypeName("Geography")).isEqualTo(Types.GeographyType.crs84()); + assertThat(Types.fromTypeName("geography(srid:4269)")) + .isEqualTo(Types.GeographyType.of("srid:4269")); + assertThat(Types.fromTypeName("geography(srid:4269, karney)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); + assertThat(Types.fromTypeName("geography ( srid:4269 , karney )")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); + assertThatExceptionOfType(IllegalArgumentException.class) .isThrownBy(() -> Types.fromTypeName("abcdefghij")) .withMessage("Cannot parse type string to primitive: abcdefghij"); @@ -79,6 +95,80 @@ public class TestTypes { assertThatExceptionOfType(IllegalArgumentException.class) .isThrownBy(() -> Types.fromPrimitiveString("abcdefghij")) .withMessage("Cannot parse type string to primitive: abcdefghij"); + + assertThat(Types.fromPrimitiveString("geometry")).isEqualTo(Types.GeometryType.crs84()); + assertThat(Types.fromPrimitiveString("Geometry")).isEqualTo(Types.GeometryType.crs84()); + assertThat(Types.fromPrimitiveString("geometry(srid:3857)")) + .isEqualTo(Types.GeometryType.of("srid:3857")); + assertThat(Types.fromPrimitiveString("geometry( srid:3857 )")) + .isEqualTo(Types.GeometryType.of("srid:3857")); + assertThat(Types.fromPrimitiveString("geometry( srid: 3857 )")) + .isEqualTo(Types.GeometryType.of("srid: 3857")); + assertThat(Types.fromPrimitiveString("Geometry( projjson:TestIdentifier )")) + .isEqualTo(Types.GeometryType.of("projjson:TestIdentifier")); + + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geometry()")) + .withMessageContaining("Invalid CRS: (empty string)"); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geometry( )")) + .withMessageContaining("Invalid CRS: (empty string)"); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geometry(srid:123,456)")) + .withMessageContaining("Invalid CRS: srid:123,456"); + + assertThat(Types.fromPrimitiveString("geography")).isEqualTo(Types.GeographyType.crs84()); + assertThat(Types.fromPrimitiveString("Geography")).isEqualTo(Types.GeographyType.crs84()); + assertThat(Types.fromPrimitiveString("geography(srid:4269)")) + .isEqualTo(Types.GeographyType.of("srid:4269")); + assertThat(Types.fromPrimitiveString("geography(srid: 4269)")) + .isEqualTo(Types.GeographyType.of("srid: 4269")); + assertThat(Types.fromPrimitiveString("geography(srid:4269, spherical)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.SPHERICAL)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, vincenty)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.VINCENTY)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, thomas)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.THOMAS)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, andoyer)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.ANDOYER)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, karney)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); + assertThat(Types.fromPrimitiveString("geography(srid: 4269, karney)")) + .isEqualTo(Types.GeographyType.of("srid: 4269", EdgeAlgorithm.KARNEY)); + assertThat(Types.fromPrimitiveString("Geography(projjson: TestIdentifier, karney)")) + .isEqualTo(Types.GeographyType.of("projjson: TestIdentifier", EdgeAlgorithm.KARNEY)); + + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geography()")) + .withMessageContaining("Invalid CRS: (empty string)"); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geography( , spherical)")) + .withMessageContaining("Invalid CRS: (empty string)"); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geography(srid:4269, BadAlgorithm)")) + .withMessageContaining("Invalid edge interpolation algorithm") + .withMessageContaining("BadAlgorithm"); + + // Test geography type with various spacing + assertThat(Types.fromPrimitiveString("geography( srid:4269 )")) + .isEqualTo(Types.GeographyType.of("srid:4269")); + assertThat(Types.fromPrimitiveString("geography( srid:4269 , spherical )")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.SPHERICAL)); + assertThat(Types.fromPrimitiveString("geography(srid:4269,vincenty)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.VINCENTY)); + assertThat(Types.fromPrimitiveString("geography( srid:4269 , karney )")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); + } + + @Test + public void testGeospatialTypeToString() { + assertThat(Types.GeometryType.crs84().toString()).isEqualTo("geometry"); + assertThat(Types.GeometryType.of("srid:4326").toString()).isEqualTo("geometry(srid:4326)"); + assertThat(Types.GeographyType.crs84().toString()).isEqualTo("geography"); + assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.KARNEY).toString()) + .isEqualTo("geography(srid:4326, karney)"); + assertThat(Types.GeographyType.of(null, EdgeAlgorithm.KARNEY).toString()) + .isEqualTo("geography(OGC:CRS84, karney)"); } @Test diff --git a/core/src/main/java/org/apache/iceberg/SchemaParser.java b/core/src/main/java/org/apache/iceberg/SchemaParser.java index d7c7567957..492668ff01 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaParser.java +++ b/core/src/main/java/org/apache/iceberg/SchemaParser.java @@ -140,10 +140,6 @@ public class SchemaParser { generator.writeEndObject(); } - static void toJson(Type.PrimitiveType primitive, JsonGenerator generator) throws IOException { - generator.writeString(primitive.toString()); - } - static void toJson(Type type, JsonGenerator generator) throws IOException { if (type.isPrimitiveType() || type.isVariantType()) { generator.writeString(type.toString()); diff --git a/core/src/test/java/org/apache/iceberg/TestGeospatialTable.java b/core/src/test/java/org/apache/iceberg/TestGeospatialTable.java new file mode 100644 index 0000000000..cb60e83a09 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestGeospatialTable.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.IOException; +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.inmemory.InMemoryCatalog; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.types.EdgeAlgorithm; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestGeospatialTable { + + @Test + public void testCreateGeospatialTable() throws IOException { + Schema schema = + new Schema( + required(3, "id", Types.IntegerType.get(), "unique ID"), + required(4, "data", Types.StringType.get()), + required(5, "geom", Types.GeometryType.of("srid:3857"), "geometry column"), + required( + 6, + "geog", + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY), + "geography column")); + + try (InMemoryCatalog catalog = initInMemoryCatalog()) { + catalog.createNamespace(Namespace.of("ns")); + + TableIdentifier identifier = TableIdentifier.of("ns", "geos_t1"); + Map<String, String> properties = ImmutableMap.of(TableProperties.FORMAT_VERSION, "3"); + catalog.createTable(identifier, schema, PartitionSpec.unpartitioned(), properties); + Table table = catalog.loadTable(identifier); + + Types.NestedField geomField = table.schema().findField("geom"); + assertThat(geomField.type().typeId()).isEqualTo(Type.TypeID.GEOMETRY); + Types.GeometryType geomType = (Types.GeometryType) geomField.type(); + assertThat(geomType.crs()).isEqualTo("srid:3857"); + + Types.NestedField geogField = table.schema().findField("geog"); + assertThat(geogField.type().typeId()).isEqualTo(Type.TypeID.GEOGRAPHY); + Types.GeographyType geogType = (Types.GeographyType) geogField.type(); + assertThat(geogType.crs()).isEqualTo("srid:4269"); + assertThat(geogType.algorithm()).isEqualTo(EdgeAlgorithm.KARNEY); + assertThat(catalog.dropTable(identifier)).isTrue(); + } + } + + private InMemoryCatalog initInMemoryCatalog() { + InMemoryCatalog catalog = new InMemoryCatalog(); + catalog.initialize("in-memory-catalog", ImmutableMap.of()); + return catalog; + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaParser.java b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java index cf6b03ee14..a52e779d9a 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaParser.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java @@ -55,6 +55,11 @@ public class TestSchemaParser extends DataTest { return true; } + @Override + protected boolean supportsGeospatial() { + return true; + } + @Override protected void writeAndValidate(Schema schema) throws IOException { Schema serialized = SchemaParser.fromJson(SchemaParser.toJson(schema)); diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java index aa478f8526..a9255e4125 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.BinaryType; @@ -36,6 +37,8 @@ import org.apache.iceberg.types.Types.DecimalType; import org.apache.iceberg.types.Types.DoubleType; import org.apache.iceberg.types.Types.FixedType; import org.apache.iceberg.types.Types.FloatType; +import org.apache.iceberg.types.Types.GeographyType; +import org.apache.iceberg.types.Types.GeometryType; import org.apache.iceberg.types.Types.IntegerType; import org.apache.iceberg.types.Types.ListType; import org.apache.iceberg.types.Types.LongType; @@ -71,7 +74,11 @@ public class TestSchemaUnionByFieldName { VariantType.get(), UnknownType.get(), TimestampNanoType.withoutZone(), - TimestampNanoType.withZone()); + TimestampNanoType.withZone(), + GeometryType.crs84(), + GeometryType.of("srid:3857"), + GeographyType.crs84(), + GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); } private static NestedField[] primitiveFields( diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java index d1591f80d8..4bd3d89946 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java @@ -29,6 +29,7 @@ import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; @@ -364,7 +365,11 @@ public class TestSchemaUpdate { Types.FixedType.ofLength(4), Types.DecimalType.of(9, 2), Types.DecimalType.of(9, 3), - Types.DecimalType.of(18, 2)); + Types.DecimalType.of(18, 2), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); for (Type.PrimitiveType fromType : primitives) { for (Type.PrimitiveType toType : primitives) { diff --git a/core/src/test/java/org/apache/iceberg/TestSortOrder.java b/core/src/test/java/org/apache/iceberg/TestSortOrder.java index 7d0688e9da..521320ea23 100644 --- a/core/src/test/java/org/apache/iceberg/TestSortOrder.java +++ b/core/src/test/java/org/apache/iceberg/TestSortOrder.java @@ -343,6 +343,22 @@ public class TestSortOrder { .hasMessage("Unsupported type for identity: variant"); } + @TestTemplate + public void testGeospatialUnsupported() { + Schema v3Schema = + new Schema( + Types.NestedField.required(3, "id", Types.LongType.get()), + Types.NestedField.required(4, "geom", Types.GeometryType.crs84()), + Types.NestedField.required(5, "geog", Types.GeographyType.crs84())); + + assertThatThrownBy(() -> SortOrder.builderFor(v3Schema).withOrderId(10).asc("geom").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unsupported type for identity: geometry"); + assertThatThrownBy(() -> SortOrder.builderFor(v3Schema).withOrderId(10).asc("geog").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unsupported type for identity: geography"); + } + @Test public void testUnknownSupported() { int fieldId = 22; diff --git a/core/src/test/java/org/apache/iceberg/data/DataTest.java b/core/src/test/java/org/apache/iceberg/data/DataTest.java index d75659229c..cc788e2ec0 100644 --- a/core/src/test/java/org/apache/iceberg/data/DataTest.java +++ b/core/src/test/java/org/apache/iceberg/data/DataTest.java @@ -34,6 +34,7 @@ import java.util.stream.Stream; import org.apache.iceberg.Schema; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; @@ -115,6 +116,11 @@ public abstract class DataTest { Types.DecimalType.of(11, 2), Types.DecimalType.of(38, 10), Types.VariantType.get(), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269"), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY), }; protected boolean supportsUnknown() { @@ -129,6 +135,10 @@ public abstract class DataTest { return false; } + protected boolean supportsGeospatial() { + return false; + } + @ParameterizedTest @FieldSource("SIMPLE_TYPES") public void testTypeSchema(Type type) throws IOException { @@ -147,6 +157,14 @@ public abstract class DataTest { || TypeUtil.find(type, t -> t.typeId() == Type.TypeID.VARIANT) == null) .as("variant is not yet implemented") .isTrue(); + if (!supportsGeospatial()) { + Assumptions.assumeThat(TypeUtil.find(type, t -> t.typeId() == Type.TypeID.GEOMETRY) == null) + .as("geometry is not yet implemented") + .isTrue(); + Assumptions.assumeThat(TypeUtil.find(type, t -> t.typeId() == Type.TypeID.GEOGRAPHY) == null) + .as("geography is not yet implemented") + .isTrue(); + } writeAndValidate( new Schema(