This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 8d6afa952 PARQUET-2417: Add `geometry` and `geography` logical type
annotations (#3200)
8d6afa952 is described below
commit 8d6afa9522e6dd8da74f0cb2d671111c4a2153d6
Author: Feng Zhang <[email protected]>
AuthorDate: Mon Apr 28 19:06:15 2025 -0700
PARQUET-2417: Add `geometry` and `geography` logical type annotations
(#3200)
---
parquet-column/pom.xml | 6 +
.../column/schema/EdgeInterpolationAlgorithm.java | 64 ++++++++
.../parquet/schema/LogicalTypeAnnotation.java | 168 +++++++++++++++++++++
.../parquet/schema/PrimitiveStringifier.java | 18 +++
.../org/apache/parquet/schema/PrimitiveType.java | 12 ++
.../main/java/org/apache/parquet/schema/Types.java | 12 ++
.../apache/parquet/schema/TestTypeBuilders.java | 97 ++++++++++++
.../format/converter/ParquetMetadataConverter.java | 53 +++++++
.../converter/TestParquetMetadataConverter.java | 140 +++++++++++++++++
pom.xml | 1 +
10 files changed, 571 insertions(+)
diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml
index 654fad27c..01b5b8e8c 100644
--- a/parquet-column/pom.xml
+++ b/parquet-column/pom.xml
@@ -76,6 +76,12 @@
<version>${slf4j.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.locationtech.jts</groupId>
+ <artifactId>jts-core</artifactId>
+ <version>${jts.version}</version>
+ </dependency>
+
<dependency>
<groupId>com.carrotsearch</groupId>
<artifactId>junit-benchmarks</artifactId>
diff --git
a/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java
b/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java
new file mode 100644
index 000000000..5357073a8
--- /dev/null
+++
b/parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.schema;
+
+/**
+ * Edge interpolation algorithm for Geography logical type
+ */
+public enum EdgeInterpolationAlgorithm {
+ SPHERICAL(0),
+ VINCENTY(1),
+ THOMAS(2),
+ ANDOYER(3),
+ KARNEY(4);
+
+ private final int value;
+
+ private EdgeInterpolationAlgorithm(int value) {
+ this.value = value;
+ }
+
+ /**
+ * Get the integer value of this enum value, as defined in the Thrift IDL.
+ */
+ public int getValue() {
+ return value;
+ }
+
+ /**
+ * Find the enum type by its integer value, as defined in the Thrift IDL.
+ * @return null if the value is not found.
+ */
+ public static EdgeInterpolationAlgorithm findByValue(int value) {
+ switch (value) {
+ case 0:
+ return SPHERICAL;
+ case 1:
+ return VINCENTY;
+ case 2:
+ return THOMAS;
+ case 3:
+ return ANDOYER;
+ case 4:
+ return KARNEY;
+ default:
+ throw new IllegalArgumentException("Unrecognized
EdgeInterpolationAlgorithm value: " + value);
+ }
+ }
+}
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 749beaa95..be98e071f 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -41,8 +41,13 @@ import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import org.apache.parquet.Preconditions;
+import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
public abstract class LogicalTypeAnnotation {
+
+ public static final String DEFAULT_CRS = "OGC:CRS84";
+ public static final EdgeInterpolationAlgorithm DEFAULT_ALGO =
EdgeInterpolationAlgorithm.SPHERICAL;
+
enum LogicalTypeToken {
MAP {
@Override
@@ -155,6 +160,31 @@ public abstract class LogicalTypeAnnotation {
return float16Type();
}
},
+ GEOMETRY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 1) {
+ throw new RuntimeException(
+ "Expecting at most 1 parameter for geometry logical type, got "
+ params.size());
+ }
+ String crs = params.isEmpty() ? null : params.get(0);
+ return geometryType(crs);
+ }
+ },
+ GEOGRAPHY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 2) {
+ throw new RuntimeException(
+ "Expecting at most 2 parameters for geography logical type (crs
and edge algorithm), got "
+ + params.size());
+ }
+ String crs = !params.isEmpty() ? params.get(0) : null;
+ EdgeInterpolationAlgorithm algo =
+ params.size() > 1 ?
EdgeInterpolationAlgorithm.valueOf(params.get(1)) : null;
+ return geographyType(crs, algo);
+ }
+ },
UNKNOWN {
@Override
protected LogicalTypeAnnotation fromString(List<String> params) {
@@ -334,6 +364,18 @@ public abstract class LogicalTypeAnnotation {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static GeometryLogicalTypeAnnotation geometryType(String crs) {
+ return new GeometryLogicalTypeAnnotation(crs);
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType(String crs,
EdgeInterpolationAlgorithm edgeAlgorithm) {
+ return new GeographyLogicalTypeAnnotation(crs, edgeAlgorithm);
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType() {
+ return new GeographyLogicalTypeAnnotation(null, null);
+ }
+
public static UnknownLogicalTypeAnnotation unknownType() {
return UnknownLogicalTypeAnnotation.INSTANCE;
}
@@ -1183,6 +1225,124 @@ public abstract class LogicalTypeAnnotation {
}
}
+ public static class GeometryLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+
+ private GeometryLogicalTypeAnnotation(String crs) {
+ this.crs = crs;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ if (crs == null || crs.isEmpty()) {
+ return "";
+ }
+ return String.format("(%s)", crs);
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation)
obj;
+ return Objects.equals(crs, other.crs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
+ public static class GeographyLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+ private final EdgeInterpolationAlgorithm algorithm;
+
+ private GeographyLogicalTypeAnnotation(String crs,
EdgeInterpolationAlgorithm algorithm) {
+ this.crs = crs;
+ this.algorithm = algorithm;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOGRAPHY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ boolean hasCrs = crs != null && !crs.isEmpty();
+ boolean hasAlgo = algorithm != null;
+ if (!hasCrs && !hasAlgo) {
+ return "";
+ }
+ return String.format("(%s,%s)", hasCrs ? crs : DEFAULT_CRS, hasAlgo ?
algorithm : DEFAULT_ALGO);
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ public EdgeInterpolationAlgorithm getAlgorithm() {
+ return algorithm;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeographyLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeographyLogicalTypeAnnotation other = (GeographyLogicalTypeAnnotation)
obj;
+ return Objects.equals(crs, other.crs) && Objects.equals(algorithm,
other.algorithm);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs, algorithm);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
/**
* Implement this interface to visit a logical type annotation in the schema.
* The default implementation for each logical type specific visitor method
is empty.
@@ -1259,6 +1419,14 @@ public abstract class LogicalTypeAnnotation {
return empty();
}
+ default Optional<T> visit(GeometryLogicalTypeAnnotation
geometryLogicalType) {
+ return empty();
+ }
+
+ default Optional<T> visit(GeographyLogicalTypeAnnotation
geographyLogicalType) {
+ return empty();
+ }
+
default Optional<T> visit(UnknownLogicalTypeAnnotation
unknownLogicalTypeAnnotation) {
return empty();
}
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
index 7aface72a..3bbcca981 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
@@ -35,6 +35,9 @@ import java.time.format.DateTimeFormatter;
import java.util.concurrent.TimeUnit;
import javax.naming.OperationNotSupportedException;
import org.apache.parquet.io.api.Binary;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKBReader;
/**
* Class that provides string representations for the primitive values. These
string values are to be used for
@@ -442,6 +445,21 @@ public abstract class PrimitiveStringifier {
}
};
+ static final PrimitiveStringifier WKB_STRINGIFIER = new
BinaryStringifierBase("WKB_STRINGIFIER") {
+
+ @Override
+ String stringifyNotNull(Binary value) {
+
+ try {
+ WKBReader reader = new WKBReader();
+ Geometry geometry = reader.read(value.getBytesUnsafe());
+ return geometry.toText();
+ } catch (ParseException e) {
+ return BINARY_INVALID;
+ }
+ }
+ };
+
static final PrimitiveStringifier FLOAT16_STRINGIFIER = new
BinaryStringifierBase("FLOAT16_STRINGIFIER") {
@Override
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index e74d7cde0..6beff4da9 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -271,6 +271,18 @@ public final class PrimitiveType extends Type {
LogicalTypeAnnotation.BsonLogicalTypeAnnotation
bsonLogicalType) {
return
of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
}
+
+ @Override
+ public Optional<PrimitiveComparator> visit(
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation
geometryLogicalType) {
+ return
of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+ }
+
+ @Override
+ public Optional<PrimitiveComparator> visit(
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation
geographyLogicalType) {
+ return
of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+ }
})
.orElseThrow(() -> new ShouldNeverHappenException(
"No comparator logic implemented for BINARY logical type: " +
logicalType));
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 399672022..fd82d3676 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -577,6 +577,18 @@ public class Types {
return checkBinaryPrimitiveType(enumLogicalType);
}
+ @Override
+ public Optional<Boolean> visit(
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation
geometryLogicalType) {
+ return checkBinaryPrimitiveType(geometryLogicalType);
+ }
+
+ @Override
+ public Optional<Boolean> visit(
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation
geographyLogicalType) {
+ return checkBinaryPrimitiveType(geographyLogicalType);
+ }
+
private Optional<Boolean> checkFixedPrimitiveType(
int l, LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(
diff --git
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
index 71886d120..018ce5b27 100644
---
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
+++
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java
@@ -55,6 +55,7 @@ import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
+import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type.Repetition;
import org.junit.Assert;
@@ -1477,6 +1478,102 @@ public class TestTypeBuilders {
.named("aDecimal");
}
+ @Test
+ public void testGeometryLogicalType() {
+ // Test with default CRS
+ PrimitiveType defaultCrsExpected =
+ new PrimitiveType(REQUIRED, BINARY, "aGeometry",
LogicalTypeAnnotation.geometryType("OGC:CRS84"));
+ PrimitiveType defaultCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geometryType("OGC:CRS84"))
+ .named("aGeometry");
+ Assert.assertEquals(defaultCrsExpected, defaultCrsActual);
+
+ // Test with custom CRS
+ PrimitiveType customCrsExpected =
+ new PrimitiveType(REQUIRED, BINARY, "aGeometry",
LogicalTypeAnnotation.geometryType("EPSG:4326"));
+ PrimitiveType customCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geometryType("EPSG:4326"))
+ .named("aGeometry");
+ Assert.assertEquals(customCrsExpected, customCrsActual);
+
+ // Test with optional repetition
+ PrimitiveType optionalGeometryExpected =
+ new PrimitiveType(OPTIONAL, BINARY, "aGeometry",
LogicalTypeAnnotation.geometryType("OGC:CRS84"));
+ PrimitiveType optionalGeometryActual = Types.optional(BINARY)
+ .as(LogicalTypeAnnotation.geometryType("OGC:CRS84"))
+ .named("aGeometry");
+ Assert.assertEquals(optionalGeometryExpected, optionalGeometryActual);
+ }
+
+ @Test
+ public void testGeographyLogicalType() {
+ // Test with default CRS and no edge algorithm
+ PrimitiveType defaultCrsExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType("OGC:CRS84", null));
+ PrimitiveType defaultCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("OGC:CRS84", null))
+ .named("aGeography");
+ Assert.assertEquals(defaultCrsExpected, defaultCrsActual);
+
+ // Test with custom CRS and no edge algorithm
+ PrimitiveType customCrsExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType("EPSG:4326", null));
+ PrimitiveType customCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null))
+ .named("aGeography");
+ Assert.assertEquals(customCrsExpected, customCrsActual);
+
+ // Test with custom CRS and edge algorithm
+ EdgeInterpolationAlgorithm greatCircle =
EdgeInterpolationAlgorithm.SPHERICAL;
+ PrimitiveType customCrsWithEdgeAlgorithmExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType("EPSG:4326", greatCircle));
+ PrimitiveType customCrsWithEdgeAlgorithmActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", greatCircle))
+ .named("aGeography");
+ Assert.assertEquals(customCrsWithEdgeAlgorithmExpected,
customCrsWithEdgeAlgorithmActual);
+
+ // Test with optional repetition
+ PrimitiveType optionalGeographyExpected = new PrimitiveType(
+ OPTIONAL, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType("OGC:CRS84", null));
+ PrimitiveType optionalGeographyActual = Types.optional(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("OGC:CRS84", null))
+ .named("aGeography");
+ Assert.assertEquals(optionalGeographyExpected, optionalGeographyActual);
+ }
+
+ @Test
+ public void testGeographyLogicalTypeWithoutEdgeInterpolationAlgorithm() {
+ // Test with default CRS and no edge algorithm
+ PrimitiveType defaultCrsExpected =
+ new PrimitiveType(REQUIRED, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType());
+ PrimitiveType defaultCrsActual =
+
Types.required(BINARY).as(LogicalTypeAnnotation.geographyType()).named("aGeography");
+ Assert.assertEquals(defaultCrsExpected, defaultCrsActual);
+
+ // Test with custom CRS and no edge algorithm
+ PrimitiveType customCrsExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType("EPSG:4326", null));
+ PrimitiveType customCrsActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null))
+ .named("aGeography");
+ Assert.assertEquals(customCrsExpected, customCrsActual);
+
+ // Test with custom CRS and edge algorithm
+ PrimitiveType customCrsWithEdgeAlgorithmExpected = new PrimitiveType(
+ REQUIRED, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType("EPSG:4326", null));
+ PrimitiveType customCrsWithEdgeAlgorithmActual = Types.required(BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null))
+ .named("aGeography");
+ Assert.assertEquals(customCrsWithEdgeAlgorithmExpected,
customCrsWithEdgeAlgorithmActual);
+
+ // Test with optional repetition
+ PrimitiveType optionalGeographyExpected =
+ new PrimitiveType(OPTIONAL, BINARY, "aGeography",
LogicalTypeAnnotation.geographyType());
+ PrimitiveType optionalGeographyActual =
+
Types.optional(BINARY).as(LogicalTypeAnnotation.geographyType()).named("aGeography");
+ Assert.assertEquals(optionalGeographyExpected, optionalGeographyActual);
+ }
+
/**
* A convenience method to avoid a large number of @Test(expected=...) tests
*
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 5759be234..15fcd14a7 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -76,10 +76,13 @@ import org.apache.parquet.format.DataPageHeader;
import org.apache.parquet.format.DataPageHeaderV2;
import org.apache.parquet.format.DecimalType;
import org.apache.parquet.format.DictionaryPageHeader;
+import org.apache.parquet.format.EdgeInterpolationAlgorithm;
import org.apache.parquet.format.Encoding;
import org.apache.parquet.format.EncryptionWithColumnKey;
import org.apache.parquet.format.FieldRepetitionType;
import org.apache.parquet.format.FileMetaData;
+import org.apache.parquet.format.GeographyType;
+import org.apache.parquet.format.GeometryType;
import org.apache.parquet.format.IntType;
import org.apache.parquet.format.KeyValue;
import org.apache.parquet.format.LogicalType;
@@ -520,6 +523,27 @@ public class ParquetMetadataConverter {
public Optional<LogicalType>
visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) {
return of(LogicalTypes.VARIANT(variantLogicalType.getSpecVersion()));
}
+
+ @Override
+ public Optional<LogicalType>
visit(LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
+ GeometryType geometryType = new GeometryType();
+ if (geometryLogicalType.getCrs() != null
+ && !geometryLogicalType.getCrs().isEmpty()) {
+ geometryType.setCrs(geometryLogicalType.getCrs());
+ }
+ return of(LogicalType.GEOMETRY(geometryType));
+ }
+
+ @Override
+ public Optional<LogicalType>
visit(LogicalTypeAnnotation.GeographyLogicalTypeAnnotation
geographyLogicalType) {
+ GeographyType geographyType = new GeographyType();
+ if (geographyLogicalType.getCrs() != null
+ && !geographyLogicalType.getCrs().isEmpty()) {
+ geographyType.setCrs(geographyLogicalType.getCrs());
+ }
+
geographyType.setAlgorithm(fromParquetEdgeInterpolationAlgorithm(geographyLogicalType.getAlgorithm()));
+ return of(LogicalType.GEOGRAPHY(geographyType));
+ }
}
private void addRowGroup(
@@ -1183,6 +1207,13 @@ public class ParquetMetadataConverter {
return LogicalTypeAnnotation.uuidType();
case FLOAT16:
return LogicalTypeAnnotation.float16Type();
+ case GEOMETRY:
+ GeometryType geometry = type.getGEOMETRY();
+ return LogicalTypeAnnotation.geometryType(geometry.getCrs());
+ case GEOGRAPHY:
+ GeographyType geography = type.getGEOGRAPHY();
+ return LogicalTypeAnnotation.geographyType(
+ geography.getCrs(),
toParquetEdgeInterpolationAlgorithm(geography.getAlgorithm()));
case VARIANT:
VariantType variant = type.getVARIANT();
return
LogicalTypeAnnotation.variantType(variant.getSpecification_version());
@@ -2490,4 +2521,26 @@ public class ParquetMetadataConverter {
}
return formatStats;
}
+
+ /** Convert Parquet Algorithm enum to Thrift Algorithm enum */
+ public static EdgeInterpolationAlgorithm
fromParquetEdgeInterpolationAlgorithm(
+ org.apache.parquet.column.schema.EdgeInterpolationAlgorithm parquetAlgo)
{
+ if (parquetAlgo == null) {
+ return null;
+ }
+ EdgeInterpolationAlgorithm thriftAlgo =
EdgeInterpolationAlgorithm.findByValue(parquetAlgo.getValue());
+ if (thriftAlgo == null) {
+ throw new IllegalArgumentException("Unrecognized Parquet
EdgeInterpolationAlgorithm: " + parquetAlgo);
+ }
+ return thriftAlgo;
+ }
+
+ /** Convert Thrift Algorithm enum to Parquet Algorithm enum */
+ public static org.apache.parquet.column.schema.EdgeInterpolationAlgorithm
toParquetEdgeInterpolationAlgorithm(
+ EdgeInterpolationAlgorithm thriftAlgo) {
+ if (thriftAlgo == null) {
+ return null;
+ }
+ return
org.apache.parquet.column.schema.EdgeInterpolationAlgorithm.findByValue(thriftAlgo.getValue());
+ }
}
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 322d4c4ab..82c70bed9 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -80,6 +80,7 @@ import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.EncodingStats;
import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
import org.apache.parquet.column.statistics.BinaryStatistics;
import org.apache.parquet.column.statistics.BooleanStatistics;
import org.apache.parquet.column.statistics.DoubleStatistics;
@@ -101,6 +102,8 @@ import org.apache.parquet.format.ConvertedType;
import org.apache.parquet.format.DecimalType;
import org.apache.parquet.format.FieldRepetitionType;
import org.apache.parquet.format.FileMetaData;
+import org.apache.parquet.format.GeographyType;
+import org.apache.parquet.format.GeometryType;
import org.apache.parquet.format.LogicalType;
import org.apache.parquet.format.MapType;
import org.apache.parquet.format.PageHeader;
@@ -1661,4 +1664,141 @@ public class TestParquetMetadataConverter {
assertEquals(repLevelHistogram,
sizeStatistics.getRepetitionLevelHistogram());
assertEquals(defLevelHistogram,
sizeStatistics.getDefinitionLevelHistogram());
}
+
+ @Test
+ public void testGeometryLogicalType() {
+ ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
+
+ // Create schema with geometry type
+ MessageType schema = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.geometryType("EPSG:4326"))
+ .named("geomField")
+ .named("Message");
+
+ // Convert to parquet schema and back
+ List<SchemaElement> parquetSchema =
parquetMetadataConverter.toParquetSchema(schema);
+ MessageType actual =
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+
+ // Verify the logical type is preserved
+ assertEquals(schema, actual);
+
+ PrimitiveType primitiveType =
actual.getType("geomField").asPrimitiveType();
+ LogicalTypeAnnotation logicalType =
primitiveType.getLogicalTypeAnnotation();
+ assertTrue(logicalType instanceof
LogicalTypeAnnotation.GeometryLogicalTypeAnnotation);
+ assertEquals("EPSG:4326",
((LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) logicalType).getCrs());
+ }
+
+ @Test
+ public void testGeographyLogicalType() {
+ ParquetMetadataConverter parquetMetadataConverter = new
ParquetMetadataConverter();
+
+ // Create schema with geography type
+ MessageType schema = Types.buildMessage()
+ .required(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.geographyType("EPSG:4326",
EdgeInterpolationAlgorithm.SPHERICAL))
+ .named("geogField")
+ .named("Message");
+
+ // Convert to parquet schema and back
+ List<SchemaElement> parquetSchema =
parquetMetadataConverter.toParquetSchema(schema);
+ MessageType actual =
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+
+ // Verify the logical type is preserved
+ assertEquals(schema, actual);
+
+ PrimitiveType primitiveType =
actual.getType("geogField").asPrimitiveType();
+ LogicalTypeAnnotation logicalType =
primitiveType.getLogicalTypeAnnotation();
+ assertTrue(logicalType instanceof
LogicalTypeAnnotation.GeographyLogicalTypeAnnotation);
+
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyType =
+ (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) logicalType;
+ assertEquals("EPSG:4326", geographyType.getCrs());
+ assertEquals(EdgeInterpolationAlgorithm.SPHERICAL,
geographyType.getAlgorithm());
+ }
+
+ @Test
+ public void testGeometryLogicalTypeWithMissingCrs() {
+ // Create a Geometry logical type without specifying CRS
+ GeometryType geometryType = new GeometryType();
+ LogicalType logicalType = new LogicalType();
+ logicalType.setGEOMETRY(geometryType);
+
+ // Convert to LogicalTypeAnnotation
+ ParquetMetadataConverter converter = new ParquetMetadataConverter();
+ LogicalTypeAnnotation annotation =
converter.getLogicalTypeAnnotation(logicalType);
+
+ // Verify the annotation is created correctly
+ assertNotNull("Geometry annotation should not be null", annotation);
+ assertTrue(
+ "Should be a GeometryLogicalTypeAnnotation",
+ annotation instanceof
LogicalTypeAnnotation.GeometryLogicalTypeAnnotation);
+
+ LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryAnnotation =
+ (LogicalTypeAnnotation.GeometryLogicalTypeAnnotation) annotation;
+
+ // Default behavior should use null or empty CRS
+ assertNull("CRS should be null or empty when not specified",
geometryAnnotation.getCrs());
+ }
+
+ @Test
+ public void testGeographyLogicalTypeWithMissingParameters() {
+ ParquetMetadataConverter converter = new ParquetMetadataConverter();
+
+ // Create a Geography logical type without CRS and algorithm
+ GeographyType geographyType = new GeographyType();
+ LogicalType logicalType = new LogicalType();
+ logicalType.setGEOGRAPHY(geographyType);
+
+ // Convert to LogicalTypeAnnotation
+ LogicalTypeAnnotation annotation =
converter.getLogicalTypeAnnotation(logicalType);
+
+ // Verify the annotation is created correctly
+ assertNotNull("Geography annotation should not be null", annotation);
+ assertTrue(
+ "Should be a GeographyLogicalTypeAnnotation",
+ annotation instanceof
LogicalTypeAnnotation.GeographyLogicalTypeAnnotation);
+
+ // Check that optional parameters are handled correctly
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyAnnotation =
+ (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) annotation;
+ assertNull("CRS should be null when not specified",
geographyAnnotation.getCrs());
+ // Most implementations default to LINEAR when algorithm is not specified
+ assertNull("Algorithm should be null when not specified",
geographyAnnotation.getAlgorithm());
+
+ // Now test the round-trip conversion
+ LogicalType roundTripType = converter.convertToLogicalType(annotation);
+ assertEquals("setField should be GEOGRAPHY",
LogicalType._Fields.GEOGRAPHY, roundTripType.getSetField());
+ assertNull(
+ "Round trip CRS should still be null",
+ roundTripType.getGEOGRAPHY().getCrs());
+ assertNull(
+ "Round trip Algorithm should be null",
+ roundTripType.getGEOGRAPHY().getAlgorithm());
+ }
+
+ @Test
+ public void testGeographyLogicalTypeWithAlgorithmButNoCrs() {
+ // Create a Geography logical type with algorithm but no CRS
+ GeographyType geographyType = new GeographyType();
+
geographyType.setAlgorithm(org.apache.parquet.format.EdgeInterpolationAlgorithm.SPHERICAL);
+ LogicalType logicalType = new LogicalType();
+ logicalType.setGEOGRAPHY(geographyType);
+
+ // Convert to LogicalTypeAnnotation
+ ParquetMetadataConverter converter = new ParquetMetadataConverter();
+ LogicalTypeAnnotation annotation =
converter.getLogicalTypeAnnotation(logicalType);
+
+ // Verify the annotation is created correctly
+ Assert.assertNotNull("Geography annotation should not be null",
annotation);
+ LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyAnnotation =
+ (LogicalTypeAnnotation.GeographyLogicalTypeAnnotation) annotation;
+
+ // CRS should be null/empty but algorithm should be set
+ assertNull("CRS should be null or empty", geographyAnnotation.getCrs());
+ assertEquals(
+ "Algorithm should be SPHERICAL",
+ EdgeInterpolationAlgorithm.SPHERICAL,
+ geographyAnnotation.getAlgorithm());
+ }
}
diff --git a/pom.xml b/pom.xml
index 22436729f..16e5abfe4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -104,6 +104,7 @@
<powermock.version>2.0.9</powermock.version>
<net.openhft.version>0.27ea0</net.openhft.version>
<exec-maven-plugin.version>3.5.0</exec-maven-plugin.version>
+ <jts.version>1.20.0</jts.version>
<!-- parquet-cli dependencies -->
<opencsv.version>2.3</opencsv.version>