wgtmac commented on code in PR #3200:
URL: https://github.com/apache/parquet-java/pull/3200#discussion_r2061953783
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -155,6 +156,40 @@ protected LogicalTypeAnnotation fromString(List<String>
params) {
return float16Type();
}
},
+ GEOMETRY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 1) {
+ throw new RuntimeException("Expecting only crs for geometry logical
type, got " + params.size());
+ }
+ if (params.size() == 1) {
+ String crs = params.get(0);
+ return geometryType(crs);
+ } else {
+ return geometryType();
+ }
Review Comment:
```suggestion
String crs = params.isEmpty() ? null : params.get(0);
return geometryType(crs);
```
##########
parquet-column/src/main/java/org/apache/parquet/column/schema/EdgeInterpolationAlgorithm.java:
##########
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.schema;
+
+/**
+ * Edge interpolation algorithm for Geography logical type
+ */
+public enum EdgeInterpolationAlgorithm {
+ SPHERICAL(0),
+ VINCENTY(1),
+ THOMAS(2),
+ ANDOYER(3),
+ KARNEY(4);
+
+ private final int value;
+
+ private EdgeInterpolationAlgorithm(int value) {
+ this.value = value;
+ }
+
+ /**
+ * Get the integer value of this enum value, as defined in the Thrift IDL.
+ */
+ public int getValue() {
+ return value;
+ }
+
+ /**
+ * Find the enum type by its integer value, as defined in the Thrift IDL.
+ * @return null if the value is not found.
+ */
+ public static EdgeInterpolationAlgorithm findByValue(int value) {
+ switch (value) {
+ case 0:
+ return SPHERICAL;
+ case 1:
+ return VINCENTY;
+ case 2:
+ return THOMAS;
+ case 3:
+ return ANDOYER;
+ case 4:
+ return KARNEY;
+ default:
+ return null;
Review Comment:
Should we throw unsupported exception?
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -334,6 +369,22 @@ public static Float16LogicalTypeAnnotation float16Type() {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static GeometryLogicalTypeAnnotation geometryType(String crs) {
+ return new GeometryLogicalTypeAnnotation(crs);
+ }
+
+ public static GeometryLogicalTypeAnnotation geometryType() {
+ return new GeometryLogicalTypeAnnotation("OGC:CRS84");
+ }
Review Comment:
Maybe we don't have to provide `geometryType()` at all?
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -1183,6 +1234,136 @@ public boolean equals(Object obj) {
}
}
+ public static class GeometryLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+
+ private GeometryLogicalTypeAnnotation(String crs) {
+ this.crs = crs;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("(").append(crs != null && !crs.isEmpty() ? crs :
"").append(")");
+ return sb.toString();
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation)
obj;
+ return Objects.equals(crs, other.crs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
+ public static class GeographyLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+ private final EdgeInterpolationAlgorithm edgeAlgorithm;
+
+ private GeographyLogicalTypeAnnotation(String crs,
EdgeInterpolationAlgorithm edgeAlgorithm) {
+ this.crs = crs;
+ this.edgeAlgorithm = edgeAlgorithm;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOGRAPHY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+
+ boolean hasCrs = crs != null && !crs.isEmpty();
+ boolean hasEdgeAlgorithm = edgeAlgorithm != null;
+
+ if (!hasCrs && !hasEdgeAlgorithm) {
+ return ""; // Return empty string when both are empty
+ }
+
+ sb.append("(");
+ if (hasCrs) {
+ sb.append(crs);
+ }
+ if (hasEdgeAlgorithm) {
+ if (hasCrs) sb.append(",");
+ sb.append(edgeAlgorithm);
+ }
+ sb.append(")");
+ return sb.toString();
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ public EdgeInterpolationAlgorithm getEdgeAlgorithm() {
Review Comment:
```suggestion
public EdgeInterpolationAlgorithm getAlgorithm() {
```
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -155,6 +156,40 @@ protected LogicalTypeAnnotation fromString(List<String>
params) {
return float16Type();
}
},
+ GEOMETRY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 1) {
+ throw new RuntimeException("Expecting only crs for geometry logical
type, got " + params.size());
+ }
+ if (params.size() == 1) {
+ String crs = params.get(0);
+ return geometryType(crs);
+ } else {
+ return geometryType();
+ }
+ }
+ },
+ GEOGRAPHY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 2) {
+ throw new RuntimeException(
+ "Expecting at most 2 parameters for geography logical type (crs
and edgeAlgorithm), got "
Review Comment:
```suggestion
"Expecting at most 2 parameters for geography logical type
(crs and edge algorithm), got "
```
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -334,6 +369,22 @@ public static Float16LogicalTypeAnnotation float16Type() {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static GeometryLogicalTypeAnnotation geometryType(String crs) {
+ return new GeometryLogicalTypeAnnotation(crs);
+ }
+
+ public static GeometryLogicalTypeAnnotation geometryType() {
+ return new GeometryLogicalTypeAnnotation("OGC:CRS84");
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType(String crs,
EdgeInterpolationAlgorithm edgeAlgorithm) {
+ return new GeographyLogicalTypeAnnotation(crs, edgeAlgorithm);
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType() {
+ return new GeographyLogicalTypeAnnotation("OGC:CRS84", null);
Review Comment:
```suggestion
return new GeographyLogicalTypeAnnotation(null, null);
```
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -1183,6 +1234,136 @@ public boolean equals(Object obj) {
}
}
+ public static class GeometryLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+
+ private GeometryLogicalTypeAnnotation(String crs) {
+ this.crs = crs;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("(").append(crs != null && !crs.isEmpty() ? crs :
"").append(")");
+ return sb.toString();
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation)
obj;
+ return Objects.equals(crs, other.crs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
+ public static class GeographyLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+ private final EdgeInterpolationAlgorithm edgeAlgorithm;
Review Comment:
```suggestion
private final EdgeInterpolationAlgorithm algorithm;
```
What about making it shorter and consistent with the spec? I think its
meaning is clear in this context.
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -334,6 +369,22 @@ public static Float16LogicalTypeAnnotation float16Type() {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static GeometryLogicalTypeAnnotation geometryType(String crs) {
+ return new GeometryLogicalTypeAnnotation(crs);
+ }
+
+ public static GeometryLogicalTypeAnnotation geometryType() {
+ return new GeometryLogicalTypeAnnotation("OGC:CRS84");
+ }
Review Comment:
```suggestion
public static GeometryLogicalTypeAnnotation geometryType() {
return new GeometryLogicalTypeAnnotation(null);
}
```
Setting the default to null may save some bytes in the thrift message
because we don't need to serialize crs. We have to write the string if the
default is `"OGC:CRS84"`.
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -1183,6 +1234,136 @@ public boolean equals(Object obj) {
}
}
+ public static class GeometryLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+
+ private GeometryLogicalTypeAnnotation(String crs) {
+ this.crs = crs;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("(").append(crs != null && !crs.isEmpty() ? crs :
"").append(")");
+ return sb.toString();
Review Comment:
```suggestion
if (crs == null || crs.isEmpty()) {
return "";
}
return String.format("(%s)", crs);
```
This is more readable.
##########
parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java:
##########
@@ -442,6 +445,22 @@ private void appendHex(byte[] array, int offset, int
length, StringBuilder build
}
};
+ static final PrimitiveStringifier WKB_STRINGIFIER = new
BinaryStringifierBase("WKB_STRINGIFIER") {
+
+ @Override
+ String stringifyNotNull(Binary value) {
+
+ Geometry geometry;
+ try {
+ WKBReader reader = new WKBReader();
+ geometry = reader.read(value.getBytesUnsafe());
Review Comment:
```suggestion
try {
WKBReader reader = new WKBReader();
Geometry geometry = reader.read(value.getBytesUnsafe());
```
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -155,6 +156,40 @@ protected LogicalTypeAnnotation fromString(List<String>
params) {
return float16Type();
}
},
+ GEOMETRY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 1) {
+ throw new RuntimeException("Expecting only crs for geometry logical
type, got " + params.size());
+ }
+ if (params.size() == 1) {
+ String crs = params.get(0);
+ return geometryType(crs);
+ } else {
+ return geometryType();
+ }
+ }
+ },
+ GEOGRAPHY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 2) {
+ throw new RuntimeException(
+ "Expecting at most 2 parameters for geography logical type (crs
and edgeAlgorithm), got "
+ + params.size());
+ }
+ if (params.size() == 1) {
+ String crs = params.get(0);
+ return geographyType(crs, null);
+ }
+ if (params.size() == 2) {
+ String crs = params.get(0);
+ String edgeAlgorithm = params.get(1);
+ return geographyType(crs,
EdgeInterpolationAlgorithm.valueOf(edgeAlgorithm));
+ }
+ return geographyType();
Review Comment:
```suggestion
String crs = params.size() > 0 ? params.get(0) : null;
EdgeInterpolationAlgorithm algo = params.size() > 1 ?
EdgeInterpolationAlgorithm.valueOf(params.get(1)) : null;
return geographyType(crs, algo);
```
##########
parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java:
##########
@@ -1183,6 +1211,12 @@ LogicalTypeAnnotation
getLogicalTypeAnnotation(LogicalType type) {
return LogicalTypeAnnotation.uuidType();
case FLOAT16:
return LogicalTypeAnnotation.float16Type();
+ case GEOMETRY:
+ GeometryType geometry = type.getGEOMETRY();
+ return LogicalTypeAnnotation.geometryType(geometry.getCrs());
+ case GEOGRAPHY:
+ GeographyType geography = type.getGEOGRAPHY();
+ return LogicalTypeAnnotation.geographyType(geography.getCrs(), null);
Review Comment:
Why not getting the edge algo?
##########
parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java:
##########
@@ -520,6 +523,31 @@ public Optional<LogicalType>
visit(LogicalTypeAnnotation.IntervalLogicalTypeAnno
public Optional<LogicalType>
visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) {
return of(LogicalTypes.VARIANT(variantLogicalType.getSpecVersion()));
}
+
+ @Override
+ public Optional<LogicalType>
visit(LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
+ GeometryType geometryType = new GeometryType();
+ if (geometryLogicalType.getCrs() != null) {
+ geometryType.setCrs(geometryLogicalType.getCrs());
+ }
+ return of(LogicalType.GEOMETRY(geometryType));
+ }
+
+ @Override
+ public Optional<LogicalType>
visit(LogicalTypeAnnotation.GeographyLogicalTypeAnnotation
geographyLogicalType) {
+ GeographyType geographyType = new GeographyType();
+ if (geographyLogicalType.getCrs() != null) {
+ geographyType.setCrs(geographyLogicalType.getCrs());
+ }
+ if (geographyLogicalType.getEdgeAlgorithm() != null) {
+ EdgeInterpolationAlgorithm algorithm =
+
EdgeInterpolationAlgorithm.valueOf(String.valueOf(geographyLogicalType.getEdgeAlgorithm()));
+ if (algorithm != null) {
Review Comment:
It would be good to throw for an unknown value. Otherwise we may silently
write an a future new algorithm to null which incorrectly defaults to the
spherical algorithm.
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -334,6 +369,22 @@ public static Float16LogicalTypeAnnotation float16Type() {
return Float16LogicalTypeAnnotation.INSTANCE;
}
+ public static GeometryLogicalTypeAnnotation geometryType(String crs) {
+ return new GeometryLogicalTypeAnnotation(crs);
+ }
+
+ public static GeometryLogicalTypeAnnotation geometryType() {
+ return new GeometryLogicalTypeAnnotation("OGC:CRS84");
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType(String crs,
EdgeInterpolationAlgorithm edgeAlgorithm) {
+ return new GeographyLogicalTypeAnnotation(crs, edgeAlgorithm);
+ }
+
+ public static GeographyLogicalTypeAnnotation geographyType() {
+ return new GeographyLogicalTypeAnnotation("OGC:CRS84", null);
Review Comment:
It would be good to set both to null or explicit values for consistency.
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -155,6 +156,40 @@ protected LogicalTypeAnnotation fromString(List<String>
params) {
return float16Type();
}
},
+ GEOMETRY {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ if (params.size() > 1) {
+ throw new RuntimeException("Expecting only crs for geometry logical
type, got " + params.size());
Review Comment:
```suggestion
throw new RuntimeException("Expecting at most 1 parameter for
geometry logical type (crs), got " + params.size());
```
##########
parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java:
##########
@@ -1183,6 +1234,136 @@ public boolean equals(Object obj) {
}
}
+ public static class GeometryLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+
+ private GeometryLogicalTypeAnnotation(String crs) {
+ this.crs = crs;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOMETRY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("(").append(crs != null && !crs.isEmpty() ? crs :
"").append(")");
+ return sb.toString();
+ }
+
+ public String getCrs() {
+ return crs;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
+ return false;
+ }
+ GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation)
obj;
+ return Objects.equals(crs, other.crs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(crs);
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.WKB_STRINGIFIER;
+ }
+ }
+
+ public static class GeographyLogicalTypeAnnotation extends
LogicalTypeAnnotation {
+ private final String crs;
+ private final EdgeInterpolationAlgorithm edgeAlgorithm;
+
+ private GeographyLogicalTypeAnnotation(String crs,
EdgeInterpolationAlgorithm edgeAlgorithm) {
+ this.crs = crs;
+ this.edgeAlgorithm = edgeAlgorithm;
+ }
+
+ @Override
+ @Deprecated
+ public OriginalType toOriginalType() {
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.GEOGRAPHY;
+ }
+
+ @Override
+ protected String typeParametersAsString() {
+ StringBuilder sb = new StringBuilder();
+
+ boolean hasCrs = crs != null && !crs.isEmpty();
+ boolean hasEdgeAlgorithm = edgeAlgorithm != null;
+
+ if (!hasCrs && !hasEdgeAlgorithm) {
+ return ""; // Return empty string when both are empty
+ }
+
+ sb.append("(");
+ if (hasCrs) {
+ sb.append(crs);
+ }
+ if (hasEdgeAlgorithm) {
+ if (hasCrs) sb.append(",");
+ sb.append(edgeAlgorithm);
+ }
+ sb.append(")");
+ return sb.toString();
Review Comment:
```suggestion
boolean hasCrs = crs != null && !crs.isEmpty();
boolean hasAlgo = algorithm != null;
if (!hasCrs && !hasAlgo) {
return "";
}
return String.format("(%s,%s)", hasCrs ? crs : DEFAULT_CRS, hasAlgo ?
algorithm : DEFAULT_ALGO);
```
Let's be more readable. This requires us to define following variables
somewhere at the top:
```
public static final String DEFAULT_CRS = "OGC:CRS84";
public static final EdgeInterpolationAlgorithm DEFAULT_ALGO =
EdgeInterpolationAlgorithm.SPHERICAL;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]