paleolimbot commented on code in PR #2971:
URL: https://github.com/apache/parquet-java/pull/2971#discussion_r2040431308
##########
parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java:
##########
@@ -775,6 +811,36 @@ public static Statistics toParquetStatistics(
return formatStats;
}
+ private static BoundingBox
toParquetBoundingBox(org.apache.parquet.column.statistics.geometry.BoundingBox
bbox) {
+ // Check if any of the required bounding box is valid.
+ if (!bbox.isValid() || bbox.isEmpty()) {
+ // According to the Thrift-generated class, these fields are marked as
required and must be set explicitly.
+ // If any of them is NaN, it indicates the bounding box is invalid or
uninitialized,
+ // so we return null to avoid creating a malformed BoundingBox object
that would later fail serialization
+ // or validation.
+ return null;
+ }
+
+ // Now we can safely create the BoundingBox object
+ BoundingBox formatBbox = new BoundingBox();
+ formatBbox.setXmin(bbox.getXMin());
+ formatBbox.setXmax(bbox.getXMax());
+ formatBbox.setYmin(bbox.getYMin());
+ formatBbox.setYmax(bbox.getYMax());
+
+ if (!Double.isNaN(bbox.getZMin()) && !Double.isNaN(bbox.getZMax())) {
Review Comment:
```suggestion
if (Double.isInfinite(zMin) && Double.isInfinite(zMax)) {
```
##########
parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java:
##########
@@ -880,6 +946,63 @@ public org.apache.parquet.column.statistics.Statistics
fromParquetStatistics(
return fromParquetStatisticsInternal(createdBy, statistics, type,
expectedOrder);
}
+ private GeospatialStatistics toParquetGeospatialStatistics(
+ org.apache.parquet.column.statistics.geometry.GeospatialStatistics
geospatialStatistics) {
+ if (geospatialStatistics == null) {
+ return null;
+ }
+
+ GeospatialStatistics formatStats = new GeospatialStatistics();
+
+ if (geospatialStatistics.getBoundingBox() != null) {
+
formatStats.setBbox(toParquetBoundingBox(geospatialStatistics.getBoundingBox()));
Review Comment:
```suggestion
// If the bounding box is not valid, do not serialize any
GeospatialStatistics to Thrift
if (!geospatialStatistics.getBoundingBox().isValid()) {
return null;
}
formatStats.setBbox(toParquetBoundingBox(geospatialStatistics.getBoundingBox()));
```
##########
parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java:
##########
@@ -775,6 +811,36 @@ public static Statistics toParquetStatistics(
return formatStats;
}
+ private static BoundingBox
toParquetBoundingBox(org.apache.parquet.column.statistics.geometry.BoundingBox
bbox) {
+ // Check if any of the required bounding box is valid.
+ if (!bbox.isValid() || bbox.isEmpty()) {
+ // According to the Thrift-generated class, these fields are marked as
required and must be set explicitly.
+ // If any of them is NaN, it indicates the bounding box is invalid or
uninitialized,
+ // so we return null to avoid creating a malformed BoundingBox object
that would later fail serialization
+ // or validation.
+ return null;
+ }
+
+ // Now we can safely create the BoundingBox object
+ BoundingBox formatBbox = new BoundingBox();
+ formatBbox.setXmin(bbox.getXMin());
+ formatBbox.setXmax(bbox.getXMax());
+ formatBbox.setYmin(bbox.getYMin());
+ formatBbox.setYmax(bbox.getYMax());
+
+ if (!Double.isNaN(bbox.getZMin()) && !Double.isNaN(bbox.getZMax())) {
+ formatBbox.setZmin(bbox.getZMin());
+ formatBbox.setZmax(bbox.getZMax());
+ }
+
+ if (!Double.isNaN(bbox.getMMin()) && !Double.isNaN(bbox.getMMax())) {
Review Comment:
```suggestion
if (Double.isInfinite(mMin) && Double.isInfinite(mMax)) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]