kylebarron commented on code in PR #8225:
URL: https://github.com/apache/arrow-rs/pull/8225#discussion_r2299561358


##########
parquet/src/geospatial/statistics.rs:
##########
@@ -0,0 +1,290 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Geospatial statistics for Parquet files.
+//!
+//! This module provides functionality for working with geospatial statistics 
in Parquet files.
+//! It includes support for bounding boxes and geospatial statistics in column 
chunk metadata.
+
+use crate::format as parquet;
+use crate::format::GeospatialStatistics as TGeospatialStatistics;
+use crate::errors::Result;
+
+// ----------------------------------------------------------------------
+// Bounding Box
+
+/// Represents a 2D/3D bounding box with optional M-coordinate support.
+/// 
+/// A bounding box defines the spatial extent of geospatial data by specifying
+/// minimum and maximum coordinates along each axis. This struct supports:
+/// - 2D coordinates (x, y) which are always required
+/// - Optional 3D coordinates (z) for elevation/height data
+/// - Optional M-coordinates for measured values (e.g., distance along a route)
+/// 
+/// # Examples
+/// 
+/// ```
+/// use parquet::geospatial::statistics::BoundingBox;
+/// 
+/// // 2D bounding box
+/// let bbox_2d = BoundingBox::new(0.0, 0.0, 100.0, 100.0, None, None, None, 
None);
+/// 
+/// // 3D bounding box with elevation
+/// let bbox_3d = BoundingBox::new(0.0, 0.0, 100.0, 100.0, Some(0.0), 
Some(1000.0), None, None);
+/// ```
+#[derive(Clone, Debug, PartialEq)]
+pub struct BoundingBox {
+    /// Minimum X coordinate (longitude or easting)
+    xmin: f64,
+    /// Minimum Y coordinate (latitude or northing)
+    ymin: f64,
+    /// Maximum X coordinate (longitude or easting)
+    xmax: f64,
+    /// Maximum Y coordinate (latitude or northing)
+    ymax: f64,
+    /// Minimum Z coordinate (elevation/height), if present
+    zmin: Option<f64>,
+    /// Maximum Z coordinate (elevation/height), if present
+    zmax: Option<f64>,
+    /// Minimum M coordinate (measured value), if present
+    mmin: Option<f64>,
+    /// Maximum M coordinate (measured value), if present
+    mmax: Option<f64>,
+}
+
+impl BoundingBox {
+    /// Creates a new bounding box with the specified coordinates.
+    /// 
+    /// # Arguments
+    /// 
+    /// * `xmin` - Minimum X coordinate
+    /// * `ymin` - Minimum Y coordinate  
+    /// * `xmax` - Maximum X coordinate
+    /// * `ymax` - Maximum Y coordinate
+    /// * `zmin` - Optional minimum Z coordinate
+    /// * `zmax` - Optional maximum Z coordinate
+    /// * `mmin` - Optional minimum M coordinate
+    /// * `mmax` - Optional maximum M coordinate
+    /// 
+    /// # Returns
+    /// 
+    /// A new `BoundingBox` instance with the specified coordinates.
+    pub fn new(xmin: f64, ymin: f64, xmax: f64, ymax: f64, zmin: Option<f64>, 
zmax: Option<f64>, mmin: Option<f64>, mmax: Option<f64>) -> Self {
+        Self { xmin, ymin, xmax, ymax, zmin, zmax, mmin, mmax }
+    }
+}
+
+// ----------------------------------------------------------------------
+// Geospatial Statistics
+
+/// Represents geospatial statistics for a Parquet column or dataset.
+/// 
+/// This struct contains metadata about the spatial characteristics of 
geospatial data,
+/// including bounding box information and the types of geospatial geometries 
present.
+/// It's used to optimize spatial queries and provide spatial context for data 
analysis.
+/// 
+/// # Examples
+/// 
+/// ```
+/// use parquet::geospatial::statistics::{GeospatialStatistics, BoundingBox};
+/// 
+/// // Empty statistics
+/// let empty_stats = GeospatialStatistics::new_empty();
+/// 
+/// // Statistics with bounding box
+/// let bbox = BoundingBox::new(0.0, 0.0, 100.0, 100.0, None, None, None, 
None);
+/// let stats = GeospatialStatistics::new(Some(bbox), Some(vec![1, 2, 3]));
+/// ```
+#[derive(Clone, Debug, PartialEq)]
+pub struct GeospatialStatistics {
+    /// Optional bounding box encompassing all geospatial data
+    bbox: Option<BoundingBox>,
+    /// Optional list of geospatial geometry type identifiers
+    /// 
+    /// Common values include:
+    /// - 1: Point
+    /// - 2: LineString  
+    /// - 3: Polygon
+    /// - 4: MultiPoint
+    /// - 5: MultiLineString
+    /// - 6: MultiPolygon
+    /// - 7: GeometryCollection
+    geospatial_types: Option<Vec<i32>>,
+}
+
+impl GeospatialStatistics {
+    /// Creates a new empty geospatial statistics instance.
+    /// 
+    /// This is useful when no spatial information is available or when
+    /// creating a placeholder for statistics that will be populated later.
+    /// 
+    /// # Returns
+    /// 
+    /// A `GeospatialStatistics` instance with no bounding box or type 
information.
+    pub fn new_empty() -> Self {

Review Comment:
   Maybe cleaner to remove this and just implement `Default`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to