kylebarron commented on code in PR #7365: URL: https://github.com/apache/arrow-rs/pull/7365#discussion_r2021571329
########## parquet/tests/arrow_reader/statistics.rs: ########## @@ -2683,3 +2695,70 @@ mod test { Arc::new(array) } } + +// To be removed before merging but a real-world use case +#[cfg(test)] +mod test_geoparquet { + use std::sync::Arc; + + use arrow::array::AsArray; + use arrow::datatypes::Float32Type; + use object_store::aws::AmazonS3Builder; + use parquet::arrow::arrow_reader::ArrowReaderMetadata; + use parquet::arrow::async_reader::ParquetObjectReader; + use parquet::schema::types::ColumnPath; + + use super::*; + + #[tokio::test] + async fn test_struct_geoparquet() { + let store = Arc::new( + AmazonS3Builder::new() + .with_bucket_name("overturemaps-us-west-2") + .with_skip_signature(true) + .with_region("us-west-2") + .build() + .unwrap(), + ); + let path = "release/2025-02-19.0/theme=addresses/type=address/part-00010-e084a2d7-fea9-41e5-a56f-e638a3307547-c000.zstd.parquet"; Review Comment: This is public data from the [Overture project](https://overturemaps.org/) conforming to the [GeoParquet specification](https://github.com/opengeospatial/geoparquet). That defines a struct column, canonically called `bbox`, which [contains the fields `xmin`, `ymin`, `xmax`, `ymax`](https://github.com/opengeospatial/geoparquet/blob/v1.1.0%2Bp1/format-specs/geoparquet.md#covering). This allows for row group filtering for spatially-sorted data by inferring the spatial bounds of each row group from the statistics. In order to use the Arrow statistics converter, which is way simpler than handling the statistics manually, we would need _some way_ to access these struct columns. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org