paleolimbot commented on code in PR #8524:
URL: https://github.com/apache/arrow-rs/pull/8524#discussion_r2411971614
##########
parquet/src/column/writer/encoder.rs:
##########
@@ -121,6 +123,10 @@ pub trait ColumnValueEncoder {
/// will *not* be tracked by the bloom filter as it is empty since. This
should be called once
/// near the end of encoding.
fn flush_bloom_filter(&mut self) -> Option<Sbbf>;
+
+ /// Computes [GeospatialStatistics], if any, and resets internal state
such that any internal
+ /// accumulator is prepared to accumulate statistics for the next column
chunk.
+ fn flush_geospatial_statistics(&mut self) ->
Option<Box<GeospatialStatistics>>;
Review Comment:
This is the lowest impact place I could find to insert the
GeospatialStatistics calculation. The ColumnMetrics are maybe a better fit but
would require passing a reference to something through the various write
methods.
##########
parquet/tests/geospatial.rs:
##########
@@ -0,0 +1,291 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(all(feature = "arrow", feature = "geospatial"))]
+mod test {
+ use std::{iter::zip, sync::Arc};
+
+ use arrow_array::{create_array, ArrayRef, BinaryArray, RecordBatch};
+ use arrow_schema::{DataType, Field, Schema};
+ use bytes::Bytes;
+ use parquet::{
+ arrow::{arrow_writer::ArrowWriterOptions, ArrowWriter},
+ basic::LogicalType,
+ column::reader::ColumnReader,
+ data_type::{ByteArray, ByteArrayType},
+ file::{
+ properties::{EnabledStatistics, WriterProperties},
+ reader::{FileReader, SerializedFileReader},
+ writer::SerializedFileWriter,
+ },
+ geospatial::{bounding_box::BoundingBox,
statistics::GeospatialStatistics},
+ schema::types::{SchemaDescriptor, Type},
+ };
+ use parquet_geospatial::testing::wkb_point_xy;
+
+ fn read_geo_statistics(b: Bytes, column: usize) ->
Vec<Option<GeospatialStatistics>> {
+ let reader = SerializedFileReader::new(b).unwrap();
+ reader
+ .metadata()
+ .row_groups()
+ .iter()
+ .map(|row_group|
row_group.column(column).geo_statistics().cloned())
+ .collect()
+ }
+
+ #[test]
+ fn test_write_statistics_not_arrow() {
+ // Four row groups: one all non-null, one with a null, one with all
nulls,
+ // one with invalid WKB
+ let column_values = vec![
+ [wkb_point_xy(1.0, 2.0), wkb_point_xy(11.0,
12.0)].map(ByteArray::from),
+ ["this is not valid wkb".into(), wkb_point_xy(31.0,
32.0)].map(ByteArray::from),
+ [wkb_point_xy(21.0, 22.0), vec![]].map(ByteArray::from),
+ [ByteArray::new(), ByteArray::new()],
+ ];
+ let def_levels = [[1, 1], [1, 1], [1, 0], [0, 0]];
+
+ // Ensure that nulls are omitted, that completely empty stats are
omitted,
+ // and that invalid WKB results in empty stats
+ let expected_geometry_types = [Some(vec![1]), None, Some(vec![1]),
None];
+ let expected_bounding_box = [
+ Some(BoundingBox::new(1.0, 11.0, 2.0, 12.0)),
+ None,
+ Some(BoundingBox::new(21.0, 21.0, 22.0, 22.0)),
+ None,
+ ];
+
+ let root = parquet_schema_geometry();
+ let schema = SchemaDescriptor::new(root.into());
+ let props = WriterProperties::builder()
+ .set_statistics_enabled(EnabledStatistics::Chunk)
+ .build();
+
+ let mut buf = Vec::with_capacity(1024);
+ let mut writer =
+ SerializedFileWriter::new(&mut buf, schema.root_schema_ptr(),
Arc::new(props)).unwrap();
+
+ for (def_levels, values) in zip(&def_levels, &column_values) {
+ let mut rg = writer.next_row_group().unwrap();
+ let mut col = rg.next_column().unwrap().unwrap();
+ col.typed::<ByteArrayType>()
+ .write_batch(values, Some(def_levels), None)
+ .unwrap();
+ col.close().unwrap();
+ rg.close().unwrap();
+ }
+
+ writer.close().unwrap();
+
+ // Check statistics on file read
+ let all_geo_stats = read_geo_statistics(buf.into(), 0);
+ assert_eq!(all_geo_stats.len(), column_values.len());
+ assert_eq!(expected_geometry_types.len(), column_values.len());
+ assert_eq!(expected_bounding_box.len(), column_values.len());
+
+ for i in 0..column_values.len() {
+ if let Some(geo_stats) = all_geo_stats[i].as_ref() {
+ assert_eq!(
+ geo_stats.geospatial_types(),
+ expected_geometry_types[i].as_ref()
+ );
+ assert_eq!(geo_stats.bounding_box(),
expected_bounding_box[i].as_ref());
+ } else {
+ assert!(expected_geometry_types[i].is_none());
+ assert!(expected_bounding_box[i].is_none());
+ }
+ }
+ }
+
+ #[test]
+ fn test_write_statistics_arrow() {
+ let arrow_schema = Arc::new(Schema::new(vec![Field::new(
+ "geom",
+ DataType::Binary,
+ true,
+ )]));
+
+ // Check the same cases as for the non-arrow writer. These need
checking again because
+ // the arrow writer uses a different encoder where the code path for
skipping nulls
+ // is independent.
+ let column_values = [
+ wkb_array_xy([Some((1.0, 2.0)), Some((11.0, 12.0))]),
+ create_array!(
+ Binary,
+ [
+ "this is not valid wkb".as_bytes(),
+ &wkb_point_xy(31.0, 32.0)
+ ]
+ ),
+ wkb_array_xy([Some((21.0, 22.0)), None]),
+ wkb_array_xy([None, None]),
+ ];
+
+ let expected_geometry_types = [Some(vec![1]), None, Some(vec![1]),
None];
+ let expected_bounding_box = [
+ Some(BoundingBox::new(1.0, 11.0, 2.0, 12.0)),
+ None,
+ Some(BoundingBox::new(21.0, 21.0, 22.0, 22.0)),
+ None,
+ ];
+
+ let root = parquet_schema_geometry();
+ let schema = SchemaDescriptor::new(root.into());
+
+ let props = WriterProperties::builder()
+ .set_statistics_enabled(EnabledStatistics::Chunk)
+ .build();
+ let options = ArrowWriterOptions::new()
+ .with_parquet_schema(schema)
+ .with_properties(props);
+
+ let mut buf = Vec::with_capacity(1024);
+ let mut file_writer =
+ ArrowWriter::try_new_with_options(&mut buf, arrow_schema.clone(),
options).unwrap();
+
+ for values in &column_values {
+ let batch = RecordBatch::try_new(arrow_schema.clone(),
vec![values.clone()]).unwrap();
+ file_writer.write(&batch).unwrap();
+ file_writer.flush().unwrap();
+ }
+
+ file_writer.close().unwrap();
+
+ // Check statistics on file read
+ let all_geo_stats = read_geo_statistics(buf.into(), 0);
+ assert_eq!(all_geo_stats.len(), column_values.len());
+
+ for i in 0..column_values.len() {
+ if let Some(geo_stats) = all_geo_stats[i].as_ref() {
+ assert_eq!(
+ geo_stats.geospatial_types(),
+ expected_geometry_types[i].as_ref()
+ );
+ assert_eq!(geo_stats.bounding_box(),
expected_bounding_box[i].as_ref());
+ } else {
+ assert!(expected_geometry_types[i].is_none());
+ assert!(expected_bounding_box[i].is_none());
+ }
+ }
+ }
+
+ #[test]
+ fn test_roundtrip_statistics_geospatial() {
+ let path = format!(
+ "{}/geospatial/geospatial.parquet",
+ arrow::util::test_util::parquet_test_data(),
+ );
+
+ test_roundtrip_statistics(&path, 2);
+ }
+
+ #[test]
+ fn test_roundtrip_geospatial_with_nan() {
+ let path = format!(
+ "{}/geospatial/geospatial-with-nan.parquet",
+ arrow::util::test_util::parquet_test_data(),
+ );
+
+ test_roundtrip_statistics(&path, 0);
+ }
+
+ #[test]
+ fn test_roundtrip_statistics_crs() {
+ let path = format!(
+ "{}/geospatial/crs-default.parquet",
+ arrow::util::test_util::parquet_test_data(),
+ );
+
+ test_roundtrip_statistics(&path, 0);
+ }
Review Comment:
These are the main tests that I was personally using to ensure that this
implementation matched the one in Arrow C++...they rewrite the geometry columns
for the test files and ensure the statistics are identical.
##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -491,6 +499,18 @@ impl ArrowWriterOptions {
..self
}
}
+
+ /// Explicitly specify the Parquet schema to be used
+ ///
+ /// If omitted (the default), the [ArrowSchemaConverter] is used to
compute the
+ /// Parquet [SchemaDescriptor]. This may be used When the
[SchemaDescriptor] is
+ /// already known or must be calculated using custom logic.
+ pub fn with_parquet_schema(self, schema_descr: SchemaDescriptor) -> Self {
+ Self {
+ schema_descr: Some(schema_descr),
+ ..self
+ }
+ }
Review Comment:
I need this to test the Arrow ByteArrayEncoder, but it would also what
somebody would need to write Geometry/Geography types generally.
##########
parquet/src/geospatial/accumulator.rs:
##########
@@ -0,0 +1,359 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module provides implementations and traits for building
[GeospatialStatistics]
+
+use std::sync::{Arc, OnceLock};
+
+use crate::{
+ errors::ParquetError, geospatial::statistics::GeospatialStatistics,
+ schema::types::ColumnDescPtr,
+};
+
+/// Create a new [GeoStatsAccumulator] instance
+pub fn new_geo_stats_accumulator(descr: &ColumnDescPtr) -> Box<dyn
GeoStatsAccumulator> {
+ ACCUMULATOR_FACTORY
+ .get_or_init(|| Arc::new(DefaultGeoStatsAccumulatorFactory::default()))
+ .new_accumulator(descr)
+}
+
+/// Initialize the global [GeoStatsAccumulatorFactory]
+///
+/// This may only be done once before any calls to [new_geo_stats_accumulator].
+/// Clients may use this to implement support for builds of the Parquet crate
without
+/// geospatial support or to implement support for Geography bounding using
external
+/// dependencies.
+pub fn init_geo_stats_accumulator_factory(
+ factory: Arc<dyn GeoStatsAccumulatorFactory>,
+) -> Result<(), ParquetError> {
+ if ACCUMULATOR_FACTORY.set(factory).is_err() {
+ Err(ParquetError::General(
+ "Global GeoStatsAccumulatorFactory already set".to_string(),
+ ))
+ } else {
+ Ok(())
+ }
+}
+
+/// Global accumulator factory instance
+static ACCUMULATOR_FACTORY: OnceLock<Arc<dyn GeoStatsAccumulatorFactory>> =
OnceLock::new();
+
+/// Factory for [GeospatialStatistics] accumulators
+///
+/// The GeoStatsAccumulatorFactory is a trait implemented by the global
factory that
+/// generates new instances of a [GeoStatsAccumulator] when constructing new
+/// encoders for a Geometry or Geography logical type.
+pub trait GeoStatsAccumulatorFactory: Send + Sync {
+ /// Create a new [GeoStatsAccumulator] appropriate for the logical type of
a given
+ /// [ColumnDescPtr]
+ fn new_accumulator(&self, descr: &ColumnDescPtr) -> Box<dyn
GeoStatsAccumulator>;
+}
+
+/// Dynamic [GeospatialStatistics] accumulator
+///
+/// The GeoStatsAccumulator is a trait whose implementors can ingest the
(non-null)
+/// elements of a column and return compliant [GeospatialStatistics] (or
`None`).
+/// When built with geospatial support this will usually be the
+/// [ParquetGeoStatsAccumulator]
+pub trait GeoStatsAccumulator: Send {
+ /// Returns true if this instance can return [GeospatialStatistics] from
+ /// [GeoStatsAccumulator::finish].
+ ///
+ /// This method returns false when this crate was built without geospatial
support
+ /// (i.e., from the [VoidGeoStatsAccumulator]) or if the accumulator
encountered
+ /// invalid or unsupported elements for which it cannot compute valid
statistics.
+ fn is_valid(&self) -> bool;
+
+ /// Update with a single slice of WKB-encoded values
+ ///
+ /// This method is infallible; however, in the event of improperly encoded
values,
+ /// implementations must ensure that [GeoStatsAccumulator::finish] returns
`None`.
+ fn update_wkb(&mut self, wkb: &[u8]);
+
+ /// Compute the final statistics and reset internal state
+ fn finish(&mut self) -> Option<Box<GeospatialStatistics>>;
+}
+
+/// Default accumulator for [GeospatialStatistics]
+///
+/// When this crate was built with geospatial support, this factory constructs
a
+/// [ParquetGeoStatsAccumulator] that ensures Geometry columns are written with
+/// statistics when statistics for that column are enabled. Otherwise, this
factory
+/// returns a [VoidGeoStatsAccumulator] that never adds any geospatial
statistics.
+///
+/// Bounding for Geography columns is not currently implemented by
parquet-geospatial
+/// and this factory will always return a [VoidGeoStatsAccumulator].
+#[derive(Debug, Default)]
+pub struct DefaultGeoStatsAccumulatorFactory {}
+
+impl GeoStatsAccumulatorFactory for DefaultGeoStatsAccumulatorFactory {
+ fn new_accumulator(&self, _descr: &ColumnDescPtr) -> Box<dyn
GeoStatsAccumulator> {
+ #[cfg(feature = "geospatial")]
+ if let Some(crate::basic::LogicalType::Geometry) =
_descr.logical_type() {
+ Box::new(ParquetGeoStatsAccumulator::default())
+ } else {
+ Box::new(VoidGeoStatsAccumulator::default())
+ }
+
+ #[cfg(not(feature = "geospatial"))]
+ return Box::new(VoidGeoStatsAccumulator::default());
+ }
+}
+
+/// A [GeoStatsAccumulator] that never computes any [GeospatialStatistics]
+#[derive(Debug, Default)]
+pub struct VoidGeoStatsAccumulator {}
+
+impl GeoStatsAccumulator for VoidGeoStatsAccumulator {
+ fn is_valid(&self) -> bool {
+ false
+ }
+
+ fn update_wkb(&mut self, _wkb: &[u8]) {}
+
+ fn finish(&mut self) -> Option<Box<GeospatialStatistics>> {
+ None
+ }
+}
+
+/// A [GeoStatsAccumulator] that uses the parquet-geospatial crate to compute
Geometry statistics
+///
+/// Note that this accumulator only supports Geometry types and will return
invalid statistics for
+/// non-point Geography input ([GeoStatsAccumulatorFactory::new_accumulator]
is responsible
+/// for ensuring an appropriate accumulator based on the logical type).
+#[cfg(feature = "geospatial")]
+#[derive(Debug)]
+pub struct ParquetGeoStatsAccumulator {
+ bounder: parquet_geospatial::bounding::GeometryBounder,
+ invalid: bool,
+}
+
+#[cfg(feature = "geospatial")]
+impl Default for ParquetGeoStatsAccumulator {
+ fn default() -> Self {
+ Self {
+ bounder: parquet_geospatial::bounding::GeometryBounder::empty(),
+ invalid: false,
+ }
+ }
+}
+
+#[cfg(feature = "geospatial")]
+impl GeoStatsAccumulator for ParquetGeoStatsAccumulator {
+ fn is_valid(&self) -> bool {
+ !self.invalid
+ }
+
+ fn update_wkb(&mut self, wkb: &[u8]) {
+ if self.bounder.update_wkb(wkb).is_err() {
+ self.invalid = true;
+ }
+ }
+
+ fn finish(&mut self) -> Option<Box<GeospatialStatistics>> {
+ use parquet_geospatial::interval::IntervalTrait;
+
+ use crate::geospatial::bounding_box::BoundingBox;
+
+ if self.invalid {
+ // Reset
+ self.invalid = false;
+ self.bounder =
parquet_geospatial::bounding::GeometryBounder::empty();
+ return None;
+ }
+
+ let bbox = if self.bounder.x().is_empty() ||
self.bounder.y().is_empty() {
+ None
+ } else {
+ let mut bbox = BoundingBox::new(
+ self.bounder.x().lo(),
+ self.bounder.x().hi(),
+ self.bounder.y().lo(),
+ self.bounder.y().hi(),
+ );
+
+ if !self.bounder.z().is_empty() {
+ bbox = bbox.with_zrange(self.bounder.z().lo(),
self.bounder.z().hi());
+ }
+
+ if !self.bounder.m().is_empty() {
+ bbox = bbox.with_mrange(self.bounder.m().lo(),
self.bounder.m().hi());
+ }
+
+ Some(bbox)
+ };
+
+ let bounder_geometry_types = self.bounder.geometry_types();
+ let geometry_types = if bounder_geometry_types.is_empty() {
+ None
+ } else {
+ Some(bounder_geometry_types)
+ };
+
+ // Reset
+ self.bounder = parquet_geospatial::bounding::GeometryBounder::empty();
+
+ Some(Box::new(GeospatialStatistics::new(bbox, geometry_types)))
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_void_accumulator() {
+ let mut accumulator = VoidGeoStatsAccumulator {};
+ assert!(!accumulator.is_valid());
+ accumulator.update_wkb(&[0x01, 0x02, 0x03]);
+ assert!(accumulator.finish().is_none());
+ }
+
+ #[cfg(feature = "geospatial")]
+ #[test]
+ fn test_default_accumulator_geospatial_factory() {
+ use std::sync::Arc;
+
+ use parquet_geospatial::testing::wkb_point_xy;
+
+ use crate::{
+ basic::LogicalType,
+ geospatial::bounding_box::BoundingBox,
+ schema::types::{ColumnDescriptor, ColumnPath, Type},
+ };
+
+ // Check that we have a working accumulator for Geometry
+ let parquet_type = Type::primitive_type_builder("geom",
crate::basic::Type::BYTE_ARRAY)
+ .with_logical_type(Some(LogicalType::Geometry))
+ .build()
+ .unwrap();
+ let column_descr =
+ ColumnDescriptor::new(Arc::new(parquet_type), 0, 0,
ColumnPath::new(vec![]));
+ let mut accumulator =
new_geo_stats_accumulator(&Arc::new(column_descr));
+
+ assert!(accumulator.is_valid());
+ accumulator.update_wkb(&wkb_point_xy(1.0, 2.0));
+ accumulator.update_wkb(&wkb_point_xy(11.0, 12.0));
+ let stats = accumulator.finish().unwrap();
+ assert_eq!(
+ stats.bounding_box().unwrap(),
+ &BoundingBox::new(1.0, 11.0, 2.0, 12.0)
+ );
+
+ // Check that we have a void accumulator for Geography
+ let parquet_type = Type::primitive_type_builder("geom",
crate::basic::Type::BYTE_ARRAY)
+ .with_logical_type(Some(LogicalType::Geography))
+ .build()
+ .unwrap();
+ let column_descr =
+ ColumnDescriptor::new(Arc::new(parquet_type), 0, 0,
ColumnPath::new(vec![]));
+ let mut accumulator =
new_geo_stats_accumulator(&Arc::new(column_descr));
+
+ assert!(!accumulator.is_valid());
+ assert!(accumulator.finish().is_none());
+
+ // We should not be able to initialize a global accumulator after
we've initialized at least
+ // one accumulator
+ assert!(init_geo_stats_accumulator_factory(Arc::new(
+ DefaultGeoStatsAccumulatorFactory::default()
+ ))
+ .is_err())
+ }
+
+ #[cfg(feature = "geospatial")]
+ #[test]
+ fn test_geometry_accumulator() {
+ use parquet_geospatial::testing::{wkb_point_xy, wkb_point_xyzm};
+
+ use crate::geospatial::bounding_box::BoundingBox;
+
+ let mut accumulator = ParquetGeoStatsAccumulator::default();
Review Comment:
These cases are also tested by way of ensuring our rewrite of the test files
matches, but this test is more explicit and easier to debug.
##########
parquet/src/geospatial/accumulator.rs:
##########
@@ -0,0 +1,359 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module provides implementations and traits for building
[GeospatialStatistics]
+
+use std::sync::{Arc, OnceLock};
+
+use crate::{
+ errors::ParquetError, geospatial::statistics::GeospatialStatistics,
+ schema::types::ColumnDescPtr,
+};
+
+/// Create a new [GeoStatsAccumulator] instance
+pub fn new_geo_stats_accumulator(descr: &ColumnDescPtr) -> Box<dyn
GeoStatsAccumulator> {
+ ACCUMULATOR_FACTORY
+ .get_or_init(|| Arc::new(DefaultGeoStatsAccumulatorFactory::default()))
+ .new_accumulator(descr)
+}
Review Comment:
This is the only part that is used outside this file...my attempt to
consolidate any implementation detail we have here with respect to whether this
crate was or wasn't built with geospatial support.
##########
parquet/src/geospatial/accumulator.rs:
##########
@@ -0,0 +1,359 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module provides implementations and traits for building
[GeospatialStatistics]
+
+use std::sync::{Arc, OnceLock};
+
+use crate::{
+ errors::ParquetError, geospatial::statistics::GeospatialStatistics,
+ schema::types::ColumnDescPtr,
+};
+
+/// Create a new [GeoStatsAccumulator] instance
+pub fn new_geo_stats_accumulator(descr: &ColumnDescPtr) -> Box<dyn
GeoStatsAccumulator> {
+ ACCUMULATOR_FACTORY
+ .get_or_init(|| Arc::new(DefaultGeoStatsAccumulatorFactory::default()))
+ .new_accumulator(descr)
+}
+
+/// Initialize the global [GeoStatsAccumulatorFactory]
+///
+/// This may only be done once before any calls to [new_geo_stats_accumulator].
+/// Clients may use this to implement support for builds of the Parquet crate
without
+/// geospatial support or to implement support for Geography bounding using
external
+/// dependencies.
+pub fn init_geo_stats_accumulator_factory(
+ factory: Arc<dyn GeoStatsAccumulatorFactory>,
+) -> Result<(), ParquetError> {
+ if ACCUMULATOR_FACTORY.set(factory).is_err() {
+ Err(ParquetError::General(
+ "Global GeoStatsAccumulatorFactory already set".to_string(),
+ ))
+ } else {
+ Ok(())
+ }
+}
Review Comment:
I can take this out if it's too much...this is what I would need in SedonaDB
to write files with geospatial stats for Geometry and Geography types (I am not
sure if we can enable the geospatial feature on Parquet two levels of
dependency deep). We also have the C++ dependencies there to write stats for
Geography...while I'd love to rewrite that in Rust and put it in
parquet-geospatial, I don't have time to do that today and the C++ dependency
to do that (s2geometry) is kind of insane to build inside of a Rust crate.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]