This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new aad788b9 fix(rust/sedona-functions): Ensure WkbView types can be 
aggregated using the groups accumulator for ST_Envelope_Agg (#656)
aad788b9 is described below

commit aad788b92bfb7808aec8e1db9f13ea37bf6642bb
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Feb 25 12:57:13 2026 -0600

    fix(rust/sedona-functions): Ensure WkbView types can be aggregated using 
the groups accumulator for ST_Envelope_Agg (#656)
---
 python/sedonadb/tests/functions/test_aggregate.py | 34 ++++++++++++++-
 rust/sedona-functions/src/st_envelope_agg.rs      | 52 ++++++++++++++++-------
 rust/sedona-schema/src/datatypes.rs               |  8 ++++
 3 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/python/sedonadb/tests/functions/test_aggregate.py 
b/python/sedonadb/tests/functions/test_aggregate.py
index be6aca9c..3c8cbd88 100644
--- a/python/sedonadb/tests/functions/test_aggregate.py
+++ b/python/sedonadb/tests/functions/test_aggregate.py
@@ -15,9 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import geopandas
+import geopandas.testing
 import pytest
 import shapely
-from sedonadb.testing import PostGIS, SedonaDB
+from sedonadb.testing import PostGIS, SedonaDB, skip_if_not_exists
 
 
 # Aggregate functions don't have a suffix in PostGIS
@@ -136,6 +138,36 @@ def test_st_envelope_agg_many_groups(eng, con):
     eng.assert_result(result, expected)
 
 
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_envelope_nontrivial_input(eng, geoarrow_data):
+    path = geoarrow_data / "ns-water" / "files" / 
"ns-water_water-point_geo.parquet"
+    eng = eng.create_or_skip()
+    skip_if_not_exists(path)
+
+    df_points_geopandas = geopandas.read_parquet(path)
+    expected = (
+        df_points_geopandas.groupby(df_points_geopandas.FEAT_CODE)["geometry"]
+        .apply(
+            lambda group: shapely.Point(*group.total_bounds[:2])
+            if len(group) == 1
+            else shapely.box(*group.total_bounds)
+        )
+        .reset_index()
+    ).set_crs(df_points_geopandas.crs)
+
+    eng.create_table_parquet("pts", path)
+    result = eng.execute_and_collect(f"""
+        SELECT  "FEAT_CODE", {call_st_envelope_agg(eng, "geometry")} AS 
geometry
+        FROM pts
+        GROUP BY "FEAT_CODE"
+        ORDER BY "FEAT_CODE"
+    """)
+
+    # This CRS is too complicated to check roundtripping through PostGIS
+    df = eng.result_to_pandas(result)
+    geopandas.testing.assert_geodataframe_equal(df, expected, check_crs=False)
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 def test_st_collect_points(eng):
     eng = eng.create_or_skip()
diff --git a/rust/sedona-functions/src/st_envelope_agg.rs 
b/rust/sedona-functions/src/st_envelope_agg.rs
index ad6efacf..753d6445 100644
--- a/rust/sedona-functions/src/st_envelope_agg.rs
+++ b/rust/sedona-functions/src/st_envelope_agg.rs
@@ -202,6 +202,7 @@ impl BoundsGroupsAccumulator2D {
         group_indices: &[usize],
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
+        input_type: SedonaType,
     ) -> Result<()> {
         // Check some of our assumptions about how this will be called
         debug_assert_eq!(self.offset, 0);
@@ -211,7 +212,7 @@ impl BoundsGroupsAccumulator2D {
             debug_assert_eq!(values[0].len(), filter.len());
         }
 
-        let arg_types = [self.input_type.clone()];
+        let arg_types = [input_type.clone()];
         let args = [ColumnarValue::Array(values[0].clone())];
         let executor = WkbExecutor::new(&arg_types, &args);
         self.xs.resize(total_num_groups, Interval::empty());
@@ -300,7 +301,13 @@ impl GroupsAccumulator for BoundsGroupsAccumulator2D {
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        self.execute_update(values, group_indices, opt_filter, 
total_num_groups)
+        self.execute_update(
+            values,
+            group_indices,
+            opt_filter,
+            total_num_groups,
+            self.input_type.clone(),
+        )
     }
 
     fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
@@ -314,8 +321,15 @@ impl GroupsAccumulator for BoundsGroupsAccumulator2D {
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        // In this case, our state is identical to our input values
-        self.execute_update(values, group_indices, opt_filter, 
total_num_groups)
+        // In this case, our state is identical to our input values except our 
geometry
+        // representation is always WKB_GEOMETRY.
+        self.execute_update(
+            values,
+            group_indices,
+            opt_filter,
+            total_num_groups,
+            WKB_GEOMETRY,
+        )
     }
 
     fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
@@ -333,7 +347,9 @@ impl GroupsAccumulator for BoundsGroupsAccumulator2D {
 mod test {
     use datafusion_expr::AggregateUDF;
     use rstest::rstest;
-    use sedona_schema::datatypes::{WKB_GEOMETRY_ITEM_CRS, WKB_VIEW_GEOMETRY};
+    use sedona_schema::datatypes::{
+        WKB_GEOMETRY_ITEM_CRS, WKB_VIEW_GEOMETRY, WKB_VIEW_GEOMETRY_ITEM_CRS,
+    };
     use sedona_testing::{
         compare::{assert_array_equal, assert_scalar_equal, 
assert_scalar_equal_wkb_geometry},
         create::{create_array, create_scalar},
@@ -400,36 +416,42 @@ mod test {
         );
     }
 
-    #[test]
-    fn udf_invoke_item_crs() {
-        let sedona_type = WKB_GEOMETRY_ITEM_CRS.clone();
+    #[rstest]
+    fn udf_invoke_item_crs(
+        #[values(WKB_GEOMETRY_ITEM_CRS.clone(), 
WKB_VIEW_GEOMETRY_ITEM_CRS.clone())]
+        sedona_type: SedonaType,
+    ) {
         let tester =
             AggregateUdfTester::new(st_envelope_agg_udf().into(), 
vec![sedona_type.clone()]);
-        assert_eq!(tester.return_type().unwrap(), sedona_type.clone());
+        assert_eq!(tester.return_type().unwrap(), 
WKB_GEOMETRY_ITEM_CRS.clone());
 
         let batches = vec![
             vec![Some("POINT (0 1)"), None, Some("POINT (2 3)")],
             vec![Some("POINT (4 5)"), None, Some("POINT (6 7)")],
         ];
-        let expected = create_scalar(Some("POLYGON((0 1, 0 7, 6 7, 6 1, 0 
1))"), &sedona_type);
+        let expected = create_scalar(
+            Some("POLYGON((0 1, 0 7, 6 7, 6 1, 0 1))"),
+            &WKB_GEOMETRY_ITEM_CRS,
+        );
 
         assert_scalar_equal(&tester.aggregate_wkt(batches).unwrap(), 
&expected);
     }
 
-    #[test]
-    fn udf_grouped_accumulate() {
-        let tester = AggregateUdfTester::new(st_envelope_agg_udf().into(), 
vec![WKB_GEOMETRY]);
+    #[rstest]
+    fn udf_grouped_accumulate(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] 
sedona_type: SedonaType) {
+        let tester =
+            AggregateUdfTester::new(st_envelope_agg_udf().into(), 
vec![sedona_type.clone()]);
         assert_eq!(tester.return_type().unwrap(), WKB_GEOMETRY);
 
         // Six elements, four groups, with one all null group and one 
partially null group
         let group_indices = vec![0, 3, 1, 1, 0, 2];
         let array0 = create_array(
             &[Some("POINT (0 1)"), None, Some("POINT (2 3)")],
-            &WKB_GEOMETRY,
+            &sedona_type,
         );
         let array1 = create_array(
             &[Some("POINT (4 5)"), None, Some("POINT (6 7)")],
-            &WKB_GEOMETRY,
+            &sedona_type,
         );
         let batches = vec![array0, array1];
 
diff --git a/rust/sedona-schema/src/datatypes.rs 
b/rust/sedona-schema/src/datatypes.rs
index 9707302c..8284c248 100644
--- a/rust/sedona-schema/src/datatypes.rs
+++ b/rust/sedona-schema/src/datatypes.rs
@@ -82,10 +82,18 @@ pub const RASTER: SedonaType = SedonaType::Raster;
 pub static WKB_GEOMETRY_ITEM_CRS: LazyLock<SedonaType> =
     LazyLock::new(|| SedonaType::new_item_crs(&WKB_GEOMETRY).unwrap());
 
+/// Sentinel for [SedonaType::new_item_crs] containing [WKB_VIEW_GEOMETRY]
+pub static WKB_VIEW_GEOMETRY_ITEM_CRS: LazyLock<SedonaType> =
+    LazyLock::new(|| SedonaType::new_item_crs(&WKB_VIEW_GEOMETRY).unwrap());
+
 /// Sentinel for [SedonaType::new_item_crs] containing [WKB_GEOGRAPHY]
 pub static WKB_GEOGRAPHY_ITEM_CRS: LazyLock<SedonaType> =
     LazyLock::new(|| SedonaType::new_item_crs(&WKB_GEOGRAPHY).unwrap());
 
+/// Sentinel for [SedonaType::new_item_crs] containing [WKB_VIEW_GEOGRAPHY]
+pub static WKB_VIEW_GEOGRAPHY_ITEM_CRS: LazyLock<SedonaType> =
+    LazyLock::new(|| SedonaType::new_item_crs(&WKB_VIEW_GEOGRAPHY).unwrap());
+
 /// Create a static value for the [`SedonaType::Raster`] that's initialized 
exactly once,
 /// on first access
 static RASTER_DATATYPE: LazyLock<DataType> =

Reply via email to