This is an automated email from the ASF dual-hosted git repository.

petern pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 0f51162e perf: Implement (2x) faster ST_Buffer kernel using `geo` 
(#233)
0f51162e is described below

commit 0f51162e20cc925da430359730d46f222938b151
Author: Peter Nguyen <[email protected]>
AuthorDate: Tue Dec 16 07:28:30 2025 -0800

    perf: Implement (2x) faster ST_Buffer kernel using `geo` (#233)
---
 c/sedona-geos/Cargo.toml                          |   2 +-
 c/sedona-geos/benches/geos-functions.rs           |   2 +-
 c/sedona-geos/src/st_buffer.rs                    |  37 ++++
 python/sedonadb/tests/functions/test_functions.py |  25 ++-
 rust/sedona-geo/benches/geo-functions.rs          |  15 ++
 rust/sedona-geo/src/lib.rs                        |   1 +
 rust/sedona-geo/src/register.rs                   |  11 +-
 rust/sedona-geo/src/st_buffer.rs                  | 223 ++++++++++++++++++++++
 rust/sedona-geo/src/st_centroid.rs                |   6 +-
 9 files changed, 312 insertions(+), 10 deletions(-)

diff --git a/c/sedona-geos/Cargo.toml b/c/sedona-geos/Cargo.toml
index c6c64ef0..ce85b023 100644
--- a/c/sedona-geos/Cargo.toml
+++ b/c/sedona-geos/Cargo.toml
@@ -31,7 +31,7 @@ rust-version.workspace = true
 [dev-dependencies]
 criterion = { workspace = true }
 sedona = { workspace = true }
-sedona-testing = { workspace = true }
+sedona-testing = { workspace = true, features = ["criterion"] }
 rstest = { workspace = true }
 geo-types = { workspace = true }
 
diff --git a/c/sedona-geos/benches/geos-functions.rs 
b/c/sedona-geos/benches/geos-functions.rs
index f2b3c626..725f4de4 100644
--- a/c/sedona-geos/benches/geos-functions.rs
+++ b/c/sedona-geos/benches/geos-functions.rs
@@ -43,7 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         &f,
         "geos",
         "st_buffer",
-        ArrayScalar(Polygon(500), Float64(1.0, 10.0)),
+        ArrayScalar(Polygon(50), Float64(1.0, 10.0)), // Reduced from 500 so 
that it can finish
     );
 
     benchmark::scalar(c, &f, "geos", "st_centroid", Polygon(10));
diff --git a/c/sedona-geos/src/st_buffer.rs b/c/sedona-geos/src/st_buffer.rs
index 2269faf8..e152a405 100644
--- a/c/sedona-geos/src/st_buffer.rs
+++ b/c/sedona-geos/src/st_buffer.rs
@@ -342,6 +342,43 @@ mod tests {
         assert_array_equal(&envelope_result, &expected_envelope);
     }
 
+    #[test]
+    fn test_empty_geometry() {
+        let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl());
+        let tester = ScalarUdfTester::new(
+            udf.into(),
+            vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)],
+        );
+
+        let input_wkt = vec![
+            Some("POINT EMPTY"),
+            Some("LINESTRING EMPTY"),
+            Some("POLYGON EMPTY"),
+            Some("MULTIPOINT EMPTY"),
+            Some("MULTILINESTRING EMPTY"),
+            Some("MULTIPOLYGON EMPTY"),
+            Some("GEOMETRYCOLLECTION EMPTY"),
+        ];
+        let input_dist = 2;
+
+        let buffer_result = tester
+            .invoke_wkb_array_scalar(input_wkt, input_dist)
+            .unwrap();
+        let expected: ArrayRef = create_array(
+            &[
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+            ],
+            &WKB_GEOMETRY,
+        );
+        assert_array_equal(&buffer_result, &expected);
+    }
+
     #[rstest]
     fn udf_with_buffer_params(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] 
sedona_type: SedonaType) {
         let udf = SedonaScalarUDF::from_kernel("st_buffer", 
st_buffer_style_impl());
diff --git a/python/sedonadb/tests/functions/test_functions.py 
b/python/sedonadb/tests/functions/test_functions.py
index 609b255f..40daaff6 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -262,7 +262,30 @@ def test_st_buffer(eng, geom, dist, expected_area):
     eng.assert_query_result(
         f"SELECT ST_Area(ST_Buffer({geom_or_null(geom)}, 
{val_or_null(dist)}))",
         expected_area,
-        numeric_epsilon=1e-9,
+        # geos passes with 1e-9, but geo needs it as high as 1e-3
+        numeric_epsilon=1e-3,
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "expected"),
+    [
+        ("POINT EMPTY", "POLYGON EMPTY"),
+        ("LINESTRING EMPTY", "POLYGON EMPTY"),
+        ("POLYGON EMPTY", "POLYGON EMPTY"),
+        ("MULTIPOINT EMPTY", "POLYGON EMPTY"),
+        ("MULTILINESTRING EMPTY", "POLYGON EMPTY"),
+        ("MULTIPOLYGON EMPTY", "POLYGON EMPTY"),
+        ("GEOMETRYCOLLECTION EMPTY", "POLYGON EMPTY"),
+    ],
+)
+def test_st_buffer_empty(eng, geom, expected):
+    eng = SedonaDB.create_or_skip()
+
+    eng.assert_query_result(
+        f"SELECT ST_Buffer({geom_or_null(geom)}, {2.0})",
+        expected,
     )
 
 
diff --git a/rust/sedona-geo/benches/geo-functions.rs 
b/rust/sedona-geo/benches/geo-functions.rs
index 16259edf..522ebdda 100644
--- a/rust/sedona-geo/benches/geo-functions.rs
+++ b/rust/sedona-geo/benches/geo-functions.rs
@@ -27,6 +27,21 @@ fn criterion_benchmark(c: &mut Criterion) {
     benchmark::scalar(c, &f, "geo", "st_area", Polygon(10));
     benchmark::scalar(c, &f, "geo", "st_area", Polygon(500));
 
+    benchmark::scalar(
+        c,
+        &f,
+        "geo",
+        "st_buffer",
+        ArrayScalar(Polygon(10), Float64(1.0, 10.0)),
+    );
+    benchmark::scalar(
+        c,
+        &f,
+        "geo",
+        "st_buffer",
+        ArrayScalar(Polygon(50), Float64(1.0, 10.0)), // Reduced from 500 so 
that it can finish
+    );
+
     benchmark::scalar(c, &f, "geo", "st_perimeter", Polygon(10));
     benchmark::scalar(c, &f, "geo", "st_perimeter", Polygon(500));
 
diff --git a/rust/sedona-geo/src/lib.rs b/rust/sedona-geo/src/lib.rs
index f06a98b1..48a9430b 100644
--- a/rust/sedona-geo/src/lib.rs
+++ b/rust/sedona-geo/src/lib.rs
@@ -17,6 +17,7 @@
 pub mod centroid;
 pub mod register;
 mod st_area;
+mod st_buffer;
 mod st_centroid;
 mod st_distance;
 mod st_dwithin;
diff --git a/rust/sedona-geo/src/register.rs b/rust/sedona-geo/src/register.rs
index bfa45da1..10c371cc 100644
--- a/rust/sedona-geo/src/register.rs
+++ b/rust/sedona-geo/src/register.rs
@@ -18,17 +18,18 @@ use sedona_expr::aggregate_udf::SedonaAccumulatorRef;
 use sedona_expr::scalar_udf::ScalarKernelRef;
 
 use crate::{
-    st_area::st_area_impl, st_centroid::st_centroid_impl, 
st_distance::st_distance_impl,
-    st_dwithin::st_dwithin_impl, st_intersection_agg::st_intersection_agg_impl,
-    st_intersects::st_intersects_impl, st_length::st_length_impl,
-    st_line_interpolate_point::st_line_interpolate_point_impl, 
st_perimeter::st_perimeter_impl,
-    st_union_agg::st_union_agg_impl,
+    st_area::st_area_impl, st_buffer::st_buffer_impl, 
st_centroid::st_centroid_impl,
+    st_distance::st_distance_impl, st_dwithin::st_dwithin_impl,
+    st_intersection_agg::st_intersection_agg_impl, 
st_intersects::st_intersects_impl,
+    st_length::st_length_impl, 
st_line_interpolate_point::st_line_interpolate_point_impl,
+    st_perimeter::st_perimeter_impl, st_union_agg::st_union_agg_impl,
 };
 
 pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
     vec![
         ("st_intersects", st_intersects_impl()),
         ("st_area", st_area_impl()),
+        ("st_buffer", st_buffer_impl()),
         ("st_centroid", st_centroid_impl()),
         ("st_distance", st_distance_impl()),
         ("st_dwithin", st_dwithin_impl()),
diff --git a/rust/sedona-geo/src/st_buffer.rs b/rust/sedona-geo/src/st_buffer.rs
new file mode 100644
index 00000000..7eebec65
--- /dev/null
+++ b/rust/sedona-geo/src/st_buffer.rs
@@ -0,0 +1,223 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::BinaryBuilder;
+use arrow_schema::DataType;
+use datafusion_common::{error::Result, exec_err, DataFusionError};
+use datafusion_expr::ColumnarValue;
+use geo::algorithm::buffer::{Buffer, BufferStyle};
+use geo_types::Polygon;
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_functions::executor::WkbExecutor;
+use sedona_geometry::is_empty::is_geometry_empty;
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::{
+    datatypes::{SedonaType, WKB_GEOMETRY},
+    matchers::ArgMatcher,
+};
+use wkb::{
+    reader::Wkb,
+    writer::{write_geometry, WriteOptions},
+    Endianness,
+};
+
+use crate::to_geo::item_to_geometry;
+
+/// ST_Buffer() implementation using buffer calculation
+pub fn st_buffer_impl() -> ScalarKernelRef {
+    Arc::new(STBuffer {})
+}
+
+#[derive(Debug)]
+struct STBuffer {}
+
+impl SedonaScalarKernel for STBuffer {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()],
+            WKB_GEOMETRY,
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        // Extract the constant scalar value before looping over the input 
geometries
+        let params: Option<BufferStyle<f64>>;
+        let arg1 = args[1].cast_to(&DataType::Float64, None)?;
+        if let ColumnarValue::Scalar(scalar_arg) = &arg1 {
+            if scalar_arg.is_null() {
+                params = None;
+            } else {
+                let distance = f64::try_from(scalar_arg.clone())?;
+                params = Some(BufferStyle::new(distance));
+            }
+        } else {
+            return exec_err!("Invalid distance: {:?}", args[1]);
+        }
+
+        let executor = WkbExecutor::new(arg_types, args);
+        let mut builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+        executor.execute_wkb_void(|maybe_wkb| {
+            match (maybe_wkb, params.clone()) {
+                (Some(wkb), Some(params)) => {
+                    invoke_scalar(&wkb, params, &mut builder)?;
+                    builder.append_value([]);
+                }
+                _ => builder.append_null(),
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn invoke_scalar(
+    wkb: &Wkb,
+    params: BufferStyle<f64>,
+    writer: &mut impl std::io::Write,
+) -> Result<()> {
+    // PostGIS returns POLYGON EMPTY for all empty geometries
+    let is_empty = is_geometry_empty(wkb).map_err(|e| 
DataFusionError::External(Box::new(e)))?;
+    if is_empty {
+        let empty_polygon = Polygon::<f64>::empty();
+        write_geometry(
+            writer,
+            &empty_polygon,
+            &WriteOptions {
+                endianness: Endianness::LittleEndian,
+            },
+        )
+        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+        return Ok(());
+    }
+
+    let geom = item_to_geometry(wkb)?;
+
+    let buffer = geom.buffer_with_style(params);
+
+    // Convert type to geo::Geometry
+    let geometry = geo::Geometry::MultiPolygon(buffer);
+
+    write_geometry(
+        writer,
+        &geometry,
+        &WriteOptions {
+            endianness: Endianness::LittleEndian,
+        },
+    )
+    .map_err(|e| DataFusionError::External(Box::new(e)))?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow_array::ArrayRef;
+    use datafusion_common::ScalarValue;
+    use rstest::rstest;
+    use sedona_expr::scalar_udf::SedonaScalarUDF;
+    use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+    use sedona_testing::compare::assert_array_equal;
+    use sedona_testing::create::create_array;
+    use sedona_testing::testers::ScalarUdfTester;
+
+    use super::*;
+
+    #[rstest]
+    fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) 
{
+        let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl());
+        let tester = ScalarUdfTester::new(
+            udf.into(),
+            vec![sedona_type.clone(), SedonaType::Arrow(DataType::Float64)],
+        );
+        tester.assert_return_type(WKB_GEOMETRY);
+
+        // Check the envelope of the buffers
+        let envelope_udf = sedona_functions::st_envelope::st_envelope_udf();
+        let envelope_tester = ScalarUdfTester::new(envelope_udf.into(), 
vec![WKB_GEOMETRY]);
+
+        let buffer_result = tester.invoke_scalar_scalar("POINT (1 2)", 
2.0).unwrap();
+        let envelope_result = 
envelope_tester.invoke_scalar(buffer_result).unwrap();
+        let expected_envelope = "POLYGON((-1 0, -1 4, 3 4, 3 0, -1 0))";
+        tester.assert_scalar_result_equals(envelope_result, expected_envelope);
+
+        let result = tester
+            .invoke_scalar_scalar(ScalarValue::Null, ScalarValue::Null)
+            .unwrap();
+        assert!(result.is_null());
+
+        let input_wkt = vec![None, Some("POINT (0 0)")];
+        let input_dist = 1;
+        let expected_envelope: ArrayRef = create_array(
+            &[None, Some("POLYGON((-1 -1, -1 1, 1 1, 1 -1, -1 -1))")],
+            &WKB_GEOMETRY,
+        );
+        let buffer_result = tester
+            .invoke_wkb_array_scalar(input_wkt, input_dist)
+            .unwrap();
+        let envelope_result = 
envelope_tester.invoke_array(buffer_result).unwrap();
+        assert_array_equal(&envelope_result, &expected_envelope);
+    }
+
+    #[test]
+    fn test_empty_geometry() {
+        let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl());
+        let tester = ScalarUdfTester::new(
+            udf.into(),
+            vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)],
+        );
+
+        let input_wkt = vec![
+            Some("POINT EMPTY"),
+            Some("LINESTRING EMPTY"),
+            Some("POLYGON EMPTY"),
+            Some("MULTIPOINT EMPTY"),
+            Some("MULTILINESTRING EMPTY"),
+            Some("MULTIPOLYGON EMPTY"),
+            Some("GEOMETRYCOLLECTION EMPTY"),
+        ];
+        let input_dist = 2;
+
+        let buffer_result = tester
+            .invoke_wkb_array_scalar(input_wkt, input_dist)
+            .unwrap();
+        let expected: ArrayRef = create_array(
+            &[
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+            ],
+            &WKB_GEOMETRY,
+        );
+        assert_array_equal(&buffer_result, &expected);
+    }
+}
diff --git a/rust/sedona-geo/src/st_centroid.rs 
b/rust/sedona-geo/src/st_centroid.rs
index bc6b8188..9b027695 100644
--- a/rust/sedona-geo/src/st_centroid.rs
+++ b/rust/sedona-geo/src/st_centroid.rs
@@ -54,8 +54,10 @@ impl SedonaScalarKernel for STCentroid {
         args: &[ColumnarValue],
     ) -> Result<ColumnarValue> {
         let executor = WkbExecutor::new(arg_types, args);
-        let mut builder =
-            BinaryBuilder::with_capacity(executor.num_iterations(), 
WKB_MIN_PROBABLE_BYTES);
+        let mut builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
         executor.execute_wkb_void(|maybe_wkb| {
             match maybe_wkb {
                 Some(wkb) => {

Reply via email to