This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 1892e1a  feat(rust/sedona-functions): Implement ST_Points and 
ST_NPoints (#257)
1892e1a is described below

commit 1892e1a1ef2a4877b9a29ed7da29dc8c5e41dda2
Author: Hiroaki Yutani <[email protected]>
AuthorDate: Fri Oct 31 00:47:03 2025 +0900

    feat(rust/sedona-functions): Implement ST_Points and ST_NPoints (#257)
    
    Co-authored-by: Peter Nguyen <[email protected]>
    Co-authored-by: Dewey Dunnington <[email protected]>
---
 benchmarks/test_functions.py                      |  18 +
 python/sedonadb/tests/functions/test_functions.py |  61 ++++
 rust/sedona-functions/src/lib.rs                  |   1 +
 rust/sedona-functions/src/register.rs             |   2 +
 rust/sedona-functions/src/st_points.rs            | 411 ++++++++++++++++++++++
 5 files changed, 493 insertions(+)

diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index 52537e4..41839ab 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -240,6 +240,24 @@ class TestBenchFunctions(TestBenchBase):
 
         benchmark(queries)
 
+    @pytest.mark.parametrize(
+        "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+    )
+    @pytest.mark.parametrize(
+        "table",
+        [
+            "collections_simple",
+            "collections_complex",
+        ],
+    )
+    def test_st_points(self, benchmark, eng, table):
+        eng = self._get_eng(eng)
+
+        def queries():
+            eng.execute_and_collect(f"SELECT ST_Points(geom1) from {table}")
+
+        benchmark(queries)
+
     @pytest.mark.parametrize(
         "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
     )
diff --git a/python/sedonadb/tests/functions/test_functions.py 
b/python/sedonadb/tests/functions/test_functions.py
index c9a31b7..2ef992d 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1157,6 +1157,67 @@ def test_st_pointm(eng, x, y, m, expected):
     )
 
 
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geometry", "expected", "expected_n"),
+    [
+        ("POINT (1 2)", "MULTIPOINT (1 2)", 1),
+        ("LINESTRING (1 2, 3 4, 5 6)", "MULTIPOINT (1 2, 3 4, 5 6)", 3),
+        (
+            "POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))",
+            "MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)",
+            5,
+        ),
+        (
+            "POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 3 1, 1 3, 1 1))",
+            "MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 1 1, 3 1, 1 3, 1 1)",
+            9,
+        ),
+        ("MULTIPOINT (1 2, 3 4, 5 6, 7 8)", "MULTIPOINT (1 2, 3 4, 5 6, 7 8)", 
4),
+        (
+            "MULTILINESTRING ((1 2, 3 4), EMPTY, (5 6, 7 8))",
+            "MULTIPOINT (1 2, 3 4, 5 6, 7 8)",
+            4,
+        ),
+        (
+            "MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)), EMPTY, ((0 0, 5 0, 
0 5, 0 0), (1 1, 3 1, 1 3, 1 1)))",
+            "MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 0 0, 5 0, 0 5, 0 0, 1 1, 
3 1, 1 3, 1 1)",
+            13,
+        ),
+        (
+            "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING EMPTY, LINESTRING (3 
4, 5 6))",
+            "MULTIPOINT (1 2, 3 4, 5 6)",
+            3,
+        ),
+        ("LINESTRING Z (1 2 3, 4 5 6, 7 8 9)", "MULTIPOINT Z (1 2 3, 4 5 6, 7 
8 9)", 3),
+        ("LINESTRING M (1 2 3, 4 5 6, 7 8 9)", "MULTIPOINT M (1 2 3, 4 5 6, 7 
8 9)", 3),
+        (
+            "LINESTRING ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)",
+            "MULTIPOINT ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)",
+            3,
+        ),
+        ("POINT EMPTY", "MULTIPOINT EMPTY", 0),
+        ("LINESTRING EMPTY", "MULTIPOINT EMPTY", 0),
+        ("POLYGON EMPTY", "MULTIPOINT EMPTY", 0),
+        ("MULTIPOINT EMPTY", "MULTIPOINT EMPTY", 0),
+        ("MULTILINESTRING EMPTY", "MULTIPOINT EMPTY", 0),
+        ("MULTIPOLYGON EMPTY", "MULTIPOINT EMPTY", 0),
+        ("GEOMETRYCOLLECTION EMPTY", "MULTIPOINT EMPTY", 0),
+        (None, None, None),
+    ],
+)
+def test_st_points(eng, geometry, expected, expected_n):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_Points({geom_or_null(geometry)})",
+        expected,
+    )
+    eng.assert_query_result(
+        f"SELECT ST_NPoints({geom_or_null(geometry)})",
+        expected_n,
+    )
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geometry", "n", "expected"),
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 8c53a99..28ad6f3 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -47,6 +47,7 @@ mod st_makeline;
 mod st_perimeter;
 mod st_point;
 mod st_pointn;
+mod st_points;
 mod st_pointzm;
 mod st_setsrid;
 mod st_srid;
diff --git a/rust/sedona-functions/src/register.rs 
b/rust/sedona-functions/src/register.rs
index e90f941..5b32efa 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -85,6 +85,8 @@ pub fn default_function_set() -> FunctionSet {
         crate::st_point::st_geogpoint_udf,
         crate::st_point::st_point_udf,
         crate::st_pointn::st_pointn_udf,
+        crate::st_points::st_points_udf,
+        crate::st_points::st_npoints_udf,
         crate::st_pointzm::st_pointz_udf,
         crate::st_pointzm::st_pointm_udf,
         crate::st_pointzm::st_pointzm_udf,
diff --git a/rust/sedona-functions/src/st_points.rs 
b/rust/sedona-functions/src/st_points.rs
new file mode 100644
index 0000000..4f9b463
--- /dev/null
+++ b/rust/sedona-functions/src/st_points.rs
@@ -0,0 +1,411 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_array::builder::{BinaryBuilder, UInt64Builder};
+use arrow_schema::DataType;
+use datafusion_common::error::Result;
+use datafusion_expr::{
+    scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, 
Volatility,
+};
+use geo_traits::{
+    CoordTrait, GeometryCollectionTrait, GeometryTrait, LineStringTrait, 
MultiLineStringTrait,
+    MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait,
+};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::{
+    error::SedonaGeometryError,
+    wkb_factory::{
+        write_wkb_coord_trait, write_wkb_multipoint_header, 
write_wkb_point_header,
+        WKB_MIN_PROBABLE_BYTES,
+    },
+};
+use sedona_schema::{
+    datatypes::{SedonaType, WKB_GEOMETRY},
+    matchers::ArgMatcher,
+};
+use std::{io::Write, sync::Arc};
+
+use crate::executor::WkbExecutor;
+
+/// ST_Points() scalar UDF
+///
+/// Native implementation to get all the points of a geometry as MULTIPOINT
+pub fn st_points_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "st_points",
+        vec![Arc::new(STPoints)],
+        Volatility::Immutable,
+        Some(st_points_doc()),
+    )
+}
+
+fn st_points_doc() -> Documentation {
+    Documentation::builder(
+        DOC_SECTION_OTHER,
+        "Returns all the points of a geometry as MULTIPOINT.",
+        "ST_Points (geom: Geometry)",
+    )
+    .with_argument("geom", "geometry: Input geometry")
+    .with_sql_example("SELECT ST_Points(ST_GeomFromWKT('LINESTRING(0 1, 2 3, 4 
5)'))")
+    .build()
+}
+
+#[derive(Debug)]
+struct STPoints;
+
+impl SedonaScalarKernel for STPoints {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], 
WKB_GEOMETRY);
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = WkbExecutor::new(arg_types, args);
+        let mut builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+
+        executor.execute_wkb_void(|maybe_wkb| {
+            if let Some(wkb) = maybe_wkb {
+                // We need to know the number of points before actually 
writing the points.
+                let n_points = count_wkb_points_recursively(&wkb);
+
+                if write_wkb_multipoint_header(&mut builder, wkb.dim(), 
n_points).is_err() {
+                    return sedona_internal_err!("Failed to write WKB point 
header");
+                };
+
+                if write_wkb_points_recursively(&mut builder, &wkb).is_err() {
+                    return sedona_internal_err!("Failed to write WKB point 
header");
+                };
+
+                builder.append_value([]);
+            } else {
+                builder.append_null();
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+/// ST_NPoints() scalar UDF
+///
+/// Native implementation to count all the points of a geometry
+pub fn st_npoints_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "st_npoints",
+        vec![Arc::new(STNPoints)],
+        Volatility::Immutable,
+        Some(st_npoints_doc()),
+    )
+}
+
+fn st_npoints_doc() -> Documentation {
+    Documentation::builder(
+        DOC_SECTION_OTHER,
+        "Returns the count of the points of a geometry.",
+        "ST_Points (geom: Geometry)",
+    )
+    .with_argument("geom", "geometry: Input geometry")
+    .with_sql_example("SELECT ST_NPoints(ST_GeomFromWKT('LINESTRING(0 1, 2 3, 
4 5)'))")
+    .build()
+}
+
+#[derive(Debug)]
+struct STNPoints;
+
+impl SedonaScalarKernel for STNPoints {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry()],
+            SedonaType::Arrow(DataType::UInt64),
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = WkbExecutor::new(arg_types, args);
+        let mut builder = 
UInt64Builder::with_capacity(executor.num_iterations());
+
+        executor.execute_wkb_void(|maybe_wkb| {
+            if let Some(wkb) = maybe_wkb {
+                builder.append_value(count_wkb_points_recursively(&wkb) as 
u64);
+            } else {
+                builder.append_null();
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn count_wkb_points_recursively<'a>(wkb: &'a wkb::reader::Wkb<'a>) -> usize {
+    match wkb.as_type() {
+        geo_traits::GeometryType::Point(point) => {
+            if point.is_empty() {
+                0
+            } else {
+                1
+            }
+        }
+        geo_traits::GeometryType::LineString(line_string) => 
line_string.num_coords(),
+        geo_traits::GeometryType::Polygon(polygon) => {
+            let mut n = match polygon.exterior() {
+                Some(ring) => ring.num_coords(),
+                None => 0,
+            };
+            n += polygon.interiors().map(|r| r.num_coords()).sum::<usize>();
+
+            n
+        }
+        geo_traits::GeometryType::MultiPoint(multi_point) => {
+            multi_point.points().filter(|p| !p.is_empty()).count()
+        }
+        geo_traits::GeometryType::MultiLineString(multi_line_string) => 
multi_line_string
+            .line_strings()
+            .map(|l| l.num_coords())
+            .sum(),
+        geo_traits::GeometryType::MultiPolygon(multi_polygon) => {
+            let mut n = 0;
+            for polygon in multi_polygon.polygons() {
+                n += match polygon.exterior() {
+                    Some(ring) => ring.num_coords(),
+                    None => 0,
+                };
+                n += polygon.interiors().map(|r| 
r.num_coords()).sum::<usize>();
+            }
+            n
+        }
+        geo_traits::GeometryType::GeometryCollection(geometry_collection) => {
+            let mut n = 0;
+            for geometry in geometry_collection.geometries() {
+                n += count_wkb_points_recursively(geometry);
+            }
+            n
+        }
+        _ => 0,
+    }
+}
+
+fn write_wkb_point_from_coord(
+    buf: &mut impl Write,
+    coord: impl CoordTrait<T = f64>,
+) -> Result<(), SedonaGeometryError> {
+    write_wkb_point_header(buf, coord.dim())?;
+    write_wkb_coord_trait(buf, &coord)
+}
+
+fn write_wkb_points_from_coords(
+    buf: &mut impl Write,
+    coords: impl Iterator<Item = impl CoordTrait<T = f64>>,
+) -> Result<(), SedonaGeometryError> {
+    for coord in coords {
+        write_wkb_point_from_coord(buf, coord)?;
+    }
+    Ok(())
+}
+
+fn write_wkb_points_recursively<'a>(
+    buf: &mut impl Write,
+    wkb: &'a wkb::reader::Wkb<'a>,
+) -> Result<(), SedonaGeometryError> {
+    match wkb.as_type() {
+        geo_traits::GeometryType::Point(point) => {
+            if let Some(coord) = point.coord() {
+                write_wkb_point_from_coord(buf, coord)?
+            }
+        }
+        geo_traits::GeometryType::LineString(line_string) => {
+            write_wkb_points_from_coords(buf, line_string.coords())?;
+        }
+        geo_traits::GeometryType::Polygon(polygon) => {
+            if let Some(ring) = polygon.exterior() {
+                write_wkb_points_from_coords(buf, ring.coords())?
+            }
+            for ring in polygon.interiors() {
+                write_wkb_points_from_coords(buf, ring.coords())?;
+            }
+        }
+        geo_traits::GeometryType::MultiPoint(multi_point) => {
+            for point in multi_point.points() {
+                if let Some(coord) = point.coord() {
+                    write_wkb_point_from_coord(buf, coord)?
+                }
+            }
+        }
+        geo_traits::GeometryType::MultiLineString(multi_line_string) => {
+            for line_string in multi_line_string.line_strings() {
+                write_wkb_points_from_coords(buf, line_string.coords())?;
+            }
+        }
+        geo_traits::GeometryType::MultiPolygon(multi_polygon) => {
+            for polygon in multi_polygon.polygons() {
+                if let Some(ring) = polygon.exterior() {
+                    write_wkb_points_from_coords(buf, ring.coords())?
+                }
+                for ring in polygon.interiors() {
+                    write_wkb_points_from_coords(buf, ring.coords())?;
+                }
+            }
+        }
+        geo_traits::GeometryType::GeometryCollection(geometry_collection) => {
+            for geometry in geometry_collection.geometries() {
+                write_wkb_points_recursively(buf, geometry)?;
+            }
+        }
+        _ => {}
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datafusion_expr::ScalarUDF;
+    use rstest::rstest;
+    use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+    use sedona_testing::{
+        compare::assert_array_equal, create::create_array, 
testers::ScalarUdfTester,
+    };
+
+    use super::*;
+
+    #[test]
+    fn udf_metadata() {
+        let st_points_udf: ScalarUDF = st_points_udf().into();
+        assert_eq!(st_points_udf.name(), "st_points");
+        assert!(st_points_udf.documentation().is_some());
+
+        let st_npoints_udf: ScalarUDF = st_npoints_udf().into();
+        assert_eq!(st_npoints_udf.name(), "st_npoints");
+        assert!(st_npoints_udf.documentation().is_some());
+    }
+
+    #[rstest]
+    fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) 
{
+        use arrow_array::UInt64Array;
+
+        let tester_points = ScalarUdfTester::new(st_points_udf().into(), 
vec![sedona_type.clone()]);
+        let tester_npoints =
+            ScalarUdfTester::new(st_npoints_udf().into(), 
vec![sedona_type.clone()]);
+
+        let input = create_array(
+            &[
+                // 2d
+                Some("POINT (1 2)"),
+                Some("LINESTRING (1 2, 3 4, 5 6)"),
+                Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"),
+                Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 3 1, 1 3, 
1 1))"),
+                Some("MULTIPOINT (1 2, 3 4, 5 6, 7 8)"),
+                Some("MULTILINESTRING ((1 2, 3 4), EMPTY, (5 6, 7 8))"),
+                Some("MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)), EMPTY, 
((0 0, 5 0, 0 5, 0 0), (1 1, 3 1, 1 3, 1 1)))"),
+                Some("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING EMPTY, 
LINESTRING (3 4, 5 6))"),
+                // 3d and 4d
+                Some("LINESTRING Z (1 2 3, 4 5 6, 7 8 9)"),
+                Some("LINESTRING M (1 2 3, 4 5 6, 7 8 9)"),
+                Some("LINESTRING ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)"),
+                // empty
+                Some("POINT EMPTY"),
+                Some("LINESTRING EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("MULTIPOINT EMPTY"),
+                Some("MULTILINESTRING EMPTY"),
+                Some("MULTIPOLYGON EMPTY"),
+                Some("GEOMETRYCOLLECTION EMPTY"),
+                // null
+                None,
+            ],
+            &sedona_type,
+        );
+
+        let expected_points = create_array(
+            &[
+                Some("MULTIPOINT (1 2)"),
+                Some("MULTIPOINT (1 2, 3 4, 5 6)"),
+                Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)"),
+                Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 1 1, 3 1, 1 3, 
1 1)"),
+                Some("MULTIPOINT (1 2, 3 4, 5 6, 7 8)"),
+                Some("MULTIPOINT (1 2, 3 4, 5 6, 7 8)"),
+                Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 0 0, 5 0, 0 5, 
0 0, 1 1, 3 1, 1 3, 1 1)"),
+                Some("MULTIPOINT (1 2, 3 4, 5 6)"),
+                // 3d and 4d
+                Some("MULTIPOINT Z (1 2 3, 4 5 6, 7 8 9)"),
+                Some("MULTIPOINT M (1 2 3, 4 5 6, 7 8 9)"),
+                Some("MULTIPOINT ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)"),
+                // empty returns empty
+                Some("MULTIPOINT EMPTY"),
+                Some("MULTIPOINT EMPTY"),
+                Some("MULTIPOINT EMPTY"),
+                Some("MULTIPOINT EMPTY"),
+                Some("MULTIPOINT EMPTY"),
+                Some("MULTIPOINT EMPTY"),
+                Some("MULTIPOINT EMPTY"),
+                // null
+                None,
+            ],
+            &WKB_GEOMETRY,
+        );
+
+        let result_points = tester_points.invoke_array(input.clone()).unwrap();
+        assert_array_equal(&result_points, &expected_points);
+
+        let expected_npoints: Arc<dyn arrow_array::Array> = 
Arc::new(UInt64Array::from(vec![
+            Some(1),
+            Some(3),
+            Some(5),
+            Some(9),
+            Some(4),
+            Some(4),
+            Some(13),
+            Some(3),
+            // 3d and 4d
+            Some(3),
+            Some(3),
+            Some(3),
+            // empty returns 0
+            Some(0),
+            Some(0),
+            Some(0),
+            Some(0),
+            Some(0),
+            Some(0),
+            Some(0),
+            // null
+            None,
+        ]));
+
+        let result_points = 
tester_npoints.invoke_array(input.clone()).unwrap();
+        assert_array_equal(&result_points, &expected_npoints);
+    }
+}

Reply via email to