This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 1892e1a feat(rust/sedona-functions): Implement ST_Points and
ST_NPoints (#257)
1892e1a is described below
commit 1892e1a1ef2a4877b9a29ed7da29dc8c5e41dda2
Author: Hiroaki Yutani <[email protected]>
AuthorDate: Fri Oct 31 00:47:03 2025 +0900
feat(rust/sedona-functions): Implement ST_Points and ST_NPoints (#257)
Co-authored-by: Peter Nguyen <[email protected]>
Co-authored-by: Dewey Dunnington <[email protected]>
---
benchmarks/test_functions.py | 18 +
python/sedonadb/tests/functions/test_functions.py | 61 ++++
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 2 +
rust/sedona-functions/src/st_points.rs | 411 ++++++++++++++++++++++
5 files changed, 493 insertions(+)
diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index 52537e4..41839ab 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -240,6 +240,24 @@ class TestBenchFunctions(TestBenchBase):
benchmark(queries)
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_points(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Points(geom1) from {table}")
+
+ benchmark(queries)
+
@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index c9a31b7..2ef992d 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1157,6 +1157,67 @@ def test_st_pointm(eng, x, y, m, expected):
)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geometry", "expected", "expected_n"),
+ [
+ ("POINT (1 2)", "MULTIPOINT (1 2)", 1),
+ ("LINESTRING (1 2, 3 4, 5 6)", "MULTIPOINT (1 2, 3 4, 5 6)", 3),
+ (
+ "POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))",
+ "MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)",
+ 5,
+ ),
+ (
+ "POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 3 1, 1 3, 1 1))",
+ "MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 1 1, 3 1, 1 3, 1 1)",
+ 9,
+ ),
+ ("MULTIPOINT (1 2, 3 4, 5 6, 7 8)", "MULTIPOINT (1 2, 3 4, 5 6, 7 8)",
4),
+ (
+ "MULTILINESTRING ((1 2, 3 4), EMPTY, (5 6, 7 8))",
+ "MULTIPOINT (1 2, 3 4, 5 6, 7 8)",
+ 4,
+ ),
+ (
+ "MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)), EMPTY, ((0 0, 5 0,
0 5, 0 0), (1 1, 3 1, 1 3, 1 1)))",
+ "MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 0 0, 5 0, 0 5, 0 0, 1 1,
3 1, 1 3, 1 1)",
+ 13,
+ ),
+ (
+ "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING EMPTY, LINESTRING (3
4, 5 6))",
+ "MULTIPOINT (1 2, 3 4, 5 6)",
+ 3,
+ ),
+ ("LINESTRING Z (1 2 3, 4 5 6, 7 8 9)", "MULTIPOINT Z (1 2 3, 4 5 6, 7
8 9)", 3),
+ ("LINESTRING M (1 2 3, 4 5 6, 7 8 9)", "MULTIPOINT M (1 2 3, 4 5 6, 7
8 9)", 3),
+ (
+ "LINESTRING ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)",
+ "MULTIPOINT ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)",
+ 3,
+ ),
+ ("POINT EMPTY", "MULTIPOINT EMPTY", 0),
+ ("LINESTRING EMPTY", "MULTIPOINT EMPTY", 0),
+ ("POLYGON EMPTY", "MULTIPOINT EMPTY", 0),
+ ("MULTIPOINT EMPTY", "MULTIPOINT EMPTY", 0),
+ ("MULTILINESTRING EMPTY", "MULTIPOINT EMPTY", 0),
+ ("MULTIPOLYGON EMPTY", "MULTIPOINT EMPTY", 0),
+ ("GEOMETRYCOLLECTION EMPTY", "MULTIPOINT EMPTY", 0),
+ (None, None, None),
+ ],
+)
+def test_st_points(eng, geometry, expected, expected_n):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(
+ f"SELECT ST_Points({geom_or_null(geometry)})",
+ expected,
+ )
+ eng.assert_query_result(
+ f"SELECT ST_NPoints({geom_or_null(geometry)})",
+ expected_n,
+ )
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geometry", "n", "expected"),
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 8c53a99..28ad6f3 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -47,6 +47,7 @@ mod st_makeline;
mod st_perimeter;
mod st_point;
mod st_pointn;
+mod st_points;
mod st_pointzm;
mod st_setsrid;
mod st_srid;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index e90f941..5b32efa 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -85,6 +85,8 @@ pub fn default_function_set() -> FunctionSet {
crate::st_point::st_geogpoint_udf,
crate::st_point::st_point_udf,
crate::st_pointn::st_pointn_udf,
+ crate::st_points::st_points_udf,
+ crate::st_points::st_npoints_udf,
crate::st_pointzm::st_pointz_udf,
crate::st_pointzm::st_pointm_udf,
crate::st_pointzm::st_pointzm_udf,
diff --git a/rust/sedona-functions/src/st_points.rs
b/rust/sedona-functions/src/st_points.rs
new file mode 100644
index 0000000..4f9b463
--- /dev/null
+++ b/rust/sedona-functions/src/st_points.rs
@@ -0,0 +1,411 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_array::builder::{BinaryBuilder, UInt64Builder};
+use arrow_schema::DataType;
+use datafusion_common::error::Result;
+use datafusion_expr::{
+ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
+};
+use geo_traits::{
+ CoordTrait, GeometryCollectionTrait, GeometryTrait, LineStringTrait,
MultiLineStringTrait,
+ MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait,
+};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::{
+ error::SedonaGeometryError,
+ wkb_factory::{
+ write_wkb_coord_trait, write_wkb_multipoint_header,
write_wkb_point_header,
+ WKB_MIN_PROBABLE_BYTES,
+ },
+};
+use sedona_schema::{
+ datatypes::{SedonaType, WKB_GEOMETRY},
+ matchers::ArgMatcher,
+};
+use std::{io::Write, sync::Arc};
+
+use crate::executor::WkbExecutor;
+
+/// ST_Points() scalar UDF
+///
+/// Native implementation to get all the points of a geometry as MULTIPOINT
+pub fn st_points_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_points",
+ vec![Arc::new(STPoints)],
+ Volatility::Immutable,
+ Some(st_points_doc()),
+ )
+}
+
+fn st_points_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Returns all the points of a geometry as MULTIPOINT.",
+ "ST_Points (geom: Geometry)",
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_sql_example("SELECT ST_Points(ST_GeomFromWKT('LINESTRING(0 1, 2 3, 4
5)'))")
+ .build()
+}
+
+#[derive(Debug)]
+struct STPoints;
+
+impl SedonaScalarKernel for STPoints {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()],
WKB_GEOMETRY);
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ if let Some(wkb) = maybe_wkb {
+ // We need to know the number of points before actually
writing the points.
+ let n_points = count_wkb_points_recursively(&wkb);
+
+ if write_wkb_multipoint_header(&mut builder, wkb.dim(),
n_points).is_err() {
+ return sedona_internal_err!("Failed to write WKB point
header");
+ };
+
+ if write_wkb_points_recursively(&mut builder, &wkb).is_err() {
+ return sedona_internal_err!("Failed to write WKB point
header");
+ };
+
+ builder.append_value([]);
+ } else {
+ builder.append_null();
+ }
+
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+/// ST_NPoints() scalar UDF
+///
+/// Native implementation to count all the points of a geometry
+pub fn st_npoints_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_npoints",
+ vec![Arc::new(STNPoints)],
+ Volatility::Immutable,
+ Some(st_npoints_doc()),
+ )
+}
+
+fn st_npoints_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Returns the count of the points of a geometry.",
+ "ST_Points (geom: Geometry)",
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_sql_example("SELECT ST_NPoints(ST_GeomFromWKT('LINESTRING(0 1, 2 3,
4 5)'))")
+ .build()
+}
+
+#[derive(Debug)]
+struct STNPoints;
+
+impl SedonaScalarKernel for STNPoints {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry()],
+ SedonaType::Arrow(DataType::UInt64),
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbExecutor::new(arg_types, args);
+ let mut builder =
UInt64Builder::with_capacity(executor.num_iterations());
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ if let Some(wkb) = maybe_wkb {
+ builder.append_value(count_wkb_points_recursively(&wkb) as
u64);
+ } else {
+ builder.append_null();
+ }
+
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn count_wkb_points_recursively<'a>(wkb: &'a wkb::reader::Wkb<'a>) -> usize {
+ match wkb.as_type() {
+ geo_traits::GeometryType::Point(point) => {
+ if point.is_empty() {
+ 0
+ } else {
+ 1
+ }
+ }
+ geo_traits::GeometryType::LineString(line_string) =>
line_string.num_coords(),
+ geo_traits::GeometryType::Polygon(polygon) => {
+ let mut n = match polygon.exterior() {
+ Some(ring) => ring.num_coords(),
+ None => 0,
+ };
+ n += polygon.interiors().map(|r| r.num_coords()).sum::<usize>();
+
+ n
+ }
+ geo_traits::GeometryType::MultiPoint(multi_point) => {
+ multi_point.points().filter(|p| !p.is_empty()).count()
+ }
+ geo_traits::GeometryType::MultiLineString(multi_line_string) =>
multi_line_string
+ .line_strings()
+ .map(|l| l.num_coords())
+ .sum(),
+ geo_traits::GeometryType::MultiPolygon(multi_polygon) => {
+ let mut n = 0;
+ for polygon in multi_polygon.polygons() {
+ n += match polygon.exterior() {
+ Some(ring) => ring.num_coords(),
+ None => 0,
+ };
+ n += polygon.interiors().map(|r|
r.num_coords()).sum::<usize>();
+ }
+ n
+ }
+ geo_traits::GeometryType::GeometryCollection(geometry_collection) => {
+ let mut n = 0;
+ for geometry in geometry_collection.geometries() {
+ n += count_wkb_points_recursively(geometry);
+ }
+ n
+ }
+ _ => 0,
+ }
+}
+
+fn write_wkb_point_from_coord(
+ buf: &mut impl Write,
+ coord: impl CoordTrait<T = f64>,
+) -> Result<(), SedonaGeometryError> {
+ write_wkb_point_header(buf, coord.dim())?;
+ write_wkb_coord_trait(buf, &coord)
+}
+
+fn write_wkb_points_from_coords(
+ buf: &mut impl Write,
+ coords: impl Iterator<Item = impl CoordTrait<T = f64>>,
+) -> Result<(), SedonaGeometryError> {
+ for coord in coords {
+ write_wkb_point_from_coord(buf, coord)?;
+ }
+ Ok(())
+}
+
+fn write_wkb_points_recursively<'a>(
+ buf: &mut impl Write,
+ wkb: &'a wkb::reader::Wkb<'a>,
+) -> Result<(), SedonaGeometryError> {
+ match wkb.as_type() {
+ geo_traits::GeometryType::Point(point) => {
+ if let Some(coord) = point.coord() {
+ write_wkb_point_from_coord(buf, coord)?
+ }
+ }
+ geo_traits::GeometryType::LineString(line_string) => {
+ write_wkb_points_from_coords(buf, line_string.coords())?;
+ }
+ geo_traits::GeometryType::Polygon(polygon) => {
+ if let Some(ring) = polygon.exterior() {
+ write_wkb_points_from_coords(buf, ring.coords())?
+ }
+ for ring in polygon.interiors() {
+ write_wkb_points_from_coords(buf, ring.coords())?;
+ }
+ }
+ geo_traits::GeometryType::MultiPoint(multi_point) => {
+ for point in multi_point.points() {
+ if let Some(coord) = point.coord() {
+ write_wkb_point_from_coord(buf, coord)?
+ }
+ }
+ }
+ geo_traits::GeometryType::MultiLineString(multi_line_string) => {
+ for line_string in multi_line_string.line_strings() {
+ write_wkb_points_from_coords(buf, line_string.coords())?;
+ }
+ }
+ geo_traits::GeometryType::MultiPolygon(multi_polygon) => {
+ for polygon in multi_polygon.polygons() {
+ if let Some(ring) = polygon.exterior() {
+ write_wkb_points_from_coords(buf, ring.coords())?
+ }
+ for ring in polygon.interiors() {
+ write_wkb_points_from_coords(buf, ring.coords())?;
+ }
+ }
+ }
+ geo_traits::GeometryType::GeometryCollection(geometry_collection) => {
+ for geometry in geometry_collection.geometries() {
+ write_wkb_points_recursively(buf, geometry)?;
+ }
+ }
+ _ => {}
+ }
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use std::sync::Arc;
+
+ use datafusion_expr::ScalarUDF;
+ use rstest::rstest;
+ use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+ use sedona_testing::{
+ compare::assert_array_equal, create::create_array,
testers::ScalarUdfTester,
+ };
+
+ use super::*;
+
+ #[test]
+ fn udf_metadata() {
+ let st_points_udf: ScalarUDF = st_points_udf().into();
+ assert_eq!(st_points_udf.name(), "st_points");
+ assert!(st_points_udf.documentation().is_some());
+
+ let st_npoints_udf: ScalarUDF = st_npoints_udf().into();
+ assert_eq!(st_npoints_udf.name(), "st_npoints");
+ assert!(st_npoints_udf.documentation().is_some());
+ }
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ use arrow_array::UInt64Array;
+
+ let tester_points = ScalarUdfTester::new(st_points_udf().into(),
vec![sedona_type.clone()]);
+ let tester_npoints =
+ ScalarUdfTester::new(st_npoints_udf().into(),
vec![sedona_type.clone()]);
+
+ let input = create_array(
+ &[
+ // 2d
+ Some("POINT (1 2)"),
+ Some("LINESTRING (1 2, 3 4, 5 6)"),
+ Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"),
+ Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 3 1, 1 3,
1 1))"),
+ Some("MULTIPOINT (1 2, 3 4, 5 6, 7 8)"),
+ Some("MULTILINESTRING ((1 2, 3 4), EMPTY, (5 6, 7 8))"),
+ Some("MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)), EMPTY,
((0 0, 5 0, 0 5, 0 0), (1 1, 3 1, 1 3, 1 1)))"),
+ Some("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING EMPTY,
LINESTRING (3 4, 5 6))"),
+ // 3d and 4d
+ Some("LINESTRING Z (1 2 3, 4 5 6, 7 8 9)"),
+ Some("LINESTRING M (1 2 3, 4 5 6, 7 8 9)"),
+ Some("LINESTRING ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)"),
+ // empty
+ Some("POINT EMPTY"),
+ Some("LINESTRING EMPTY"),
+ Some("POLYGON EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTILINESTRING EMPTY"),
+ Some("MULTIPOLYGON EMPTY"),
+ Some("GEOMETRYCOLLECTION EMPTY"),
+ // null
+ None,
+ ],
+ &sedona_type,
+ );
+
+ let expected_points = create_array(
+ &[
+ Some("MULTIPOINT (1 2)"),
+ Some("MULTIPOINT (1 2, 3 4, 5 6)"),
+ Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)"),
+ Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 1 1, 3 1, 1 3,
1 1)"),
+ Some("MULTIPOINT (1 2, 3 4, 5 6, 7 8)"),
+ Some("MULTIPOINT (1 2, 3 4, 5 6, 7 8)"),
+ Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0, 0 0, 5 0, 0 5,
0 0, 1 1, 3 1, 1 3, 1 1)"),
+ Some("MULTIPOINT (1 2, 3 4, 5 6)"),
+ // 3d and 4d
+ Some("MULTIPOINT Z (1 2 3, 4 5 6, 7 8 9)"),
+ Some("MULTIPOINT M (1 2 3, 4 5 6, 7 8 9)"),
+ Some("MULTIPOINT ZM (1 2 3 4, 5 6 7 8, 9 0 1 2)"),
+ // empty returns empty
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ // null
+ None,
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ let result_points = tester_points.invoke_array(input.clone()).unwrap();
+ assert_array_equal(&result_points, &expected_points);
+
+ let expected_npoints: Arc<dyn arrow_array::Array> =
Arc::new(UInt64Array::from(vec![
+ Some(1),
+ Some(3),
+ Some(5),
+ Some(9),
+ Some(4),
+ Some(4),
+ Some(13),
+ Some(3),
+ // 3d and 4d
+ Some(3),
+ Some(3),
+ Some(3),
+ // empty returns 0
+ Some(0),
+ Some(0),
+ Some(0),
+ Some(0),
+ Some(0),
+ Some(0),
+ Some(0),
+ // null
+ None,
+ ]));
+
+ let result_points =
tester_npoints.invoke_array(input.clone()).unwrap();
+ assert_array_equal(&result_points, &expected_npoints);
+ }
+}