This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new a6ae54e feat(sql): Implement ST_PointN (#255)
a6ae54e is described below
commit a6ae54e3da85969df9ea1a1d9aef67fa42001948
Author: Hiroaki Yutani <[email protected]>
AuthorDate: Thu Oct 30 23:03:35 2025 +0900
feat(sql): Implement ST_PointN (#255)
Co-authored-by: Dewey Dunnington <[email protected]>
---
benchmarks/test_functions.py | 18 ++
python/sedonadb/tests/functions/test_functions.py | 43 ++++
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 1 +
rust/sedona-functions/src/st_pointn.rs | 280 ++++++++++++++++++++++
rust/sedona-schema/src/matchers.rs | 25 ++
6 files changed, 368 insertions(+)
diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index 4c30cd4..270d631 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -240,6 +240,24 @@ class TestBenchFunctions(TestBenchBase):
benchmark(queries)
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "segments_large",
+ ],
+ )
+ def test_st_pointn(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_PointN(geom1, 3) from {table}")
+
+ benchmark(queries)
+
@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index 5d2b250..a43c701 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1157,6 +1157,49 @@ def test_st_pointm(eng, x, y, m, expected):
)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geometry", "n", "expected"),
+ [
+ ("LINESTRING (1 2, 3 4, 5 6)", 1, "POINT (1 2)"),
+ ("LINESTRING (1 2, 3 4, 5 6)", 2, "POINT (3 4)"),
+ ("LINESTRING (1 2, 3 4, 5 6)", -1, "POINT (5 6)"),
+ ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", 1, "POINT Z (1 2 3)"),
+ ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", 2, "POINT Z (3 4 5)"),
+ ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", -1, "POINT Z (5 6 7)"),
+ ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", 1, "POINT ZM (1 2 3 4)"),
+ ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", 2, "POINT ZM (3 4 5 6)"),
+ ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", -1, "POINT ZM (5 6 7
8)"),
+ # invalid n
+ ("LINESTRING (1 2, 3 4, 5 6)", 0, None),
+ ("LINESTRING (1 2, 3 4, 5 6)", 4, None),
+ ("LINESTRING (1 2, 3 4, 5 6)", -4, None),
+ # other geometries
+ ("POINT (1 2)", 1, None),
+ ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", 1, None),
+ ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", 1, None),
+ ("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", 1, None),
+ # empty geometries
+ ("POINT EMPTY", 1, None),
+ ("LINESTRING EMPTY", 1, None),
+ ("POLYGON EMPTY", 1, None),
+ ("MULTIPOINT EMPTY", 1, None),
+ ("MULTILINESTRING EMPTY", 1, None),
+ ("MULTIPOLYGON EMPTY", 1, None),
+ ("GEOMETRYCOLLECTION EMPTY", 1, None),
+ # null
+ (None, None, None),
+ (None, 1, None),
+ ],
+)
+def test_st_pointn(eng, geometry, n, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(
+ f"SELECT ST_PointN({geom_or_null(geometry)}, {val_or_null(n)})",
+ expected,
+ )
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geometry", "expected"),
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 42ee3dc..da35789 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -46,6 +46,7 @@ mod st_length;
mod st_makeline;
mod st_perimeter;
mod st_point;
+mod st_pointn;
mod st_pointzm;
mod st_setsrid;
mod st_srid;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index 30a9007..a5d0f69 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -84,6 +84,7 @@ pub fn default_function_set() -> FunctionSet {
crate::st_perimeter::st_perimeter_udf,
crate::st_point::st_geogpoint_udf,
crate::st_point::st_point_udf,
+ crate::st_pointn::st_pointn_udf,
crate::st_pointzm::st_pointz_udf,
crate::st_pointzm::st_pointm_udf,
crate::st_pointzm::st_pointzm_udf,
diff --git a/rust/sedona-functions/src/st_pointn.rs
b/rust/sedona-functions/src/st_pointn.rs
new file mode 100644
index 0000000..c11cc55
--- /dev/null
+++ b/rust/sedona-functions/src/st_pointn.rs
@@ -0,0 +1,280 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_array::builder::BinaryBuilder;
+use arrow_schema::DataType;
+use datafusion_common::{error::Result, ScalarValue};
+use datafusion_expr::{
+ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
+};
+use geo_traits::{CoordTrait, GeometryTrait, LineStringTrait};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::{
+ error::SedonaGeometryError,
+ wkb_factory::{write_wkb_coord_trait, write_wkb_point_header,
WKB_MIN_PROBABLE_BYTES},
+};
+use sedona_schema::{
+ datatypes::{SedonaType, WKB_GEOMETRY},
+ matchers::ArgMatcher,
+};
+use std::{io::Write, sync::Arc};
+
+use crate::executor::WkbExecutor;
+
+/// ST_PointN() scalar UDF
+///
+/// Native implementation to get the nth point of a LINESTRING geometry.
+pub fn st_pointn_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_pointn",
+ vec![Arc::new(STPointN)],
+ Volatility::Immutable,
+ Some(st_pointn_doc()),
+ )
+}
+
+fn st_pointn_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Returns the nth point of a geometry. Returns NULL if the geometry is
empty or not a LINESTRING. Negative values are counted backwards from the end.",
+ "ST_PointN (geom: Geometry, n: integer)",
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_argument("n", "n: Index")
+ .with_sql_example("SELECT ST_PointN(ST_GeomFromWKT('LINESTRING(0 1, 2 3, 4
5)'), 2)")
+ .build()
+}
+
+#[derive(Debug)]
+struct STPointN;
+
+impl SedonaScalarKernel for STPointN {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry(), ArgMatcher::is_integer()],
+ WKB_GEOMETRY,
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ let maybe_n: Option<i64> = match &args[1].cast_to(&DataType::Int64,
None)? {
+ ColumnarValue::Scalar(ScalarValue::Int64(maybe_n)) => *maybe_n,
+ _ => None, // pass invalid n value so that all
+ };
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ let n = match maybe_n {
+ // n is 1-origin, so 0 is invalid value
+ Some(n) if n != 0 => n,
+ _ => {
+ builder.append_null();
+ return Ok(());
+ }
+ };
+
+ if let Some(wkb) = maybe_wkb {
+ if let geo_traits::GeometryType::LineString(line_string) =
wkb.as_type() {
+ let num_coords = line_string.num_coords() as i64;
+
+ // if n is out of the range, return NULL
+ if n.abs() > num_coords {
+ builder.append_null();
+ return Ok(());
+ }
+
+ // Negative values are counted backwards from the end
+ let n = if n > 0 { n - 1 } else { num_coords + n } as
usize;
+
+ if let Some(coord) = line_string.coord(n) {
+ if write_wkb_point_from_coord(&mut builder,
coord).is_err() {
+ return sedona_internal_err!("Failed to write WKB
point");
+ };
+ builder.append_value([]);
+ return Ok(());
+ }
+ }
+ }
+
+ builder.append_null();
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn write_wkb_point_from_coord(
+ buf: &mut impl Write,
+ coord: impl CoordTrait<T = f64>,
+) -> Result<(), SedonaGeometryError> {
+ write_wkb_point_header(buf, coord.dim())?;
+ write_wkb_coord_trait(buf, &coord)
+}
+
+#[cfg(test)]
+mod tests {
+ use datafusion_expr::ScalarUDF;
+ use rstest::rstest;
+ use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+ use sedona_testing::{
+ compare::assert_array_equal, create::create_array,
testers::ScalarUdfTester,
+ };
+
+ use super::*;
+
+ #[test]
+ fn udf_metadata() {
+ let st_pointn_udf: ScalarUDF = st_pointn_udf().into();
+ assert_eq!(st_pointn_udf.name(), "st_pointn");
+ assert!(st_pointn_udf.documentation().is_some());
+ }
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ let tester_pointn = ScalarUdfTester::new(
+ st_pointn_udf().into(),
+ vec![sedona_type.clone(), SedonaType::Arrow(DataType::Int64)],
+ );
+
+ // valid cases
+ let input_linestrings = create_array(
+ &[
+ Some("LINESTRING (11 12, 21 22, 31 32, 41 42)"),
+ Some("LINESTRING Z (11 12 13, 21 22 23, 31 32 33, 41 42 43)"),
+ Some("LINESTRING M (11 12 13, 21 22 23, 31 32 33, 41 42 43)"),
+ Some("LINESTRING ZM (11 12 13 14, 21 22 23 24, 31 32 33 34, 41
42 43 44)"),
+ ],
+ &sedona_type,
+ );
+
+ // first points
+ let expected1 = create_array(
+ &[
+ Some("POINT (11 12)"),
+ Some("POINT Z (11 12 13)"),
+ Some("POINT M (11 12 13)"),
+ Some("POINT ZM (11 12 13 14)"),
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ let result1 = tester_pointn
+ .invoke_array_scalar(input_linestrings.clone(),
ScalarValue::Int64(Some(1)))
+ .unwrap();
+ assert_array_equal(&result1, &expected1);
+
+ // second points
+ let expected2 = create_array(
+ &[
+ Some("POINT (21 22)"),
+ Some("POINT Z (21 22 23)"),
+ Some("POINT M (21 22 23)"),
+ Some("POINT ZM (21 22 23 24)"),
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ let result2 = tester_pointn
+ .invoke_array_scalar(input_linestrings.clone(),
ScalarValue::Int64(Some(2)))
+ .unwrap();
+ assert_array_equal(&result2, &expected2);
+
+ // second points from tail
+ let expected2_tail = create_array(
+ &[
+ Some("POINT (31 32)"),
+ Some("POINT Z (31 32 33)"),
+ Some("POINT M (31 32 33)"),
+ Some("POINT ZM (31 32 33 34)"),
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ let result2_tail = tester_pointn
+ .invoke_array_scalar(input_linestrings.clone(),
ScalarValue::Int64(Some(-2)))
+ .unwrap();
+ assert_array_equal(&result2_tail, &expected2_tail);
+
+ // out of range or 0
+ let expected_null = create_array(&[None, None, None, None],
&WKB_GEOMETRY);
+
+ let result_zero = tester_pointn
+ .invoke_array_scalar(input_linestrings.clone(),
ScalarValue::Int64(Some(0)))
+ .unwrap();
+ assert_array_equal(&result_zero, &expected_null);
+
+ let result_too_big = tester_pointn
+ .invoke_array_scalar(input_linestrings.clone(),
ScalarValue::Int64(Some(5)))
+ .unwrap();
+ assert_array_equal(&result_too_big, &expected_null);
+
+ let result_too_big_neg = tester_pointn
+ .invoke_array_scalar(input_linestrings.clone(),
ScalarValue::Int64(Some(-5)))
+ .unwrap();
+ assert_array_equal(&result_too_big_neg, &expected_null);
+
+ let result_null = tester_pointn
+ .invoke_array_scalar(input_linestrings.clone(),
ScalarValue::Int64(None))
+ .unwrap();
+ assert_array_equal(&result_null, &expected_null);
+
+ // invalid cases
+ let input_others = create_array(
+ &[
+ Some("POINT (1 2)"),
+ Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"),
+ Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)"),
+ Some("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))"),
+ Some("MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)))"),
+ Some("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5
6))"),
+ Some("POINT EMPTY"),
+ Some("LINESTRING EMPTY"),
+ Some("POLYGON EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTILINESTRING EMPTY"),
+ Some("MULTIPOLYGON EMPTY"),
+ Some("GEOMETRYCOLLECTION EMPTY"),
+ None,
+ ],
+ &sedona_type,
+ );
+
+ // all NULL
+ let expected_others = create_array(
+ &[
+ None, None, None, None, None, None, None, None, None, None,
None, None, None, None,
+ ],
+ &WKB_GEOMETRY,
+ );
+ let result_others = tester_pointn
+ .invoke_array_scalar(input_others.clone(),
ScalarValue::Int64(Some(2)))
+ .unwrap();
+ assert_array_equal(&result_others, &expected_others);
+ }
+}
diff --git a/rust/sedona-schema/src/matchers.rs
b/rust/sedona-schema/src/matchers.rs
index 2992b05..4935f43 100644
--- a/rust/sedona-schema/src/matchers.rs
+++ b/rust/sedona-schema/src/matchers.rs
@@ -183,6 +183,11 @@ impl ArgMatcher {
Arc::new(IsNumeric {})
}
+ /// Matches any integer argument
+ pub fn is_integer() -> Arc<dyn TypeMatcher + Send + Sync> {
+ Arc::new(IsInteger {})
+ }
+
/// Matches any string argument
pub fn is_string() -> Arc<dyn TypeMatcher + Send + Sync> {
Arc::new(IsString {})
@@ -356,6 +361,22 @@ impl TypeMatcher for IsNumeric {
}
}
+#[derive(Debug)]
+struct IsInteger {}
+
+impl TypeMatcher for IsInteger {
+ fn match_type(&self, arg: &SedonaType) -> bool {
+ match arg {
+ SedonaType::Arrow(data_type) => data_type.is_integer(),
+ _ => false,
+ }
+ }
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(SedonaType::Arrow(DataType::Int64))
+ }
+}
+
#[derive(Debug)]
struct IsString {}
@@ -455,6 +476,10 @@ mod tests {
Some(SedonaType::Arrow(DataType::Float64))
);
+
assert!(ArgMatcher::is_integer().match_type(&SedonaType::Arrow(DataType::UInt32)));
+
assert!(ArgMatcher::is_integer().match_type(&SedonaType::Arrow(DataType::Int32)));
+
assert!(!ArgMatcher::is_integer().match_type(&SedonaType::Arrow(DataType::Float64)));
+
assert!(ArgMatcher::is_string().match_type(&SedonaType::Arrow(DataType::Utf8)));
assert!(ArgMatcher::is_string().match_type(&SedonaType::Arrow(DataType::Utf8View)));
assert!(ArgMatcher::is_string().match_type(&SedonaType::Arrow(DataType::LargeUtf8)));