This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 43a6e9d6 feat(rust/sedona-functions): Implement ST_GeometryN (#317)
43a6e9d6 is described below
commit 43a6e9d645118f25f6eb60b49e6a9ed1feab9036
Author: Abeeujah <[email protected]>
AuthorDate: Wed Nov 19 16:20:26 2025 +0100
feat(rust/sedona-functions): Implement ST_GeometryN (#317)
---
python/sedonadb/tests/functions/test_functions.py | 82 +++++++
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 1 +
rust/sedona-functions/src/st_geometryn.rs | 280 ++++++++++++++++++++++
4 files changed, 364 insertions(+)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index 07f9d47a..b700c37a 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1064,6 +1064,88 @@ def test_st_geomfromwkb(eng, geom):
eng.assert_query_result(f"SELECT ST_GeomFromWKB({wkb})", expected)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geom", "index", "expected"),
+ [
+ # 1. POINT
+ ("POINT(1 1)", 1, "POINT (1 1)"), # n=1 (Valid)
+ ("POINT(1 1)", 2, None), # n=2 (OOB)
+ ("POINT(1 1)", 99, None), # n=99 (Large OOB)
+ # 2. LINESTRING
+ ("LINESTRING(2 2, 3 3, 4 4)", 1, "LINESTRING (2 2, 3 3, 4 4)"), # n=1
(Valid)
+ (None, 2, None), # Null input (n=2)
+ ("LINESTRING(2 2, 3 3, 4 4)", 0, None), # n=0 (OOB)
+ # 3. POLYGON
+ (
+ "POLYGON((0 0, 1 0, 1 1, 0 0))",
+ 1,
+ "POLYGON ((0 0, 1 0, 1 1, 0 0))",
+ ), # n=1 (Valid)
+ ("POLYGON((0 0, 1 0, 1 1, 0 0))", 3, None), # n=3 (OOB)
+ # 4. MULTIPOINT
+ ("MULTIPOINT((1 1), (2 2), (3 3))", 2, "POINT (2 2)"), # n=2 (Valid)
+ ("MULTIPOINT((1 1), (2 2), (3 3))", 3, "POINT (3 3)"), # n=3 (Valid)
+ (None, 0, None), # Null Input (n=0)
+ ("MULTIPOINT((1 1), (2 2), (3 3))", 1, "POINT (1 1)"), # n=1 (Valid)
+ ("MULTIPOINT((1 1), (2 2), (3 3))", 0, None), # n=0 (OOB)
+ # 5. MULTILINESTRING
+ (
+ "MULTILINESTRING((1 1, 2 2), (3 3, 4 4))",
+ 1,
+ "LINESTRING (1 1, 2 2)",
+ ), # n=1 (Valid)
+ ("MULTILINESTRING((1 1, 2 2), (3 3, 4 4))", 3, None), # n=3 (OOB)
+ (
+ "MULTILINESTRING((1 1, 2 2), (3 3, 4 4))",
+ 2,
+ "LINESTRING (3 3, 4 4)",
+ ), # n=2 (Valid)
+ # 6. MULTIPOLYGON
+ (
+ "MULTIPOLYGON(((0 0, 1 1, 0 1, 0 0)), ((5 5, 6 6, 5 6, 5 5)))",
+ 2,
+ "POLYGON ((5 5, 6 6, 5 6, 5 5))",
+ ), # n=2 (Valid)
+ ("MULTIPOLYGON(((0 0, 1 1, 0 1, 0 0)))", 2, None), # n=2 (OOB)
+ (
+ "MULTIPOLYGON(((0 0, 1 1, 0 1, 0 0)), ((5 5, 6 6, 5 6, 5 5)))",
+ 1,
+ "POLYGON ((0 0, 1 1, 0 1, 0 0))",
+ ), # n=1 (Valid)
+ ("MULTIPOLYGON EMPTY", 1, None), # Empty Multi (n=1)
+ # 7. GEOMETRYCOLLECTION
+ (
+ "GEOMETRYCOLLECTION(POINT(10 10), LINESTRING(20 20, 30 30),
POLYGON((1 1, 2 2, 1 2, 1 1)))",
+ 1,
+ "POINT (10 10)",
+ ), # n=1 (Point)
+ (
+ "GEOMETRYCOLLECTION(POINT(10 10), LINESTRING(20 20, 30 30),
POLYGON((1 1, 2 2, 1 2, 1 1)))",
+ 2,
+ "LINESTRING (20 20, 30 30)",
+ ), # n=2 (LineString)
+ ("GEOMETRYCOLLECTION(POINT(10 10))", 2, None), # n=2 (OOB)
+ (
+ "GEOMETRYCOLLECTION(POINT(1 1), GEOMETRYCOLLECTION(LINESTRING(2 2,
3 3)))",
+ 1,
+ "POINT (1 1)",
+ ), # n=1 (Nested: Point)
+ (
+ "GEOMETRYCOLLECTION(POINT(1 1), GEOMETRYCOLLECTION(LINESTRING(2 2,
3 3)))",
+ 2,
+ "GEOMETRYCOLLECTION (LINESTRING (2 2, 3 3))",
+ ), # n=2 (Nested: GC)
+ ("GEOMETRYCOLLECTION(POINT(1 1))", 0, None), # n=0 (OOB)
+ ],
+)
+def test_st_geometryn(eng, geom, index, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(
+ f"SELECT ST_GeometryN({geom_or_null(geom)}, {val_or_null(index)})",
expected
+ )
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 25417e06..7a7a9585 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -36,6 +36,7 @@ mod st_dwithin;
pub mod st_envelope;
pub mod st_envelope_aggr;
pub mod st_flipcoordinates;
+mod st_geometryn;
mod st_geometrytype;
mod st_geomfromwkb;
mod st_geomfromwkt;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index 9a06f258..68f4ed54 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -72,6 +72,7 @@ pub fn default_function_set() -> FunctionSet {
crate::st_dwithin::st_dwithin_udf,
crate::st_envelope::st_envelope_udf,
crate::st_flipcoordinates::st_flipcoordinates_udf,
+ crate::st_geometryn::st_geometryn_udf,
crate::st_geometrytype::st_geometry_type_udf,
crate::st_geomfromwkb::st_geogfromwkb_udf,
crate::st_geomfromwkb::st_geomfromwkb_udf,
diff --git a/rust/sedona-functions/src/st_geometryn.rs
b/rust/sedona-functions/src/st_geometryn.rs
new file mode 100644
index 00000000..74ed3953
--- /dev/null
+++ b/rust/sedona-functions/src/st_geometryn.rs
@@ -0,0 +1,280 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use crate::executor::WkbExecutor;
+use arrow_array::builder::BinaryBuilder;
+use datafusion_common::{cast::as_int64_array, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::Documentation;
+use datafusion_expr::Volatility;
+use geo_traits::{
+ GeometryCollectionTrait, GeometryTrait, MultiLineStringTrait,
MultiPointTrait,
+ MultiPolygonTrait,
+};
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::{
+ datatypes::{SedonaType, WKB_GEOMETRY},
+ matchers::ArgMatcher,
+};
+use wkb::reader::Wkb;
+
+/// ST_GeometryN() scalar UDF
+///
+/// Native implementation to get the nth Geometry in a Collection
+pub fn st_geometryn_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_geometryn",
+ vec![Arc::new(STGeometryN)],
+ Volatility::Immutable,
+ Some(st_geometryn_doc()),
+ )
+}
+
+fn st_geometryn_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Return the 1-based Nth element geometry of an input geometry which is
a GEOMETRYCOLLECTION, MULTIPOINT, MULTILINESTRING, MULTICURVE, MULTI)POLYGON,
or POLYHEDRALSURFACE. Otherwise, returns NULL.",
+ "ST_GeometryN (geom: Geometry, n: integer)",
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_argument("n", "n: Index")
+ .with_sql_example("SELECT ST_GeometryN('GEOMETRYCOLLECTION(POINT(10 10),
LINESTRING(20 20, 30 30), POLYGON((1 1, 2 2, 1 2, 1 1)))', 1)")
+ .build()
+}
+
+#[derive(Debug)]
+struct STGeometryN;
+
+impl SedonaScalarKernel for STGeometryN {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry(), ArgMatcher::is_integer()],
+ WKB_GEOMETRY,
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ let integer_value = args[1]
+ .cast_to(&arrow_schema::DataType::Int64, None)?
+ .to_array(executor.num_iterations())?;
+ let index_array = as_int64_array(&integer_value)?;
+ let mut index_iter = index_array.iter();
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ match (maybe_wkb, index_iter.next().unwrap()) {
+ (Some(wkb), Some(index)) => {
+ if invoke_scalar(&wkb, (index - 1) as usize, &mut
builder)? {
+ builder.append_value([]);
+ } else {
+ // Unsupported Geometry Type, Invalid index encountered
+ builder.append_null();
+ }
+ }
+ _ => builder.append_null(),
+ }
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn invoke_scalar(geom: &Wkb, index: usize, writer: &mut impl std::io::Write)
-> Result<bool> {
+ let geometry = match geom.as_type() {
+ geo_traits::GeometryType::GeometryCollection(collection) => {
+ collection.geometry(index).map(|item| item.buf())
+ }
+ geo_traits::GeometryType::MultiLineString(mul_ls) => {
+ mul_ls.line_string(index).map(|ls| ls.buf())
+ }
+ geo_traits::GeometryType::MultiPolygon(mul_pgn) => {
+ mul_pgn.polygon(index).map(|pgn| pgn.buf())
+ }
+ geo_traits::GeometryType::MultiPoint(mul_pt) =>
mul_pt.point(index).map(|pt| pt.buf()),
+ // PostGIS returns `Self` for Simple Geometries
+ _ if index == 0 => Some(geom.buf()),
+ _ => None,
+ };
+
+ if let Some(buf) = geometry {
+ writer.write_all(buf)?;
+ Ok(true)
+ } else {
+ Ok(false)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use rstest::rstest;
+ use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+ use sedona_testing::testers::ScalarUdfTester;
+
+ use super::*;
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ use sedona_testing::{compare::assert_array_equal,
create::create_array};
+
+ let tester = ScalarUdfTester::new(
+ st_geometryn_udf().into(),
+ vec![
+ sedona_type.clone(),
+ SedonaType::Arrow(arrow_schema::DataType::Int64),
+ ],
+ );
+ tester.assert_return_type(WKB_GEOMETRY);
+
+ let input_wkt = create_array(
+ &[
+ // 1. POINT
+ Some("POINT(1 1)"), // n=1 (Valid)
+ Some("POINT(1 1)"), // n=2 (OOB)
+ Some("POINT(1 1)"), // n=99 (Large OOB)
+ // 2. LINESTRING
+ Some("LINESTRING(2 2, 3 3, 4 4)"), // n=1 (Valid)
+ None, // Null input (n=2)
+ Some("LINESTRING(2 2, 3 3, 4 4)"), // n=0 (OOB)
+ // 3. POLYGON
+ Some("POLYGON((0 0, 1 0, 1 1, 0 0))"), // n=1 (Valid)
+ Some("POLYGON((0 0, 1 0, 1 1, 0 0))"), // n=3 (OOB)
+ // 4. MULTIPOINT
+ Some("MULTIPOINT((1 1), (2 2), (3 3))"), // n=2 (Valid) -
Original
+ Some("MULTIPOINT((1 1), (2 2), (3 3))"), // n=3 (Valid) -
Original
+ None, // Null Input (n=0)
- Original
+ Some("MULTIPOINT((1 1), (2 2), (3 3))"), // n=1 (Valid)
+ Some("MULTIPOINT((1 1), (2 2), (3 3))"), // n=0 (OOB)
+ // 5. MULTILINESTRING
+ Some("MULTILINESTRING((1 1, 2 2), (3 3, 4 4))"), // n=1
(Valid) - Original
+ Some("MULTILINESTRING((1 1, 2 2), (3 3, 4 4))"), // n=3 (OOB)
- Original
+ Some("MULTILINESTRING((1 1, 2 2), (3 3, 4 4))"), // n=2
(Valid)
+ // 6. MULTIPOLYGON
+ Some("MULTIPOLYGON(((0 0, 1 1, 0 1, 0 0)), ((5 5, 6 6, 5 6, 5
5)))"), // n=2 (Valid) - Original
+ Some("MULTIPOLYGON(((0 0, 1 1, 0 1, 0 0)))"),
// n=2 (OOB) - Original
+ Some("MULTIPOLYGON(((0 0, 1 1, 0 1, 0 0)), ((5 5, 6 6, 5 6, 5
5)))"), // n=1 (Valid)
+ Some("MULTIPOLYGON EMPTY"),
// Empty Multi (n=1)
+ // 7. GEOMETRYCOLLECTION (7 cases)
+ Some("GEOMETRYCOLLECTION(POINT(10 10), LINESTRING(20 20, 30
30), POLYGON((1 1, 2 2, 1 2, 1 1)))"), // n=1 (Point) - Original
+ Some("GEOMETRYCOLLECTION(POINT(10 10), LINESTRING(20 20, 30
30), POLYGON((1 1, 2 2, 1 2, 1 1)))"), // n=2 (LineString) - Original
+ Some("GEOMETRYCOLLECTION(POINT(10 10))"), // n=2 (OOB) -
Original
+ Some("GEOMETRYCOLLECTION(POINT(1 1),
GEOMETRYCOLLECTION(LINESTRING(2 2, 3 3)))"), // n=1 (Nested: Point)
+ Some("GEOMETRYCOLLECTION(POINT(1 1),
GEOMETRYCOLLECTION(LINESTRING(2 2, 3 3)))"), // n=2 (Nested: GC)
+ Some("GEOMETRYCOLLECTION(POINT(1 1))"), // n=0 (OOB)
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ let integers = arrow_array::create_array!(
+ Int64,
+ [
+ // 1. POINT
+ Some(1), // n=1
+ Some(2), // n=2 (OOB)
+ Some(99), // n=99 (OOB)
+ // 2. LINESTRING
+ Some(1), // n=1
+ Some(2), // Null input
+ Some(0), // n=0 (OOB)
+ // 3. POLYGON
+ Some(1), // n=1
+ Some(3), // n=3 (OOB)
+ // 4. MULTIPOINT
+ Some(2), // n=2
+ Some(3), // n=3
+ Some(0), // n=0 (Null input)
+ Some(1), // n=1
+ Some(0), // n=0 (OOB)
+ // 5. MULTILINESTRING
+ Some(1), // n=1
+ Some(3), // n=3 (OOB)
+ Some(2), // n=2
+ // 6. MULTIPOLYGON
+ Some(2), // n=2
+ Some(2), // n=2 (OOB)
+ Some(1), // n=1
+ Some(1), // n=1 (Empty)
+ // 7. GEOMETRYCOLLECTION
+ Some(1), // n=1 (Point)
+ Some(2), // n=2 (LineString)
+ Some(2), // n=2 (OOB)
+ Some(1), // n=1 (Nested: Point)
+ Some(2), // n=2 (Nested: GC)
+ Some(0) // n=0 (OOB)
+ ]
+ );
+
+ let expected = create_array(
+ &[
+ // 1. POINT
+ Some("POINT(1 1)"),
+ None,
+ None,
+ // 2. LINESTRING
+ Some("LINESTRING(2 2, 3 3, 4 4)"),
+ None,
+ None,
+ // 3. POLYGON
+ Some("POLYGON((0 0, 1 0, 1 1, 0 0))"),
+ None,
+ // 4. MULTIPOINT
+ Some("POINT(2 2)"),
+ Some("POINT(3 3)"),
+ None,
+ Some("POINT(1 1)"),
+ None,
+ // 5. MULTILINESTRING
+ Some("LINESTRING(1 1, 2 2)"),
+ None,
+ Some("LINESTRING(3 3, 4 4)"),
+ // 6. MULTIPOLYGON
+ Some("POLYGON((5 5, 6 6, 5 6, 5 5))"),
+ None,
+ Some("POLYGON((0 0, 1 1, 0 1, 0 0))"),
+ None,
+ // 7. GEOMETRYCOLLECTION
+ Some("POINT(10 10)"),
+ Some("LINESTRING(20 20, 30 30)"),
+ None,
+ Some("POINT(1 1)"),
+ Some("GEOMETRYCOLLECTION(LINESTRING(2 2, 3 3))"), // The WKB
of the GC component itself
+ None,
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ assert_array_equal(
+ &tester.invoke_arrays(vec![input_wkt, integers]).unwrap(),
+ &expected,
+ );
+ }
+}