This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 958b447 feat(c/geoarrow-geos): Implement ST_IsSimple (#239)
958b447 is described below
commit 958b447b8292d15d09fe62188364217ded681af2
Author: Abeeujah <[email protected]>
AuthorDate: Fri Oct 24 18:16:15 2025 +0100
feat(c/geoarrow-geos): Implement ST_IsSimple (#239)
---
c/sedona-geos/benches/geos-functions.rs | 3 +
c/sedona-geos/src/lib.rs | 1 +
c/sedona-geos/src/register.rs | 3 +-
c/sedona-geos/src/st_issimple.rs | 148 ++++++++++++++++++++++
python/sedonadb/tests/functions/test_functions.py | 117 +++++++++++++++++
5 files changed, 271 insertions(+), 1 deletion(-)
diff --git a/c/sedona-geos/benches/geos-functions.rs
b/c/sedona-geos/benches/geos-functions.rs
index 9bb70cd..56f3bb5 100644
--- a/c/sedona-geos/benches/geos-functions.rs
+++ b/c/sedona-geos/benches/geos-functions.rs
@@ -213,6 +213,9 @@ fn criterion_benchmark(c: &mut Criterion) {
ArrayScalar(Polygon(10), Polygon(500)),
);
+ benchmark::scalar(c, &f, "geos", "st_issimple", Polygon(10));
+ benchmark::scalar(c, &f, "geos", "st_issimple", Polygon(500));
+
benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(10));
benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(500));
diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs
index 22185c0..aed7b25 100644
--- a/c/sedona-geos/src/lib.rs
+++ b/c/sedona-geos/src/lib.rs
@@ -26,6 +26,7 @@ mod st_centroid;
mod st_convexhull;
mod st_dwithin;
mod st_isring;
+mod st_issimple;
mod st_isvalid;
mod st_isvalidreason;
mod st_length;
diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs
index 7229f18..6b69f0c 100644
--- a/c/sedona-geos/src/register.rs
+++ b/c/sedona-geos/src/register.rs
@@ -19,7 +19,7 @@ use sedona_expr::scalar_udf::ScalarKernelRef;
use crate::{
distance::st_distance_impl, st_area::st_area_impl,
st_buffer::st_buffer_impl,
st_centroid::st_centroid_impl, st_convexhull::st_convex_hull_impl,
st_dwithin::st_dwithin_impl,
- st_isring::st_is_ring_impl, st_isvalid::st_is_valid_impl,
+ st_isring::st_is_ring_impl, st_issimple::st_is_simple_impl,
st_isvalid::st_is_valid_impl,
st_isvalidreason::st_is_valid_reason_impl, st_length::st_length_impl,
st_perimeter::st_perimeter_impl, st_unaryunion::st_unary_union_impl,
};
@@ -51,6 +51,7 @@ pub fn scalar_kernels() -> Vec<(&'static str,
ScalarKernelRef)> {
("st_intersection", st_intersection_impl()),
("st_intersects", st_intersects_impl()),
("st_isring", st_is_ring_impl()),
+ ("st_issimple", st_is_simple_impl()),
("st_isvalid", st_is_valid_impl()),
("st_isvalidreason", st_is_valid_reason_impl()),
("st_length", st_length_impl()),
diff --git a/c/sedona-geos/src/st_issimple.rs b/c/sedona-geos/src/st_issimple.rs
new file mode 100644
index 0000000..266a5af
--- /dev/null
+++ b/c/sedona-geos/src/st_issimple.rs
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::BooleanBuilder;
+use arrow_schema::DataType;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+use geos::Geom;
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::executor::GeosExecutor;
+
+/// ST_IsSimple() implementation using the geos crate
+pub fn st_is_simple_impl() -> ScalarKernelRef {
+ Arc::new(STIsSimple {})
+}
+
+#[derive(Debug)]
+struct STIsSimple {}
+
+impl SedonaScalarKernel for STIsSimple {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry()],
+ SedonaType::Arrow(DataType::Boolean),
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = GeosExecutor::new(arg_types, args);
+ let mut builder =
BooleanBuilder::with_capacity(executor.num_iterations());
+ executor.execute_wkb_void(|maybe_wkb| {
+ match maybe_wkb {
+ Some(wkb) => {
+ builder.append_value(invoke_scalar(&wkb)?);
+ }
+ _ => builder.append_null(),
+ }
+
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn invoke_scalar(geos_geom: &geos::Geometry) -> Result<bool> {
+ geos_geom.is_simple().map_err(|e| {
+ DataFusionError::Execution(format!("Failed to check if geometry is
simple: {e}"))
+ })
+}
+
+#[cfg(test)]
+mod tests {
+
+ use arrow_array::{ArrayRef, BooleanArray};
+ use datafusion_common::ScalarValue;
+ use rstest::rstest;
+ use sedona_expr::scalar_udf::SedonaScalarUDF;
+ use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+ use sedona_testing::testers::ScalarUdfTester;
+
+ use super::*;
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ let udf = SedonaScalarUDF::from_kernel("st_issimple",
st_is_simple_impl());
+ let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+ tester.assert_return_type(DataType::Boolean);
+
+ // Simple Polygon
+ let result = tester
+ .invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))")
+ .unwrap();
+ tester.assert_scalar_result_equals(result, true);
+
+ // Complex Polygon (self-intersecting)
+ let result = tester
+ .invoke_scalar("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))")
+ .unwrap();
+ tester.assert_scalar_result_equals(result, false);
+
+ let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
+ assert!(result.is_null());
+
+ let input_wkt = vec![
+ None,
// Null
+ Some("POINT (1 1)"), // Points are always simple (T)
+ Some("MULTIPOINT (1 1, 2 2, 3 3)"), // Points are always simple (T)
+ Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"), // Simple Polygon (T)
+ Some("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))"), // Complex Polygon (F)
+ Some("POLYGON((1 2, 3 4, 5 6, 1 2))"), // POSTGIS Reference (F)
+ Some("LINESTRING (0 0, 1 1)"), // Simple LineString (T)
+ Some("LINESTRING (0 0, 1 1, 0 1, 1 0)"), // Complex LineString (F)
+ Some("LINESTRING(1 1,2 2,2 3.5,1 3,1 2,2 1)"), // POSTGIS
Reference (F)
+ Some("MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))"), // Simple
MultiLineString (T)
+ Some("MULTILINESTRING ((0 0, 2 2), (0 2, 2 0))"), // Complex
MultiLineString (F)
+ Some("POINT (10 10)"), // Point (T)
+ Some("GEOMETRYCOLLECTION EMPTY"), // Empty (T)
+ Some("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))"), // Complex
Polygon (F)
+ Some("MULTIPOLYGON (((0 0, 0 1, 1 1, 1 0, 0 0)), ((2 2, 2 3, 3 3,
3 2, 2 2)))"), // Holes are fine (T)
+ Some("POLYGON ((0 0, 3 0, 3 3, 0 3, 0 0), (1 1, 0 2, 2 2, 1 1))"),
// Holes are fine (T)
+ ];
+
+ let expected: ArrayRef = Arc::new(BooleanArray::from(vec![
+ None,
+ Some(true),
+ Some(true),
+ Some(true),
+ Some(false),
+ Some(false),
+ Some(true),
+ Some(false),
+ Some(false),
+ Some(true),
+ Some(false),
+ Some(true),
+ Some(true),
+ Some(false),
+ Some(true),
+ Some(true),
+ ]));
+ assert_eq!(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected);
+ }
+}
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index 0bbd097..041a0a8 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -211,6 +211,123 @@ def test_st_centroid(eng, geom, expected):
eng.assert_query_result(f"SELECT ST_Centroid({geom_or_null(geom)})",
expected)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geom", "expected"),
+ [
+ (None, None),
+ # POINTS - Always simple (single point has no self-intersections)
+ ("POINT (1 1)", True),
+ ("POINT EMPTY", True), # Empty geometry is simple
+ # MULTIPOINTS
+ ("MULTIPOINT (1 1, 2 2, 3 3)", True), # Distinct points
+ ("MULTIPOINT (1 1, 2 2, 1 1)", False), # Duplicate points make it
non-simple
+ ("MULTIPOINT EMPTY", True), # Empty multipoint
+ ("MULTIPOINT (1 1, 2 2, 3 3)", True),
+ # LINESTRINGS
+ ("LINESTRING (0 0, 1 1)", True), # Simple straight line
+ ("LINESTRING (0 0, 1 1, 2 2)", True), # Simple line, collinear points
+ ("LINESTRING (0 0, 1 1, 0 1, 1 0)", False), # Self-intersecting
(bowtie shape)
+ ("LINESTRING(1 1,2 2,2 3.5,1 3,1 2,2 1)", False), # Complex
self-intersection
+ (
+ "LINESTRING (0 0, 1 1, 0 0)",
+ False,
+ ), # Closed loop with repeated start/end but intersects at interior
+ ("LINESTRING (0 0, 1 1, 1 0, 0 0)", True), # Simple closed ring
(triangle)
+ ("LINESTRING EMPTY", True), # Empty linestring
+ # POLYGONS
+ ("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))", True), # Simple rectangle
+ (
+ "POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))",
+ False,
+ ), # Bowtie polygon - self-intersecting
+ (
+ "POLYGON((1 2, 3 4, 5 6, 1 2))",
+ False,
+ ), # Degenerate polygon - zero-area Triangle
+ (
+ "Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))",
+ False,
+ ), # Star shape with self-intersection
+ (
+ "POLYGON ((0 0, 3 0, 3 3, 0 3, 0 0), (1 1, 2 1, 2 2, 1 2, 1 1))",
+ True,
+ ), # Polygon with hole, valid
+ (
+ "POLYGON ((0 0, 3 0, 3 3, 0 3, 0 0), (1 1, 0 2, 2 2, 1 1))",
+ True,
+ ), # Valid OGC Polygon (is also considered 'Simple' by OGC standard)
+ # MULTILINESTRINGS
+ (
+ "MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))",
+ True,
+ ), # Touching at endpoints only
+ ("MULTILINESTRING ((0 0, 2 2), (0 2, 2 0))", False), # Lines cross in
middle
+ ("MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))", True), # Disjoint lines
+ (
+ "MULTILINESTRING ((0 0, 1 1, 2 2), (2 2, 3 3))",
+ True,
+ ), # Connected at endpoint
+ (
+ "MULTILINESTRING ((0 0, 2 0, 2 2, 0 2, 0 0), (1 1, 3 1, 3 3, 1 3,
1 1))",
+ False,
+ ), # Not simple: The two rings overlap and intersect (2 1), violating
the MULTILINESTRING simplicity rule.
+ ("MULTILINESTRING ((0 0, 2 2), (1 0, 1 2))", False), # Lines
intersect at (1,1)
+ ("MULTILINESTRING EMPTY", True), # Empty multilinestring
+ # MULTIPOLYGONS
+ ("MULTIPOLYGON (((0 0, 0 1, 1 1, 1 0, 0 0)))", True), # Single simple
polygon
+ (
+ "MULTIPOLYGON (((0 0, 0 2, 2 2, 2 0, 0 0)), ((3 0, 3 2, 5 2, 5 0,
3 0)))",
+ True,
+ ), # Two disjoint polygons
+ (
+ "MULTIPOLYGON (((0 0, 0 2, 2 2, 2 0, 0 0)), ((1 1, 1 3, 3 3, 3 1,
1 1)))",
+ True,
+ ), # Touching at point
+ (
+ "MULTIPOLYGON (((0 0, 0 3, 3 3, 3 0, 0 0)), ((1 1, 1 2, 2 2, 2 1,
1 1)))",
+ True,
+ ), # One inside another (donut)
+ (
+ "MULTIPOLYGON (((0 0, 0 2, 2 2, 2 0, 0 0)), ((0 0, 0 1, 1 1, 1 0,
0 0)))",
+ True,
+ ), # Simple: The boundaries do not cross
+ ("MULTIPOLYGON EMPTY", True), # Empty multipolygon
+ # GEOMETRYCOLLECTIONS
+ (
+ "GEOMETRYCOLLECTION (POINT (1 1), LINESTRING (0 0, 1 1))",
+ True,
+ ), # Simple components
+ (
+ "GEOMETRYCOLLECTION (LINESTRING (0 0, 2 2), LINESTRING (0 2, 2
0))",
+ True,
+ ),
+ ("GEOMETRYCOLLECTION EMPTY", True), # Empty collection
+ # EDGE CASES
+ ("POINT (1 1)", True), # Repeated for completeness
+ (
+ "LINESTRING (1 1, 1 1)",
+ True,
+ ), # Simple: Start and end points are the only intersecting points.
+ (
+ "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0), (0.2 0.2, 0.2 0.8, 0.8 0.8,
0.8 0.2, 0.2 0.2))",
+ True,
+ ), # Proper hole
+ (
+ "POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0), (0.5 0.5, 1.5 0.5, 1.5 1.5,
0.5 1.5, 0.5 0.5))",
+ True,
+ ), # Another valid hole
+ (
+ "LINESTRING (0 0, 1 0, 1 1, 0 1, 0.5 1, 0.5 0)",
+ False,
+ ), # Self-touching at non-endpoint
+ ],
+)
+def test_st_issimple(eng, geom, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(f"SELECT ST_IsSimple({geom_or_null(geom)})",
expected)
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),