(sedona-db) branch main updated: Add more python integration tests + fix pre-commit ruff-format (#8)

jiayu Tue, 02 Sep 2025 14:22:06 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git



The following commit(s) were added to refs/heads/main by this push:
     new f3a0a81  Add more python integration tests + fix pre-commit 
ruff-format (#8)
f3a0a81 is described below

commit f3a0a81a0385f47660ebfdf0bf769a4383ecbf08
Author: Peter Nguyen <petern0...@gmail.com>
AuthorDate: Tue Sep 2 12:56:42 2025 -0700

    Add more python integration tests + fix pre-commit ruff-format (#8)
    
    * Move test_st_dwithin to test_predicates.py
    
    * Add test_st_buffer
    
    * Add test_st_asbinary
    
    * Add test_st_geomfromwkb
    
    * pre-commit
    
    * Remove duplicate pre-commit ruff-format entry causing conflicts
    
    * pre-commit testing.py
    
    * Simplify testing.py and use escaped byte strings
    
    * Add numeric_epsilon parameter for st_buffer to conditionally use 
math.isclose
    
    * Check exact strings in test_st_astext
    
    * Compare bytes for st_geom/gfromwkb using shapely
    
    * Pr feedback
    
    * Decrease to epsilon 1e-9
---
 .pre-commit-config.yaml                            |   7 -
 python/sedonadb/python/sedonadb/testing.py         |  23 ++-
 python/sedonadb/tests/functions/test_functions.py  | 209 +++++++++++++++++----
 python/sedonadb/tests/functions/test_predicates.py |  36 +++-
 4 files changed, 229 insertions(+), 46 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8fdb0bc..fefea8b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -44,13 +44,6 @@ repos:
       name: rustfmt
       args: ["--all", "--"]
 
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.5
-    hooks:
-      - id: ruff
-        args: [ --fix ]
-      - id: ruff-format
-
   - repo: https://github.com/cheshirekow/cmake-format-precommit
     rev: v0.6.13
     hooks:
diff --git a/python/sedonadb/python/sedonadb/testing.py 
b/python/sedonadb/python/sedonadb/testing.py
index fe90910..8c2176d 100644
--- a/python/sedonadb/python/sedonadb/testing.py
+++ b/python/sedonadb/python/sedonadb/testing.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import os
+import math
 import warnings
 from pathlib import Path
 from typing import TYPE_CHECKING, List, Tuple
@@ -170,7 +171,9 @@ class DBEngine:
         else:
             return tab.to_pandas()
 
-    def result_to_tuples(self, result, *, wkt_precision=None) -> 
List[Tuple[str]]:
+    def result_to_tuples(
+        self, result, *, wkt_precision=None, **kwargs
+    ) -> List[Tuple[str]]:
         """Convert a query result into row tuples
 
         This option strips away fine-grained type information but is helpful 
for
@@ -202,7 +205,8 @@ class DBEngine:
         - A tuple of strings as the string output of a single row
         - A string as the string output of a single column of a single row
         - A bool for a single boolean value
-        - An int or float for single numeric values
+        - An int or float for single numeric values (optionally with a 
numeric_epsilon)
+        - bytes for single binary values
 
         Using Arrow table equality is the most strict (ensures exact type 
equality and
         byte-for-byte value equality); however, string output is most useful 
for checking
@@ -260,11 +264,22 @@ class DBEngine:
             self.assert_result(result, [(expected,)], **kwargs)
         elif isinstance(expected, bool):
             self.assert_result(result, [(str(expected).lower(),)], **kwargs)
-        elif isinstance(expected, (int, float)):
+        elif isinstance(expected, (int, float, bytes)):
             result_df = self.result_to_pandas(result)
             assert result_df.shape == (1, 1)
             result_value = result_df.iloc[0, 0]
-            assert result_value == expected, f"Expected {expected}, got 
{result_value}"
+            eps = kwargs.get("numeric_epsilon", None)
+            if eps is not None:
+                assert isinstance(expected, (int, float)), (
+                    f"numeric_epsilon is only supported for int or float, not 
{type(expected).__name__}"
+                )
+                assert math.isclose(result_value, expected, rel_tol=eps), (
+                    f"Expected {expected}, got {result_value}"
+                )
+            else:
+                assert result_value == expected, (
+                    f"Expected {expected}, got {result_value}"
+                )
         elif expected is None:
             self.assert_result(result, [(None,)], **kwargs)
         else:
diff --git a/python/sedonadb/tests/functions/test_functions.py 
b/python/sedonadb/tests/functions/test_functions.py
index f4dda6b..306e0c5 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import pytest
+import shapely
 from sedonadb.testing import geom_or_null, PostGIS, SedonaDB, val_or_null
 
 
@@ -50,6 +51,108 @@ def test_st_area(eng, geom, expected):
     eng.assert_query_result(f"SELECT ST_Area({geom_or_null(geom)})", expected)
 
 
+@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
+@pytest.mark.parametrize(
+    ("geom", "expected"),
+    [
+        (
+            "POINT (1 1)",
+            
b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\x3f\x00\x00\x00\x00\x00\x00\xf0\x3f",
+        ),
+        (
+            "POINT EMPTY",
+            
b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x7f\x00\x00\x00\x00\x00\x00\xf8\x7f",
+        ),
+        (
+            "LINESTRING (0 0, 1 2, 3 4)",
+            
b"\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\x3f\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x08\x40\x00\x00\x00\x00\x00\x00\x10\x40",
+        ),
+        ("LINESTRING EMPTY", b"\x01\x02\x00\x00\x00\x00\x00\x00\x00"),
+        (
+            "POINT ZM (0 0 0 0)",
+            
b"\x01\xb9\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+        ),
+        (
+            "GEOMETRYCOLLECTION (POINT (0 0), POLYGON ((0 0, 1 0, 1 1, 0 1, 0 
0)))",
+            
b"\x01\x07\x00\x00\x00\x02\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+        ),
+    ],
+)
+def test_st_asbinary(eng, geom, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(f"SELECT ST_AsBinary({geom_or_null(geom)})", 
expected)
+
+
+@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
+@pytest.mark.parametrize(
+    ("geom"),
+    [
+        None,
+        # geoarrow-c returns POINT (nan nan) instead of POINT EMPTY
+        "POINT EMPTY",
+        "LINESTRING EMPTY",
+        "POLYGON EMPTY",
+        "MULTIPOINT EMPTY",
+        "MULTILINESTRING EMPTY",
+        "MULTIPOLYGON EMPTY",
+        "GEOMETRYCOLLECTION EMPTY",
+        "POINT(1 1)",
+        "LINESTRING(0 0,1 1)",
+        "POLYGON((0 0,1 0,1 1,0 1,0 0))",
+        "MULTIPOINT((0 0),(1 1))",
+        "MULTILINESTRING((0 0,1 1),(1 1,2 2))",
+        "MULTIPOLYGON(((0 0,1 0,1 1,0 1,0 0)),((0 0,1 0,1 1,0 1,0 0)))",
+        "GEOMETRYCOLLECTION(POINT(0 0),POLYGON((0 0,1 0,1 1,0 1,0 
0)),LINESTRING(0 0,1 1),GEOMETRYCOLLECTION(POLYGON((0 0,-1 0,-1 -1,0 -1,0 
0))))",
+        "POINT Z(0 0 0)",
+        "POINT ZM(0 0 0 0)",
+        "LINESTRING M(0 0 0,1 1 1)",
+    ],
+)
+def test_st_astext(eng, geom):
+    eng = eng.create_or_skip()
+    expected = geom
+
+    if isinstance(eng, PostGIS) and expected is not None:
+        expected = expected.replace(r"M(", r"M (")
+        expected = expected.replace(r"Z(", r"Z (")
+
+    eng.assert_query_result(f"SELECT ST_AsText({geom_or_null(geom)})", 
expected)
+
+
+@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
+@pytest.mark.parametrize(
+    ("geom", "dist", "expected_area"),
+    [
+        (None, None, None),
+        (None, 1.0, None),
+        ("POINT (1 1)", None, None),
+        ("POINT (1 1)", 0.0, 0),
+        ("POINT EMPTY", 1.0, 0),
+        ("LINESTRING EMPTY", 1.0, 0),
+        ("POLYGON EMPTY", 1.0, 0),
+        ("POINT (0 0)", 1.0, 3.121445152258052),
+        ("POINT (0 0)", 2.0, 12.485780609032208),
+        ("LINESTRING (0 0, 1 1)", 1.0, 5.949872277004242),
+        ("LINESTRING (0 0, 1 1)", 2.0, 18.14263485852459),
+        ("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 2.0, 21.48578060903221),
+        ("MULTIPOINT ((0 0), (1 1))", 1.0, 5.682167728387077),
+        (
+            "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON 
((0 0, 1 0, 1 1, 0 1, 0 0)))",
+            1.0,
+            8.121445152256216,
+        ),
+    ],
+)
+def test_st_buffer(eng, geom, dist, expected_area):
+    eng = eng.create_or_skip()
+
+    eng.assert_query_result(
+        f"SELECT ST_Area(ST_Buffer({geom_or_null(geom)}, 
{val_or_null(dist)}))",
+        expected_area,
+        numeric_epsilon=1e-9,
+    )
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geom", "expected"),
@@ -123,40 +226,6 @@ def test_st_dimension(eng, geom, expected):
     eng.assert_query_result(f"SELECT ST_Dimension({geom_or_null(geom)})", 
expected)
 
 
-@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
-@pytest.mark.parametrize(
-    ("geom1", "geom2", "distance", "expected"),
-    [
-        (None, "POINT (0 0)", 1.0, None),
-        ("POINT (1 1)", None, 1.0, None),
-        ("POINT (0 0)", "POINT (0 0)", None, None),
-        (None, None, None, None),
-        ("POINT (0 0)", "POINT (0 0)", 1.0, True),
-        ("POINT (0 0)", "POINT (5 0)", 2.0, False),
-        ("LINESTRING (0 0, 1 1)", "LINESTRING (2 2, 3 3)", 1.0, False),
-        ("LINESTRING (0 0, 1 1)", "LINESTRING (10 0, 11 1)", 2.0, False),
-        (
-            "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
-            "POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))",
-            6.0,
-            True,
-        ),
-        (
-            "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))",
-            "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))",
-            1.0,
-            True,
-        ),
-    ],
-)
-def test_st_dwithin(eng, geom1, geom2, distance, expected):
-    eng = eng.create_or_skip()
-    eng.assert_query_result(
-        f"SELECT ST_DWithin({geom_or_null(geom1)}, {geom_or_null(geom2)}, 
{val_or_null(distance)})",
-        expected,
-    )
-
-
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geom", "expected"),
@@ -261,6 +330,78 @@ def test_st_geomfromtext(eng, wkt, expected):
     eng.assert_query_result(f"SELECT ST_GeomFromText({val_or_null(wkt)})", 
expected)
 
 
+@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
+@pytest.mark.parametrize(
+    ("geom"),
+    [
+        "POINT (1 1)",
+        "POINT EMPTY",
+        "LINESTRING EMPTY",
+        "POLYGON EMPTY",
+        "GEOMETRYCOLLECTION EMPTY",
+        "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
+        "MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))",
+        "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON ((0 
0, 0 1, 1 1, 1 0, 0 0)))",
+    ],
+)
+def test_st_geogfromwkb(eng, geom):
+    eng = eng.create_or_skip()
+
+    expected = geom
+    if geom == "POINT EMPTY":
+        # arrow-c returns POINT (nan nan) instead of POINT EMPTY
+        expected = "POINT (nan nan)"
+
+    if geom is None:
+        wkb = val_or_null(None)
+    else:
+        wkb = shapely.from_wkt(geom).wkb
+        if isinstance(eng, SedonaDB):
+            wkb = "0x" + wkb.hex()
+        elif isinstance(eng, PostGIS):
+            wkb = r"\x" + wkb.hex()
+            wkb = f"'{wkb}'::bytea"
+        else:
+            raise
+    eng.assert_query_result(f"SELECT ST_GeogFromWKB({wkb})", expected)
+
+
+@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
+@pytest.mark.parametrize(
+    ("geom"),
+    [
+        "POINT (1 1)",
+        "POINT EMPTY",
+        "LINESTRING EMPTY",
+        "POLYGON EMPTY",
+        "GEOMETRYCOLLECTION EMPTY",
+        "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
+        "MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))",
+        "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON ((0 
0, 0 1, 1 1, 1 0, 0 0)))",
+    ],
+)
+def test_st_geomfromwkb(eng, geom):
+    eng = eng.create_or_skip()
+
+    expected = geom
+    if geom == "POINT EMPTY":
+        # arrow-c returns POINT (nan nan) instead of POINT EMPTY
+        expected = "POINT (nan nan)"
+
+    if geom is None:
+        wkb = val_or_null(None)
+    else:
+        wkb = shapely.from_wkt(geom).wkb
+        if isinstance(eng, SedonaDB):
+            wkb = "0x" + wkb.hex()
+        elif isinstance(eng, PostGIS):
+            wkb = r"\x" + wkb.hex()
+            wkb = f"'{wkb}'::bytea"
+        else:
+            raise
+    eng.assert_query_result(f"SELECT ST_GeomFromWKB({wkb})", expected)
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geom", "expected"),
diff --git a/python/sedonadb/tests/functions/test_predicates.py 
b/python/sedonadb/tests/functions/test_predicates.py
index 2ad4976..fafe9f5 100644
--- a/python/sedonadb/tests/functions/test_predicates.py
+++ b/python/sedonadb/tests/functions/test_predicates.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import pytest
-from sedonadb.testing import geom_or_null, PostGIS, SedonaDB
+from sedonadb.testing import geom_or_null, PostGIS, SedonaDB, val_or_null
 
 
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@@ -150,6 +150,40 @@ def test_st_disjoint(eng, geom1, geom2, expected):
     )
 
 
+@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
+@pytest.mark.parametrize(
+    ("geom1", "geom2", "distance", "expected"),
+    [
+        (None, "POINT (0 0)", 1.0, None),
+        ("POINT (1 1)", None, 1.0, None),
+        ("POINT (0 0)", "POINT (0 0)", None, None),
+        (None, None, None, None),
+        ("POINT (0 0)", "POINT (0 0)", 1.0, True),
+        ("POINT (0 0)", "POINT (5 0)", 2.0, False),
+        ("LINESTRING (0 0, 1 1)", "LINESTRING (2 2, 3 3)", 1.0, False),
+        ("LINESTRING (0 0, 1 1)", "LINESTRING (10 0, 11 1)", 2.0, False),
+        (
+            "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
+            "POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))",
+            6.0,
+            True,
+        ),
+        (
+            "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))",
+            "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))",
+            1.0,
+            True,
+        ),
+    ],
+)
+def test_st_dwithin(eng, geom1, geom2, distance, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_DWithin({geom_or_null(geom1)}, {geom_or_null(geom2)}, 
{val_or_null(distance)})",
+        expected,
+    )
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geom1", "geom2", "expected"),

(sedona-db) branch main updated: Add more python integration tests + fix pre-commit ruff-format (#8)

Reply via email to