This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push: new f3a0a81 Add more python integration tests + fix pre-commit ruff-format (#8) f3a0a81 is described below commit f3a0a81a0385f47660ebfdf0bf769a4383ecbf08 Author: Peter Nguyen <petern0...@gmail.com> AuthorDate: Tue Sep 2 12:56:42 2025 -0700 Add more python integration tests + fix pre-commit ruff-format (#8) * Move test_st_dwithin to test_predicates.py * Add test_st_buffer * Add test_st_asbinary * Add test_st_geomfromwkb * pre-commit * Remove duplicate pre-commit ruff-format entry causing conflicts * pre-commit testing.py * Simplify testing.py and use escaped byte strings * Add numeric_epsilon parameter for st_buffer to conditionally use math.isclose * Check exact strings in test_st_astext * Compare bytes for st_geom/gfromwkb using shapely * Pr feedback * Decrease to epsilon 1e-9 --- .pre-commit-config.yaml | 7 - python/sedonadb/python/sedonadb/testing.py | 23 ++- python/sedonadb/tests/functions/test_functions.py | 209 +++++++++++++++++---- python/sedonadb/tests/functions/test_predicates.py | 36 +++- 4 files changed, 229 insertions(+), 46 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8fdb0bc..fefea8b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,13 +44,6 @@ repos: name: rustfmt args: ["--all", "--"] - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.5 - hooks: - - id: ruff - args: [ --fix ] - - id: ruff-format - - repo: https://github.com/cheshirekow/cmake-format-precommit rev: v0.6.13 hooks: diff --git a/python/sedonadb/python/sedonadb/testing.py b/python/sedonadb/python/sedonadb/testing.py index fe90910..8c2176d 100644 --- a/python/sedonadb/python/sedonadb/testing.py +++ b/python/sedonadb/python/sedonadb/testing.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import os +import math import warnings from pathlib import Path from typing import TYPE_CHECKING, List, Tuple @@ -170,7 +171,9 @@ class DBEngine: else: return tab.to_pandas() - def result_to_tuples(self, result, *, wkt_precision=None) -> List[Tuple[str]]: + def result_to_tuples( + self, result, *, wkt_precision=None, **kwargs + ) -> List[Tuple[str]]: """Convert a query result into row tuples This option strips away fine-grained type information but is helpful for @@ -202,7 +205,8 @@ class DBEngine: - A tuple of strings as the string output of a single row - A string as the string output of a single column of a single row - A bool for a single boolean value - - An int or float for single numeric values + - An int or float for single numeric values (optionally with a numeric_epsilon) + - bytes for single binary values Using Arrow table equality is the most strict (ensures exact type equality and byte-for-byte value equality); however, string output is most useful for checking @@ -260,11 +264,22 @@ class DBEngine: self.assert_result(result, [(expected,)], **kwargs) elif isinstance(expected, bool): self.assert_result(result, [(str(expected).lower(),)], **kwargs) - elif isinstance(expected, (int, float)): + elif isinstance(expected, (int, float, bytes)): result_df = self.result_to_pandas(result) assert result_df.shape == (1, 1) result_value = result_df.iloc[0, 0] - assert result_value == expected, f"Expected {expected}, got {result_value}" + eps = kwargs.get("numeric_epsilon", None) + if eps is not None: + assert isinstance(expected, (int, float)), ( + f"numeric_epsilon is only supported for int or float, not {type(expected).__name__}" + ) + assert math.isclose(result_value, expected, rel_tol=eps), ( + f"Expected {expected}, got {result_value}" + ) + else: + assert result_value == expected, ( + f"Expected {expected}, got {result_value}" + ) elif expected is None: self.assert_result(result, [(None,)], **kwargs) else: diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index f4dda6b..306e0c5 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import pytest +import shapely from sedonadb.testing import geom_or_null, PostGIS, SedonaDB, val_or_null @@ -50,6 +51,108 @@ def test_st_area(eng, geom, expected): eng.assert_query_result(f"SELECT ST_Area({geom_or_null(geom)})", expected) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "expected"), + [ + ( + "POINT (1 1)", + b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\x3f\x00\x00\x00\x00\x00\x00\xf0\x3f", + ), + ( + "POINT EMPTY", + b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x7f\x00\x00\x00\x00\x00\x00\xf8\x7f", + ), + ( + "LINESTRING (0 0, 1 2, 3 4)", + b"\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\x3f\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x08\x40\x00\x00\x00\x00\x00\x00\x10\x40", + ), + ("LINESTRING EMPTY", b"\x01\x02\x00\x00\x00\x00\x00\x00\x00"), + ( + "POINT ZM (0 0 0 0)", + b"\x01\xb9\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + ), + ( + "GEOMETRYCOLLECTION (POINT (0 0), POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0)))", + b"\x01\x07\x00\x00\x00\x02\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + ), + ], +) +def test_st_asbinary(eng, geom, expected): + eng = eng.create_or_skip() + eng.assert_query_result(f"SELECT ST_AsBinary({geom_or_null(geom)})", expected) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom"), + [ + None, + # geoarrow-c returns POINT (nan nan) instead of POINT EMPTY + "POINT EMPTY", + "LINESTRING EMPTY", + "POLYGON EMPTY", + "MULTIPOINT EMPTY", + "MULTILINESTRING EMPTY", + "MULTIPOLYGON EMPTY", + "GEOMETRYCOLLECTION EMPTY", + "POINT(1 1)", + "LINESTRING(0 0,1 1)", + "POLYGON((0 0,1 0,1 1,0 1,0 0))", + "MULTIPOINT((0 0),(1 1))", + "MULTILINESTRING((0 0,1 1),(1 1,2 2))", + "MULTIPOLYGON(((0 0,1 0,1 1,0 1,0 0)),((0 0,1 0,1 1,0 1,0 0)))", + "GEOMETRYCOLLECTION(POINT(0 0),POLYGON((0 0,1 0,1 1,0 1,0 0)),LINESTRING(0 0,1 1),GEOMETRYCOLLECTION(POLYGON((0 0,-1 0,-1 -1,0 -1,0 0))))", + "POINT Z(0 0 0)", + "POINT ZM(0 0 0 0)", + "LINESTRING M(0 0 0,1 1 1)", + ], +) +def test_st_astext(eng, geom): + eng = eng.create_or_skip() + expected = geom + + if isinstance(eng, PostGIS) and expected is not None: + expected = expected.replace(r"M(", r"M (") + expected = expected.replace(r"Z(", r"Z (") + + eng.assert_query_result(f"SELECT ST_AsText({geom_or_null(geom)})", expected) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "dist", "expected_area"), + [ + (None, None, None), + (None, 1.0, None), + ("POINT (1 1)", None, None), + ("POINT (1 1)", 0.0, 0), + ("POINT EMPTY", 1.0, 0), + ("LINESTRING EMPTY", 1.0, 0), + ("POLYGON EMPTY", 1.0, 0), + ("POINT (0 0)", 1.0, 3.121445152258052), + ("POINT (0 0)", 2.0, 12.485780609032208), + ("LINESTRING (0 0, 1 1)", 1.0, 5.949872277004242), + ("LINESTRING (0 0, 1 1)", 2.0, 18.14263485852459), + ("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 2.0, 21.48578060903221), + ("MULTIPOINT ((0 0), (1 1))", 1.0, 5.682167728387077), + ( + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0)))", + 1.0, + 8.121445152256216, + ), + ], +) +def test_st_buffer(eng, geom, dist, expected_area): + eng = eng.create_or_skip() + + eng.assert_query_result( + f"SELECT ST_Area(ST_Buffer({geom_or_null(geom)}, {val_or_null(dist)}))", + expected_area, + numeric_epsilon=1e-9, + ) + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom", "expected"), @@ -123,40 +226,6 @@ def test_st_dimension(eng, geom, expected): eng.assert_query_result(f"SELECT ST_Dimension({geom_or_null(geom)})", expected) -@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) -@pytest.mark.parametrize( - ("geom1", "geom2", "distance", "expected"), - [ - (None, "POINT (0 0)", 1.0, None), - ("POINT (1 1)", None, 1.0, None), - ("POINT (0 0)", "POINT (0 0)", None, None), - (None, None, None, None), - ("POINT (0 0)", "POINT (0 0)", 1.0, True), - ("POINT (0 0)", "POINT (5 0)", 2.0, False), - ("LINESTRING (0 0, 1 1)", "LINESTRING (2 2, 3 3)", 1.0, False), - ("LINESTRING (0 0, 1 1)", "LINESTRING (10 0, 11 1)", 2.0, False), - ( - "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", - "POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))", - 6.0, - True, - ), - ( - "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", - "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", - 1.0, - True, - ), - ], -) -def test_st_dwithin(eng, geom1, geom2, distance, expected): - eng = eng.create_or_skip() - eng.assert_query_result( - f"SELECT ST_DWithin({geom_or_null(geom1)}, {geom_or_null(geom2)}, {val_or_null(distance)})", - expected, - ) - - @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom", "expected"), @@ -261,6 +330,78 @@ def test_st_geomfromtext(eng, wkt, expected): eng.assert_query_result(f"SELECT ST_GeomFromText({val_or_null(wkt)})", expected) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom"), + [ + "POINT (1 1)", + "POINT EMPTY", + "LINESTRING EMPTY", + "POLYGON EMPTY", + "GEOMETRYCOLLECTION EMPTY", + "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + "MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))", + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))", + ], +) +def test_st_geogfromwkb(eng, geom): + eng = eng.create_or_skip() + + expected = geom + if geom == "POINT EMPTY": + # arrow-c returns POINT (nan nan) instead of POINT EMPTY + expected = "POINT (nan nan)" + + if geom is None: + wkb = val_or_null(None) + else: + wkb = shapely.from_wkt(geom).wkb + if isinstance(eng, SedonaDB): + wkb = "0x" + wkb.hex() + elif isinstance(eng, PostGIS): + wkb = r"\x" + wkb.hex() + wkb = f"'{wkb}'::bytea" + else: + raise + eng.assert_query_result(f"SELECT ST_GeogFromWKB({wkb})", expected) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom"), + [ + "POINT (1 1)", + "POINT EMPTY", + "LINESTRING EMPTY", + "POLYGON EMPTY", + "GEOMETRYCOLLECTION EMPTY", + "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + "MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))", + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))", + ], +) +def test_st_geomfromwkb(eng, geom): + eng = eng.create_or_skip() + + expected = geom + if geom == "POINT EMPTY": + # arrow-c returns POINT (nan nan) instead of POINT EMPTY + expected = "POINT (nan nan)" + + if geom is None: + wkb = val_or_null(None) + else: + wkb = shapely.from_wkt(geom).wkb + if isinstance(eng, SedonaDB): + wkb = "0x" + wkb.hex() + elif isinstance(eng, PostGIS): + wkb = r"\x" + wkb.hex() + wkb = f"'{wkb}'::bytea" + else: + raise + eng.assert_query_result(f"SELECT ST_GeomFromWKB({wkb})", expected) + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom", "expected"), diff --git a/python/sedonadb/tests/functions/test_predicates.py b/python/sedonadb/tests/functions/test_predicates.py index 2ad4976..fafe9f5 100644 --- a/python/sedonadb/tests/functions/test_predicates.py +++ b/python/sedonadb/tests/functions/test_predicates.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import pytest -from sedonadb.testing import geom_or_null, PostGIS, SedonaDB +from sedonadb.testing import geom_or_null, PostGIS, SedonaDB, val_or_null @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @@ -150,6 +150,40 @@ def test_st_disjoint(eng, geom1, geom2, expected): ) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom1", "geom2", "distance", "expected"), + [ + (None, "POINT (0 0)", 1.0, None), + ("POINT (1 1)", None, 1.0, None), + ("POINT (0 0)", "POINT (0 0)", None, None), + (None, None, None, None), + ("POINT (0 0)", "POINT (0 0)", 1.0, True), + ("POINT (0 0)", "POINT (5 0)", 2.0, False), + ("LINESTRING (0 0, 1 1)", "LINESTRING (2 2, 3 3)", 1.0, False), + ("LINESTRING (0 0, 1 1)", "LINESTRING (10 0, 11 1)", 2.0, False), + ( + "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + "POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))", + 6.0, + True, + ), + ( + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", + 1.0, + True, + ), + ], +) +def test_st_dwithin(eng, geom1, geom2, distance, expected): + eng = eng.create_or_skip() + eng.assert_query_result( + f"SELECT ST_DWithin({geom_or_null(geom1)}, {geom_or_null(geom2)}, {val_or_null(distance)})", + expected, + ) + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom1", "geom2", "expected"),