Abeeujah commented on PR #219:
URL: https://github.com/apache/sedona-db/pull/219#issuecomment-3408086299
Hi @petern48 @paleolimbot I'm getting this weird error on the python end
```sh
_________________________________________________________________
test_st_isclosed[POINT(0 0)-True-SedonaDB]
__________________________________________________________________
eng = <sedonadb.testing.SedonaDB object at 0x7fd86c71d7d0>, geom = 'POINT(0
0)', expected = True
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
[
(None, None),
("LINESTRING(0 0, 1 1)", False),
("LINESTRING(0 0, 0 1, 1 1, 0 0)", True),
("MULTILINESTRING((0 0, 0 1, 1 1, 0 0),(0 0, 1 1))", False),
("POINT(0 0)", True),
("MULTIPOINT((0 0), (1 1))", True),
],
)
def test_st_isclosed(eng, geom, expected):
eng = eng.create_or_skip()
> eng.assert_query_result(f"SELECT ST_IsClosed({geom_or_null(geom)})",
expected)
tests/functions/test_functions.py:590:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _
python/sedonadb/testing.py:124: in assert_query_result
return self.assert_result(result, expected, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
python/sedonadb/testing.py:282: in assert_result
self.assert_result(result, [(str(expected).lower(),)], **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _
self = <sedonadb.testing.SedonaDB object at 0x7fd86c71d7d0>
result = pyarrow.Table
st_isclosed(st_geomfromwkt(Utf8("POINT(0 0)"))): bool not null
----
st_isclosed(st_geomfromwkt(Utf8("POINT(0 0)"))): [[false]], expected =
[('true',)]
kwargs = {}, geopandas = <module 'geopandas' from
'/home/abee/Desktop/Odhack/sedona-db/python/sedonadb/.venv/lib/python3.11/site-packages/geopandas/__init__.py'>
pandas = <module 'pandas' from
'/home/abee/Desktop/Odhack/sedona-db/python/sedonadb/.venv/lib/python3.11/site-packages/pandas/__init__.py'>
def assert_result(self, result, expected, **kwargs) -> "DBEngine":
"""Assert a result against an expected target
Supported expected targets include:
- A pyarrow.Table (compared using ==)
- A geopandas.GeoDataFrame (compared using geopandas.testing)
- A pandas.DataFrame (for non-spatial results; compared using
pandas.testing)
- A list of tuples where all values have been converted to strings.
For
geometry results, these strings are converted to WKT using
geoarrow.pyarrow
(which ensures a consistent WKT output format).
- A tuple of strings as the string output of a single row
- A string as the string output of a single column of a single row
- A bool for a single boolean value
- An int or float for single numeric values (optionally with a
numeric_epsilon)
- bytes for single binary values
Using Arrow table equality is the most strict (ensures exact type
equality and
byte-for-byte value equality); however, string output is most useful
for checking
logical value quality among engines. GeoPandas/Pandas expected
targets generate
the most useful assertion failures and are probably the best option
for general
usage.
"""
import geopandas.testing
import pandas
if isinstance(expected, pa.Table):
result_arrow = self.result_to_table(result)
if result_arrow.schema != expected.schema:
raise AssertionError(
f"Expected schema:\n {expected.schema}\nGot:\n
{result_arrow.schema}"
)
if result_arrow.columns != expected.columns:
raise AssertionError(f"Expected:\n {expected}\nGot:\n
{result_arrow}")
# It is probably a bug in geoarrow.types.type_parrow that CRS
mismatches
# are still considered "equal" by the == operator
geometry_cols = _geometry_columns(result_arrow.schema)
expected_geometry_cols = _geometry_columns(expected.schema)
assert len(geometry_cols) == len(expected_geometry_cols)
for item, expected_item in zip(
geometry_cols.items(), expected_geometry_cols.items()
):
assert item[0] == expected_item[0]
if item[1].edge_type != expected_item[1].edge_type:
raise AssertionError(
f"Edge type mismatch for column '{item[0]}': "
f"Expected {expected_item[1].edge_type}, got
{item[1].edge_type}"
)
if not _crs_equal(item[1].crs, expected_item[1].crs):
raise AssertionError(
f"CRS mismatch for column '{item[0]}': "
f"Expected {expected_item[1].crs}, got {item[1].crs}"
)
elif isinstance(expected, geopandas.GeoDataFrame):
result_pandas = self.result_to_pandas(result)
geopandas.testing.assert_geodataframe_equal(
result_pandas, expected, **kwargs
)
elif isinstance(expected, pandas.DataFrame):
result_pandas = self.result_to_pandas(result)
pandas.testing.assert_frame_equal(result_pandas, expected,
**kwargs)
elif isinstance(expected, list):
result_tuples = self.result_to_tuples(result, **kwargs)
if result_tuples != expected:
> raise AssertionError(
f"Expected:\n {expected}\nGot:\n {result_tuples}"
E AssertionError: Expected:
E [('true',)]
E Got:
E [('false',)]
python/sedonadb/testing.py:274: AssertionError
__________________________________________________________
test_st_isclosed[MULTIPOINT((0 0), (1 1))-True-SedonaDB]
___________________________________________________________
eng = <sedonadb.testing.SedonaDB object at 0x7fd86c7c0c10>, geom =
'MULTIPOINT((0 0), (1 1))', expected = True
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
[
(None, None),
("LINESTRING(0 0, 1 1)", False),
("LINESTRING(0 0, 0 1, 1 1, 0 0)", True),
("MULTILINESTRING((0 0, 0 1, 1 1, 0 0),(0 0, 1 1))", False),
("POINT(0 0)", True),
("MULTIPOINT((0 0), (1 1))", True),
],
)
def test_st_isclosed(eng, geom, expected):
eng = eng.create_or_skip()
> eng.assert_query_result(f"SELECT ST_IsClosed({geom_or_null(geom)})",
expected)
tests/functions/test_functions.py:590:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _
python/sedonadb/testing.py:123: in assert_query_result
result = self.execute_and_collect(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
python/sedonadb/testing.py:344: in execute_and_collect
return self.con.sql(query).to_arrow_table()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
python/sedonadb/dataframe.py:252: in to_arrow_table
return pa.table(self)
^^^^^^^^^^^^^^
pyarrow/table.pxi:6224: in pyarrow.lib.table
???
pyarrow/ipc.pxi:794: in pyarrow.lib.RecordBatchReader.read_all
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _
> ???
E pyarrow.lib.ArrowInvalid: External error: Execution error: Failed to
check if geometry is closed: External error: SedonaDB internal error: Invalid
geometry type.
E This issue was likely caused by a bug in SedonaDB's code. Please help us
to resolve this by filing a bug report in our issue tracker:
https://github.com/apache/sedona-db/issues
pyarrow/error.pxi:92: ArrowInvalid
===========================================================================
short test summary info
===========================================================================
FAILED tests/functions/test_functions.py::test_st_isclosed[POINT(0
0)-True-SedonaDB] - AssertionError: Expected:
FAILED tests/functions/test_functions.py::test_st_isclosed[MULTIPOINT((0 0),
(1 1))-True-SedonaDB] - pyarrow.lib.ArrowInvalid: External error: Execution
error: Failed to check if geometry is closed: External error: SedonaDB internal
error: Invalid geometry type.
```
I understand it's quite a lot to read, this is only failing for `POINT` and
`MULTIPOINT` I'd appreciate any form of guidance on resolving this
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]