Copilot commented on code in PR #2710:
URL: https://github.com/apache/sedona/pull/2710#discussion_r2916581262
##########
python/sedona/spark/geopandas/base.py:
##########
@@ -1093,8 +1303,39 @@ def force_3d(self, z=0.0):
"""
return _delegate_to_geometry_column("force_3d", self, z)
- # def line_merge(self, directed=False):
- # raise NotImplementedError("This method is not implemented yet.")
+ def line_merge(self, directed=False):
+ """Return merged LineStrings.
+
+ Returns a ``GeoSeries`` of (Multi)LineStrings, where connected
+ LineStrings are merged together into single LineStrings.
+
+ Parameters
+ ----------
+ directed : bool, default False
+ Only ``directed=False`` is supported. Passing ``directed=True``
+ will raise ``NotImplementedError``.
+
+ Returns
+ -------
+ GeoSeries
+
Review Comment:
The `line_merge` docstring says `directed` is “Currently not supported by
Sedona”, but `GeoSeries.line_merge` raises `NotImplementedError` when
`directed=True`. Please document the actual behavior explicitly (e.g., only
`directed=False` is supported and `directed=True` raises) to avoid implying the
argument is silently ignored.
```suggestion
Whether to treat LineStrings as directed when merging.
Only ``directed=False`` is currently supported; passing
``directed=True`` raises :class:`NotImplementedError`.
Returns
-------
GeoSeries
Raises
------
NotImplementedError
If ``directed`` is True.
```
##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -1667,7 +1817,28 @@ def test_force_3d(self):
self.check_sgpd_equals_gpd(result, expected)
def test_line_merge(self):
- pass
+ s = GeoSeries(
+ [
+ MultiLineString([[(0, 0), (1, 1)], [(1, 1), (2, 2)]]),
+ MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
+ LineString([(0, 0), (1, 1)]),
+ None,
+ ]
+ )
+ expected = gpd.GeoSeries(
+ [
+ MultiLineString([[(0, 0), (1, 1)], [(1, 1), (2, 2)]]),
+ MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
+ LineString([(0, 0), (1, 1)]),
+ None,
+ ]
+ ).line_merge()
+ result = s.line_merge()
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().line_merge()
+ self.check_sgpd_equals_gpd(df_result, expected)
Review Comment:
`line_merge` has behavior specific to `directed=True` (raises
`NotImplementedError`), but the unit test only exercises the default
`directed=False` path. Add an assertion that
`GeoSeries(...).line_merge(directed=True)` raises, so the unsupported-argument
contract remains enforced.
```suggestion
# `directed=True` is not implemented and should raise
with pytest.raises(NotImplementedError):
s.line_merge(directed=True)
with pytest.raises(NotImplementedError):
s.to_geoframe().line_merge(directed=True)
```
##########
python/sedona/spark/geopandas/geoseries.py:
##########
@@ -792,30 +792,27 @@ def is_empty(self) -> pspd.Series:
return _to_bool(result)
def count_coordinates(self):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error(
- "count_coordinates",
- "Counts the number of coordinate tuples in each geometry.",
- )
+ spark_expr = stf.ST_NPoints(self.spark.column)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=False,
)
def count_geometries(self):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error(
- "count_geometries",
- "Counts the number of geometries in each multi-geometry or
collection.",
- )
+ spark_expr = stf.ST_NumGeometries(self.spark.column)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=False,
)
def count_interior_rings(self):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error(
- "count_interior_rings",
- "Counts the number of interior rings (holes) in each polygon.",
- )
+ # Sedona's ST_NumInteriorRings returns NULL for non-polygon geometries
+ # (including MultiPolygon). GeoPandas semantics require 0 for
+ # non-polygon and empty geometries, so we wrap with coalesce.
+ spark_expr = F.coalesce(stf.ST_NumInteriorRings(self.spark.column),
F.lit(0))
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=False,
)
Review Comment:
`count_interior_rings` uses `coalesce(ST_NumInteriorRings(...), lit(0))`,
which also turns *missing* geometries (NULL input) into 0. That makes
NULL-handling inconsistent with `count_coordinates`/`count_geometries` (which
propagate NULLs) and is likely incorrect for GeoSeries with `None` values.
Consider preserving NULL inputs explicitly (e.g.,
`when(self.spark.column.isNull(), lit(None)).otherwise(coalesce(..., lit(0)))`)
so only non-polygon/empty geometries become 0 while missing values remain
NULL/NA.
##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -748,13 +748,54 @@ def test_is_empty(self):
self.check_pd_series_equal(df_result, expected)
def test_count_coordinates(self):
- pass
+ s = GeoSeries(
+ [
+ Point(0, 0),
+ LineString([(0, 0), (1, 1), (2, 2)]),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ]
+ )
+ result = s.count_coordinates()
+ expected = pd.Series([1, 3, 5], dtype="int32")
+ self.check_pd_series_equal(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().count_coordinates()
+ self.check_pd_series_equal(df_result, expected)
def test_count_geometries(self):
- pass
+ s = GeoSeries(
+ [
+ Point(0, 0),
+ MultiPoint([(0, 0), (1, 1)]),
+ MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
+ ]
+ )
+ result = s.count_geometries()
+ expected = pd.Series([1, 2, 2], dtype="int32")
+ self.check_pd_series_equal(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().count_geometries()
+ self.check_pd_series_equal(df_result, expected)
def test_count_interior_rings(self):
- pass
+ s = GeoSeries(
+ [
+ Polygon(
+ [(0, 0), (10, 0), (10, 10), (0, 10)],
+ [[(1, 1), (2, 1), (2, 2), (1, 2)]],
+ ),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ]
+ )
+ result = s.count_interior_rings()
+ expected = pd.Series([1, 0], dtype="int32")
+ self.check_pd_series_equal(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().count_interior_rings()
+ self.check_pd_series_equal(df_result, expected)
Review Comment:
`test_count_interior_rings` doesn’t cover `None` geometries, which is
important here because the implementation currently applies `coalesce(..., 0)`
and can accidentally convert NULL inputs into 0. Add a `None` element (or a
dedicated test) and derive the expected result from geopandas
(`gpd.GeoSeries(...).count_interior_rings()`) so missing-value semantics stay
aligned.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]