Copilot commented on code in PR #2710:
URL: https://github.com/apache/sedona/pull/2710#discussion_r2916323350
##########
python/sedona/spark/geopandas/base.py:
##########
@@ -707,15 +820,81 @@ def envelope(self):
"""
return _delegate_to_geometry_column("envelope", self)
- # def minimum_rotated_rectangle(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ def minimum_rotated_rectangle(self):
+ """Return the minimum rotated rectangle (oriented envelope) that
+ encloses each geometry.
- # @property
- # def exterior(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ Unlike ``envelope``, the rectangle may be rotated to better fit the
+ geometry.
- # def extract_unique_points(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import MultiPoint
+ >>> s = GeoSeries(
+ ... [MultiPoint([(0, 0), (1, 0), (0.5, 1)])]
+ ... )
+ >>> s.minimum_rotated_rectangle()
+ 0 POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))
+ dtype: geometry
+
+ See Also
+ --------
+ GeoSeries.envelope : axis-aligned bounding rectangle
+ GeoSeries.convex_hull : convex hull geometry
+ """
+ return _delegate_to_geometry_column("minimum_rotated_rectangle", self)
+
+ @property
+ def exterior(self):
+ """Return the outer boundary of each polygon geometry.
+
+ Returns a ``GeoSeries`` of LinearRings representing the exterior ring
+ of each polygon. For non-polygon geometries, returns ``None``.
+
Review Comment:
The docstring says this returns LinearRings and the example output shows a
LINEARRING, but Sedona's ST_ExteriorRing returns a LINESTRING (see
python/sedona/spark/sql/st_functions.py docstring) and the added match test
explicitly notes this type difference. To avoid misleading API docs, update the
docstring/example to reflect the actual Sedona return type (and/or document the
known discrepancy vs geopandas).
##########
python/sedona/spark/geopandas/base.py:
##########
@@ -313,14 +313,94 @@ def is_empty(self):
"""
return _delegate_to_geometry_column("is_empty", self)
- # def count_coordinates(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ def count_coordinates(self):
+ """Return a ``Series`` of ``dtype('int64')`` with the number of
+ coordinate tuples in each geometry.
Review Comment:
The docstrings for the new count_* methods hardcode an ``int64`` dtype
(including in the doctest output), but the added unit tests assert an ``int32``
result dtype. This inconsistency is likely to confuse users and can make
doctest-style examples misleading; consider describing the return as an integer
dtype (without a fixed width) or updating the docs/examples to match the actual
returned dtype.
##########
python/tests/geopandas/test_match_geopandas_series.py:
##########
@@ -549,13 +549,24 @@ def test_is_empty(self):
self.check_pd_series_equal(sgpd_result, gpd_result)
def test_count_coordinates(self):
- pass
+ for geom in self.geoms:
+ sgpd_result = GeoSeries(geom).count_coordinates()
+ gpd_result = gpd.GeoSeries(geom).count_coordinates()
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_count_geometries(self):
- pass
+ for geom in self.geoms:
+ sgpd_result = GeoSeries(geom).count_geometries()
+ gpd_result = gpd.GeoSeries(geom).count_geometries()
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_count_interior_rings(self):
- pass
+ # Sedona returns null for empty geometries while geopandas returns 0,
+ # so we only test with non-empty polygons.
+ geom = [self.polygons[1], self.polygons[2]]
+ sgpd_result = GeoSeries(geom).count_interior_rings()
+ gpd_result = gpd.GeoSeries(geom).count_interior_rings()
+ self.check_pd_series_equal(sgpd_result, gpd_result)
Review Comment:
This match test only exercises ``count_interior_rings`` on a couple of
non-empty polygons, which avoids the current NULL-returning behavior for
non-polygons / MultiPolygon seen in Sedona's ``ST_NumInteriorRings``. If the
goal is geopandas-compat semantics (0 for non-polygons/empties), add coverage
for at least one non-polygon geometry (and/or MultiPolygon) once the
implementation is adjusted; otherwise, consider asserting the known divergence
explicitly so it doesn't get overlooked.
```suggestion
# so we first test with non-empty polygons where the behavior
matches.
geom = [self.polygons[1], self.polygons[2]]
sgpd_result = GeoSeries(geom).count_interior_rings()
gpd_result = gpd.GeoSeries(geom).count_interior_rings()
self.check_pd_series_equal(sgpd_result, gpd_result)
# Sedona's underlying ST_NumInteriorRings currently returns NULL for
# non-polygons and MultiPolygon, while geopandas returns 0.
Explicitly
# assert this divergence so it is documented and does not get
overlooked.
non_polygon_geom = [
Point(0, 0),
MultiPolygon([self.polygons[1], self.polygons[2]]),
]
sgpd_np_result = GeoSeries(non_polygon_geom).count_interior_rings()
gpd_np_result =
gpd.GeoSeries(non_polygon_geom).count_interior_rings()
sgpd_np = sgpd_np_result.to_pandas()
for s_val, g_val in zip(sgpd_np, gpd_np_result):
# geopandas semantics: 0 for non-polygons / MultiPolygon
assert g_val == 0
# current Sedona behavior: returns NULL/NaN for these cases
assert pd.isna(s_val)
```
##########
python/sedona/spark/geopandas/base.py:
##########
@@ -313,14 +313,94 @@ def is_empty(self):
"""
return _delegate_to_geometry_column("is_empty", self)
- # def count_coordinates(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ def count_coordinates(self):
+ """Return a ``Series`` of ``dtype('int64')`` with the number of
+ coordinate tuples in each geometry.
- # def count_geometries(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ Returns
+ -------
+ Series (int)
- # def count_interior_rings(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Point, LineString, Polygon
+ >>> s = GeoSeries(
+ ... [
+ ... Point(0, 0),
+ ... LineString([(0, 0), (1, 1), (2, 2)]),
+ ... Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ... ]
+ ... )
+ >>> s.count_coordinates()
+ 0 1
+ 1 3
+ 2 5
+ dtype: int64
+
+ """
+ return _delegate_to_geometry_column("count_coordinates", self)
+
+ def count_geometries(self):
+ """Return a ``Series`` of ``dtype('int64')`` with the number of
+ geometries in each multi-geometry or geometry collection.
+
+ For non-multi geometries, returns 1.
+
+ Returns
+ -------
+ Series (int)
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Point, MultiPoint, MultiLineString
+ >>> s = GeoSeries(
+ ... [
+ ... Point(0, 0),
+ ... MultiPoint([(0, 0), (1, 1)]),
+ ... MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
+ ... ]
+ ... )
+ >>> s.count_geometries()
+ 0 1
+ 1 2
+ 2 2
+ dtype: int64
+
+ """
+ return _delegate_to_geometry_column("count_geometries", self)
+
+ def count_interior_rings(self):
+ """Return a ``Series`` of ``dtype('int64')`` with the number of
+ interior rings (holes) in each polygon geometry.
+
+ Returns 0 for polygons without holes and for non-polygon geometries.
+
Review Comment:
This docstring states that non-polygon geometries return 0, but the current
implementation delegates to Sedona's ST_NumInteriorRings, which returns NULL
for non-polygons (and even for MultiPolygon in Sedona's own SQL tests, e.g.
python/tests/sql/test_function.py shows only polygon rows have non-null
results). Either update the implementation to match the documented/geopandas
behavior (e.g., return 0 for non-polygons/empties while preserving nulls for
missing values), or adjust the docstring to reflect the actual NULL-returning
behavior.
##########
python/sedona/spark/geopandas/geoseries.py:
##########
@@ -792,30 +792,24 @@ def is_empty(self) -> pspd.Series:
return _to_bool(result)
def count_coordinates(self):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error(
- "count_coordinates",
- "Counts the number of coordinate tuples in each geometry.",
- )
+ spark_expr = stf.ST_NPoints(self.spark.column)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=False,
)
def count_geometries(self):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error(
- "count_geometries",
- "Counts the number of geometries in each multi-geometry or
collection.",
- )
+ spark_expr = stf.ST_NumGeometries(self.spark.column)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=False,
)
def count_interior_rings(self):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error(
- "count_interior_rings",
- "Counts the number of interior rings (holes) in each polygon.",
- )
+ spark_expr = stf.ST_NumInteriorRings(self.spark.column)
Review Comment:
``ST_NumInteriorRings`` returns NULL for non-polygon geometries (and
MultiPolygon) in Sedona (see existing SQL tests filtering out NULLs in
python/tests/sql/test_function.py around the ST_NumInteriorRings test). This
GeoSeries method currently forwards that NULL straight through, but
geopandas/count_interior_rings semantics (and the new base.py docstring)
indicate non-polygons should yield 0. Consider wrapping the expression to
return 0 for non-polygons (and likely empty geometries) while preserving NULLs
for missing values.
```suggestion
# Sedona's ST_NumInteriorRings returns NULL for non-polygon
geometries
# (including MultiPolygon). GeoPandas semantics require 0 for
# non-polygon and empty geometries, while preserving NULL for
# missing values.
geom_type_col = stf.ST_GeometryType(self.spark.column)
spark_expr = (
F.when(
self.spark.column.isNull(),
F.lit(None).cast("int"),
)
.when(
geom_type_col.isin(["ST_Polygon"]),
stf.ST_NumInteriorRings(self.spark.column),
)
.otherwise(F.lit(0))
)
```
##########
python/sedona/spark/geopandas/geoseries.py:
##########
@@ -1114,8 +1124,11 @@ def force_3d(self, z=0.0) -> "GeoSeries":
)
def line_merge(self, directed=False):
Review Comment:
The ``directed`` parameter is accepted and forwarded from the public API,
but the implementation ignores it and always calls ``ST_LineMerge(col)``
(Sedona doesn't support a directed variant). To prevent silent behavior
differences vs geopandas, consider raising ``NotImplementedError`` when
``directed=True`` or emitting a warning that the argument is ignored.
```suggestion
def line_merge(self, directed=False):
if directed:
raise NotImplementedError(
"Sedona does not support directed line_merge; the 'directed'
argument must be False."
)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]