This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push: new 60436fead1 [GH-2025] Geopandas.GeoSeries: Implement is_valid, is_empty, is_simple (#2026) 60436fead1 is described below commit 60436fead1e4686dd476ffebbc45a8efda6af633 Author: Peter Nguyen <petern0...@gmail.com> AuthorDate: Wed Jul 2 15:01:45 2025 -0700 [GH-2025] Geopandas.GeoSeries: Implement is_valid, is_empty, is_simple (#2026) * Support elements of None type in GeoSeries * Implement test_is_valid * Implement is_empty * Implement is_simple --- python/sedona/geopandas/geoseries.py | 119 +++++++++++++++++++-- python/tests/geopandas/test_geoseries.py | 34 +++++- .../tests/geopandas/test_match_geopandas_series.py | 24 ++++- 3 files changed, 160 insertions(+), 17 deletions(-) diff --git a/python/sedona/geopandas/geoseries.py b/python/sedona/geopandas/geoseries.py index cf17eb31a2..ea75b3da80 100644 --- a/python/sedona/geopandas/geoseries.py +++ b/python/sedona/geopandas/geoseries.py @@ -154,7 +154,9 @@ class GeoSeries(GeoFrame, pspd.Series): fastpath=fastpath, ) gs = gpd.GeoSeries(s) - pdf = pd.Series(gs.apply(lambda geom: geom.wkb)) + pdf = pd.Series( + gs.apply(lambda geom: geom.wkb if geom is not None else None) + ) # initialize the parent class pyspark Series with the pandas Series super().__init__( data=pdf, @@ -591,18 +593,88 @@ class GeoSeries(GeoFrame, pspd.Series): ).to_spark_pandas() @property - def is_valid(self): - # Implementation of the abstract method - raise NotImplementedError("This method is not implemented yet.") + def is_valid(self) -> pspd.Series: + """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for + geometries that are valid. + + Examples + -------- + + An example with one invalid polygon (a bowtie geometry crossing itself) + and one missing geometry: + + >>> from shapely.geometry import Polygon + >>> s = geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 1), (0, 1)]), + ... Polygon([(0,0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry + ... Polygon([(0, 0), (2, 2), (2, 0)]), + ... None + ... ] + ... ) + >>> s + 0 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 1 POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0)) + 2 POLYGON ((0 0, 2 2, 2 0, 0 0)) + 3 None + dtype: geometry + + >>> s.is_valid + 0 True + 1 False + 2 True + 3 False + dtype: bool + + See also + -------- + GeoSeries.is_valid_reason : reason for invalidity + """ + return ( + self._process_geometry_column("ST_IsValid", rename="is_valid") + .to_spark_pandas() + .astype("bool") + ) def is_valid_reason(self): # Implementation of the abstract method raise NotImplementedError("This method is not implemented yet.") @property - def is_empty(self): - # Implementation of the abstract method - raise NotImplementedError("This method is not implemented yet.") + def is_empty(self) -> pspd.Series: + """ + Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for + empty geometries. + + Examples + -------- + An example of a GeoDataFrame with one empty point, one point and one missing + value: + + >>> from shapely.geometry import Point + >>> d = {'geometry': [Point(), Point(2, 1), None]} + >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326") + >>> gdf + geometry + 0 POINT EMPTY + 1 POINT (2 1) + 2 None + + >>> gdf.is_empty + 0 True + 1 False + 2 False + dtype: bool + + See Also + -------- + GeoSeries.isna : detect missing values + """ + return ( + self._process_geometry_column("ST_IsEmpty", rename="is_empty") + .to_spark_pandas() + .astype("bool") + ) def count_coordinates(self): # Implementation of the abstract method @@ -617,9 +689,36 @@ class GeoSeries(GeoFrame, pspd.Series): raise NotImplementedError("This method is not implemented yet.") @property - def is_simple(self): - # Implementation of the abstract method - raise NotImplementedError("This method is not implemented yet.") + def is_simple(self) -> pspd.Series: + """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for + geometries that do not cross themselves. + + This is meaningful only for `LineStrings` and `LinearRings`. + + Examples + -------- + >>> from shapely.geometry import LineString + >>> s = geopandas.GeoSeries( + ... [ + ... LineString([(0, 0), (1, 1), (1, -1), (0, 1)]), + ... LineString([(0, 0), (1, 1), (1, -1)]), + ... ] + ... ) + >>> s + 0 LINESTRING (0 0, 1 1, 1 -1, 0 1) + 1 LINESTRING (0 0, 1 1, 1 -1) + dtype: geometry + + >>> s.is_simple + 0 False + 1 True + dtype: bool + """ + return ( + self._process_geometry_column("ST_IsSimple", rename="is_simple") + .to_spark_pandas() + .astype("bool") + ) @property def is_ring(self): diff --git a/python/tests/geopandas/test_geoseries.py b/python/tests/geopandas/test_geoseries.py index 8c0805b5f8..99bc9aa654 100644 --- a/python/tests/geopandas/test_geoseries.py +++ b/python/tests/geopandas/test_geoseries.py @@ -23,7 +23,7 @@ import sedona.geopandas as sgpd from sedona.geopandas import GeoSeries from tests.test_base import TestBase from shapely import wkt -from shapely.geometry import Point, LineString, Polygon, GeometryCollection +from shapely.geometry import Point, LineString, Polygon, GeometryCollection, LinearRing from pandas.testing import assert_series_equal @@ -192,13 +192,29 @@ class TestGeoSeries(TestBase): assert_series_equal(result, expected) def test_is_valid(self): - pass + geoseries = sgpd.GeoSeries( + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry + Polygon([(0, 0), (2, 2), (2, 0)]), + None, + ] + ) + result = geoseries.is_valid + expected = pd.Series([True, False, True, False]) + assert_series_equal(result.to_pandas(), expected) def test_is_valid_reason(self): pass def test_is_empty(self): - pass + geoseries = sgpd.GeoSeries( + [Point(), Point(2, 1), Polygon([(0, 0), (1, 1), (0, 1)]), None], + ) + + result = geoseries.is_empty + expected = pd.Series([True, False, False, False]) + assert_series_equal(result.to_pandas(), expected) def test_count_coordinates(self): pass @@ -210,7 +226,17 @@ class TestGeoSeries(TestBase): pass def test_is_simple(self): - pass + s = sgpd.GeoSeries( + [ + LineString([(0, 0), (1, 1), (1, -1), (0, 1)]), + LineString([(0, 0), (1, 1), (1, -1)]), + LinearRing([(0, 0), (1, 1), (1, -1), (0, 1)]), + LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]), + ] + ) + result = s.is_simple + expected = pd.Series([False, True, False, True]) + assert_series_equal(result.to_pandas(), expected) def test_is_ring(self): pass diff --git a/python/tests/geopandas/test_match_geopandas_series.py b/python/tests/geopandas/test_match_geopandas_series.py index ec89ba23bd..c4d6eb9da0 100644 --- a/python/tests/geopandas/test_match_geopandas_series.py +++ b/python/tests/geopandas/test_match_geopandas_series.py @@ -32,6 +32,7 @@ from shapely.geometry import ( LineString, MultiPolygon, GeometryCollection, + LinearRing, ) from sedona.geopandas import GeoSeries @@ -310,13 +311,21 @@ class TestMatchGeopandasSeries(TestBase): self.check_pd_series_equal(sgpd_result, gpd_result) def test_is_valid(self): - pass + for _, geom in self.geoms: + sgpd_result = GeoSeries(geom).is_valid + assert isinstance(sgpd_result, ps.Series) + gpd_result = gpd.GeoSeries(geom).is_valid + self.check_pd_series_equal(sgpd_result, gpd_result) def test_is_valid_reason(self): pass def test_is_empty(self): - pass + for _, geom in self.geoms: + sgpd_result = GeoSeries(geom).is_empty + assert isinstance(sgpd_result, ps.Series) + gpd_result = gpd.GeoSeries(geom).is_empty + self.check_pd_series_equal(sgpd_result, gpd_result) def test_count_coordinates(self): pass @@ -328,7 +337,16 @@ class TestMatchGeopandasSeries(TestBase): pass def test_is_simple(self): - pass + data = [ + LineString([(0, 0), (0, 0)]), + LineString([(0, 0), (1, 1), (1, -1), (0, 1)]), + LineString([(0, 0), (1, 1), (0, 0)]), + LinearRing([(0, 0), (1, 1), (1, 0), (0, 1), (0, 0)]), + LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]), + ] + sgpd_result = GeoSeries(data).is_simple + gpd_result = gpd.GeoSeries(data).is_simple + self.check_pd_series_equal(sgpd_result, gpd_result) def test_is_ring(self): pass