This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 60436fead1 [GH-2025] Geopandas.GeoSeries: Implement is_valid, 
is_empty, is_simple (#2026)
60436fead1 is described below

commit 60436fead1e4686dd476ffebbc45a8efda6af633
Author: Peter Nguyen <petern0...@gmail.com>
AuthorDate: Wed Jul 2 15:01:45 2025 -0700

    [GH-2025] Geopandas.GeoSeries: Implement is_valid, is_empty, is_simple 
(#2026)
    
    * Support elements of None type in GeoSeries
    
    * Implement test_is_valid
    
    * Implement is_empty
    
    * Implement is_simple
---
 python/sedona/geopandas/geoseries.py               | 119 +++++++++++++++++++--
 python/tests/geopandas/test_geoseries.py           |  34 +++++-
 .../tests/geopandas/test_match_geopandas_series.py |  24 ++++-
 3 files changed, 160 insertions(+), 17 deletions(-)

diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index cf17eb31a2..ea75b3da80 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -154,7 +154,9 @@ class GeoSeries(GeoFrame, pspd.Series):
                     fastpath=fastpath,
                 )
             gs = gpd.GeoSeries(s)
-            pdf = pd.Series(gs.apply(lambda geom: geom.wkb))
+            pdf = pd.Series(
+                gs.apply(lambda geom: geom.wkb if geom is not None else None)
+            )
             # initialize the parent class pyspark Series with the pandas Series
             super().__init__(
                 data=pdf,
@@ -591,18 +593,88 @@ class GeoSeries(GeoFrame, pspd.Series):
         ).to_spark_pandas()
 
     @property
-    def is_valid(self):
-        # Implementation of the abstract method
-        raise NotImplementedError("This method is not implemented yet.")
+    def is_valid(self) -> pspd.Series:
+        """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+        geometries that are valid.
+
+        Examples
+        --------
+
+        An example with one invalid polygon (a bowtie geometry crossing itself)
+        and one missing geometry:
+
+        >>> from shapely.geometry import Polygon
+        >>> s = geopandas.GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (1, 1), (0, 1)]),
+        ...         Polygon([(0,0), (1, 1), (1, 0), (0, 1)]),  # bowtie 
geometry
+        ...         Polygon([(0, 0), (2, 2), (2, 0)]),
+        ...         None
+        ...     ]
+        ... )
+        >>> s
+        0         POLYGON ((0 0, 1 1, 0 1, 0 0))
+        1    POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))
+        2         POLYGON ((0 0, 2 2, 2 0, 0 0))
+        3                                   None
+        dtype: geometry
+
+        >>> s.is_valid
+        0     True
+        1    False
+        2     True
+        3    False
+        dtype: bool
+
+        See also
+        --------
+        GeoSeries.is_valid_reason : reason for invalidity
+        """
+        return (
+            self._process_geometry_column("ST_IsValid", rename="is_valid")
+            .to_spark_pandas()
+            .astype("bool")
+        )
 
     def is_valid_reason(self):
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
     @property
-    def is_empty(self):
-        # Implementation of the abstract method
-        raise NotImplementedError("This method is not implemented yet.")
+    def is_empty(self) -> pspd.Series:
+        """
+        Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+        empty geometries.
+
+        Examples
+        --------
+        An example of a GeoDataFrame with one empty point, one point and one 
missing
+        value:
+
+        >>> from shapely.geometry import Point
+        >>> d = {'geometry': [Point(), Point(2, 1), None]}
+        >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
+        >>> gdf
+            geometry
+        0  POINT EMPTY
+        1  POINT (2 1)
+        2         None
+
+        >>> gdf.is_empty
+        0     True
+        1    False
+        2    False
+        dtype: bool
+
+        See Also
+        --------
+        GeoSeries.isna : detect missing values
+        """
+        return (
+            self._process_geometry_column("ST_IsEmpty", rename="is_empty")
+            .to_spark_pandas()
+            .astype("bool")
+        )
 
     def count_coordinates(self):
         # Implementation of the abstract method
@@ -617,9 +689,36 @@ class GeoSeries(GeoFrame, pspd.Series):
         raise NotImplementedError("This method is not implemented yet.")
 
     @property
-    def is_simple(self):
-        # Implementation of the abstract method
-        raise NotImplementedError("This method is not implemented yet.")
+    def is_simple(self) -> pspd.Series:
+        """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+        geometries that do not cross themselves.
+
+        This is meaningful only for `LineStrings` and `LinearRings`.
+
+        Examples
+        --------
+        >>> from shapely.geometry import LineString
+        >>> s = geopandas.GeoSeries(
+        ...     [
+        ...         LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
+        ...         LineString([(0, 0), (1, 1), (1, -1)]),
+        ...     ]
+        ... )
+        >>> s
+        0    LINESTRING (0 0, 1 1, 1 -1, 0 1)
+        1         LINESTRING (0 0, 1 1, 1 -1)
+        dtype: geometry
+
+        >>> s.is_simple
+        0    False
+        1     True
+        dtype: bool
+        """
+        return (
+            self._process_geometry_column("ST_IsSimple", rename="is_simple")
+            .to_spark_pandas()
+            .astype("bool")
+        )
 
     @property
     def is_ring(self):
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 8c0805b5f8..99bc9aa654 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -23,7 +23,7 @@ import sedona.geopandas as sgpd
 from sedona.geopandas import GeoSeries
 from tests.test_base import TestBase
 from shapely import wkt
-from shapely.geometry import Point, LineString, Polygon, GeometryCollection
+from shapely.geometry import Point, LineString, Polygon, GeometryCollection, 
LinearRing
 from pandas.testing import assert_series_equal
 
 
@@ -192,13 +192,29 @@ class TestGeoSeries(TestBase):
         assert_series_equal(result, expected)
 
     def test_is_valid(self):
-        pass
+        geoseries = sgpd.GeoSeries(
+            [
+                Polygon([(0, 0), (1, 1), (0, 1)]),
+                Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]),  # bowtie geometry
+                Polygon([(0, 0), (2, 2), (2, 0)]),
+                None,
+            ]
+        )
+        result = geoseries.is_valid
+        expected = pd.Series([True, False, True, False])
+        assert_series_equal(result.to_pandas(), expected)
 
     def test_is_valid_reason(self):
         pass
 
     def test_is_empty(self):
-        pass
+        geoseries = sgpd.GeoSeries(
+            [Point(), Point(2, 1), Polygon([(0, 0), (1, 1), (0, 1)]), None],
+        )
+
+        result = geoseries.is_empty
+        expected = pd.Series([True, False, False, False])
+        assert_series_equal(result.to_pandas(), expected)
 
     def test_count_coordinates(self):
         pass
@@ -210,7 +226,17 @@ class TestGeoSeries(TestBase):
         pass
 
     def test_is_simple(self):
-        pass
+        s = sgpd.GeoSeries(
+            [
+                LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
+                LineString([(0, 0), (1, 1), (1, -1)]),
+                LinearRing([(0, 0), (1, 1), (1, -1), (0, 1)]),
+                LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]),
+            ]
+        )
+        result = s.is_simple
+        expected = pd.Series([False, True, False, True])
+        assert_series_equal(result.to_pandas(), expected)
 
     def test_is_ring(self):
         pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index ec89ba23bd..c4d6eb9da0 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -32,6 +32,7 @@ from shapely.geometry import (
     LineString,
     MultiPolygon,
     GeometryCollection,
+    LinearRing,
 )
 
 from sedona.geopandas import GeoSeries
@@ -310,13 +311,21 @@ class TestMatchGeopandasSeries(TestBase):
             self.check_pd_series_equal(sgpd_result, gpd_result)
 
     def test_is_valid(self):
-        pass
+        for _, geom in self.geoms:
+            sgpd_result = GeoSeries(geom).is_valid
+            assert isinstance(sgpd_result, ps.Series)
+            gpd_result = gpd.GeoSeries(geom).is_valid
+            self.check_pd_series_equal(sgpd_result, gpd_result)
 
     def test_is_valid_reason(self):
         pass
 
     def test_is_empty(self):
-        pass
+        for _, geom in self.geoms:
+            sgpd_result = GeoSeries(geom).is_empty
+            assert isinstance(sgpd_result, ps.Series)
+            gpd_result = gpd.GeoSeries(geom).is_empty
+            self.check_pd_series_equal(sgpd_result, gpd_result)
 
     def test_count_coordinates(self):
         pass
@@ -328,7 +337,16 @@ class TestMatchGeopandasSeries(TestBase):
         pass
 
     def test_is_simple(self):
-        pass
+        data = [
+            LineString([(0, 0), (0, 0)]),
+            LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
+            LineString([(0, 0), (1, 1), (0, 0)]),
+            LinearRing([(0, 0), (1, 1), (1, 0), (0, 1), (0, 0)]),
+            LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]),
+        ]
+        sgpd_result = GeoSeries(data).is_simple
+        gpd_result = gpd.GeoSeries(data).is_simple
+        self.check_pd_series_equal(sgpd_result, gpd_result)
 
     def test_is_ring(self):
         pass

Reply via email to