This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 69aff58410 [GH-2394] Implement `symmetric_difference` (#2395)
69aff58410 is described below
commit 69aff5841000efdbe152e0141d5fe75d55e5a7f4
Author: Yunchi Pang <[email protected]>
AuthorDate: Mon Oct 13 21:10:23 2025 -0700
[GH-2394] Implement `symmetric_difference` (#2395)
Co-authored-by: Peter Nguyen <[email protected]>
---
python/sedona/spark/geopandas/base.py | 104 +++++++++++++++++++++
python/sedona/spark/geopandas/geoseries.py | 12 +++
python/tests/geopandas/test_geoseries.py | 96 +++++++++++++++++++
.../tests/geopandas/test_match_geopandas_series.py | 22 +++++
4 files changed, 234 insertions(+)
diff --git a/python/sedona/spark/geopandas/base.py
b/python/sedona/spark/geopandas/base.py
index 912b3c35f4..5c1d12c90f 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -2140,6 +2140,110 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("difference", self, other, align)
+ def symmetric_difference(self, other, align=None):
+ """Return a ``GeoSeries`` of the symmetric difference of points in
+ each aligned geometry with `other`.
+
+ For each geometry, the symmetric difference consists of points in the
+ geometry not in `other`, and points in `other` not in the geometry.
+
+ The operation works on a 1-to-1 row-wise manner.
+
+ Parameters
+ ----------
+ other : Geoseries or geometric object
+ The Geoseries (elementwise) or geometric object to find the
+ symmetric difference to.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
+ If False, the order of elements is preserved. None defaults to
True.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... LineString([(0, 0), (2, 2)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(0, 1),
+ ... ],
+ ... )
+ >>> s2 = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 1), (0, 1)]),
+ ... LineString([(1, 0), (1, 3)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(1, 1),
+ ... Point(0, 1),
+ ... ],
+ ... index=range(1, 6),
+ ... )
+
+ >>> s
+ 0 POLYGON ((0 0, 2 2, 0 2, 0 0))
+ 1 POLYGON ((0 0, 2 2, 0 2, 0 0))
+ 2 LINESTRING (0 0, 2 2)
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT (0 1)
+ dtype: geometry
+
+ >>> s2
+ 1 POLYGON ((0 0, 1 1, 0 1, 0 0))
+ 2 LINESTRING (1 0, 1 3)
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT (1 1)
+ 5 POINT (0 1)
+ dtype: geometry
+
+ We can do symmetric difference of each geometry and a single
+ shapely geometry:
+
+ >>> s.symmetric_difference(Polygon([(0, 0), (1, 1), (0, 1)]))
+ 0 POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+ 1 POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+ 2 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+ 3 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+ 4 POLYGON ((0 1, 1 1, 0 0, 0 1))
+ dtype: geometry
+
+ We can also check two GeoSeries against each other, row by row.
+ The GeoSeries above have different indices. We can either align both
GeoSeries
+ based on index values and compare elements with the same index using
+ ``align=True`` or ignore index and compare elements based on their
matching
+ order using ``align=False``:
+
+ >>> s.symmetric_difference(s2, align=True)
+ 0 None
+ 1 POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+ 2 MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (1 0,...
+ 3 LINESTRING EMPTY
+ 4 MULTIPOINT ((0 1), (1 1))
+ 5 None
+ dtype: geometry
+
+ >>> s.symmetric_difference(s2, align=False)
+ 0 POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+ 1 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 2, 1 2, 2...
+ 2 MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (2 0,...
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT EMPTY
+ dtype: geometry
+
+ See also
+ --------
+ GeoSeries.difference
+ GeoSeries.union
+ GeoSeries.intersection
+ """
+ return _delegate_to_geometry_column("symmetric_difference", self,
other, align)
+
def intersection_all(self):
raise NotImplementedError("This method is not implemented yet.")
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index 361b46b197..7fde6aaea7 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -946,6 +946,18 @@ class GeoSeries(GeoFrame, pspd.Series):
returns_geom=True,
)
+ def symmetric_difference(self, other, align=None) -> "GeoSeries":
+ other_series, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ spark_expr = stf.ST_SymDifference(F.col("L"), F.col("R"))
+ return self._row_wise_operation(
+ spark_expr,
+ other_series,
+ align=align,
+ returns_geom=True,
+ )
+
@property
def is_simple(self) -> pspd.Series:
spark_expr = stf.ST_IsSimple(self.spark.column)
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index d9c5fd7db3..2e3f786077 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -848,6 +848,102 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
df_result = s.to_geoframe().difference(s2, align=False)
self.check_sgpd_equals_gpd(df_result, expected)
+ def test_symmetric_difference(self):
+ s = GeoSeries(
+ [
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ LineString([(0, 0), (2, 2)]),
+ LineString([(2, 0), (0, 2)]),
+ Point(0, 1),
+ ],
+ )
+ s2 = GeoSeries(
+ [
+ Polygon([(0, 0), (1, 1), (0, 1)]),
+ LineString([(1, 0), (1, 3)]),
+ LineString([(2, 0), (0, 2)]),
+ Point(1, 1),
+ Point(0, 1),
+ ],
+ index=range(1, 6),
+ )
+
+ # Test with single geometry
+ result = s.symmetric_difference(Polygon([(0, 0), (1, 1), (0, 1)]))
+ expected = gpd.GeoSeries(
+ [
+ Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+ Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+ GeometryCollection(
+ [
+ Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+ LineString([(1, 1), (2, 2)]),
+ ]
+ ),
+ GeometryCollection(
+ [
+ Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+ LineString([(2, 0), (1, 1)]),
+ LineString([(1, 1), (0, 2)]),
+ ]
+ ),
+ Polygon([(0, 1), (1, 1), (0, 0), (0, 1)]),
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Test with align=True
+ result = s.symmetric_difference(s2, align=True)
+ expected = gpd.GeoSeries(
+ [
+ None,
+ Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+ MultiLineString(
+ [
+ LineString([(0, 0), (1, 1)]),
+ LineString([(1, 1), (2, 2)]),
+ LineString([(1, 0), (1, 1)]),
+ LineString([(1, 1), (1, 3)]),
+ ]
+ ),
+ LineString(),
+ MultiPoint([Point(0, 1), Point(1, 1)]),
+ None,
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Test with align=False
+ result = s.symmetric_difference(s2, align=False)
+ expected = gpd.GeoSeries(
+ [
+ Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+ GeometryCollection(
+ [
+ Polygon([(0, 0), (0, 2), (1, 2), (2, 2), (1, 1), (0,
0)]),
+ LineString([(1, 0), (1, 1)]),
+ LineString([(1, 1), (1, 3)]),
+ ]
+ ),
+ MultiLineString(
+ [
+ LineString([(0, 0), (1, 1)]),
+ LineString([(1, 1), (2, 2)]),
+ LineString([(2, 0), (1, 1)]),
+ LineString([(1, 1), (0, 2)]),
+ ]
+ ),
+ LineString([(2, 0), (0, 2)]),
+ Point(),
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().symmetric_difference(s2, align=False)
+ self.check_sgpd_equals_gpd(df_result, expected)
+
def test_is_simple(self):
s = sgpd.GeoSeries(
[
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index ebbc699ca3..3ec311b0d9 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -564,6 +564,28 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+ def test_symmetric_difference(self):
+ for geom, geom2 in self.pairs:
+ # Operation doesn't work on invalid geometries
+ if (
+ not gpd.GeoSeries(geom).is_valid.all()
+ or not gpd.GeoSeries(geom2).is_valid.all()
+ ):
+ continue
+
+ sgpd_result =
GeoSeries(geom).symmetric_difference(GeoSeries(geom2))
+ gpd_result =
gpd.GeoSeries(geom).symmetric_difference(gpd.GeoSeries(geom2))
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).symmetric_difference(
+ GeoSeries(geom2), align=False
+ )
+ gpd_result = gpd.GeoSeries(geom).symmetric_difference(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
def test_is_simple(self):
# 'is_simple' is meaningful only for `LineStrings` and `LinearRings`
data = [