This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 073488d3e0 [GH-2398] Implement geopandas `union` (#2399)
073488d3e0 is described below
commit 073488d3e09fe5abee6a831a245dee4acfb9d0dd
Author: Yunchi Pang <[email protected]>
AuthorDate: Tue Oct 14 22:00:10 2025 -0700
[GH-2398] Implement geopandas `union` (#2399)
---
python/sedona/spark/geopandas/base.py | 99 ++++++++++++++++++++++
python/sedona/spark/geopandas/geoseries.py | 12 +++
python/tests/geopandas/test_geoseries.py | 95 +++++++++++++++++++++
.../tests/geopandas/test_match_geopandas_series.py | 20 +++++
4 files changed, 226 insertions(+)
diff --git a/python/sedona/spark/geopandas/base.py
b/python/sedona/spark/geopandas/base.py
index 5c1d12c90f..1f5d67f5a5 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -2244,6 +2244,105 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("symmetric_difference", self,
other, align)
+ def union(self, other, align=None):
+ """Return a ``GeoSeries`` of the union of points in each aligned
geometry
+ with `other`.
+
+ The operation works on a 1-to-1 row-wise manner.
+
+ Parameters
+ ----------
+ other : Geoseries or geometric object
+ The Geoseries (elementwise) or geometric object to find the
+ union with.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
+ If False, the order of elements is preserved. None defaults to
True.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... LineString([(0, 0), (2, 2)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(0, 1),
+ ... ],
+ ... )
+ >>> s2 = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 1), (0, 1)]),
+ ... LineString([(1, 0), (1, 3)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(1, 1),
+ ... Point(0, 1),
+ ... ],
+ ... index=range(1, 6),
+ ... )
+
+ >>> s
+ 0 POLYGON ((0 0, 2 2, 0 2, 0 0))
+ 1 POLYGON ((0 0, 2 2, 0 2, 0 0))
+ 2 LINESTRING (0 0, 2 2)
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT (0 1)
+ dtype: geometry
+
+ >>> s2
+ 1 POLYGON ((0 0, 1 1, 0 1, 0 0))
+ 2 LINESTRING (1 0, 1 3)
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT (1 1)
+ 5 POINT (0 1)
+ dtype: geometry
+
+ We can do union of each geometry and a single shapely geometry:
+
+ >>> s.union(Polygon([(0, 0), (1, 1), (0, 1)]))
+ 0 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+ 1 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+ 2 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+ 3 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+ 4 POLYGON ((0 1, 1 1, 0 0, 0 1))
+ dtype: geometry
+
+ We can also check two GeoSeries against each other, row by row.
+ The GeoSeries above have different indices. We can either align both
GeoSeries
+ based on index values and compare elements with the same index using
+ ``align=True`` or ignore index and compare elements based on their
matching
+ order using ``align=False``:
+
+ >>> s.union(s2, align=True)
+ 0 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+ 1 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+ 2 MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (1 0,...
+ 3 LINESTRING (2 0, 0 2)
+ 4 MULTIPOINT ((0 1), (1 1))
+ dtype: geometry
+
+ >>> s.union(s2, align=False)
+ 0 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+ 1 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 2, 1 2, 2...
+ 2 MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (2 0,...
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT (0 1)
+ dtype: geometry
+
+ See Also
+ --------
+ GeoSeries.symmetric_difference
+ GeoSeries.difference
+ GeoSeries.intersection
+ """
+ return _delegate_to_geometry_column("union", self, other, align)
+
def intersection_all(self):
raise NotImplementedError("This method is not implemented yet.")
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index 7fde6aaea7..21ffd05654 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -958,6 +958,18 @@ class GeoSeries(GeoFrame, pspd.Series):
returns_geom=True,
)
+ def union(self, other, align=None) -> "GeoSeries":
+ other_series, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ spark_expr = stf.ST_Union(F.col("L"), F.col("R"))
+ return self._row_wise_operation(
+ spark_expr,
+ other_series,
+ align=align,
+ returns_geom=True,
+ )
+
@property
def is_simple(self) -> pspd.Series:
spark_expr = stf.ST_IsSimple(self.spark.column)
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 2e3f786077..08e5a1779c 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -944,6 +944,101 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
df_result = s.to_geoframe().symmetric_difference(s2, align=False)
self.check_sgpd_equals_gpd(df_result, expected)
+ def test_union(self):
+ s = GeoSeries(
+ [
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ LineString([(0, 0), (2, 2)]),
+ LineString([(2, 0), (0, 2)]),
+ Point(0, 1),
+ ],
+ )
+ s2 = GeoSeries(
+ [
+ Polygon([(0, 0), (1, 1), (0, 1)]),
+ LineString([(1, 0), (1, 3)]),
+ LineString([(2, 0), (0, 2)]),
+ Point(1, 1),
+ Point(0, 1),
+ ],
+ index=range(1, 6),
+ )
+
+ # Test with single geometry
+ result = s.union(Polygon([(0, 0), (1, 1), (0, 1)]))
+ expected = gpd.GeoSeries(
+ [
+ Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+ Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+ GeometryCollection(
+ [
+ Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+ LineString([(0, 0), (2, 2)]),
+ ]
+ ),
+ GeometryCollection(
+ [
+ Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+ LineString([(2, 0), (0, 2)]),
+ ]
+ ),
+ Polygon([(0, 1), (1, 1), (0, 0), (0, 1)]),
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Test with align=True
+ result = s.union(s2, align=True)
+ expected = gpd.GeoSeries(
+ [
+ None,
+ Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+ MultiLineString(
+ [
+ LineString([(0, 0), (1, 1)]),
+ LineString([(1, 1), (2, 2)]),
+ LineString([(1, 0), (1, 1)]),
+ LineString([(1, 1), (1, 3)]),
+ ]
+ ),
+ LineString([(2, 0), (0, 2)]),
+ MultiPoint([Point(0, 1), Point(1, 1)]),
+ None,
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Test with align=False
+ result = s.union(s2, align=False)
+ expected = gpd.GeoSeries(
+ [
+ Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+ GeometryCollection(
+ [
+ Polygon([(0, 0), (0, 2), (1, 2), (2, 2), (1, 1), (0,
0)]),
+ LineString([(1, 0), (1, 1)]),
+ LineString([(1, 1), (1, 3)]),
+ ]
+ ),
+ MultiLineString(
+ [
+ LineString([(0, 0), (1, 1)]),
+ LineString([(1, 1), (2, 2)]),
+ LineString([(2, 0), (1, 1)]),
+ LineString([(1, 1), (0, 2)]),
+ ]
+ ),
+ LineString([(2, 0), (0, 2)]),
+ Point(0, 1),
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().union(s2, align=False)
+ self.check_sgpd_equals_gpd(df_result, expected)
+
def test_is_simple(self):
s = sgpd.GeoSeries(
[
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 3ec311b0d9..b1692c444f 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -586,6 +586,26 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+ def test_union(self):
+ for geom, geom2 in self.pairs:
+ # Operation doesn't work on invalid geometries
+ if (
+ not gpd.GeoSeries(geom).is_valid.all()
+ or not gpd.GeoSeries(geom2).is_valid.all()
+ ):
+ continue
+
+ sgpd_result = GeoSeries(geom).union(GeoSeries(geom2))
+ gpd_result = gpd.GeoSeries(geom).union(gpd.GeoSeries(geom2))
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).union(GeoSeries(geom2),
align=False)
+ gpd_result = gpd.GeoSeries(geom).union(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
def test_is_simple(self):
# 'is_simple' is meaningful only for `LineStrings` and `LinearRings`
data = [