Copilot commented on code in PR #2732:
URL: https://github.com/apache/sedona/pull/2732#discussion_r2922864490
##########
python/sedona/spark/geopandas/geoseries.py:
##########
@@ -1330,6 +1330,89 @@ def distance(self, other, align=None) -> pspd.Series:
)
return result
+ def frechet_distance(self, other, align=None, densify=None) -> pspd.Series:
+ if densify is not None:
+ raise NotImplementedError(
+ "Sedona does not support the densify parameter for
frechet_distance."
+ )
+
+ other_series, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ spark_expr = stf.ST_FrechetDistance(F.col("L"), F.col("R"))
+ result = self._row_wise_operation(
+ spark_expr,
+ other_series,
+ align,
+ default_val=None,
+ )
+ return result
+
+ def hausdorff_distance(self, other, align=None, densify=None) ->
pspd.Series:
+ other_series, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ if densify is not None:
+ spark_expr = stf.ST_HausdorffDistance(F.col("L"), F.col("R"),
densify)
+ else:
+ spark_expr = stf.ST_HausdorffDistance(F.col("L"), F.col("R"))
+ result = self._row_wise_operation(
+ spark_expr,
+ other_series,
+ align,
+ default_val=None,
+ )
+ return result
+
+ def geom_equals(self, other, align=None) -> pspd.Series:
+ other_series, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ spark_expr = stp.ST_Equals(F.col("L"), F.col("R"))
+ result = self._row_wise_operation(
+ spark_expr,
+ other_series,
+ align,
+ returns_geom=False,
+ default_val=False,
+ )
+ return _to_bool(result)
+
+ def interpolate(self, distance, normalized=False) -> "GeoSeries":
+ other_series, extended = self._make_series_of_val(distance)
+ align = not extended
+
+ if normalized:
+ spark_expr = stf.ST_LineInterpolatePoint(F.col("L"), F.col("R"))
+ else:
+ spark_expr = stf.ST_LineInterpolatePoint(
+ F.col("L"), F.col("R") / stf.ST_Length(F.col("L"))
+ )
Review Comment:
For normalized=False, interpolate computes a fraction via `distance /
ST_Length(L)`. If `ST_Length(L)` is 0 (e.g., empty or zero-length lines), this
produces inf/NaN and can lead to errors or invalid results inside
`ST_LineInterpolatePoint`. Consider explicitly handling `ST_Length(L) == 0`
(and/or empty geometries) to match geopandas/shapely behavior instead of
relying on division-by-zero semantics.
```suggestion
len_col = stf.ST_Length(F.col("L"))
frac_col = F.when(len_col == F.lit(0), F.lit(0.0)).otherwise(
F.col("R") / len_col
)
spark_expr = stf.ST_LineInterpolatePoint(F.col("L"), frac_col)
```
##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -2635,6 +2635,177 @@ def test_relate(self):
expected = pd.Series(["FF2F11212", "212101212"])
self.check_pd_series_equal(result, expected)
+ def test_frechet_distance(self):
+ s1 = GeoSeries(
+ [
+ LineString([(0, 0), (1, 0), (2, 0)]),
+ LineString([(0, 0), (1, 1)]),
+ ]
+ )
+ s2 = GeoSeries(
+ [
+ LineString([(0, 1), (1, 2), (2, 1)]),
+ LineString([(1, 0), (2, 1)]),
+ ]
+ )
+
+ result = s1.frechet_distance(s2, align=False)
+ expected = pd.Series([2.0, 1.0])
+ self.check_pd_series_equal(result, expected)
+
+ # Test with single geometry
+ line = LineString([(0, 1), (1, 2), (2, 1)])
+ result = s1.frechet_distance(line)
+ expected = pd.Series([2.0, 1.0])
+ self.check_pd_series_equal(result, expected)
+
+ # Test that GeoDataFrame works too
+ df_result = s1.to_geoframe().frechet_distance(s2, align=False)
+ expected = pd.Series([2.0, 1.0])
+ self.check_pd_series_equal(df_result, expected)
+
+ # Test that densify raises NotImplementedError
+ with pytest.raises(NotImplementedError):
+ s1.frechet_distance(s2, densify=0.5)
+
+ def test_hausdorff_distance(self):
+ s1 = GeoSeries(
+ [
+ LineString([(0, 0), (1, 0), (2, 0)]),
+ LineString([(0, 0), (1, 1)]),
+ ]
+ )
+ s2 = GeoSeries(
+ [
+ LineString([(0, 1), (1, 2), (2, 1)]),
+ LineString([(1, 0), (2, 1)]),
+ ]
+ )
+
+ result = s1.hausdorff_distance(s2, align=False)
+ expected = pd.Series([2.0, 1.0])
+ self.check_pd_series_equal(result, expected)
+
+ # Test with single geometry
+ line = LineString([(0, 1), (1, 2), (2, 1)])
+ result = s1.hausdorff_distance(line)
+ expected = pd.Series([2.0, 1.0])
+ self.check_pd_series_equal(result, expected)
+
+ # Test that GeoDataFrame works too
+ df_result = s1.to_geoframe().hausdorff_distance(s2, align=False)
+ expected = pd.Series([2.0, 1.0])
+ self.check_pd_series_equal(df_result, expected)
+
+ # Test with densify parameter
+ result = s1.hausdorff_distance(s2, densify=0.5, align=False)
+ expected = pd.Series([2.0, 1.0])
+ self.check_pd_series_equal(result, expected)
+
+ def test_geom_equals(self):
+ s1 = GeoSeries(
+ [
+ Point(0, 0),
+ Point(1, 1),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ]
+ )
+ s2 = GeoSeries(
+ [
+ Point(0, 0),
+ Point(2, 2),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ]
+ )
+
+ result = s1.geom_equals(s2, align=False)
+ expected = pd.Series([True, False, True])
+ self.check_pd_series_equal(result, expected)
+
+ # Test with single geometry
+ result = s1.geom_equals(Point(0, 0))
+ expected = pd.Series([True, False, False])
+ self.check_pd_series_equal(result, expected)
+
+ # Test that GeoDataFrame works too
+ df_result = s1.to_geoframe().geom_equals(s2, align=False)
+ expected = pd.Series([True, False, True])
+ self.check_pd_series_equal(df_result, expected)
+
+ def test_interpolate(self):
+ s = GeoSeries(
+ [
+ LineString([(0, 0), (2, 0), (0, 2)]),
+ LineString([(0, 0), (2, 2)]),
+ LineString([(2, 0), (0, 2)]),
+ ]
+ )
Review Comment:
The new unit tests don’t exercise empty or zero-length linear geometries for
interpolate/project. Given the implementation can involve division by
`ST_Length`, adding at least one empty `LineString()` / `LinearRing()` case
(and a degenerate zero-length line) would help catch division-by-zero/edge-case
behavior and ensure compatibility with geopandas.
##########
python/tests/geopandas/test_match_geopandas_series.py:
##########
@@ -1285,6 +1285,100 @@ def test_relate(self):
)
self.check_pd_series_equal(sgpd_result, gpd_result)
+ def test_frechet_distance(self):
+ line_pairs = [
+ (self.linestrings, self.linestrings),
+ (self.linearrings, self.linearrings),
+ (self.linestrings, self.linearrings),
+ ]
+ for geom, geom2 in line_pairs:
+ # Skip pairs containing empty geometries
+ if any(g.is_empty for g in geom) or any(g.is_empty for g in geom2):
+ continue
+
+ sgpd_result = GeoSeries(geom).frechet_distance(GeoSeries(geom2),
align=True)
+ gpd_result = gpd.GeoSeries(geom).frechet_distance(
+ gpd.GeoSeries(geom2), align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).frechet_distance(
+ GeoSeries(geom2), align=False
+ )
+ gpd_result = gpd.GeoSeries(geom).frechet_distance(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ def test_hausdorff_distance(self):
+ for geom, geom2 in self.pairs:
+ sgpd_result = GeoSeries(geom).hausdorff_distance(
+ GeoSeries(geom2), align=True
+ )
+ gpd_result = gpd.GeoSeries(geom).hausdorff_distance(
+ gpd.GeoSeries(geom2), align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).hausdorff_distance(
+ GeoSeries(geom2), align=False
+ )
+ gpd_result = gpd.GeoSeries(geom).hausdorff_distance(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ def test_geom_equals(self):
+ for geom, geom2 in self.pairs:
+ if self.contains_any_geom_collection(geom, geom2):
+ continue
+
+ sgpd_result = GeoSeries(geom).geom_equals(GeoSeries(geom2),
align=True)
+ gpd_result = gpd.GeoSeries(geom).geom_equals(
+ gpd.GeoSeries(geom2), align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).geom_equals(GeoSeries(geom2),
align=False)
+ gpd_result = gpd.GeoSeries(geom).geom_equals(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ def test_interpolate(self):
+ for geom in [self.linestrings, self.linearrings]:
+ # Skip empty geometries
+ non_empty = [g for g in geom if not g.is_empty]
+ if not non_empty:
+ continue
+
+ sgpd_result = GeoSeries(non_empty).interpolate(1.0)
+ gpd_result = gpd.GeoSeries(non_empty).interpolate(1.0)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ sgpd_result = GeoSeries(non_empty).interpolate(0.5,
normalized=True)
+ gpd_result = gpd.GeoSeries(non_empty).interpolate(0.5,
normalized=True)
Review Comment:
These interpolate match tests filter out empty geometries (`LineString()` /
`LinearRing()`), which are present in the test fixtures. This means
empty-geometry behavior is untested and potential runtime errors/semantic
mismatches are hidden. Please add explicit assertions for empty inputs (or
ensure the implementation matches geopandas for empties) instead of skipping
them.
```suggestion
# Ensure behavior matches GeoPandas for both empty and non-empty
geometries
sgpd_result = GeoSeries(geom).interpolate(1.0)
gpd_result = gpd.GeoSeries(geom).interpolate(1.0)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
sgpd_result = GeoSeries(geom).interpolate(0.5, normalized=True)
gpd_result = gpd.GeoSeries(geom).interpolate(0.5,
normalized=True)
```
##########
python/tests/geopandas/test_match_geopandas_series.py:
##########
@@ -1285,6 +1285,100 @@ def test_relate(self):
)
self.check_pd_series_equal(sgpd_result, gpd_result)
+ def test_frechet_distance(self):
+ line_pairs = [
+ (self.linestrings, self.linestrings),
+ (self.linearrings, self.linearrings),
+ (self.linestrings, self.linearrings),
+ ]
+ for geom, geom2 in line_pairs:
+ # Skip pairs containing empty geometries
+ if any(g.is_empty for g in geom) or any(g.is_empty for g in geom2):
+ continue
+
+ sgpd_result = GeoSeries(geom).frechet_distance(GeoSeries(geom2),
align=True)
+ gpd_result = gpd.GeoSeries(geom).frechet_distance(
+ gpd.GeoSeries(geom2), align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).frechet_distance(
+ GeoSeries(geom2), align=False
+ )
+ gpd_result = gpd.GeoSeries(geom).frechet_distance(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ def test_hausdorff_distance(self):
+ for geom, geom2 in self.pairs:
+ sgpd_result = GeoSeries(geom).hausdorff_distance(
+ GeoSeries(geom2), align=True
+ )
+ gpd_result = gpd.GeoSeries(geom).hausdorff_distance(
+ gpd.GeoSeries(geom2), align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).hausdorff_distance(
+ GeoSeries(geom2), align=False
+ )
+ gpd_result = gpd.GeoSeries(geom).hausdorff_distance(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ def test_geom_equals(self):
+ for geom, geom2 in self.pairs:
+ if self.contains_any_geom_collection(geom, geom2):
+ continue
+
+ sgpd_result = GeoSeries(geom).geom_equals(GeoSeries(geom2),
align=True)
+ gpd_result = gpd.GeoSeries(geom).geom_equals(
+ gpd.GeoSeries(geom2), align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).geom_equals(GeoSeries(geom2),
align=False)
+ gpd_result = gpd.GeoSeries(geom).geom_equals(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ def test_interpolate(self):
+ for geom in [self.linestrings, self.linearrings]:
+ # Skip empty geometries
+ non_empty = [g for g in geom if not g.is_empty]
+ if not non_empty:
+ continue
+
+ sgpd_result = GeoSeries(non_empty).interpolate(1.0)
+ gpd_result = gpd.GeoSeries(non_empty).interpolate(1.0)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ sgpd_result = GeoSeries(non_empty).interpolate(0.5,
normalized=True)
+ gpd_result = gpd.GeoSeries(non_empty).interpolate(0.5,
normalized=True)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ def test_project(self):
+ for geom in [self.linestrings, self.linearrings]:
+ # Skip empty geometries
+ non_empty = [g for g in geom if not g.is_empty]
+ if not non_empty:
+ continue
+
+ point = Point(1, 1)
+ sgpd_result = GeoSeries(non_empty).project(point)
+ gpd_result = gpd.GeoSeries(non_empty).project(point)
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ sgpd_result = GeoSeries(non_empty).project(point, normalized=True)
+ gpd_result = gpd.GeoSeries(non_empty).project(point,
normalized=True)
Review Comment:
These project match tests filter out empty geometries (`LineString()` /
`LinearRing()`), which are present in the fixtures. This prevents validating
empty/zero-length behavior (which is a common edge case for linear referencing)
against geopandas. Please add coverage for empties or adjust the implementation
to safely handle them.
```suggestion
sgpd_result = GeoSeries(geom).interpolate(1.0)
gpd_result = gpd.GeoSeries(geom).interpolate(1.0)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
sgpd_result = GeoSeries(geom).interpolate(0.5, normalized=True)
gpd_result = gpd.GeoSeries(geom).interpolate(0.5,
normalized=True)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_project(self):
for geom in [self.linestrings, self.linearrings]:
point = Point(1, 1)
sgpd_result = GeoSeries(geom).project(point)
gpd_result = gpd.GeoSeries(geom).project(point)
self.check_pd_series_equal(sgpd_result, gpd_result)
sgpd_result = GeoSeries(geom).project(point, normalized=True)
gpd_result = gpd.GeoSeries(geom).project(point, normalized=True)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]