jiayuasu commented on code in PR #2732:
URL: https://github.com/apache/sedona/pull/2732#discussion_r2929310794


##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -2635,6 +2635,177 @@ def test_relate(self):
         expected = pd.Series(["FF2F11212", "212101212"])
         self.check_pd_series_equal(result, expected)
 
+    def test_frechet_distance(self):
+        s1 = GeoSeries(
+            [
+                LineString([(0, 0), (1, 0), (2, 0)]),
+                LineString([(0, 0), (1, 1)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                LineString([(0, 1), (1, 2), (2, 1)]),
+                LineString([(1, 0), (2, 1)]),
+            ]
+        )
+
+        result = s1.frechet_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)

Review Comment:
   Sedona GeoSeries cannot hold empty geometries (Spark UDT limitation), so we 
can't add empty geometry unit tests. The match tests skip empties for the same 
reason.



##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -2635,6 +2635,177 @@ def test_relate(self):
         expected = pd.Series(["FF2F11212", "212101212"])
         self.check_pd_series_equal(result, expected)
 
+    def test_frechet_distance(self):
+        s1 = GeoSeries(
+            [
+                LineString([(0, 0), (1, 0), (2, 0)]),
+                LineString([(0, 0), (1, 1)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                LineString([(0, 1), (1, 2), (2, 1)]),
+                LineString([(1, 0), (2, 1)]),
+            ]
+        )
+
+        result = s1.frechet_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test with single geometry
+        line = LineString([(0, 1), (1, 2), (2, 1)])
+        result = s1.frechet_distance(line)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test that GeoDataFrame works too
+        df_result = s1.to_geoframe().frechet_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(df_result, expected)
+
+        # Test that densify raises NotImplementedError
+        with pytest.raises(NotImplementedError):
+            s1.frechet_distance(s2, densify=0.5)
+
+    def test_hausdorff_distance(self):
+        s1 = GeoSeries(
+            [
+                LineString([(0, 0), (1, 0), (2, 0)]),
+                LineString([(0, 0), (1, 1)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                LineString([(0, 1), (1, 2), (2, 1)]),
+                LineString([(1, 0), (2, 1)]),
+            ]
+        )
+
+        result = s1.hausdorff_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test with single geometry
+        line = LineString([(0, 1), (1, 2), (2, 1)])
+        result = s1.hausdorff_distance(line)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test that GeoDataFrame works too
+        df_result = s1.to_geoframe().hausdorff_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(df_result, expected)
+
+        # Test with densify parameter
+        result = s1.hausdorff_distance(s2, densify=0.5, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+    def test_geom_equals(self):
+        s1 = GeoSeries(
+            [
+                Point(0, 0),
+                Point(1, 1),
+                Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                Point(0, 0),
+                Point(2, 2),
+                Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+            ]
+        )
+
+        result = s1.geom_equals(s2, align=False)
+        expected = pd.Series([True, False, True])
+        self.check_pd_series_equal(result, expected)

Review Comment:
   Sedona GeoSeries cannot hold empty geometries (Spark UDT limitation), so we 
can't add empty geometry unit tests. The match tests skip empties for the same 
reason.



##########
python/tests/geopandas/test_geoseries.py:
##########
@@ -2635,6 +2635,177 @@ def test_relate(self):
         expected = pd.Series(["FF2F11212", "212101212"])
         self.check_pd_series_equal(result, expected)
 
+    def test_frechet_distance(self):
+        s1 = GeoSeries(
+            [
+                LineString([(0, 0), (1, 0), (2, 0)]),
+                LineString([(0, 0), (1, 1)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                LineString([(0, 1), (1, 2), (2, 1)]),
+                LineString([(1, 0), (2, 1)]),
+            ]
+        )
+
+        result = s1.frechet_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test with single geometry
+        line = LineString([(0, 1), (1, 2), (2, 1)])
+        result = s1.frechet_distance(line)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test that GeoDataFrame works too
+        df_result = s1.to_geoframe().frechet_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(df_result, expected)
+
+        # Test that densify raises NotImplementedError
+        with pytest.raises(NotImplementedError):
+            s1.frechet_distance(s2, densify=0.5)
+
+    def test_hausdorff_distance(self):
+        s1 = GeoSeries(
+            [
+                LineString([(0, 0), (1, 0), (2, 0)]),
+                LineString([(0, 0), (1, 1)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                LineString([(0, 1), (1, 2), (2, 1)]),
+                LineString([(1, 0), (2, 1)]),
+            ]
+        )
+
+        result = s1.hausdorff_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test with single geometry
+        line = LineString([(0, 1), (1, 2), (2, 1)])
+        result = s1.hausdorff_distance(line)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+        # Test that GeoDataFrame works too
+        df_result = s1.to_geoframe().hausdorff_distance(s2, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(df_result, expected)
+
+        # Test with densify parameter
+        result = s1.hausdorff_distance(s2, densify=0.5, align=False)
+        expected = pd.Series([2.0, 1.0])
+        self.check_pd_series_equal(result, expected)
+
+    def test_geom_equals(self):
+        s1 = GeoSeries(
+            [
+                Point(0, 0),
+                Point(1, 1),
+                Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                Point(0, 0),
+                Point(2, 2),
+                Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+            ]
+        )
+
+        result = s1.geom_equals(s2, align=False)
+        expected = pd.Series([True, False, True])
+        self.check_pd_series_equal(result, expected)
+
+        # Test with single geometry
+        result = s1.geom_equals(Point(0, 0))
+        expected = pd.Series([True, False, False])
+        self.check_pd_series_equal(result, expected)
+
+        # Test that GeoDataFrame works too
+        df_result = s1.to_geoframe().geom_equals(s2, align=False)
+        expected = pd.Series([True, False, True])
+        self.check_pd_series_equal(df_result, expected)
+
+    def test_interpolate(self):
+        s = GeoSeries(
+            [
+                LineString([(0, 0), (2, 0), (0, 2)]),
+                LineString([(0, 0), (2, 2)]),
+                LineString([(2, 0), (0, 2)]),
+            ]
+        )

Review Comment:
   Sedona GeoSeries cannot hold empty geometries (Spark UDT limitation), so we 
can't add empty geometry unit tests. The match tests skip empties for the same 
reason.



##########
python/sedona/spark/geopandas/geoseries.py:
##########
@@ -1330,6 +1330,89 @@ def distance(self, other, align=None) -> pspd.Series:
         )
         return result
 
+    def frechet_distance(self, other, align=None, densify=None) -> pspd.Series:
+        if densify is not None:
+            raise NotImplementedError(
+                "Sedona does not support the densify parameter for 
frechet_distance."
+            )
+
+        other_series, extended = self._make_series_of_val(other)
+        align = False if extended else align
+
+        spark_expr = stf.ST_FrechetDistance(F.col("L"), F.col("R"))
+        result = self._row_wise_operation(
+            spark_expr,
+            other_series,
+            align,
+            default_val=None,
+        )
+        return result
+
+    def hausdorff_distance(self, other, align=None, densify=None) -> 
pspd.Series:
+        other_series, extended = self._make_series_of_val(other)
+        align = False if extended else align
+
+        if densify is not None:
+            spark_expr = stf.ST_HausdorffDistance(F.col("L"), F.col("R"), 
densify)
+        else:
+            spark_expr = stf.ST_HausdorffDistance(F.col("L"), F.col("R"))
+        result = self._row_wise_operation(
+            spark_expr,
+            other_series,
+            align,
+            default_val=None,
+        )
+        return result
+
+    def geom_equals(self, other, align=None) -> pspd.Series:
+        other_series, extended = self._make_series_of_val(other)
+        align = False if extended else align
+
+        spark_expr = stp.ST_Equals(F.col("L"), F.col("R"))
+        result = self._row_wise_operation(
+            spark_expr,
+            other_series,
+            align,
+            returns_geom=False,
+            default_val=False,
+        )
+        return _to_bool(result)
+
+    def interpolate(self, distance, normalized=False) -> "GeoSeries":
+        other_series, extended = self._make_series_of_val(distance)
+        align = not extended
+
+        if normalized:
+            spark_expr = stf.ST_LineInterpolatePoint(F.col("L"), F.col("R"))
+        else:
+            spark_expr = stf.ST_LineInterpolatePoint(
+                F.col("L"), F.col("R") / stf.ST_Length(F.col("L"))
+            )

Review Comment:
   Empty geometries cannot be constructed in Sedona GeoSeries (Spark UDT 
limitation), so the division-by-zero with empty lines cannot occur in practice. 
For zero-length (non-empty) lines, geopandas returns `POINT(x y)` at the line's 
location regardless of distance, which is the same behavior we get from 
`ST_LineInterpolatePoint` when the fraction overflows — Sedona clamps to [0, 1].



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to