This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new e65c67fed1 [GH-2506] Fix segmentize array-like input and support lists
as 'array-like' (#2507)
e65c67fed1 is described below
commit e65c67fed199d66cced460da1e8d935b464629de
Author: Peter Nguyen <[email protected]>
AuthorDate: Sat Nov 15 00:56:05 2025 -0800
[GH-2506] Fix segmentize array-like input and support lists as 'array-like'
(#2507)
---
python/sedona/spark/geopandas/geoseries.py | 6 ++++--
python/tests/geopandas/test_geoseries.py | 18 ++++++++++++++++
.../tests/geopandas/test_match_geopandas_series.py | 24 +++++++++++++++++++---
3 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index 95cf7d752b..4fbccdcf96 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -1075,7 +1075,7 @@ class GeoSeries(GeoFrame, pspd.Series):
def segmentize(self, max_segment_length):
other_series, extended = self._make_series_of_val(max_segment_length)
- align = False if extended else align
+ align = not extended
spark_expr = stf.ST_Segmentize(F.col("L"), F.col("R"))
return self._row_wise_operation(
@@ -2833,11 +2833,13 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
Returns:
tuple[pspd.Series, bool]:
- The series of the value
- - Whether returned value was a single object extended into a
series (useful for row-wise 'align' parameter)
+ - Whether returned value was a extended into a series (useful
for row-wise 'align' parameter)
"""
# generator instead of a in-memory list
if isinstance(value, GeoDataFrame):
return value.geometry, False
+ elif isinstance(value, (list, np.ndarray)):
+ return pspd.Series(value), True
elif not isinstance(value, pspd.Series):
lst = [value for _ in range(len(self))]
if isinstance(value, BaseGeometry):
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 8d3455fa8f..74cbff8970 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -1464,6 +1464,24 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
df_result = s.to_geoframe().segmentize(5)
self.check_sgpd_equals_gpd(df_result, expected)
+ # Test array-like input
+ result = s.segmentize(ps.Series([5, 10]))
+ expected = gpd.GeoSeries(
+ [
+ LineString([(0, 0), (0, 5), (0, 10)]),
+ Polygon(
+ [
+ (0, 0),
+ (10, 0),
+ (10, 10),
+ (0, 10),
+ (0, 0),
+ ]
+ ),
+ ],
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
def test_transform(self):
pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 98b0c528a7..fc5bc27187 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -18,6 +18,7 @@ import os
import shutil
import tempfile
import pytest
+import numpy as np
import pandas as pd
import geopandas as gpd
import pyspark.pandas as ps
@@ -387,7 +388,6 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
# Ensure filling with np.nan or pd.NA returns None
# but filling None return empty geometry
- import numpy as np
for fill_val in [np.nan, pd.NA, None]:
sgpd_result = GeoSeries(data).fillna(fill_val)
@@ -419,8 +419,6 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
)
def test_total_bounds(self):
- import numpy as np
-
for geom in self.geoms:
sgpd_result = GeoSeries(geom).total_bounds
gpd_result = gpd.GeoSeries(geom).total_bounds
@@ -832,6 +830,26 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
gpd_result = gpd.GeoSeries(geom).segmentize(2.5)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+ # Test array-like inputs
+ geoms = self.polygons
+ lst = list(range(1, len(geoms) + 1))
+
+ # Traditional python list
+ sgpd_result = GeoSeries(geoms).segmentize(lst)
+ gpd_result = gpd.GeoSeries(geoms).segmentize(lst)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ np_array = np.array(lst)
+ sgpd_result = GeoSeries(geoms).segmentize(np_array)
+ gpd_result = gpd.GeoSeries(geoms).segmentize(np_array)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ # pandas series
+ psser = ps.Series(lst)
+ sgpd_result = GeoSeries(geoms).segmentize(psser)
+ gpd_result = gpd.GeoSeries(geoms).segmentize(psser.to_pandas())
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
def test_transform(self):
pass