This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new c18dddac87 [GH-2039] Geopandas.GeoSeries: Implement from_wkb, 
from_wkt, from_xy + temp __init__ fix (#2040)
c18dddac87 is described below

commit c18dddac87c1d3a781ce791ed2aee8d8f6779f00
Author: Peter Nguyen <petern0...@gmail.com>
AuthorDate: Thu Jul 3 21:41:57 2025 -0700

    [GH-2039] Geopandas.GeoSeries: Implement from_wkb, from_wkt, from_xy + temp 
__init__ fix (#2040)
---
 python/sedona/geopandas/geoseries.py               | 275 ++++++++++++++++++++-
 python/tests/geopandas/test_geoseries.py           |  40 ++-
 .../tests/geopandas/test_match_geopandas_series.py |  32 ++-
 3 files changed, 335 insertions(+), 12 deletions(-)

diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index 6adb2f1529..6f68c3c056 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -123,9 +123,6 @@ class GeoSeries(GeoFrame, pspd.Series):
             assert not copy
             assert not fastpath
 
-            self._anchor = data
-            self._col_label = index
-
             data_crs = None
             if hasattr(data, "crs"):
                 data_crs = data.crs
@@ -136,6 +133,20 @@ class GeoSeries(GeoFrame, pspd.Series):
                     "allow_override=True)' to overwrite CRS or "
                     "'GeoSeries.to_crs(crs)' to reproject geometries. "
                 )
+            # This is a temporary workaround since pyspark errors when 
creating a ps.Series from a ps.Series
+            # This is NOT a scalable solution since we call to_pandas() on the 
data and is a hacky solution
+            # but this should be resolved if/once 
https://github.com/apache/spark/pull/51300 is merged in.
+            # For now, we reset self._anchor = data to have keep the geometry 
information (e.g crs) that's lost in to_pandas()
+            super().__init__(
+                data=data.to_pandas(),
+                index=index,
+                dtype=dtype,
+                name=name,
+                copy=copy,
+                fastpath=fastpath,
+            )
+
+            self._anchor = data
         else:
             if isinstance(data, pd.Series):
                 assert index is None
@@ -1360,7 +1371,82 @@ class GeoSeries(GeoFrame, pspd.Series):
         on_invalid="raise",
         **kwargs,
     ) -> "GeoSeries":
-        raise NotImplementedError("GeoSeries.from_wkb() is not implemented 
yet.")
+        r"""
+        Alternate constructor to create a ``GeoSeries``
+        from a list or array of WKB objects
+
+        Parameters
+        ----------
+        data : array-like or Series
+            Series, list or array of WKB objects
+        index : array-like or Index
+            The index for the GeoSeries.
+        crs : value, optional
+            Coordinate Reference System of the geometry objects. Can be 
anything
+            accepted by
+            :meth:`pyproj.CRS.from_user_input() 
<pyproj.crs.CRS.from_user_input>`,
+            such as an authority string (eg "EPSG:4326") or a WKT string.
+        on_invalid: {"raise", "warn", "ignore"}, default "raise"
+            - raise: an exception will be raised if a WKB input geometry is 
invalid.
+            - warn: a warning will be raised and invalid WKB geometries will 
be returned
+              as None.
+            - ignore: invalid WKB geometries will be returned as None without 
a warning.
+            - fix: an effort is made to fix invalid input geometries (e.g. 
close
+              unclosed rings). If this is not possible, they are returned as 
``None``
+              without a warning. Requires GEOS >= 3.11 and shapely >= 2.1.
+
+        kwargs
+            Additional arguments passed to the Series constructor,
+            e.g. ``name``.
+
+        Returns
+        -------
+        GeoSeries
+
+        See Also
+        --------
+        GeoSeries.from_wkt
+
+        Examples
+        --------
+
+        >>> wkbs = [
+        ... (
+        ...     b"\x01\x01\x00\x00\x00\x00\x00\x00\x00"
+        ...     b"\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?"
+        ... ),
+        ... (
+        ...     b"\x01\x01\x00\x00\x00\x00\x00\x00\x00"
+        ...     b"\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00@"
+        ... ),
+        ... (
+        ...    b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00"
+        ...    b"\x00\x08@\x00\x00\x00\x00\x00\x00\x08@"
+        ... ),
+        ... ]
+        >>> s = geopandas.GeoSeries.from_wkb(wkbs)
+        >>> s
+        0    POINT (1 1)
+        1    POINT (2 2)
+        2    POINT (3 3)
+        dtype: geometry
+        """
+        if on_invalid != "raise":
+            raise NotImplementedError(
+                "GeoSeries.from_wkb(): only on_invalid='raise' is implemented"
+            )
+
+        from pyspark.sql.types import StructType, StructField, BinaryType
+
+        schema = StructType([StructField("data", BinaryType(), True)])
+        return cls._create_from_select(
+            f"ST_GeomFromWKB(`data`)",
+            data,
+            schema,
+            index,
+            crs,
+            **kwargs,
+        )
 
     @classmethod
     def from_wkt(
@@ -1371,11 +1457,154 @@ class GeoSeries(GeoFrame, pspd.Series):
         on_invalid="raise",
         **kwargs,
     ) -> "GeoSeries":
-        raise NotImplementedError("GeoSeries.from_wkt() is not implemented 
yet.")
+        """
+        Alternate constructor to create a ``GeoSeries``
+        from a list or array of WKT objects
+
+        Parameters
+        ----------
+        data : array-like, Series
+            Series, list, or array of WKT objects
+        index : array-like or Index
+            The index for the GeoSeries.
+        crs : value, optional
+            Coordinate Reference System of the geometry objects. Can be 
anything
+            accepted by
+            :meth:`pyproj.CRS.from_user_input() 
<pyproj.crs.CRS.from_user_input>`,
+            such as an authority string (eg "EPSG:4326") or a WKT string.
+        on_invalid : {"raise", "warn", "ignore"}, default "raise"
+            - raise: an exception will be raised if a WKT input geometry is 
invalid.
+            - warn: a warning will be raised and invalid WKT geometries will be
+              returned as ``None``.
+            - ignore: invalid WKT geometries will be returned as ``None`` 
without a
+              warning.
+            - fix: an effort is made to fix invalid input geometries (e.g. 
close
+              unclosed rings). If this is not possible, they are returned as 
``None``
+              without a warning. Requires GEOS >= 3.11 and shapely >= 2.1.
+
+        kwargs
+            Additional arguments passed to the Series constructor,
+            e.g. ``name``.
+
+        Returns
+        -------
+        GeoSeries
+
+        See Also
+        --------
+        GeoSeries.from_wkb
+
+        Examples
+        --------
+
+        >>> wkts = [
+        ... 'POINT (1 1)',
+        ... 'POINT (2 2)',
+        ... 'POINT (3 3)',
+        ... ]
+        >>> s = geopandas.GeoSeries.from_wkt(wkts)
+        >>> s
+        0    POINT (1 1)
+        1    POINT (2 2)
+        2    POINT (3 3)
+        dtype: geometry
+        """
+        if on_invalid != "raise":
+            raise NotImplementedError(
+                "GeoSeries.from_wkt(): only on_invalid='raise' is implemented"
+            )
+
+        from pyspark.sql.types import StructType, StructField, StringType
+
+        schema = StructType([StructField("data", StringType(), True)])
+        return cls._create_from_select(
+            f"ST_GeomFromText(`data`)",
+            data,
+            schema,
+            index,
+            crs,
+            **kwargs,
+        )
 
     @classmethod
     def from_xy(cls, x, y, z=None, index=None, crs=None, **kwargs) -> 
"GeoSeries":
-        raise NotImplementedError("GeoSeries.from_xy() is not implemented 
yet.")
+        """
+        Alternate constructor to create a :class:`~geopandas.GeoSeries` of 
Point
+        geometries from lists or arrays of x, y(, z) coordinates
+
+        In case of geographic coordinates, it is assumed that longitude is 
captured
+        by ``x`` coordinates and latitude by ``y``.
+
+        Parameters
+        ----------
+        x, y, z : iterable
+        index : array-like or Index, optional
+            The index for the GeoSeries. If not given and all coordinate inputs
+            are Series with an equal index, that index is used.
+        crs : value, optional
+            Coordinate Reference System of the geometry objects. Can be 
anything
+            accepted by
+            :meth:`pyproj.CRS.from_user_input() 
<pyproj.crs.CRS.from_user_input>`,
+            such as an authority string (eg "EPSG:4326") or a WKT string.
+        **kwargs
+            Additional arguments passed to the Series constructor,
+            e.g. ``name``.
+
+        Returns
+        -------
+        GeoSeries
+
+        See Also
+        --------
+        GeoSeries.from_wkt
+        points_from_xy
+
+        Examples
+        --------
+
+        >>> x = [2.5, 5, -3.0]
+        >>> y = [0.5, 1, 1.5]
+        >>> s = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326")
+        >>> s
+        0    POINT (2.5 0.5)
+        1    POINT (5 1)
+        2    POINT (-3 1.5)
+        dtype: geometry
+        """
+        from pyspark.sql.types import StructType, StructField, DoubleType
+
+        schema = StructType(
+            [StructField("x", DoubleType(), True), StructField("y", 
DoubleType(), True)]
+        )
+
+        # Spark doesn't automatically cast ints to floats for us
+        x = [float(num) for num in x]
+        y = [float(num) for num in y]
+        z = [float(num) for num in z] if z else None
+
+        if z:
+            data = list(zip(x, y, z))
+            select = f"ST_PointZ(`x`, `y`, `z`)"
+            schema.add(StructField("z", DoubleType(), True))
+        else:
+            data = list(zip(x, y))
+            select = f"ST_Point(`x`, `y`)"
+
+        geoseries = cls._create_from_select(
+            select,
+            data,
+            schema,
+            index,
+            crs,
+            **kwargs,
+        )
+
+        if crs:
+            from pyproj import CRS
+
+            geoseries.crs = CRS.from_user_input(crs).to_epsg()
+
+        return geoseries
 
     @classmethod
     def from_shapely(
@@ -1387,6 +1616,40 @@ class GeoSeries(GeoFrame, pspd.Series):
     def from_arrow(cls, arr, **kwargs) -> "GeoSeries":
         raise NotImplementedError("GeoSeries.from_arrow() is not implemented 
yet.")
 
+    @classmethod
+    def _create_from_select(
+        cls, select: str, data, schema, index, crs, **kwargs
+    ) -> "GeoSeries":
+
+        from pyspark.pandas.utils import default_session
+        from pyspark.pandas.internal import InternalField
+        import numpy as np
+
+        if isinstance(data, list) and not isinstance(data[0], (tuple, list)):
+            data = [(obj,) for obj in data]
+
+        select = f"{select} as geometry"
+
+        spark_df = default_session().createDataFrame(data, schema=schema)
+        spark_df = spark_df.selectExpr(select)
+
+        internal = InternalFrame(
+            spark_frame=spark_df,
+            index_spark_columns=None,
+            column_labels=[("geometry",)],
+            data_spark_columns=[scol_for(spark_df, "geometry")],
+            data_fields=[
+                InternalField(np.dtype("object"), spark_df.schema["geometry"])
+            ],
+            column_label_names=[("geometry",)],
+        )
+        return GeoSeries(
+            first_series(PandasOnSparkDataFrame(internal)),
+            index,
+            crs=crs,
+            name=kwargs.get("name", None),
+        )
+
     def to_file(
         self,
         filename: Union[os.PathLike, typing.IO],
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 3c7951e941..78cb66812e 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -119,13 +119,47 @@ class TestGeoSeries(TestBase):
         pass
 
     def test_from_wkb(self):
-        pass
+        wkbs = [
+            (
+                b"\x01\x01\x00\x00\x00\x00\x00\x00\x00"
+                b"\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?"
+            ),
+            (
+                b"\x01\x01\x00\x00\x00\x00\x00\x00\x00"
+                b"\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00@"
+            ),
+            (
+                b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00"
+                b"\x00\x08@\x00\x00\x00\x00\x00\x00\x08@"
+            ),
+        ]
+        s = sgpd.GeoSeries.from_wkb(wkbs)
+        expected = gpd.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+        self.check_sgpd_equals_gpd(s, expected)
 
     def test_from_wkt(self):
-        pass
+        wkts = [
+            "POINT (1 1)",
+            "POINT (2 2)",
+            "POINT (3 3)",
+        ]
+        s = sgpd.GeoSeries.from_wkt(wkts)
+        expected = gpd.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+        self.check_sgpd_equals_gpd(s, expected)
 
     def test_from_xy(self):
-        pass
+        x = [2.5, 5, -3.0]
+        y = [0.5, 1, 1.5]
+        s = sgpd.GeoSeries.from_xy(x, y, crs="EPSG:4326")
+        expected = gpd.GeoSeries([Point(2.5, 0.5), Point(5, 1), Point(-3, 
1.5)])
+        self.check_sgpd_equals_gpd(s, expected)
+
+        z = [1, 2, 3]
+        s = sgpd.GeoSeries.from_xy(x, y, z)
+        expected = gpd.GeoSeries(
+            [Point(2.5, 0.5, 1), Point(5, 1, 2), Point(-3, 1.5, 3)]
+        )
+        self.check_sgpd_equals_gpd(s, expected)
 
     def test_from_shapely(self):
         pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index 4a4a3495ca..00ad6687ee 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -241,13 +241,39 @@ class TestMatchGeopandasSeries(TestBase):
         pass
 
     def test_from_wkb(self):
-        pass
+        for _, geom in self.geoms:
+            wkb = [g.wkb for g in geom]
+            sgpd_result = GeoSeries.from_wkb(wkb)
+            gpd_result = gpd.GeoSeries.from_wkb(wkb)
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
     def test_from_wkt(self):
-        pass
+        for _, geom in self.geoms:
+            wkt = [g.wkt for g in geom]
+            sgpd_result = GeoSeries.from_wkt(wkt)
+            gpd_result = gpd.GeoSeries.from_wkt(wkt)
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
     def test_from_xy(self):
-        pass
+        tests = [
+            [
+                [2.5, 0.5, 5.0, -2],  # x
+                [5, 10, 0, 1],  # y
+                [-3, 1.5, -1000, 25],  # z
+                "EPSG:4326",
+            ],
+            [
+                [2.5, -0.5, 1, 500],  # x
+                [5, 1, -100, 1000],  # y
+                None,
+                None,
+            ],
+        ]
+        for x, y, z, crs in tests:
+            sgpd_result = GeoSeries.from_xy(x, y, z, crs=crs)
+            gpd_result = gpd.GeoSeries.from_xy(x, y, z, crs=crs)
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+            assert sgpd_result.crs == gpd_result.crs
 
     def test_from_shapely(self):
         pass

Reply via email to