(sedona) branch master updated: [SEDONA-720] Add GeoPandas Compatible API on Sedona - framework (#1843)

jiayu Fri, 14 Mar 2025 09:29:57 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new e97f3a20c6 [SEDONA-720] Add GeoPandas Compatible API on Sedona - 
framework (#1843)
e97f3a20c6 is described below

commit e97f3a20c638019aa13cfe45bb70b23eb31e0537
Author: Feng Zhang <[email protected]>
AuthorDate: Fri Mar 14 09:29:43 2025 -0700

    [SEDONA-720] Add GeoPandas Compatible API on Sedona - framework (#1843)
    
    * [SEDONA-720] Add GeoPandas Compatible API on Sedona - framework
    
    * temporarily disable lower python versions on ci
    
    * fix python tests and revert ci python pipeline
    
    * fix lint issue
    
    * remove numpy dtypes import
    
    * fix TestDataframe
    
    * add more functions to geodataframe implementation
    
    * remove schema print and show
    
    * Update version to 1.8.0
    
    * Remove show and printSchema
    
    ---------
    
    Co-authored-by: Jia Yu <[email protected]>
---
 python/sedona/geopandas/__init__.py         |  26 ++
 python/sedona/geopandas/_typing.py          |  51 +++
 python/sedona/geopandas/base.py             | 325 ++++++++++++++
 python/sedona/geopandas/geodataframe.py     | 672 ++++++++++++++++++++++++++++
 python/sedona/geopandas/geoindex.py         |  28 ++
 python/sedona/geopandas/geoseries.py        | 615 +++++++++++++++++++++++++
 python/sedona/geopandas/internal.py         |  33 ++
 python/sedona/geopandas/tools/__init__.py   |  22 +
 python/sedona/geopandas/tools/sjoin.py      | 183 ++++++++
 python/tests/geopandas/__init__.py          |  16 +
 python/tests/geopandas/test_geodataframe.py | 187 ++++++++
 python/tests/geopandas/test_geoseries.py    | 114 +++++
 python/tests/geopandas/test_sjoin.py        |  53 +++
 13 files changed, 2325 insertions(+)

diff --git a/python/sedona/geopandas/__init__.py 
b/python/sedona/geopandas/__init__.py
new file mode 100644
index 0000000000..f56b1699c9
--- /dev/null
+++ b/python/sedona/geopandas/__init__.py
@@ -0,0 +1,26 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+"""
+.. versionadded:: 1.8.0
+    geopandas API on Sedona
+"""
+
+from sedona.geopandas.geoseries import GeoSeries
+from sedona.geopandas.geodataframe import GeoDataFrame
+
+from sedona.geopandas.tools import sjoin
diff --git a/python/sedona/geopandas/_typing.py 
b/python/sedona/geopandas/_typing.py
new file mode 100644
index 0000000000..2a83e127ce
--- /dev/null
+++ b/python/sedona/geopandas/_typing.py
@@ -0,0 +1,51 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+import datetime
+import decimal
+from typing import Any, Tuple, TypeVar, Union
+
+import numpy as np
+from pandas.api.extensions import ExtensionDtype
+
+# TypeVars
+T = TypeVar("T")
+
+GeoFrameLike = TypeVar("GeoFrameLike", bound="GeoFrame")
+GeoIndexOpsLike = TypeVar("GeoIndexOpsLike", bound="GeoIndexOpsMixin")
+
+# Type aliases
+Scalar = Union[
+    int,
+    float,
+    bool,
+    str,
+    bytes,
+    decimal.Decimal,
+    datetime.date,
+    datetime.datetime,
+    None,
+]
+
+Label = Tuple[Any, ...]
+Name = Union[Any, Label]
+
+Axis = Union[int, str]
+Dtype = Union[np.dtype, ExtensionDtype]
+
+DataFrameOrSeries = Union["GeoDataFrame", "GeoSeries"]
+SeriesOrIndex = Union["GeoSeries", "GeoIndex"]
diff --git a/python/sedona/geopandas/base.py b/python/sedona/geopandas/base.py
new file mode 100644
index 0000000000..c7cbc39ca3
--- /dev/null
+++ b/python/sedona/geopandas/base.py
@@ -0,0 +1,325 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+"""
+A base class of Sedona/Spark DataFrame/Column to behave like geopandas 
GeoDataFrame/GeoSeries.
+"""
+from abc import ABCMeta, abstractmethod
+from typing import (
+    Any,
+    Callable,
+    Optional,
+    Union,
+)
+
+import geopandas as gpd
+import pandas as pd
+from pyspark.pandas._typing import (
+    Axis,
+    Dtype,
+    Scalar,
+)
+from pyspark.sql import Column
+
+from sedona.geopandas._typing import GeoFrameLike
+
+bool_type = bool
+
+
+class GeoFrame(object, metaclass=ABCMeta):
+    """
+    A base class for both GeoDataFrame and GeoSeries.
+    """
+
+    @abstractmethod
+    def __getitem__(self, key: Any) -> Any:
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def _reduce_for_geostat_function(
+        self,
+        sfun: Callable[["GeoSeries"], Column],
+        name: str,
+        axis: Optional[Axis] = None,
+        numeric_only: bool = True,
+        skipna: bool = True,
+        **kwargs: Any,
+    ) -> Union["GeoSeries", Scalar]:
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def dtypes(self) -> Union[gpd.GeoSeries, pd.Series, Dtype]:
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def _to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def geoindex(self) -> "GeoIndex":
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def copy(self: GeoFrameLike) -> GeoFrameLike:
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def area(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def crs(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @crs.setter
+    @abstractmethod
+    def crs(self, value):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def geom_type(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def type(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def length(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def is_valid(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def is_valid_reason(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def is_empty(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def count_coordinates(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def count_geometries(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def count_interior_rings(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def is_simple(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def is_ring(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def is_ccw(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def is_closed(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def has_z(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def get_precision(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def get_geometry(self, index):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def boundary(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def centroid(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def concave_hull(self, ratio=0.0, allow_holes=False):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def convex_hull(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def delaunay_triangles(self, tolerance=0.0, only_edges=False):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def voronoi_polygons(self, tolerance=0.0, extend_to=None, 
only_edges=False):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def envelope(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def minimum_rotated_rectangle(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def exterior(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def extract_unique_points(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def offset_curve(self, distance, quad_segs=8, join_style="round", 
mitre_limit=5.0):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def interiors(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def remove_repeated_points(self, tolerance=0.0):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def set_precision(self, grid_size, mode="valid_output"):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def representative_point(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def minimum_bounding_circle(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def minimum_bounding_radius(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def minimum_clearance(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def normalize(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def make_valid(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def reverse(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def segmentize(self, max_segment_length):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def transform(self, transformation, include_z=False):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def force_2d(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def force_3d(self, z=0):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def line_merge(self, directed=False):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    @abstractmethod
+    def unary_union(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def union_all(self, method="unary", grid_size=None):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def intersection_all(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def contains(self, other, align=None):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def contains_properly(self, other, align=None):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def to_parquet(self, path, **kwargs):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def buffer(
+        self,
+        distance,
+        resolution=16,
+        cap_style="round",
+        join_style="round",
+        mitre_limit=5.0,
+        single_sided=False,
+        **kwargs,
+    ):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @abstractmethod
+    def sjoin(self, other, predicate="intersects", **kwargs):
+        raise NotImplementedError("This method is not implemented yet.")
diff --git a/python/sedona/geopandas/geodataframe.py 
b/python/sedona/geopandas/geodataframe.py
new file mode 100644
index 0000000000..bdef237c1f
--- /dev/null
+++ b/python/sedona/geopandas/geodataframe.py
@@ -0,0 +1,672 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+from __future__ import annotations
+
+from typing import Any, Callable, Optional, Union
+
+from pyspark.sql import Column
+
+import pandas as pd
+import geopandas as gpd
+import pyspark.pandas as pspd
+
+from sedona.geopandas.base import GeoFrame
+from sedona.geopandas._typing import GeoFrameLike, Label
+from pyspark.pandas._typing import Axis, Dtype, Scalar
+from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
+from pyspark.pandas import Series as PandasOnSparkSeries
+
+
+class GeoDataFrame(GeoFrame, pspd.DataFrame):
+    """
+    A class representing a GeoDataFrame, inheriting from GeoFrame and 
pyspark.pandas.DataFrame.
+    """
+
+    def __getitem__(self, key: Any) -> Any:
+        """
+        Get item from GeoDataFrame by key.
+
+        Parameters
+        ----------
+        key : str, list, slice, ndarray or Series
+            - If key is a string, returns a Series for that column
+            - If key is a list of strings, returns a new GeoDataFrame with 
selected columns
+            - If key is a slice or array, returns rows in the GeoDataFrame
+
+        Returns
+        -------
+        Any
+            Series, GeoDataFrame, or other objects depending on the key type.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point
+        >>> from sedona.geopandas import GeoDataFrame
+        >>>
+        >>> data = {'geometry': [Point(0, 0), Point(1, 1)], 'value': [1, 2]}
+        >>> gdf = GeoDataFrame(data)
+        >>> gdf['value']
+        0    1
+        1    2
+        Name: value, dtype: int64
+        """
+        from sedona.geopandas import GeoSeries
+
+        # Handle column access by name
+        if isinstance(key, str):
+            # Access column directly from the spark DataFrame
+            column_name = key
+
+            # Check if column exists
+            if column_name not in self.columns:
+                raise KeyError(f"Column '{column_name}' does not exist")
+
+            # Get column data from spark_frame
+            spark_df = self._internal.spark_frame.select(column_name)
+            pandas_df = spark_df.toPandas()
+
+            # Check if this is a geometry column
+            field = next(
+                (f for f in self._internal.spark_frame.schema.fields if f.name 
== key),
+                None,
+            )
+
+            if field and (
+                field.dataType.typeName() == "geometrytype"
+                or field.dataType.typeName() == "binary"
+            ):
+                # Return as GeoSeries for geometry columns
+                return GeoSeries(pandas_df[column_name])
+            else:
+                # Return as regular pandas Series for non-geometry columns
+                from pyspark.pandas import Series
+
+                return Series(pandas_df[column_name])
+
+        # Handle list of column names
+        elif isinstance(key, list) and all(isinstance(k, str) for k in key):
+            # Check if all columns exist
+            missing_cols = [k for k in key if k not in self.columns]
+            if missing_cols:
+                raise KeyError(f"Columns {missing_cols} do not exist")
+
+            # Select columns from the spark DataFrame
+            spark_df = self._internal.spark_frame.select(*key)
+            pandas_df = spark_df.toPandas()
+
+            # Return as GeoDataFrame
+            return GeoDataFrame(pandas_df)
+
+        # Handle row selection via slice or boolean indexing
+        else:
+            # For now, convert to pandas first for row-based operations
+            # This could be optimized later for better performance
+            pandas_df = self._internal.spark_frame.toPandas()
+            selected_rows = pandas_df[key]
+            return GeoDataFrame(selected_rows)
+
+    def __init__(
+        self,
+        data=None,
+        index=None,
+        columns=None,
+        dtype=None,
+        copy=False,
+        geometry: Any | None = None,
+        crs: Any | None = None,
+        **kwargs,
+    ):
+        assert data is not None
+
+        self._anchor: GeoDataFrame
+        self._col_label: Label
+
+        from sedona.geopandas import GeoSeries
+
+        if isinstance(
+            data, (GeoDataFrame, GeoSeries, PandasOnSparkSeries, 
PandasOnSparkDataFrame)
+        ):
+            assert dtype is None
+            assert not copy
+
+            self._anchor = data
+            self._col_label = index
+        else:
+            if isinstance(data, pd.DataFrame):
+                assert index is None
+                assert dtype is None
+                assert not copy
+                df = data
+            else:
+                df = pd.DataFrame(
+                    data=data,
+                    index=index,
+                    dtype=dtype,
+                    copy=copy,
+                )
+            gdf = gpd.GeoDataFrame(df)
+            # convert each geometry column to wkb type
+            for col in gdf.columns:
+                if isinstance(gdf[col], gpd.GeoSeries):
+                    gdf[col] = gdf[col].apply(lambda geom: geom.wkb)
+            pdf = pd.DataFrame(gdf)
+            # initialize the parent class pyspark Dataframe with the pandas 
Series
+            super().__init__(
+                data=pdf,
+                index=index,
+                dtype=dtype,
+                copy=copy,
+            )
+
+    def _reduce_for_geostat_function(
+        self,
+        sfun: Callable[["GeoSeries"], Column],
+        name: str,
+        axis: Optional[Axis] = None,
+        numeric_only: bool = True,
+        skipna: bool = True,
+        **kwargs: Any,
+    ) -> Union["GeoSeries", Scalar]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def dtypes(self) -> Union[gpd.GeoSeries, pd.Series, Dtype]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def _to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def geoindex(self) -> "GeoIndex":
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def copy(self, deep=False):
+        """
+        Make a copy of this GeoDataFrame object.
+
+        Parameters:
+        - deep: bool, default False
+            If True, a deep copy of the data is made. Otherwise, a shallow 
copy is made.
+
+        Returns:
+        - GeoDataFrame: A copy of this GeoDataFrame object.
+
+        Examples:
+        >>> from shapely.geometry import Point
+        >>> import geopandas as gpd
+        >>> from sedona.geopandas import GeoDataFrame
+
+        >>> gdf = GeoDataFrame([{"geometry": Point(1, 1), "value1": 2, 
"value2": 3}])
+        >>> gdf_copy = gdf.copy()
+        >>> print(gdf_copy)
+           geometry  value1  value2
+        0  POINT (1 1)       2       3
+        """
+        if deep:
+            return GeoDataFrame(
+                self._anchor.copy(), dtype=self.dtypes, index=self._col_label
+            )
+        else:
+            return self
+
+    @property
+    def area(self) -> "GeoDataFrame":
+        """
+        Returns a GeoDataFrame containing the area of each geometry expressed 
in the units of the CRS.
+
+        Returns
+        -------
+        GeoDataFrame
+            A GeoDataFrame with the areas of the geometries.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Polygon
+        >>> from sedona.geopandas import GeoDataFrame
+        >>>
+        >>> data = {
+        ...     'geometry': [Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), 
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])],
+        ...     'value': [1, 2]
+        ... }
+        >>> gdf = GeoDataFrame(data)
+        >>> gdf.area
+           geometry_area  value
+        0           1.0      1
+        1           4.0      2
+        """
+        # Create a list of all column expressions for the new dataframe
+        select_expressions = []
+
+        # Process geometry columns to calculate areas
+        for field in self._internal.spark_frame.schema.fields:
+            col_name = field.name
+
+            # Skip index column to avoid duplication
+            if col_name == "__index_level_0__" or col_name == 
"__natural_order__":
+                continue
+
+            if (
+                field.dataType.typeName() == "geometrytype"
+                or field.dataType.typeName() == "binary"
+            ):
+                # Calculate the area for each geometry column
+                if field.dataType.typeName() == "binary":
+                    area_expr = (
+                        f"ST_Area(ST_GeomFromWKB(`{col_name}`)) as 
{col_name}_area"
+                    )
+                else:
+                    area_expr = f"ST_Area(`{col_name}`) as {col_name}_area"
+                select_expressions.append(area_expr)
+            else:
+                # Keep non-geometry columns as they are
+                select_expressions.append(f"`{col_name}`")
+
+        # Execute the query to get all data in one go
+        result_df = self._internal.spark_frame.selectExpr(*select_expressions)
+
+        # Convert to pandas DataFrame
+        pandas_df = result_df.toPandas()
+
+        # Create a new GeoDataFrame with the result
+        # Note: This avoids the need to manipulate the index columns separately
+        return GeoDataFrame(pandas_df)
+
+    @property
+    def crs(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @crs.setter
+    def crs(self, value):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def geom_type(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def type(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def length(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_valid(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def is_valid_reason(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_empty(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def count_coordinates(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def count_geometries(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def count_interior_rings(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_simple(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_ring(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_ccw(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_closed(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def has_z(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def get_precision(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def get_geometry(self, index):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def boundary(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def centroid(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def concave_hull(self, ratio=0.0, allow_holes=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def convex_hull(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def delaunay_triangles(self, tolerance=0.0, only_edges=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def voronoi_polygons(self, tolerance=0.0, extend_to=None, 
only_edges=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def envelope(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_rotated_rectangle(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def exterior(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def extract_unique_points(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def offset_curve(self, distance, quad_segs=8, join_style="round", 
mitre_limit=5.0):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def interiors(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def remove_repeated_points(self, tolerance=0.0):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def set_precision(self, grid_size, mode="valid_output"):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def representative_point(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_bounding_circle(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_bounding_radius(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_clearance(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def normalize(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def make_valid(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def reverse(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def segmentize(self, max_segment_length):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def transform(self, transformation, include_z=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def force_2d(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def force_3d(self, z=0):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def line_merge(self, directed=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def unary_union(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def union_all(self, method="unary", grid_size=None):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def intersection_all(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def contains(self, other, align=None):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def contains_properly(self, other, align=None):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def buffer(
+        self,
+        distance,
+        resolution=16,
+        cap_style="round",
+        join_style="round",
+        mitre_limit=5.0,
+        single_sided=False,
+        **kwargs,
+    ) -> "GeoDataFrame":
+        """
+        Returns a GeoDataFrame with all geometries buffered by the specified 
distance.
+
+        Parameters
+        ----------
+        distance : float
+            The distance to buffer by. Negative distances will create inward 
buffers.
+        resolution : int, default 16
+            The number of segments used to approximate curves.
+        cap_style : str, default "round"
+            The style of the buffer cap. One of 'round', 'flat', 'square'.
+        join_style : str, default "round"
+            The style of the buffer join. One of 'round', 'mitre', 'bevel'.
+        mitre_limit : float, default 5.0
+            The mitre limit ratio for joins when join_style='mitre'.
+        single_sided : bool, default False
+            Whether to create a single-sided buffer.
+
+        Returns
+        -------
+        GeoDataFrame
+            A new GeoDataFrame with buffered geometries.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point
+        >>> from sedona.geopandas import GeoDataFrame
+        >>>
+        >>> data = {
+        ...     'geometry': [Point(0, 0), Point(1, 1)],
+        ...     'value': [1, 2]
+        ... }
+        >>> gdf = GeoDataFrame(data)
+        >>> buffered = gdf.buffer(0.5)
+        """
+        # Create a list of all column expressions for the new dataframe
+        select_expressions = []
+
+        # Process each field in the schema
+        for field in self._internal.spark_frame.schema.fields:
+            col_name = field.name
+
+            # Skip index and order columns
+            if col_name == "__index_level_0__" or col_name == 
"__natural_order__":
+                continue
+
+            # Apply buffer to geometry columns
+            if (
+                field.dataType.typeName() == "geometrytype"
+                or field.dataType.typeName() == "binary"
+            ):
+
+                if field.dataType.typeName() == "binary":
+                    # For binary geometry columns (WKB)
+                    buffer_expr = f"ST_Buffer(ST_GeomFromWKB(`{col_name}`), 
{distance}) as {col_name}"
+                else:
+                    # For native geometry columns
+                    buffer_expr = f"ST_Buffer(`{col_name}`, {distance}) as 
{col_name}"
+                select_expressions.append(buffer_expr)
+            else:
+                # Keep non-geometry columns as they are
+                select_expressions.append(f"`{col_name}`")
+
+        # Execute the query to get all data in one go
+        result_df = self._internal.spark_frame.selectExpr(*select_expressions)
+
+        # Convert to pandas DataFrame and create a new GeoDataFrame
+        pandas_df = result_df.toPandas()
+        return GeoDataFrame(pandas_df)
+
+    def sjoin(
+        self,
+        other,
+        how="inner",
+        predicate="intersects",
+        lsuffix="left",
+        rsuffix="right",
+        distance=None,
+        on_attribute=None,
+        **kwargs,
+    ):
+        """
+        Spatial join of two GeoDataFrames.
+
+        Parameters
+        ----------
+        other : GeoDataFrame
+            The right GeoDataFrame to join with.
+        how : str, default 'inner'
+            The type of join:
+            * 'left': use keys from left_df; retain only left_df geometry 
column
+            * 'right': use keys from right_df; retain only right_df geometry 
column
+            * 'inner': use intersection of keys from both dfs; retain only
+              left_df geometry column
+        predicate : str, default 'intersects'
+            Binary predicate. Valid values: 'intersects', 'contains', 
'within', 'dwithin'
+        lsuffix : str, default 'left'
+            Suffix to apply to overlapping column names (left GeoDataFrame).
+        rsuffix : str, default 'right'
+            Suffix to apply to overlapping column names (right GeoDataFrame).
+        distance : float, optional
+            Distance for 'dwithin' predicate. Required if predicate='dwithin'.
+        on_attribute : str, list or tuple, optional
+            Column name(s) to join on as an additional join restriction.
+            These must be found in both DataFrames.
+
+        Returns
+        -------
+        GeoDataFrame
+            A GeoDataFrame with the results of the spatial join.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point, Polygon
+        >>> from sedona.geopandas import GeoDataFrame
+        >>>
+        >>> polygons = GeoDataFrame({
+        ...     'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])],
+        ...     'value': [1]
+        ... })
+        >>> points = GeoDataFrame({
+        ...     'geometry': [Point(0.5, 0.5), Point(2, 2)],
+        ...     'value': [1, 2]
+        ... })
+        >>> joined = points.sjoin(polygons)
+        """
+        from sedona.geopandas.tools.sjoin import sjoin as sjoin_tool
+
+        return sjoin_tool(
+            self,
+            other,
+            how=how,
+            predicate=predicate,
+            lsuffix=lsuffix,
+            rsuffix=rsuffix,
+            distance=distance,
+            on_attribute=on_attribute,
+            **kwargs,
+        )
+
+    def to_parquet(self, path, **kwargs):
+        """
+        Write the GeoSeries to a GeoParquet file.
+
+        Parameters:
+        - path: str
+            The file path where the GeoParquet file will be written.
+        - kwargs: Any
+            Additional arguments to pass to the Sedona DataFrame output 
function.
+        """
+        # Use the Spark DataFrame's write method to write to GeoParquet format
+        self._internal.spark_frame.write.format("geoparquet").save(path, 
**kwargs)
diff --git a/python/sedona/geopandas/geoindex.py 
b/python/sedona/geopandas/geoindex.py
new file mode 100644
index 0000000000..60c6991e83
--- /dev/null
+++ b/python/sedona/geopandas/geoindex.py
@@ -0,0 +1,28 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+
+class GeoIndex:
+    """
+    A placeholder class for GeoIndex.
+    """
+
+    def __init__(self):
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def some_method(self):
+        raise NotImplementedError("This method is not implemented yet.")
diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
new file mode 100644
index 0000000000..67f65c71e4
--- /dev/null
+++ b/python/sedona/geopandas/geoseries.py
@@ -0,0 +1,615 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+from typing import Any, Callable, Optional, Union
+
+from pyspark.pandas._typing import Axis, Dtype, Scalar
+from pyspark.pandas.internal import InternalFrame
+from pyspark.pandas.series import first_series
+from pyspark.pandas.utils import scol_for
+from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
+from pyspark.pandas import Series as PandasOnSparkSeries
+from pyspark.sql import Column
+
+import pandas as pd
+import geopandas as gpd
+import pyspark.pandas as pspd
+from pyspark.sql.types import BinaryType
+
+from sedona.geopandas.geodataframe import GeoDataFrame
+from sedona.geopandas.base import GeoFrame
+from sedona.geopandas._typing import Label
+from sedona.geopandas.geoindex import GeoIndex
+
+
+class GeoSeries(GeoFrame, pspd.Series):
+    """
+    A class representing a GeoSeries, inheriting from GeoFrame and 
pyspark.pandas.DataFrame.
+    """
+
+    def __getitem__(self, key: Any) -> Any:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def __init__(
+        self,
+        data=None,
+        index=None,
+        dtype=None,
+        name=None,
+        copy=False,
+        fastpath=False,
+        crs=None,
+        **kwargs,
+    ):
+        """
+        Initialize a GeoSeries object.
+
+        Parameters:
+        - data: The input data for the GeoSeries. It can be a GeoDataFrame, 
GeoSeries, or pandas Series.
+        - index: The index for the GeoSeries.
+        - crs: Coordinate Reference System for the GeoSeries.
+        - dtype: Data type for the GeoSeries.
+        - name: Name of the GeoSeries.
+        - copy: Whether to copy the input data.
+        - fastpath: Internal parameter for fast initialization.
+
+        Examples:
+        >>> from shapely.geometry import Point
+        >>> import geopandas as gpd
+        >>> from sedona.geopandas import GeoSeries
+
+        # Example 1: Initialize with GeoDataFrame
+        >>> gdf = gpd.GeoDataFrame({'geometry': [Point(1, 1), Point(2, 2)]})
+        >>> gs = GeoSeries(data=gdf)
+        >>> print(gs)
+        0    POINT (1 1)
+        1    POINT (2 2)
+        Name: geometry, dtype: geometry
+
+        # Example 2: Initialize with GeoSeries
+        >>> gseries = gpd.GeoSeries([Point(1, 1), Point(2, 2)])
+        >>> gs = GeoSeries(data=gseries)
+        >>> print(gs)
+        0    POINT (1 1)
+        1    POINT (2 2)
+        dtype: geometry
+
+        # Example 3: Initialize with pandas Series
+        >>> pseries = pd.Series([Point(1, 1), Point(2, 2)])
+        >>> gs = GeoSeries(data=pseries)
+        >>> print(gs)
+        0    POINT (1 1)
+        1    POINT (2 2)
+        dtype: geometry
+        """
+        assert data is not None
+
+        self._anchor: GeoDataFrame
+        self._col_label: Label
+
+        if isinstance(
+            data, (GeoDataFrame, GeoSeries, PandasOnSparkSeries, 
PandasOnSparkDataFrame)
+        ):
+            assert dtype is None
+            assert name is None
+            assert not copy
+            assert not fastpath
+
+            self._anchor = data
+            self._col_label = index
+        else:
+            if isinstance(data, pd.Series):
+                assert index is None
+                assert dtype is None
+                assert name is None
+                assert not copy
+                assert not fastpath
+                s = data
+            else:
+                s = pd.Series(
+                    data=data,
+                    index=index,
+                    dtype=dtype,
+                    name=name,
+                    copy=copy,
+                    fastpath=fastpath,
+                )
+            gs = gpd.GeoSeries(s)
+            pdf = pd.Series(gs.apply(lambda geom: geom.wkb))
+            # initialize the parent class pyspark Series with the pandas Series
+            super().__init__(
+                data=pdf,
+                index=index,
+                dtype=dtype,
+                name=name,
+                copy=copy,
+                fastpath=fastpath,
+            )
+
+    def _reduce_for_geostat_function(
+        self,
+        sfun: Callable[["GeoSeries"], Column],
+        name: str,
+        axis: Optional[Axis] = None,
+        numeric_only: bool = True,
+        skipna: bool = True,
+        **kwargs: Any,
+    ) -> Union["GeoSeries", Scalar]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def dtypes(self) -> Union[gpd.GeoSeries, pd.Series, Dtype]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def _to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def geometry(self) -> "GeoSeries":
+        return self
+
+    @property
+    def geoindex(self) -> "GeoIndex":
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def copy(self, deep=False):
+        """
+        Make a copy of this GeoSeries object.
+
+        Parameters:
+        - deep: bool, default False
+            If True, a deep copy of the data is made. Otherwise, a shallow 
copy is made.
+
+        Returns:
+        - GeoSeries: A copy of this GeoSeries object.
+
+        Examples:
+        >>> from shapely.geometry import Point
+        >>> import geopandas as gpd
+        >>> from sedona.geopandas import GeoSeries
+
+        >>> gs = GeoSeries([Point(1, 1), Point(2, 2)])
+        >>> gs_copy = gs.copy()
+        >>> print(gs_copy)
+        0    POINT (1 1)
+        1    POINT (2 2)
+        dtype: geometry
+        """
+        if deep:
+            return GeoSeries(
+                self._anchor.copy(), dtype=self.dtype, index=self._col_label
+            )
+        else:
+            return self
+
+    @property
+    def area(self) -> "GeoSeries":
+        """
+        Returns a Series containing the area of each geometry in the GeoSeries 
expressed in the units of the CRS.
+
+        Returns
+        -------
+        Series
+            A Series containing the area of each geometry.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Polygon
+        >>> import geopandas as gpd
+        >>> from sedona.geopandas import GeoSeries
+
+        >>> gs = GeoSeries([Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), 
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])])
+        >>> gs.area
+        0    1.0
+        1    4.0
+        dtype: float64
+        """
+
+        # Find the first column with BinaryType or GeometryType
+        first_col = self.get_first_geometry_column()
+
+        if self.get_first_geometry_column():
+            data_type = self._internal.spark_frame.schema[first_col].dataType
+            if isinstance(data_type, BinaryType):
+                sql_expr = f"ST_Area(ST_GeomFromWKB(`{first_col}`)) as area"
+            else:
+                sql_expr = f"ST_Area(`{first_col}`) as area"
+
+        sdf = self._internal.spark_frame.selectExpr(sql_expr)
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=None,
+            column_labels=[self._column_label],
+            data_spark_columns=[scol_for(sdf, "area")],
+            data_fields=[self._internal.data_fields[0]],
+            column_label_names=self._internal.column_label_names,
+        )
+        return _to_geo_series(first_series(PandasOnSparkDataFrame(internal)))
+
+    @property
+    def crs(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @crs.setter
+    def crs(self, value):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def geom_type(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def type(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def length(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_valid(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def is_valid_reason(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_empty(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def count_coordinates(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def count_geometries(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def count_interior_rings(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_simple(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_ring(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_ccw(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def is_closed(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def has_z(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def get_precision(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def get_geometry(self, index):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def boundary(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def centroid(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def concave_hull(self, ratio=0.0, allow_holes=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def convex_hull(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def delaunay_triangles(self, tolerance=0.0, only_edges=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def voronoi_polygons(self, tolerance=0.0, extend_to=None, 
only_edges=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def envelope(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_rotated_rectangle(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def exterior(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def extract_unique_points(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def offset_curve(self, distance, quad_segs=8, join_style="round", 
mitre_limit=5.0):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def interiors(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def remove_repeated_points(self, tolerance=0.0):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def set_precision(self, grid_size, mode="valid_output"):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def representative_point(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_bounding_circle(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_bounding_radius(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def minimum_clearance(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def normalize(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def make_valid(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def reverse(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def segmentize(self, max_segment_length):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def transform(self, transformation, include_z=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def force_2d(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def force_3d(self, z=0):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def line_merge(self, directed=False):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    @property
+    def unary_union(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def union_all(self, method="unary", grid_size=None):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def intersection_all(self):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def contains(self, other, align=None):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def contains_properly(self, other, align=None):
+        # Implementation of the abstract method
+        raise NotImplementedError("This method is not implemented yet.")
+
+    def buffer(
+        self,
+        distance,
+        resolution=16,
+        cap_style="round",
+        join_style="round",
+        mitre_limit=5.0,
+        single_sided=False,
+        **kwargs,
+    ):
+        """
+        Returns a GeoSeries of geometries representing all points within a 
given distance of each geometric object.
+
+        Parameters
+        ----------
+        distance : float
+            The distance to buffer around each geometry.
+        resolution : int, optional, default 16
+            The resolution of the buffer around each geometry.
+        cap_style : str, optional, default "round"
+            The style of the buffer's cap (round, flat, or square).
+        join_style : str, optional, default "round"
+            The style of the buffer's join (round, mitre, or bevel).
+        mitre_limit : float, optional, default 5.0
+            The mitre limit for the buffer's join style.
+        single_sided : bool, optional, default False
+            Whether to create a single-sided buffer.
+
+        Returns
+        -------
+        GeoSeries
+            A GeoSeries of buffered geometries.
+        """
+        # Find the first column with BinaryType or GeometryType
+        first_col = self.get_first_geometry_column()
+
+        if self.get_first_geometry_column():
+            data_type = self._internal.spark_frame.schema[first_col].dataType
+            if isinstance(data_type, BinaryType):
+                sql_expr = (
+                    f"ST_Buffer(ST_GeomFromWKB(`{first_col}`), {distance}) as 
buffer"
+                )
+            else:
+                sql_expr = f"ST_Buffer(`{first_col}`, {distance}) as buffer"
+
+        sdf = self._internal.spark_frame.selectExpr(sql_expr)
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=None,
+            column_labels=[self._column_label],
+            data_spark_columns=[scol_for(sdf, "buffer")],
+            data_fields=[self._internal.data_fields[0]],
+            column_label_names=self._internal.column_label_names,
+        )
+        return _to_geo_series(first_series(PandasOnSparkDataFrame(internal)))
+
+    def to_parquet(self, path, **kwargs):
+        """
+        Write the GeoSeries to a GeoParquet file.
+
+        Parameters:
+        - path: str
+            The file path where the GeoParquet file will be written.
+        - kwargs: Any
+            Additional arguments to pass to the Sedona DataFrame output 
function.
+        """
+        # Use the Spark DataFrame's write method to write to GeoParquet format
+        self._internal.spark_frame.write.format("geoparquet").save(path, 
**kwargs)
+
+    def sjoin(
+        self,
+        other,
+        how="inner",
+        predicate="intersects",
+        lsuffix="left",
+        rsuffix="right",
+        distance=None,
+        on_attribute=None,
+        **kwargs,
+    ):
+        """
+        Perform a spatial join between two GeoSeries.
+        Parameters:
+        - other: GeoSeries
+        - how: str, default 'inner'
+            The type of join to perform.
+        - predicate: str, default 'intersects'
+            The spatial predicate to use for the join.
+        - lsuffix: str, default 'left'
+            Suffix to apply to the left GeoSeries' column names.
+        - rsuffix: str, default 'right'
+            Suffix to apply to the right GeoSeries' column names.
+        - distance: float, optional
+            The distance threshold for the join.
+        - on_attribute: str, optional
+            The attribute to join on.
+        - kwargs: Any
+            Additional arguments to pass to the join function.
+        Returns:
+        - GeoSeries
+        """
+        from sedona.geopandas import sjoin
+
+        # Implementation of the abstract method
+        return sjoin(
+            self,
+            other,
+            how,
+            predicate,
+            lsuffix,
+            rsuffix,
+            distance,
+            on_attribute,
+            **kwargs,
+        )
+
+    # 
-----------------------------------------------------------------------------
+    # # Utils
+    # 
-----------------------------------------------------------------------------
+
+    def get_first_geometry_column(self):
+        first_binary_or_geometry_col = next(
+            (
+                field.name
+                for field in self._internal.spark_frame.schema.fields
+                if isinstance(field.dataType, BinaryType)
+                or field.dataType.typeName() == "geometrytype"
+            ),
+            None,
+        )
+        return first_binary_or_geometry_col
+
+
+# -----------------------------------------------------------------------------
+# # Utils
+# -----------------------------------------------------------------------------
+
+
+def _to_geo_series(df: PandasOnSparkSeries) -> GeoSeries:
+    """
+    Get the first Series from the DataFrame.
+
+    Parameters:
+    - df: The input DataFrame.
+
+    Returns:
+    - GeoSeries: The first Series from the DataFrame.
+    """
+    return GeoSeries(data=df)
diff --git a/python/sedona/geopandas/internal.py 
b/python/sedona/geopandas/internal.py
new file mode 100644
index 0000000000..e1d4e3fd47
--- /dev/null
+++ b/python/sedona/geopandas/internal.py
@@ -0,0 +1,33 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+import pandas as pd
+from pyspark._typing import F
+from pyspark.pandas.internal import InternalFrame as InternalPySparkFrame
+
+
+class InternalGeoFrame(InternalPySparkFrame):
+
+    @staticmethod
+    def from_pandas(pdf: pd.DataFrame) -> "InternalGeoFrame":
+        internal_frame = InternalPySparkFrame.from_pandas(pdf)
+        sdf = internal_frame.spark_frame.withColumn("geometry", F.lit(None))
+        return InternalGeoFrame(
+            spark_frame=sdf,
+            index_spark_columns=internal_frame.index_spark_columns,
+            data_spark_columns=internal_frame.data_spark_columns,
+        )
diff --git a/python/sedona/geopandas/tools/__init__.py 
b/python/sedona/geopandas/tools/__init__.py
new file mode 100644
index 0000000000..f097cc879b
--- /dev/null
+++ b/python/sedona/geopandas/tools/__init__.py
@@ -0,0 +1,22 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+from .sjoin import sjoin
+
+__all__ = [
+    "sjoin",
+]
diff --git a/python/sedona/geopandas/tools/sjoin.py 
b/python/sedona/geopandas/tools/sjoin.py
new file mode 100644
index 0000000000..6cc6bbce6e
--- /dev/null
+++ b/python/sedona/geopandas/tools/sjoin.py
@@ -0,0 +1,183 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+from pyspark.pandas.internal import InternalFrame
+from pyspark.pandas.series import first_series
+from pyspark.pandas.utils import scol_for
+from pyspark.sql.functions import expr
+
+from sedona.geopandas import GeoDataFrame, GeoSeries
+from sedona.geopandas.geoseries import _to_geo_series
+from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
+
+
+def _frame_join(left_df, right_df):
+    """Join the GeoDataFrames at the DataFrame level.
+
+    Parameters
+    ----------
+    left_df : GeoDataFrame
+    right_df : GeoDataFrame
+
+    Returns
+    -------
+    GeoDataFrame
+        Joined GeoDataFrame.
+
+    TODO: Implement this function with more details and parameters.
+    """
+    # Get the internal Spark DataFrames
+    left_sdf = left_df._internal.spark_frame
+    right_sdf = right_df._internal.spark_frame
+
+    # Convert WKB to geometry
+    left_geo_df = left_sdf.selectExpr("ST_GeomFromWKB(`0`) as l_geometry")
+    right_geo_df = right_sdf.selectExpr("ST_GeomFromWKB(`0`) as r_geometry")
+
+    # Perform Spatial Join using ST_Intersects
+    spatial_join_df = left_geo_df.alias("l").join(
+        right_geo_df.alias("r"), expr("ST_Intersects(l_geometry, r_geometry)")
+    )
+
+    # Use the provided code template to create an InternalFrame and return a 
GeoSeries
+    internal = InternalFrame(
+        spark_frame=spatial_join_df,
+        index_spark_columns=None,
+        column_labels=[left_df._col_label],
+        data_spark_columns=[scol_for(spatial_join_df, "l_geometry")],
+        data_fields=[left_df._internal.data_fields[0]],
+        column_label_names=left_df._internal.column_label_names,
+    )
+    return _to_geo_series(first_series(PandasOnSparkDataFrame(internal)))
+
+
+def sjoin(
+    left_df,
+    right_df,
+    how="inner",
+    predicate="intersects",
+    lsuffix="left",
+    rsuffix="right",
+    distance=None,
+    on_attribute=None,
+    **kwargs,
+):
+    """Spatial join of two GeoDataFrames.
+
+    See the User Guide page :doc:`../../user_guide/mergingdata` for details.
+
+
+    Parameters
+    ----------
+    left_df, right_df : GeoDataFrames
+    how : string, default 'inner'
+        The type of join:
+
+        * 'left': use keys from left_df; retain only left_df geometry column
+        * 'right': use keys from right_df; retain only right_df geometry column
+        * 'inner': use intersection of keys from both dfs; retain only
+          left_df geometry column
+    predicate : string, default 'intersects'
+        Binary predicate. Valid values are determined by the spatial index 
used.
+        You can check the valid values in left_df or right_df as
+        ``left_df.sindex.valid_query_predicates`` or
+        ``right_df.sindex.valid_query_predicates``
+        Replaces deprecated ``op`` parameter.
+    lsuffix : string, default 'left'
+        Suffix to apply to overlapping column names (left GeoDataFrame).
+    rsuffix : string, default 'right'
+        Suffix to apply to overlapping column names (right GeoDataFrame).
+    distance : number or array_like, optional
+        Distance(s) around each input geometry within which to query the tree
+        for the 'dwithin' predicate. If array_like, must be
+        one-dimesional with length equal to length of left GeoDataFrame.
+        Required if ``predicate='dwithin'``.
+    on_attribute : string, list or tuple
+        Column name(s) to join on as an additional join restriction on top
+        of the spatial predicate. These must be found in both DataFrames.
+        If set, observations are joined only if the predicate applies
+        and values in specified columns match.
+
+    Examples
+    --------
+    >>> groceries_w_communities = geopandas.sjoin(groceries, chicago)
+    >>> groceries_w_communities.head()  # doctest: +SKIP
+       OBJECTID       community                           geometry
+    0        16          UPTOWN  MULTIPOINT ((-87.65661 41.97321))
+    1        18     MORGAN PARK  MULTIPOINT ((-87.68136 41.69713))
+    2        22  NEAR WEST SIDE  MULTIPOINT ((-87.63918 41.86847))
+    3        23  NEAR WEST SIDE  MULTIPOINT ((-87.65495 41.87783))
+    4        27         CHATHAM  MULTIPOINT ((-87.62715 41.73623))
+    [5 rows x 95 columns]
+
+    Notes
+    -----
+    Every operation in GeoPandas is planar, i.e. the potential third
+    dimension is not taken into account.
+    """
+    if kwargs:
+        first = next(iter(kwargs.keys()))
+        raise TypeError(f"sjoin() got an unexpected keyword argument 
'{first}'")
+
+    on_attribute = _maybe_make_list(on_attribute)
+
+    _basic_checks(left_df, right_df, how, lsuffix, rsuffix, 
on_attribute=on_attribute)
+
+    joined = _frame_join(
+        left_df,
+        right_df,
+    )
+
+    return joined
+
+
+def _maybe_make_list(obj):
+    if isinstance(obj, tuple):
+        return list(obj)
+    if obj is not None and not isinstance(obj, list):
+        return [obj]
+    return obj
+
+
+def _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=None):
+    """Checks the validity of join input parameters.
+
+    `how` must be one of the valid options.
+    `'index_'` concatenated with `lsuffix` or `rsuffix` must not already
+    exist as columns in the left or right data frames.
+
+    Parameters
+    ------------
+    left_df : GeoDataFrame
+    right_df : GeoData Frame
+    how : str, one of 'left', 'right', 'inner'
+        join type
+    lsuffix : str
+        left index suffix
+    rsuffix : str
+        right index suffix
+    on_attribute : list, default None
+        list of column names to merge on along with geometry
+    """
+    if not isinstance(left_df, GeoSeries):
+        raise ValueError(f"'left_df' should be GeoSeries, got {type(left_df)}")
+
+    if not isinstance(right_df, GeoSeries):
+        raise ValueError(f"'right_df' should be GeoSeries, got 
{type(right_df)}")
+
+    allowed_hows = ["inner"]
+    if how not in allowed_hows:
+        raise ValueError(f'`how` was "{how}" but is expected to be in 
{allowed_hows}')
diff --git a/python/tests/geopandas/__init__.py 
b/python/tests/geopandas/__init__.py
new file mode 100644
index 0000000000..a67d5ea255
--- /dev/null
+++ b/python/tests/geopandas/__init__.py
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
diff --git a/python/tests/geopandas/test_geodataframe.py 
b/python/tests/geopandas/test_geodataframe.py
new file mode 100644
index 0000000000..502b521526
--- /dev/null
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -0,0 +1,187 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+import shutil
+import tempfile
+
+from shapely.geometry import (
+    Point,
+)
+
+from sedona.geopandas import GeoDataFrame
+from tests.test_base import TestBase
+import pyspark.pandas as ps
+
+
+class TestDataframe(TestBase):
+    # def setup_method(self):
+    #     N = 10
+    #     self.tempdir = tempfile.mkdtemp()
+    #     self.crs = "epsg:4326"
+    #     self.df = GeoDataFrame(
+    #         [
+    #             {"geometry": Point(x, y), "value1": x + y, "value2": x * y}
+    #             for x, y in zip(range(N), range(N))
+    #         ],
+    #         crs=self.crs,
+    #     )
+    #
+    # def teardown_method(self):
+    #     shutil.rmtree(self.tempdir)
+
+    def test_constructor(self):
+        df = GeoDataFrame([Point(x, x) for x in range(3)])
+        check_geodataframe(df)
+
+    def test_psdf(self):
+        # this is to make sure the spark session works with pandas on spark api
+        psdf = ps.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6],
+                "b": [100, 200, 300, 400, 500, 600],
+                "c": ["one", "two", "three", "four", "five", "six"],
+            },
+            index=[10, 20, 30, 40, 50, 60],
+        )
+        assert psdf.count().count() is 3
+
+    def test_type_single_geometry_column(self):
+        # Create a GeoDataFrame with a single geometry column and additional 
attributes
+        points = [Point(x, x) for x in range(3)]
+        data = {"geometry1": points, "id": [1, 2, 3], "value": ["a", "b", "c"]}
+
+        df = GeoDataFrame(data)
+
+        # Verify the GeoDataFrame type
+        assert type(df) is GeoDataFrame
+
+        # Check the underlying Spark DataFrame schema
+        schema = df._internal.spark_frame.schema
+
+        # Assert the geometry column has the correct type and is not nullable
+        geometry_field = schema["geometry1"]
+        assert geometry_field.dataType.typeName() == "geometrytype"
+        assert not geometry_field.nullable
+
+        # Assert non-geometry columns are present with correct types
+        assert schema["id"].dataType.typeName().startswith("long")
+        assert schema["value"].dataType.typeName().startswith("string")
+
+        # Verify number of columns
+        assert len(schema.fields) == 5
+
+    def test_type_multiple_geometry_columns(self):
+        # Create points for two geometry columns
+        points1 = [Point(x, x) for x in range(3)]
+        points2 = [Point(x + 5, x + 5) for x in range(3)]
+
+        # Create a dictionary with two geometry columns
+        data = {"geometry1": points1, "geometry2": points2, "attribute": [1, 
2, 3]}
+
+        df = GeoDataFrame(data)
+        assert type(df) is GeoDataFrame
+
+        schema = df._internal.spark_frame.schema
+        # Assert both geometry columns have the correct type
+        geometry_field1 = schema["geometry1"]
+        assert geometry_field1.dataType.typeName() == "geometrytype"
+        assert not geometry_field1.nullable
+
+        geometry_field2 = schema["geometry2"]
+        assert geometry_field2.dataType.typeName() == "geometrytype"
+        assert not geometry_field2.nullable
+
+        # Check non-geometry column
+        attribute_field = schema["attribute"]
+        assert attribute_field.dataType.typeName() != "geometrytype"
+
+    def test_copy(self):
+        df = GeoDataFrame([Point(x, x) for x in range(3)], name="test_df")
+        df_copy = df.copy()
+        assert type(df_copy) is GeoDataFrame
+
+    def test_area(self):
+        # Create a GeoDataFrame with polygons to test area calculation
+        from shapely.geometry import Polygon
+
+        # Create polygons with known areas (1.0 and 4.0 square units)
+        poly1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])  # 1 square unit
+        poly2 = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])  # 4 square units
+
+        data = {"geometry1": [poly1, poly2], "id": [1, 2], "value": ["a", "b"]}
+
+        df = GeoDataFrame(data)
+
+        # Calculate area
+        area_df = df.area
+
+        # Verify result is a GeoDataFrame
+        assert type(area_df) is GeoDataFrame
+
+        # Verify the geometry column was converted to area values
+        assert "geometry1_area" in area_df.columns
+
+        # Verify non-geometry columns were preserved
+        assert "id" in area_df.columns
+        assert "value" in area_df.columns
+
+        # Check the actual area values
+        area_values = area_df["geometry1_area"].to_list()
+        assert len(area_values) == 2
+        self.assert_almost_equal(area_values[0], 1.0)
+        self.assert_almost_equal(area_values[1], 4.0)
+
+    def test_buffer(self):
+        # Create a GeoDataFrame with geometries to test buffer operation
+        from shapely.geometry import Polygon, Point
+
+        # Create input geometries
+        point = Point(0, 0)
+        square = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
+
+        data = {"geometry1": [point, square], "id": [1, 2], "value": ["a", 
"b"]}
+        df = GeoDataFrame(data)
+
+        # Apply buffer with distance 0.5
+        buffer_df = df.buffer(0.5)
+
+        # Verify result is a GeoDataFrame
+        assert type(buffer_df) is GeoDataFrame
+
+        # Verify the original columns are preserved
+        assert "geometry1" in buffer_df.columns
+        assert "id" in buffer_df.columns
+        assert "value" in buffer_df.columns
+
+        # Convert to pandas to extract individual geometries
+        pandas_df = 
buffer_df._internal.spark_frame.select("geometry1").toPandas()
+
+        # Calculate areas to verify buffer was applied correctly
+        # Point buffer with radius 0.5 should have area approximately π * 0.5² 
≈ 0.785
+        # Square buffer with radius 0.5 should expand the 1x1 square to 2x2 
square with rounded corners
+        areas = [geom.area for geom in pandas_df["geometry1"]]
+
+        # Check that square buffer area is greater than original (1.0)
+        assert areas[1] > 1.0
+
+
+# -----------------------------------------------------------------------------
+# # Utils
+# -----------------------------------------------------------------------------
+
+
+def check_geodataframe(df):
+    assert isinstance(df, GeoDataFrame)
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
new file mode 100644
index 0000000000..52e60ab3f7
--- /dev/null
+++ b/python/tests/geopandas/test_geoseries.py
@@ -0,0 +1,114 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+import os
+import shutil
+import tempfile
+
+from shapely.geometry import (
+    Point,
+    Polygon,
+)
+
+from sedona.geopandas import GeoSeries
+from tests.test_base import TestBase
+import pyspark.pandas as ps
+
+
+class TestSeries(TestBase):
+    def setup_method(self):
+        self.tempdir = tempfile.mkdtemp()
+        self.t1 = Polygon([(0, 0), (1, 0), (1, 1)])
+        self.t2 = Polygon([(0, 0), (1, 1), (0, 1)])
+        self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
+        self.g1 = GeoSeries([self.t1, self.t2])
+        self.g2 = GeoSeries([self.sq, self.t1])
+        self.g3 = GeoSeries([self.t1, self.t2], crs="epsg:4326")
+        self.g4 = GeoSeries([self.t2, self.t1])
+
+    def teardown_method(self):
+        shutil.rmtree(self.tempdir)
+
+    def test_constructor(self):
+        s = GeoSeries([Point(x, x) for x in range(3)])
+        check_geoseries(s)
+
+    def test_psdf(self):
+        # this is to make sure the spark session works with pandas on spark api
+        psdf = ps.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6],
+                "b": [100, 200, 300, 400, 500, 600],
+                "c": ["one", "two", "three", "four", "five", "six"],
+            },
+            index=[10, 20, 30, 40, 50, 60],
+        )
+        assert psdf.count().count() is 3
+
+    def test_internal_st_function(self):
+        # this is to make sure the spark session works with internal sedona 
udfs
+        baseDf = self.spark.sql(
+            "SELECT ST_GeomFromWKT('POLYGON ((50 50 1, 50 80 2, 80 80 3, 80 50 
2, 50 50 1))') as geom"
+        )
+        actual = baseDf.selectExpr("ST_AsText(ST_Expand(geom, 10))").first()[0]
+        expected = "POLYGON Z((40 40 -9, 40 90 -9, 90 90 13, 90 40 13, 40 40 
-9))"
+        assert expected == actual
+
+    def test_type(self):
+        assert type(self.g1) is GeoSeries
+        assert type(self.g2) is GeoSeries
+        assert type(self.g3) is GeoSeries
+        assert type(self.g4) is GeoSeries
+
+    def test_copy(self):
+        gc = self.g3.copy()
+        assert type(gc) is GeoSeries
+        assert self.g3.name == gc.name
+
+    def test_area(self):
+        area = self.g1.area
+        assert area is not None
+        assert type(area) is GeoSeries
+        assert area.count() is 2
+
+    def test_buffer(self):
+        buffer = self.g1.buffer(0.2)
+        assert buffer is not None
+        assert type(buffer) is GeoSeries
+        assert buffer.count() is 2
+
+    def test_buffer_then_area(self):
+        area = self.g1.buffer(0.2).area
+        assert area is not None
+        assert type(area) is GeoSeries
+        assert area.count() is 2
+
+    def test_buffer_then_geoparquet(self):
+        temp_file_path = os.path.join(
+            self.tempdir, next(tempfile._get_candidate_names()) + ".parquet"
+        )
+        self.g1.buffer(0.2).to_parquet(temp_file_path)
+        assert os.path.exists(temp_file_path)
+
+
+# -----------------------------------------------------------------------------
+# # Utils
+# -----------------------------------------------------------------------------
+
+
+def check_geoseries(s):
+    assert isinstance(s, GeoSeries)
+    assert isinstance(s.geometry, GeoSeries)
diff --git a/python/tests/geopandas/test_sjoin.py 
b/python/tests/geopandas/test_sjoin.py
new file mode 100644
index 0000000000..f9e1c6f680
--- /dev/null
+++ b/python/tests/geopandas/test_sjoin.py
@@ -0,0 +1,53 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+import shutil
+import tempfile
+
+from shapely.geometry import Polygon
+from sedona.geopandas import GeoSeries, sjoin
+from tests.test_base import TestBase
+
+
+class TestSpatialJoin(TestBase):
+    def setup_method(self):
+        self.tempdir = tempfile.mkdtemp()
+        self.t1 = Polygon([(0, 0), (1, 0), (1, 1)])
+        self.t2 = Polygon([(0, 0), (1, 1), (0, 1)])
+        self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
+        self.g1 = GeoSeries([self.t1, self.t2])
+        self.g2 = GeoSeries([self.sq, self.t1])
+        self.g3 = GeoSeries([self.t1, self.t2], crs="epsg:4326")
+        self.g4 = GeoSeries([self.t2, self.t1])
+
+    def teardown_method(self):
+        shutil.rmtree(self.tempdir)
+
+    def test_sjoin_method1(self):
+        left = self.g1
+        right = self.g2
+        joined = sjoin(left, right)
+        assert joined is not None
+        assert type(joined) is GeoSeries
+        assert joined.count() is 4
+
+    def test_sjoin_method2(self):
+        left = self.g1
+        right = self.g2
+        joined = left.sjoin(right)
+        assert joined is not None
+        assert type(joined) is GeoSeries
+        assert joined.count() is 4

(sedona) branch master updated: [SEDONA-720] Add GeoPandas Compatible API on Sedona - framework (#1843)

Reply via email to