This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 607c383f4b [SEDONA-708] Sedona should use PyArrow to get GeoPandas 
(#1794)
607c383f4b is described below

commit 607c383f4bb6df5763a597cb98dbb6c8d390513c
Author: Jia Yu <[email protected]>
AuthorDate: Thu Feb 6 21:18:40 2025 -0800

    [SEDONA-708] Sedona should use PyArrow to get GeoPandas (#1794)
    
    * Initial commit
    
    * Separate the logic between Pandas and GeoPandas
    
    * Work with GeoPandas < 1.0.0
    
    * Refine the code structure
---
 python/sedona/maps/SedonaMapUtils.py      | 16 +++++++++++++---
 python/sedona/raster_utils/SedonaUtils.py |  5 ++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/python/sedona/maps/SedonaMapUtils.py 
b/python/sedona/maps/SedonaMapUtils.py
index 66fa283d1d..ddca721c45 100644
--- a/python/sedona/maps/SedonaMapUtils.py
+++ b/python/sedona/maps/SedonaMapUtils.py
@@ -18,6 +18,8 @@
 import json
 
 from sedona.sql.types import GeometryType
+from sedona.utils.geoarrow import dataframe_to_arrow
+from packaging.version import parse
 
 
 class SedonaMapUtils:
@@ -34,17 +36,25 @@ class SedonaMapUtils:
         """
         if geometry_col is None:
             geometry_col = SedonaMapUtils.__get_geometry_col__(df)
-        pandas_df = df.toPandas()
+
+        # Convert the dataframe to arrow format, then to geopandas dataframe
+        # This is faster than converting directly to geopandas dataframe via 
toPandas
         if (
             geometry_col is None
         ):  # No geometry column found even after searching schema, return 
Pandas Dataframe
-            return pandas_df
+            data_pyarrow = dataframe_to_arrow(df)
+            return data_pyarrow.to_pandas()
         try:
             import geopandas as gpd
         except ImportError:
             msg = "GeoPandas is missing. You can install it manually or via 
apache-sedona[kepler-map] or apache-sedona[pydeck-map]."
             raise ImportError(msg) from None
-        geo_df = gpd.GeoDataFrame(pandas_df, geometry=geometry_col)
+        # From GeoPandas 1.0.0 onwards, the from_arrow method is available
+        if parse(gpd.__version__) >= parse("1.0.0"):
+            data_pyarrow = dataframe_to_arrow(df)
+            geo_df = gpd.GeoDataFrame.from_arrow(data_pyarrow)
+        else:
+            geo_df = gpd.GeoDataFrame(df.toPandas(), geometry=geometry_col)
         if geometry_col != "geometry" and rename is True:
             geo_df.rename_geometry("geometry", inplace=True)
         return geo_df
diff --git a/python/sedona/raster_utils/SedonaUtils.py 
b/python/sedona/raster_utils/SedonaUtils.py
index 5f7304f3ff..d35fcd6210 100644
--- a/python/sedona/raster_utils/SedonaUtils.py
+++ b/python/sedona/raster_utils/SedonaUtils.py
@@ -15,10 +15,13 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+from sedona.maps.SedonaMapUtils import SedonaMapUtils
+
 
 class SedonaUtils:
     @classmethod
     def display_image(cls, df):
         from IPython.display import HTML, display
 
-        display(HTML(df.toPandas().to_html(escape=False)))
+        pdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df, rename=False)
+        display(HTML(pdf.to_html(escape=False)))

Reply via email to