This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 90601cdb fix(python/sedonadb): Tweaks to support pandas>=3.0 (#538)
90601cdb is described below

commit 90601cdbd11ce952f688700e909f3114ce41ad32
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Jan 22 15:30:20 2026 -0600

    fix(python/sedonadb): Tweaks to support pandas>=3.0 (#538)
---
 python/sedonadb/python/sedonadb/dataframe.py | 3 +++
 python/sedonadb/tests/test_datasource.py     | 9 +++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/python/sedonadb/python/sedonadb/dataframe.py 
b/python/sedonadb/python/sedonadb/dataframe.py
index f1b641b9..42980063 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -518,7 +518,10 @@ def _qualified_type_name(obj):
 
 SPECIAL_CASED_SCANS = {
     "pyarrow.lib.Table": _scan_collected_default,
+    # pandas < 3.0
     "pandas.core.frame.DataFrame": _scan_collected_default,
+    # pandas >= 3.0
+    "pandas.DataFrame": _scan_collected_default,
     "geopandas.geodataframe.GeoDataFrame": _scan_geopandas,
     "polars.dataframe.frame.DataFrame": _scan_collected_default,
 }
diff --git a/python/sedonadb/tests/test_datasource.py 
b/python/sedonadb/tests/test_datasource.py
index 64e5b0b8..59578949 100644
--- a/python/sedonadb/tests/test_datasource.py
+++ b/python/sedonadb/tests/test_datasource.py
@@ -74,8 +74,13 @@ def test_read_ogr_multi_file(con):
 
     with tempfile.TemporaryDirectory() as td:
         # Create partitioned files by writing Parquet first and translating
-        # one file at a time
-        con.create_data_frame(gdf).to_parquet(td, partition_by="partition")
+        # one file at a time. We need to cast partition in pandas>=3.0 because
+        # the default translation of a string column is LargeUtf8 and this is 
not
+        # currently supported by DataFusion partition_by.
+        con.create_data_frame(gdf).to_view("tmp_gdf", overwrite=True)
+        con.sql(
+            """SELECT idx, partition::VARCHAR AS partition, wkb_geometry FROM 
tmp_gdf"""
+        ).to_parquet(td, partition_by="partition")
         for parquet_path in Path(td).rglob("*.parquet"):
             fgb_path = str(parquet_path).replace(".parquet", ".fgb")
             con.read_parquet(parquet_path).to_pandas().to_file(fgb_path)

Reply via email to