This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 90601cdb fix(python/sedonadb): Tweaks to support pandas>=3.0 (#538)
90601cdb is described below
commit 90601cdbd11ce952f688700e909f3114ce41ad32
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Jan 22 15:30:20 2026 -0600
fix(python/sedonadb): Tweaks to support pandas>=3.0 (#538)
---
python/sedonadb/python/sedonadb/dataframe.py | 3 +++
python/sedonadb/tests/test_datasource.py | 9 +++++++--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/python/sedonadb/python/sedonadb/dataframe.py
b/python/sedonadb/python/sedonadb/dataframe.py
index f1b641b9..42980063 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -518,7 +518,10 @@ def _qualified_type_name(obj):
SPECIAL_CASED_SCANS = {
"pyarrow.lib.Table": _scan_collected_default,
+ # pandas < 3.0
"pandas.core.frame.DataFrame": _scan_collected_default,
+ # pandas >= 3.0
+ "pandas.DataFrame": _scan_collected_default,
"geopandas.geodataframe.GeoDataFrame": _scan_geopandas,
"polars.dataframe.frame.DataFrame": _scan_collected_default,
}
diff --git a/python/sedonadb/tests/test_datasource.py
b/python/sedonadb/tests/test_datasource.py
index 64e5b0b8..59578949 100644
--- a/python/sedonadb/tests/test_datasource.py
+++ b/python/sedonadb/tests/test_datasource.py
@@ -74,8 +74,13 @@ def test_read_ogr_multi_file(con):
with tempfile.TemporaryDirectory() as td:
# Create partitioned files by writing Parquet first and translating
- # one file at a time
- con.create_data_frame(gdf).to_parquet(td, partition_by="partition")
+ # one file at a time. We need to cast partition in pandas>=3.0 because
+ # the default translation of a string column is LargeUtf8 and this is
not
+ # currently supported by DataFusion partition_by.
+ con.create_data_frame(gdf).to_view("tmp_gdf", overwrite=True)
+ con.sql(
+ """SELECT idx, partition::VARCHAR AS partition, wkb_geometry FROM
tmp_gdf"""
+ ).to_parquet(td, partition_by="partition")
for parquet_path in Path(td).rglob("*.parquet"):
fgb_path = str(parquet_path).replace(".parquet", ".fgb")
con.read_parquet(parquet_path).to_pandas().to_file(fgb_path)