This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new e6a6e326 feat(python/sedonadb): Write GDAL/OGR via pyogrio (#632)
e6a6e326 is described below

commit e6a6e32647f2f05c12b4298e61a73cc8957b5fee
Author: Dewey Dunnington <[email protected]>
AuthorDate: Sat Feb 21 11:27:53 2026 -0600

    feat(python/sedonadb): Write GDAL/OGR via pyogrio (#632)
    
    Co-authored-by: Copilot <[email protected]>
---
 python/sedonadb/python/sedonadb/dataframe.py       | 89 ++++++++++++++++++++++
 .../{test_datasource.py => io/test_pyogrio.py}     | 83 ++++++++++++++++++++
 2 files changed, 172 insertions(+)

diff --git a/python/sedonadb/python/sedonadb/dataframe.py 
b/python/sedonadb/python/sedonadb/dataframe.py
index c32c8600..74941814 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import io
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, 
Optional, Union
 
@@ -445,6 +446,94 @@ class DataFrame:
             single_file_output,
         )
 
+    def to_pyogrio(
+        self,
+        path: Union[str, Path, io.BytesIO],
+        *,
+        driver: Optional[str] = None,
+        geometry_type: Optional[str] = None,
+        geometry_name: Optional[str] = None,
+        crs: Optional[str] = None,
+        append: bool = False,
+        **kwargs: Dict[str, Any],
+    ):
+        """Write using GDAL/OGR via pyogrio
+
+        Writes this DataFrame batchwise to a file using GDAL/OGR using the
+        implementation provided by the pyogrio package. This is the same 
backend
+        used by GeoPandas and this function is a light wrapper around
+        `pyogrio.raw.write_arrow()` that fills in default values using
+        information available to the DataFrame (e.g., geometry column and CRS).
+
+        Args:
+            path: An output path or `BytesIO` output buffer.
+            driver: An explicit GDAL OGR driver. Usually inferred from `path` 
but
+                must be provided if path is a `BytesIO`. Not all drivers 
support
+                writing to `BytesIO`.
+            geometry_type: A GeoJSON-style geometry type or `None` to provide 
an
+                inferred default value (which may be `"Unknown"`). This is 
required
+                to write some types of output (e.g. Shapefiles) and may provide
+                files that are more efficiently read.
+            geometry_name: The column to write as the primary geometry column. 
If
+                `None`, the name of the geometry column will be inferred.
+            crs: An optional string overriding the CRS of `geometry_name`.
+            append: Use `True` to append to the file for drivers that support
+                appending.
+            kwargs: Extra arguments passed to `pyogrio.raw.write_arrow()`.
+
+        Examples:
+
+            >>> import tempfile
+            >>> sd = sedona.db.connect()
+            >>> td = tempfile.TemporaryDirectory()
+            >>> sd.sql("SELECT ST_Point(0, 1, 
3857)").to_pyogrio(f"{td.name}/tmp.fgb")
+            >>> sd.read_pyogrio(f"{td.name}/tmp.fgb").show()
+            ┌──────────────┐
+            │ wkb_geometry │
+            │   geometry   │
+            ╞══════════════╡
+            │ POINT(0 1)   │
+            └──────────────┘
+        """
+        if geometry_name is None:
+            geometry_name = self._impl.primary_geometry_column()
+
+        if crs is None and geometry_name is not None:
+            inferred_crs = self.schema.field(geometry_name).type.crs
+            crs = None if inferred_crs is None else inferred_crs.to_json()
+
+        if geometry_type is None:
+            # This is required for pyogrio.raw.write_arrow(). We could try 
harder
+            # to infer this because some drivers need this information.
+            geometry_type = "Unknown"
+
+        if isinstance(path, Path):
+            path = str(path)
+
+        if isinstance(path, io.BytesIO) and driver is None:
+            raise ValueError("driver must be provided when path is a BytesIO")
+
+        # There may be more endings worth special-casing here but zipped 
FlatGeoBuf
+        # is particularly useful and isn't automatically recognized
+        if driver is None and isinstance(path, str) and 
path.endswith(".fgb.zip"):
+            driver = "FlatGeoBuf"
+
+        # Writer: pyogrio.write_arrow() via Cython ogr_write_arrow()
+        # 
https://github.com/geopandas/pyogrio/blob/3b2d40273b501c10ecf46cbd37c6e555754c89af/pyogrio/raw.py#L755-L897
+        # 
https://github.com/geopandas/pyogrio/blob/3b2d40273b501c10ecf46cbd37c6e555754c89af/pyogrio/_io.pyx#L2858-L2980
+        import pyogrio.raw
+
+        pyogrio.raw.write_arrow(
+            self,
+            path,
+            driver=driver,
+            geometry_type=geometry_type,
+            geometry_name=geometry_name,
+            crs=crs,
+            append=append,
+            **kwargs,
+        )
+
     def show(
         self,
         limit: Optional[int] = 10,
diff --git a/python/sedonadb/tests/test_datasource.py 
b/python/sedonadb/tests/io/test_pyogrio.py
similarity index 65%
rename from python/sedonadb/tests/test_datasource.py
rename to python/sedonadb/tests/io/test_pyogrio.py
index 4640adfd..297c4198 100644
--- a/python/sedonadb/tests/test_datasource.py
+++ b/python/sedonadb/tests/io/test_pyogrio.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import io
 import tempfile
 from pathlib import Path
 
@@ -138,3 +139,85 @@ def test_read_ogr_file_not_found(con):
             sedonadb._lib.SedonaError, match="Can't infer schema for zero 
objects"
         ):
             con.read_pyogrio(Path(td) / "file_does_not_exist")
+
+
+def test_write_ogr(con):
+    with tempfile.TemporaryDirectory() as td:
+        # Basic write with defaults
+        df = con.sql("SELECT ST_Point(0, 1, 3857)")
+        expected = geopandas.GeoDataFrame(
+            {"geometry": geopandas.GeoSeries.from_wkt(["POINT (0 1)"], 
crs=3857)}
+        )
+
+        df.to_pyogrio(f"{td}/foofy.fgb")
+        geopandas.testing.assert_geodataframe_equal(
+            geopandas.read_file(f"{td}/foofy.fgb"), expected
+        )
+
+        # Ensure Path input works
+        df.to_pyogrio(Path(f"{td}/foofy.fgb"))
+        geopandas.testing.assert_geodataframe_equal(
+            geopandas.read_file(f"{td}/foofy.fgb"), expected
+        )
+
+        # Ensure zipped FlatGeoBuf doesn't require specifying the driver
+        df.to_pyogrio(Path(f"{td}/foofy.fgb.zip"))
+        geopandas.testing.assert_geodataframe_equal(
+            geopandas.read_file(f"{td}/foofy.fgb.zip"), expected
+        )
+
+        # Ensure inferred CRS that is None works
+        con.sql("SELECT ST_Point(0, 1)").to_pyogrio(f"{td}/foofy.fgb")
+        expected = geopandas.GeoDataFrame(
+            {"geometry": geopandas.GeoSeries.from_wkt(["POINT (0 1)"])}
+        )
+        geopandas.testing.assert_geodataframe_equal(
+            geopandas.read_file(f"{td}/foofy.fgb"), expected
+        )
+
+
+def test_write_ogr_buffer(con):
+    buf = io.BytesIO()
+    df = con.sql("SELECT ST_Point(0, 1, 3857)")
+    expected = geopandas.GeoDataFrame(
+        {"geometry": geopandas.GeoSeries.from_wkt(["POINT (0 1)"], crs=3857)}
+    )
+
+    df.to_pyogrio(buf, driver="FlatGeoBuf")
+    geopandas.testing.assert_geodataframe_equal(
+        geopandas.read_file(buf.getvalue(), driver="FlatGeoBuf"), expected
+    )
+
+    # Ensure reasonable error if driver is not specified
+    with pytest.raises(ValueError, match="driver must be provided"):
+        df.to_pyogrio(buf)
+
+
+def test_write_ogr_no_geometry(con):
+    with tempfile.TemporaryDirectory() as td:
+        df = con.sql("SELECT 'one' as one")
+        expected = pd.DataFrame({"one": ["one"]})
+
+        df.to_pyogrio(f"{td}/foofy.csv")
+        pd.testing.assert_frame_equal(pd.read_csv(f"{td}/foofy.csv"), expected)
+
+
+def test_write_ogr_many_batches(con):
+    # Check with a non-trivial number of batches
+    con.funcs.table.sd_random_geometry("MultiLineString", 50000, 
seed=4837).to_view(
+        "pyogrio_test"
+    )
+    df = con.sql(
+        """
+        SELECT id, ST_SetCrs(geometry, 'EPSG:4326') AS geometry
+        FROM pyogrio_test
+        ORDER BY id
+        """
+    )
+    expected = df.to_pandas()
+
+    with tempfile.TemporaryDirectory() as td:
+        df.to_pyogrio(f"{td}/foofy.gpkg")
+        geopandas.testing.assert_geodataframe_equal(
+            geopandas.read_file(f"{td}/foofy.gpkg"), expected
+        )

Reply via email to