This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 00a8dad8 feat(r/sedonadb): Implement GDAL read via sf/arrow in R 
bindings (#670)
00a8dad8 is described below

commit 00a8dad885ae4e06be35bf52aa09af704cee60eb
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Feb 27 22:22:20 2026 -0600

    feat(r/sedonadb): Implement GDAL read via sf/arrow in R bindings (#670)
    
    Co-authored-by: Copilot <[email protected]>
---
 r/sedonadb/NAMESPACE                               |   2 +
 r/sedonadb/R/000-wrappers.R                        |  16 ++
 r/sedonadb/R/datasource.R                          | 227 +++++++++++++++++++++
 r/sedonadb/bootstrap.R                             |   3 +
 r/sedonadb/inst/files/natural-earth_cities.fgb     | Bin 0 -> 32224 bytes
 r/sedonadb/inst/files/natural-earth_cities.fgb.zip | Bin 0 -> 9636 bytes
 r/sedonadb/man/sd_read_sf.Rd                       |  70 +++++++
 r/sedonadb/src/init.c                              |  12 ++
 r/sedonadb/src/rust/api.h                          |   4 +
 r/sedonadb/src/rust/src/datasource.rs              |  90 ++++++++
 r/sedonadb/src/rust/src/lib.rs                     |   2 +-
 r/sedonadb/tests/testthat/_snaps/datasource.md     |   4 +
 r/sedonadb/tests/testthat/test-dataframe.R         |   2 -
 r/sedonadb/tests/testthat/test-datasource.R        | 146 +++++++++++++
 14 files changed, 575 insertions(+), 3 deletions(-)

diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE
index 7138d90d..2468428d 100644
--- a/r/sedonadb/NAMESPACE
+++ b/r/sedonadb/NAMESPACE
@@ -52,6 +52,7 @@ export(sd_connect)
 export(sd_count)
 export(sd_ctx_drop_view)
 export(sd_ctx_read_parquet)
+export(sd_ctx_read_sf)
 export(sd_ctx_register_udf)
 export(sd_ctx_sql)
 export(sd_ctx_view)
@@ -70,6 +71,7 @@ export(sd_filter)
 export(sd_group_by)
 export(sd_preview)
 export(sd_read_parquet)
+export(sd_read_sf)
 export(sd_register_udf)
 export(sd_select)
 export(sd_sql)
diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R
index 7ef69f70..a09f9349 100644
--- a/r/sedonadb/R/000-wrappers.R
+++ b/r/sedonadb/R/000-wrappers.R
@@ -55,6 +55,22 @@ NULL
 }
 
 
+`apply_crses_to_sf_stream` <- function(
+  `stream_in_xptr`,
+  `geometry_column_names`,
+  `geometry_column_crses`,
+  `stream_out_xptr`
+) {
+  invisible(.Call(
+    savvy_apply_crses_to_sf_stream__impl,
+    `stream_in_xptr`,
+    `geometry_column_names`,
+    `geometry_column_crses`,
+    `stream_out_xptr`
+  ))
+}
+
+
 `configure_proj_shared` <- function(
   `shared_library_path` = NULL,
   `database_path` = NULL,
diff --git a/r/sedonadb/R/datasource.R b/r/sedonadb/R/datasource.R
new file mode 100644
index 00000000..bfa03de1
--- /dev/null
+++ b/r/sedonadb/R/datasource.R
@@ -0,0 +1,227 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Read GDAL/OGR via the sf package
+#'
+#' Uses the ArrowArrayStream interface to GDAL exposed via the sf package
+#' to read GDAL/OGR-based data sources.
+#'
+#' @param ctx A SedonaDB context created using [sd_connect()].
+#' @param dsn,layer Description of datasource and layer. See [sf::read_sf()]
+#'   for details.
+#' @param ... Currently unused and must be empty
+#' @param query A SQL query to pass on to GDAL/OGR.
+#' @param options A character vector with layer open options in the
+#'   form "KEY=VALUE".
+#' @param drivers A list of drivers to try if the dsn cannot be guessed.
+#' @param filter A spatial object that may be used to filter while reading.
+#'   In the future SedonaDB will automatically calculate this value based on
+#'   the query. May be any spatial object that can be converted to WKT via
+#'   [wk::as_wkt()]. This filter's CRS must match that of the data.
+#' @param fid_column_name An optional name for the feature id (FID) column.
+#' @param lazy Use `TRUE` to stream the data from the source rather than 
collect
+#'   first. This can be faster for large data sources but can also be confusing
+#'   because the data may only be scanned exactly once.
+#'
+#' @returns A SedonaDB DataFrame.
+#' @export
+#'
+#' @examples
+#' nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+#' sd_read_sf(nc_gpkg)
+#'
+sd_read_sf <- function(
+  dsn,
+  layer = NULL,
+  ...,
+  query = NA,
+  options = NULL,
+  drivers = NULL,
+  filter = NULL,
+  fid_column_name = NULL,
+  lazy = FALSE
+) {
+  sd_ctx_read_sf(
+    ctx(),
+    dsn = dsn,
+    layer = layer,
+    ...,
+    query = query,
+    options = options,
+    drivers = drivers,
+    filter = filter,
+    fid_column_name = fid_column_name,
+    lazy = lazy
+  )
+}
+
+#' @rdname sd_read_sf
+#' @export
+sd_ctx_read_sf <- function(
+  ctx,
+  dsn,
+  layer = NULL,
+  ...,
+  query = NA,
+  options = NULL,
+  drivers = NULL,
+  filter = NULL,
+  fid_column_name = NULL,
+  lazy = FALSE
+) {
+  stream <- read_sf_stream(
+    dsn = dsn,
+    layer = layer,
+    ...,
+    query = query,
+    options = options,
+    drivers = drivers,
+    filter = filter,
+    fid_column_name = fid_column_name
+  )
+
+  df <- ctx$data_frame_from_array_stream(stream, collect_now = !lazy)
+  new_sedonadb_dataframe(ctx, df)
+}
+
+
+read_sf_stream <- function(
+  dsn,
+  layer = NULL,
+  ...,
+  query = NA,
+  options = NULL,
+  drivers = NULL,
+  filter = NULL,
+  fid_column_name = NULL
+) {
+  check_dots_empty(..., label = "sd_read_sf()")
+
+  if (is.null(layer)) {
+    layer <- character(0)
+  } else {
+    layer <- enc2utf8(layer)
+  }
+
+  if (length(dsn) != 1 || nchar(dsn) == 0) {
+    stop("Expected non-empty value of length 1 for dsn")
+  }
+
+  dsn_exists <- file.exists(dsn)
+
+  # A heuristic to catch common database DSNs so that we don't try to normalize
+  # them as file paths
+  dsn_isdb <- grepl("^(pg|mssql|pgeo|odbc|postgresql):", tolower(dsn))
+  dsn_is_http <- grepl("^https?://", dsn)
+
+  # Normalize (e.g., replace ~) and ensure internal encoding is UTF-8
+  if (length(dsn) == 1 && dsn_exists && !dsn_isdb && !dsn_is_http) {
+    dsn <- enc2utf8(normalizePath(dsn))
+
+    if (endsWith(dsn, ".zip")) {
+      dsn <- paste0("/vsizip/", dsn)
+    }
+  }
+
+  if (dsn_is_http) {
+    dsn <- paste0("/vsicurl/", enc2utf8(dsn))
+
+    if (endsWith(dsn, ".zip")) {
+      dsn <- paste0("/vsizip/", dsn)
+    }
+  }
+
+  # Rcpp expects these to be character vectors
+  options <- as.character(options)
+  drivers <- as.character(drivers)
+  fid_column_name <- as.character(fid_column_name)
+
+  if (!is.null(filter)) {
+    filter <- wk::as_wkt(filter)
+    if (length(filter) != 1) {
+      stop("Filter must be a geometry-like object of length one")
+    }
+  } else {
+    filter <- character(0)
+  }
+
+  stream <- nanoarrow::nanoarrow_allocate_array_stream()
+  read_fn <- asNamespace("sf")[["CPL_read_gdal_stream"]]
+  info <- read_fn(
+    stream,
+    dsn,
+    layer,
+    query,
+    options,
+    TRUE, # quiet
+    drivers,
+    filter,
+    dsn_exists,
+    dsn_isdb,
+    fid_column_name,
+    getOption("width")
+  )
+
+  # Check filter for CRS equality
+  if (!identical(filter, character())) {
+    filter_crs <- wk::wk_crs(filter)
+
+    for (column_crs in info[[2]]) {
+      column_crs_sf <- sf::st_crs(column_crs)
+      if (!wk::wk_crs_equal(filter_crs, column_crs_sf)) {
+        stop(
+          sprintf(
+            "filter crs (%s) does not match output CRS (%s)",
+            format(filter_crs),
+            format(column_crs_sf)
+          )
+        )
+      }
+    }
+  }
+
+  # sf doesn't currently support GEOMETRY_METADATA_ENCODING=GEOARROW, so we
+  # need to post-process the stream to ensure the CRS is set on the output
+  geometry_column_names <- info[[1]]
+  geometry_column_crses <- vapply(
+    info[[2]],
+    function(x) wk::wk_crs_projjson(sf::st_crs(x)),
+    character(1)
+  )
+
+  # The sf implementation assigns the "missing" geometry column name "geometry"
+  # where the name in the schema is "wkb_geometry".
+  if (
+    "geometry" %in% geometry_column_names && !("wkb_geometry" %in% 
geometry_column_names)
+  ) {
+    geometry_column_names <- c(geometry_column_names, "wkb_geometry")
+    geometry_column_crses <- c(
+      geometry_column_crses,
+      geometry_column_crses[geometry_column_names == "geometry"]
+    )
+  }
+
+  stream_out <- nanoarrow::nanoarrow_allocate_array_stream()
+  apply_crses_to_sf_stream(
+    stream,
+    geometry_column_names,
+    geometry_column_crses,
+    stream_out
+  )
+
+  stream_out
+}
diff --git a/r/sedonadb/bootstrap.R b/r/sedonadb/bootstrap.R
index d1974f24..6a93919e 100644
--- a/r/sedonadb/bootstrap.R
+++ b/r/sedonadb/bootstrap.R
@@ -43,6 +43,9 @@ file.copy(
 # Remove unused libgpuspatial crate
 unlink("src/c/sedona-libgpuspatial", recursive = TRUE)
 
+# Other unused files
+unlink("src/c/.clang-format")
+
 # Tweak workspace Cargo.toml
 top_cargo_toml <- "src/Cargo.toml"
 lines <- readLines(top_cargo_toml)
diff --git a/r/sedonadb/inst/files/natural-earth_cities.fgb 
b/r/sedonadb/inst/files/natural-earth_cities.fgb
new file mode 100644
index 00000000..1cdc6cec
Binary files /dev/null and b/r/sedonadb/inst/files/natural-earth_cities.fgb 
differ
diff --git a/r/sedonadb/inst/files/natural-earth_cities.fgb.zip 
b/r/sedonadb/inst/files/natural-earth_cities.fgb.zip
new file mode 100644
index 00000000..a5d036da
Binary files /dev/null and b/r/sedonadb/inst/files/natural-earth_cities.fgb.zip 
differ
diff --git a/r/sedonadb/man/sd_read_sf.Rd b/r/sedonadb/man/sd_read_sf.Rd
new file mode 100644
index 00000000..7622ccc5
--- /dev/null
+++ b/r/sedonadb/man/sd_read_sf.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasource.R
+\name{sd_read_sf}
+\alias{sd_read_sf}
+\alias{sd_ctx_read_sf}
+\title{Read GDAL/OGR via the sf package}
+\usage{
+sd_read_sf(
+  dsn,
+  layer = NULL,
+  ...,
+  query = NA,
+  options = NULL,
+  drivers = NULL,
+  filter = NULL,
+  fid_column_name = NULL,
+  lazy = FALSE
+)
+
+sd_ctx_read_sf(
+  ctx,
+  dsn,
+  layer = NULL,
+  ...,
+  query = NA,
+  options = NULL,
+  drivers = NULL,
+  filter = NULL,
+  fid_column_name = NULL,
+  lazy = FALSE
+)
+}
+\arguments{
+\item{dsn, layer}{Description of datasource and layer. See 
\code{\link[sf:st_read]{sf::read_sf()}}
+for details.}
+
+\item{...}{Currently unused and must be empty}
+
+\item{query}{A SQL query to pass on to GDAL/OGR.}
+
+\item{options}{A character vector with layer open options in the
+form "KEY=VALUE".}
+
+\item{drivers}{A list of drivers to try if the dsn cannot be guessed.}
+
+\item{filter}{A spatial object that may be used to filter while reading.
+In the future SedonaDB will automatically calculate this value based on
+the query. May be any spatial object that can be converted to WKT via
+\code{\link[wk:wkt]{wk::as_wkt()}}. This filter's CRS must match that of the 
data.}
+
+\item{fid_column_name}{An optional name for the feature id (FID) column.}
+
+\item{lazy}{Use \code{TRUE} to stream the data from the source rather than 
collect
+first. This can be faster for large data sources but can also be confusing
+because the data may only be scanned exactly once.}
+
+\item{ctx}{A SedonaDB context created using 
\code{\link[=sd_connect]{sd_connect()}}.}
+}
+\value{
+A SedonaDB DataFrame.
+}
+\description{
+Uses the ArrowArrayStream interface to GDAL exposed via the sf package
+to read GDAL/OGR-based data sources.
+}
+\examples{
+nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+sd_read_sf(nc_gpkg)
+
+}
diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c
index d2b6c572..636dcd61 100644
--- a/r/sedonadb/src/init.c
+++ b/r/sedonadb/src/init.c
@@ -55,6 +55,16 @@ SEXP handle_result(SEXP res_) {
   return (SEXP)res;
 }
 
+SEXP savvy_apply_crses_to_sf_stream__impl(SEXP c_arg__stream_in_xptr,
+                                          SEXP c_arg__geometry_column_names,
+                                          SEXP c_arg__geometry_column_crses,
+                                          SEXP c_arg__stream_out_xptr) {
+  SEXP res = savvy_apply_crses_to_sf_stream__ffi(
+      c_arg__stream_in_xptr, c_arg__geometry_column_names,
+      c_arg__geometry_column_crses, c_arg__stream_out_xptr);
+  return handle_result(res);
+}
+
 SEXP savvy_configure_proj_shared__impl(SEXP c_arg__shared_library_path,
                                        SEXP c_arg__database_path,
                                        SEXP c_arg__search_path) {
@@ -328,6 +338,8 @@ SEXP savvy_SedonaDBExprFactory_scalar_function__impl(SEXP 
self__,
 }
 
 static const R_CallMethodDef CallEntries[] = {
+    {"savvy_apply_crses_to_sf_stream__impl",
+     (DL_FUNC)&savvy_apply_crses_to_sf_stream__impl, 4},
     {"savvy_configure_proj_shared__impl",
      (DL_FUNC)&savvy_configure_proj_shared__impl, 3},
     {"savvy_init_r_runtime_interrupts__impl",
diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h
index 6820d68e..bd1ac9e5 100644
--- a/r/sedonadb/src/rust/api.h
+++ b/r/sedonadb/src/rust/api.h
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+SEXP savvy_apply_crses_to_sf_stream__ffi(SEXP c_arg__stream_in_xptr,
+                                         SEXP c_arg__geometry_column_names,
+                                         SEXP c_arg__geometry_column_crses,
+                                         SEXP c_arg__stream_out_xptr);
 SEXP savvy_configure_proj_shared__ffi(SEXP c_arg__shared_library_path,
                                       SEXP c_arg__database_path,
                                       SEXP c_arg__search_path);
diff --git a/r/sedonadb/src/rust/src/datasource.rs 
b/r/sedonadb/src/rust/src/datasource.rs
new file mode 100644
index 00000000..9994a3d0
--- /dev/null
+++ b/r/sedonadb/src/rust/src/datasource.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::HashMap, iter::zip, ptr::swap_nonoverlapping, 
sync::Arc};
+
+use arrow_array::{
+    ffi_stream::FFI_ArrowArrayStream, RecordBatch, RecordBatchIterator, 
RecordBatchReader,
+};
+use arrow_schema::{ArrowError, Schema};
+use savvy::{savvy, savvy_err};
+use sedona_schema::{
+    crs::deserialize_crs,
+    datatypes::{Edges, SedonaType},
+};
+
+use crate::ffi::import_array_stream;
+
+#[savvy]
+fn apply_crses_to_sf_stream(
+    stream_in_xptr: savvy::Sexp,
+    geometry_column_names: savvy::StringSexp,
+    geometry_column_crses: savvy::StringSexp,
+    stream_out_xptr: savvy::Sexp,
+) -> savvy::Result<()> {
+    let reader_in = Box::new(import_array_stream(stream_in_xptr)?);
+    let reader_out = apply_crses_to_sf_stream_impl(
+        reader_in,
+        geometry_column_names.iter().collect(),
+        geometry_column_crses.iter().collect(),
+    )?;
+
+    let out_void = unsafe { savvy_ffi::R_ExternalPtrAddr(stream_out_xptr.0) };
+    if out_void.is_null() {
+        return Err(savvy_err!(
+            "external pointer to null in apply_crses_to_sf_stream()"
+        ));
+    }
+
+    let mut ffi_stream = FFI_ArrowArrayStream::new(reader_out);
+    let ffi_out = out_void as *mut FFI_ArrowArrayStream;
+    unsafe { swap_nonoverlapping(&mut ffi_stream, ffi_out, 1) };
+    Ok(())
+}
+
+fn apply_crses_to_sf_stream_impl(
+    stream: Box<dyn RecordBatchReader + Send>,
+    geometry_column_names: Vec<&str>,
+    geometry_column_crses: Vec<&str>,
+) -> savvy::Result<Box<dyn RecordBatchReader + Send>> {
+    let crs_lookup =
+        zip(geometry_column_names, 
geometry_column_crses).collect::<HashMap<&str, &str>>();
+    let schema = stream.schema();
+
+    let new_fields = schema
+        .fields()
+        .iter()
+        .map(|f| {
+            if f.extension_type_name() == Some("ogc.wkb") {
+                let crs = crs_lookup.get(f.name().as_str()).unwrap_or(&"");
+                let sedona_type = SedonaType::Wkb(Edges::Planar, 
deserialize_crs(crs)?);
+                Ok(sedona_type.to_storage_field(f.name(), f.is_nullable())?)
+            } else {
+                Ok(f.as_ref().clone())
+            }
+        })
+        .collect::<Result<Vec<_>, savvy::Error>>()?;
+
+    let new_schema = 
Arc::new(Schema::new(new_fields).with_metadata(schema.metadata().clone()));
+    let schema_iter = new_schema.clone();
+    let iter = stream.map(move |maybe_batch| -> Result<RecordBatch, 
ArrowError> {
+        let batch = maybe_batch?;
+        RecordBatch::try_new(schema_iter.clone(), batch.columns().to_vec())
+    });
+
+    Ok(Box::new(RecordBatchIterator::new(iter, new_schema)))
+}
diff --git a/r/sedonadb/src/rust/src/lib.rs b/r/sedonadb/src/rust/src/lib.rs
index 84251908..0db3f2b7 100644
--- a/r/sedonadb/src/rust/src/lib.rs
+++ b/r/sedonadb/src/rust/src/lib.rs
@@ -14,7 +14,6 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-// Example functions
 
 use std::ffi::c_void;
 
@@ -26,6 +25,7 @@ use sedona_proj::register::{configure_global_proj_engine, 
ProjCrsEngineBuilder};
 
 mod context;
 mod dataframe;
+mod datasource;
 mod error;
 mod expression;
 mod ffi;
diff --git a/r/sedonadb/tests/testthat/_snaps/datasource.md 
b/r/sedonadb/tests/testthat/_snaps/datasource.md
new file mode 100644
index 00000000..e40fc124
--- /dev/null
+++ b/r/sedonadb/tests/testthat/_snaps/datasource.md
@@ -0,0 +1,4 @@
+# sd_read_sf() works with filter
+
+    filter crs (NULL) does not match output CRS (NAD27)
+
diff --git a/r/sedonadb/tests/testthat/test-dataframe.R 
b/r/sedonadb/tests/testthat/test-dataframe.R
index 86393e4d..ae3cfb56 100644
--- a/r/sedonadb/tests/testthat/test-dataframe.R
+++ b/r/sedonadb/tests/testthat/test-dataframe.R
@@ -325,8 +325,6 @@ test_that("sd_write_parquet validates geoparquet_version 
parameter", {
 })
 
 test_that("sd_write_parquet accepts max_row_group_size parameter", {
-  skip_if_not_installed("arrow")
-
   tmp_parquet_file <- tempfile(fileext = ".parquet")
   tmp_parquet_file_tiny_groups <- tempfile(fileext = ".parquet")
   on.exit(unlink(c(tmp_parquet_file, tmp_parquet_file_tiny_groups)))
diff --git a/r/sedonadb/tests/testthat/test-datasource.R 
b/r/sedonadb/tests/testthat/test-datasource.R
new file mode 100644
index 00000000..ef003cc8
--- /dev/null
+++ b/r/sedonadb/tests/testthat/test-datasource.R
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("sd_read_sf() works for layers with named geometry columns", {
+  skip_if_not_installed("sf")
+
+  nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+
+  from_stream <- sf::st_as_sf(sd_read_sf(nc_gpkg))
+  from_sf <- sf::st_read(nc_gpkg, quiet = TRUE)
+
+  # Expect identical CRS
+  expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+  # Expect identical content without CRS
+  expect_equal(
+    from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+    from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+  )
+})
+
+test_that("sd_read_sf() works for layers with unnamed geometry columns", {
+  skip_if_not_installed("sf")
+
+  nc_shp <- system.file("shape/nc.shp", package = "sf")
+
+  from_stream <- sf::st_as_sf(sd_read_sf(nc_shp))
+  from_sf <- sf::st_read(nc_shp, quiet = TRUE, promote_to_multi = FALSE)
+
+  # Expect identical CRS
+  expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+  # The from_stream version has a geometry column named "wkb_geometry" but
+  # sf renames this internally to "geometry"
+  expect_true("wkb_geometry" %in% names(from_stream))
+  colnames(from_stream)[colnames(from_stream) == "wkb_geometry"] <- "geometry"
+  sf::st_geometry(from_stream) <- "geometry"
+
+  # Expect identical content without CRS
+  expect_equal(
+    from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+    from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+  )
+})
+
+test_that("sd_read_sf() works for database dsns / non-default layers", {
+  skip_if_not_installed("sf")
+
+  # Can be tested using docker compose up with
+  # postgresql://localhost:5432/postgres?user=postgres&password=password
+  test_uri <- Sys.getenv("SEDONADB_POSTGRESQL_TEST_URI", unset = "")
+  if (identical(test_uri, "")) {
+    skip("SEDONADB_POSTGRESQL_TEST_URI is not set")
+  }
+
+  nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+  sf::st_write(
+    sf::st_read(nc_gpkg, quiet = TRUE),
+    test_uri,
+    "test_sf_nc",
+    append = FALSE,
+    driver = "PostgreSQL",
+    quiet = TRUE
+  )
+
+  from_stream <- sf::st_as_sf(sd_read_sf(test_uri, "test_sf_nc"))
+  from_sf <- sf::st_read(test_uri, "test_sf_nc", quiet = TRUE)
+
+  # Expect identical CRS
+  expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+  # Expect identical content without CRS
+  expect_equal(
+    from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+    from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+  )
+})
+
+test_that("sd_read_sf() works with filter", {
+  skip_if_not_installed("sf")
+
+  nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+  filter <- wk::rct(-77.901, 36.162, -77.075, 36.556, crs = 
sf::st_crs("NAD27"))
+
+  from_stream <- sf::st_as_sf(sd_read_sf(nc_gpkg, filter = filter))
+  from_sf <- sf::st_read(nc_gpkg, quiet = TRUE, wkt_filter = 
wk::as_wkt(filter))
+
+  # Expect identical CRS
+  expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+  # Expect identical content without CRS
+  expect_equal(
+    from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+    from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+  )
+
+  # Check for error if filtered with an invalid CRS
+  wk::wk_crs(filter) <- NULL
+  expect_snapshot_error(sd_read_sf(nc_gpkg, filter = filter))
+})
+
+test_that("sd_read_sf() works for zipped dsns", {
+  skip_if_not_installed("sf")
+
+  fgb <- system.file("files/natural-earth_cities.fgb", package = "sedonadb")
+  fgb_zip <- paste0(fgb, ".zip")
+
+  from_stream_fgb <- sd_read_sf(fgb) |> sf::st_as_sf()
+  from_stream_fgb_zip <- sd_read_sf(fgb_zip) |> sf::st_as_sf()
+  expect_identical(from_stream_fgb_zip, from_stream_fgb)
+})
+
+test_that("sd_read_sf() works for URL dsns", {
+  skip_on_cran()
+  skip_if_not_installed("sf")
+
+  # nolint start: line_length_linter
+  url <- 
"https://github.com/geoarrow/geoarrow-data/releases/download/v0.2.0/ns-water_water-point.fgb";
+  # nolint end
+  expect_identical(
+    sd_read_sf(url) |> sd_count(),
+    44690
+  )
+
+  # nolint start: line_length_linter
+  zipped_url <- 
"https://github.com/geoarrow/geoarrow-data/releases/download/v0.1.0/ns-water-water_point.fgb.zip";
+  # nolint end
+  expect_identical(
+    sd_read_sf(zipped_url) |> sd_count(),
+    44690
+  )
+})

Reply via email to