This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 00a8dad8 feat(r/sedonadb): Implement GDAL read via sf/arrow in R
bindings (#670)
00a8dad8 is described below
commit 00a8dad885ae4e06be35bf52aa09af704cee60eb
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Feb 27 22:22:20 2026 -0600
feat(r/sedonadb): Implement GDAL read via sf/arrow in R bindings (#670)
Co-authored-by: Copilot <[email protected]>
---
r/sedonadb/NAMESPACE | 2 +
r/sedonadb/R/000-wrappers.R | 16 ++
r/sedonadb/R/datasource.R | 227 +++++++++++++++++++++
r/sedonadb/bootstrap.R | 3 +
r/sedonadb/inst/files/natural-earth_cities.fgb | Bin 0 -> 32224 bytes
r/sedonadb/inst/files/natural-earth_cities.fgb.zip | Bin 0 -> 9636 bytes
r/sedonadb/man/sd_read_sf.Rd | 70 +++++++
r/sedonadb/src/init.c | 12 ++
r/sedonadb/src/rust/api.h | 4 +
r/sedonadb/src/rust/src/datasource.rs | 90 ++++++++
r/sedonadb/src/rust/src/lib.rs | 2 +-
r/sedonadb/tests/testthat/_snaps/datasource.md | 4 +
r/sedonadb/tests/testthat/test-dataframe.R | 2 -
r/sedonadb/tests/testthat/test-datasource.R | 146 +++++++++++++
14 files changed, 575 insertions(+), 3 deletions(-)
diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE
index 7138d90d..2468428d 100644
--- a/r/sedonadb/NAMESPACE
+++ b/r/sedonadb/NAMESPACE
@@ -52,6 +52,7 @@ export(sd_connect)
export(sd_count)
export(sd_ctx_drop_view)
export(sd_ctx_read_parquet)
+export(sd_ctx_read_sf)
export(sd_ctx_register_udf)
export(sd_ctx_sql)
export(sd_ctx_view)
@@ -70,6 +71,7 @@ export(sd_filter)
export(sd_group_by)
export(sd_preview)
export(sd_read_parquet)
+export(sd_read_sf)
export(sd_register_udf)
export(sd_select)
export(sd_sql)
diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R
index 7ef69f70..a09f9349 100644
--- a/r/sedonadb/R/000-wrappers.R
+++ b/r/sedonadb/R/000-wrappers.R
@@ -55,6 +55,22 @@ NULL
}
+`apply_crses_to_sf_stream` <- function(
+ `stream_in_xptr`,
+ `geometry_column_names`,
+ `geometry_column_crses`,
+ `stream_out_xptr`
+) {
+ invisible(.Call(
+ savvy_apply_crses_to_sf_stream__impl,
+ `stream_in_xptr`,
+ `geometry_column_names`,
+ `geometry_column_crses`,
+ `stream_out_xptr`
+ ))
+}
+
+
`configure_proj_shared` <- function(
`shared_library_path` = NULL,
`database_path` = NULL,
diff --git a/r/sedonadb/R/datasource.R b/r/sedonadb/R/datasource.R
new file mode 100644
index 00000000..bfa03de1
--- /dev/null
+++ b/r/sedonadb/R/datasource.R
@@ -0,0 +1,227 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Read GDAL/OGR via the sf package
+#'
+#' Uses the ArrowArrayStream interface to GDAL exposed via the sf package
+#' to read GDAL/OGR-based data sources.
+#'
+#' @param ctx A SedonaDB context created using [sd_connect()].
+#' @param dsn,layer Description of datasource and layer. See [sf::read_sf()]
+#' for details.
+#' @param ... Currently unused and must be empty
+#' @param query A SQL query to pass on to GDAL/OGR.
+#' @param options A character vector with layer open options in the
+#' form "KEY=VALUE".
+#' @param drivers A list of drivers to try if the dsn cannot be guessed.
+#' @param filter A spatial object that may be used to filter while reading.
+#' In the future SedonaDB will automatically calculate this value based on
+#' the query. May be any spatial object that can be converted to WKT via
+#' [wk::as_wkt()]. This filter's CRS must match that of the data.
+#' @param fid_column_name An optional name for the feature id (FID) column.
+#' @param lazy Use `TRUE` to stream the data from the source rather than
collect
+#' first. This can be faster for large data sources but can also be confusing
+#' because the data may only be scanned exactly once.
+#'
+#' @returns A SedonaDB DataFrame.
+#' @export
+#'
+#' @examples
+#' nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+#' sd_read_sf(nc_gpkg)
+#'
+sd_read_sf <- function(
+ dsn,
+ layer = NULL,
+ ...,
+ query = NA,
+ options = NULL,
+ drivers = NULL,
+ filter = NULL,
+ fid_column_name = NULL,
+ lazy = FALSE
+) {
+ sd_ctx_read_sf(
+ ctx(),
+ dsn = dsn,
+ layer = layer,
+ ...,
+ query = query,
+ options = options,
+ drivers = drivers,
+ filter = filter,
+ fid_column_name = fid_column_name,
+ lazy = lazy
+ )
+}
+
+#' @rdname sd_read_sf
+#' @export
+sd_ctx_read_sf <- function(
+ ctx,
+ dsn,
+ layer = NULL,
+ ...,
+ query = NA,
+ options = NULL,
+ drivers = NULL,
+ filter = NULL,
+ fid_column_name = NULL,
+ lazy = FALSE
+) {
+ stream <- read_sf_stream(
+ dsn = dsn,
+ layer = layer,
+ ...,
+ query = query,
+ options = options,
+ drivers = drivers,
+ filter = filter,
+ fid_column_name = fid_column_name
+ )
+
+ df <- ctx$data_frame_from_array_stream(stream, collect_now = !lazy)
+ new_sedonadb_dataframe(ctx, df)
+}
+
+
+read_sf_stream <- function(
+ dsn,
+ layer = NULL,
+ ...,
+ query = NA,
+ options = NULL,
+ drivers = NULL,
+ filter = NULL,
+ fid_column_name = NULL
+) {
+ check_dots_empty(..., label = "sd_read_sf()")
+
+ if (is.null(layer)) {
+ layer <- character(0)
+ } else {
+ layer <- enc2utf8(layer)
+ }
+
+ if (length(dsn) != 1 || nchar(dsn) == 0) {
+ stop("Expected non-empty value of length 1 for dsn")
+ }
+
+ dsn_exists <- file.exists(dsn)
+
+ # A heuristic to catch common database DSNs so that we don't try to normalize
+ # them as file paths
+ dsn_isdb <- grepl("^(pg|mssql|pgeo|odbc|postgresql):", tolower(dsn))
+ dsn_is_http <- grepl("^https?://", dsn)
+
+ # Normalize (e.g., replace ~) and ensure internal encoding is UTF-8
+ if (length(dsn) == 1 && dsn_exists && !dsn_isdb && !dsn_is_http) {
+ dsn <- enc2utf8(normalizePath(dsn))
+
+ if (endsWith(dsn, ".zip")) {
+ dsn <- paste0("/vsizip/", dsn)
+ }
+ }
+
+ if (dsn_is_http) {
+ dsn <- paste0("/vsicurl/", enc2utf8(dsn))
+
+ if (endsWith(dsn, ".zip")) {
+ dsn <- paste0("/vsizip/", dsn)
+ }
+ }
+
+ # Rcpp expects these to be character vectors
+ options <- as.character(options)
+ drivers <- as.character(drivers)
+ fid_column_name <- as.character(fid_column_name)
+
+ if (!is.null(filter)) {
+ filter <- wk::as_wkt(filter)
+ if (length(filter) != 1) {
+ stop("Filter must be a geometry-like object of length one")
+ }
+ } else {
+ filter <- character(0)
+ }
+
+ stream <- nanoarrow::nanoarrow_allocate_array_stream()
+ read_fn <- asNamespace("sf")[["CPL_read_gdal_stream"]]
+ info <- read_fn(
+ stream,
+ dsn,
+ layer,
+ query,
+ options,
+ TRUE, # quiet
+ drivers,
+ filter,
+ dsn_exists,
+ dsn_isdb,
+ fid_column_name,
+ getOption("width")
+ )
+
+ # Check filter for CRS equality
+ if (!identical(filter, character())) {
+ filter_crs <- wk::wk_crs(filter)
+
+ for (column_crs in info[[2]]) {
+ column_crs_sf <- sf::st_crs(column_crs)
+ if (!wk::wk_crs_equal(filter_crs, column_crs_sf)) {
+ stop(
+ sprintf(
+ "filter crs (%s) does not match output CRS (%s)",
+ format(filter_crs),
+ format(column_crs_sf)
+ )
+ )
+ }
+ }
+ }
+
+ # sf doesn't currently support GEOMETRY_METADATA_ENCODING=GEOARROW, so we
+ # need to post-process the stream to ensure the CRS is set on the output
+ geometry_column_names <- info[[1]]
+ geometry_column_crses <- vapply(
+ info[[2]],
+ function(x) wk::wk_crs_projjson(sf::st_crs(x)),
+ character(1)
+ )
+
+ # The sf implementation assigns the "missing" geometry column name "geometry"
+ # where the name in the schema is "wkb_geometry".
+ if (
+ "geometry" %in% geometry_column_names && !("wkb_geometry" %in%
geometry_column_names)
+ ) {
+ geometry_column_names <- c(geometry_column_names, "wkb_geometry")
+ geometry_column_crses <- c(
+ geometry_column_crses,
+ geometry_column_crses[geometry_column_names == "geometry"]
+ )
+ }
+
+ stream_out <- nanoarrow::nanoarrow_allocate_array_stream()
+ apply_crses_to_sf_stream(
+ stream,
+ geometry_column_names,
+ geometry_column_crses,
+ stream_out
+ )
+
+ stream_out
+}
diff --git a/r/sedonadb/bootstrap.R b/r/sedonadb/bootstrap.R
index d1974f24..6a93919e 100644
--- a/r/sedonadb/bootstrap.R
+++ b/r/sedonadb/bootstrap.R
@@ -43,6 +43,9 @@ file.copy(
# Remove unused libgpuspatial crate
unlink("src/c/sedona-libgpuspatial", recursive = TRUE)
+# Other unused files
+unlink("src/c/.clang-format")
+
# Tweak workspace Cargo.toml
top_cargo_toml <- "src/Cargo.toml"
lines <- readLines(top_cargo_toml)
diff --git a/r/sedonadb/inst/files/natural-earth_cities.fgb
b/r/sedonadb/inst/files/natural-earth_cities.fgb
new file mode 100644
index 00000000..1cdc6cec
Binary files /dev/null and b/r/sedonadb/inst/files/natural-earth_cities.fgb
differ
diff --git a/r/sedonadb/inst/files/natural-earth_cities.fgb.zip
b/r/sedonadb/inst/files/natural-earth_cities.fgb.zip
new file mode 100644
index 00000000..a5d036da
Binary files /dev/null and b/r/sedonadb/inst/files/natural-earth_cities.fgb.zip
differ
diff --git a/r/sedonadb/man/sd_read_sf.Rd b/r/sedonadb/man/sd_read_sf.Rd
new file mode 100644
index 00000000..7622ccc5
--- /dev/null
+++ b/r/sedonadb/man/sd_read_sf.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasource.R
+\name{sd_read_sf}
+\alias{sd_read_sf}
+\alias{sd_ctx_read_sf}
+\title{Read GDAL/OGR via the sf package}
+\usage{
+sd_read_sf(
+ dsn,
+ layer = NULL,
+ ...,
+ query = NA,
+ options = NULL,
+ drivers = NULL,
+ filter = NULL,
+ fid_column_name = NULL,
+ lazy = FALSE
+)
+
+sd_ctx_read_sf(
+ ctx,
+ dsn,
+ layer = NULL,
+ ...,
+ query = NA,
+ options = NULL,
+ drivers = NULL,
+ filter = NULL,
+ fid_column_name = NULL,
+ lazy = FALSE
+)
+}
+\arguments{
+\item{dsn, layer}{Description of datasource and layer. See
\code{\link[sf:st_read]{sf::read_sf()}}
+for details.}
+
+\item{...}{Currently unused and must be empty}
+
+\item{query}{A SQL query to pass on to GDAL/OGR.}
+
+\item{options}{A character vector with layer open options in the
+form "KEY=VALUE".}
+
+\item{drivers}{A list of drivers to try if the dsn cannot be guessed.}
+
+\item{filter}{A spatial object that may be used to filter while reading.
+In the future SedonaDB will automatically calculate this value based on
+the query. May be any spatial object that can be converted to WKT via
+\code{\link[wk:wkt]{wk::as_wkt()}}. This filter's CRS must match that of the
data.}
+
+\item{fid_column_name}{An optional name for the feature id (FID) column.}
+
+\item{lazy}{Use \code{TRUE} to stream the data from the source rather than
collect
+first. This can be faster for large data sources but can also be confusing
+because the data may only be scanned exactly once.}
+
+\item{ctx}{A SedonaDB context created using
\code{\link[=sd_connect]{sd_connect()}}.}
+}
+\value{
+A SedonaDB DataFrame.
+}
+\description{
+Uses the ArrowArrayStream interface to GDAL exposed via the sf package
+to read GDAL/OGR-based data sources.
+}
+\examples{
+nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+sd_read_sf(nc_gpkg)
+
+}
diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c
index d2b6c572..636dcd61 100644
--- a/r/sedonadb/src/init.c
+++ b/r/sedonadb/src/init.c
@@ -55,6 +55,16 @@ SEXP handle_result(SEXP res_) {
return (SEXP)res;
}
+SEXP savvy_apply_crses_to_sf_stream__impl(SEXP c_arg__stream_in_xptr,
+ SEXP c_arg__geometry_column_names,
+ SEXP c_arg__geometry_column_crses,
+ SEXP c_arg__stream_out_xptr) {
+ SEXP res = savvy_apply_crses_to_sf_stream__ffi(
+ c_arg__stream_in_xptr, c_arg__geometry_column_names,
+ c_arg__geometry_column_crses, c_arg__stream_out_xptr);
+ return handle_result(res);
+}
+
SEXP savvy_configure_proj_shared__impl(SEXP c_arg__shared_library_path,
SEXP c_arg__database_path,
SEXP c_arg__search_path) {
@@ -328,6 +338,8 @@ SEXP savvy_SedonaDBExprFactory_scalar_function__impl(SEXP
self__,
}
static const R_CallMethodDef CallEntries[] = {
+ {"savvy_apply_crses_to_sf_stream__impl",
+ (DL_FUNC)&savvy_apply_crses_to_sf_stream__impl, 4},
{"savvy_configure_proj_shared__impl",
(DL_FUNC)&savvy_configure_proj_shared__impl, 3},
{"savvy_init_r_runtime_interrupts__impl",
diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h
index 6820d68e..bd1ac9e5 100644
--- a/r/sedonadb/src/rust/api.h
+++ b/r/sedonadb/src/rust/api.h
@@ -15,6 +15,10 @@
// specific language governing permissions and limitations
// under the License.
+SEXP savvy_apply_crses_to_sf_stream__ffi(SEXP c_arg__stream_in_xptr,
+ SEXP c_arg__geometry_column_names,
+ SEXP c_arg__geometry_column_crses,
+ SEXP c_arg__stream_out_xptr);
SEXP savvy_configure_proj_shared__ffi(SEXP c_arg__shared_library_path,
SEXP c_arg__database_path,
SEXP c_arg__search_path);
diff --git a/r/sedonadb/src/rust/src/datasource.rs
b/r/sedonadb/src/rust/src/datasource.rs
new file mode 100644
index 00000000..9994a3d0
--- /dev/null
+++ b/r/sedonadb/src/rust/src/datasource.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::HashMap, iter::zip, ptr::swap_nonoverlapping,
sync::Arc};
+
+use arrow_array::{
+ ffi_stream::FFI_ArrowArrayStream, RecordBatch, RecordBatchIterator,
RecordBatchReader,
+};
+use arrow_schema::{ArrowError, Schema};
+use savvy::{savvy, savvy_err};
+use sedona_schema::{
+ crs::deserialize_crs,
+ datatypes::{Edges, SedonaType},
+};
+
+use crate::ffi::import_array_stream;
+
+#[savvy]
+fn apply_crses_to_sf_stream(
+ stream_in_xptr: savvy::Sexp,
+ geometry_column_names: savvy::StringSexp,
+ geometry_column_crses: savvy::StringSexp,
+ stream_out_xptr: savvy::Sexp,
+) -> savvy::Result<()> {
+ let reader_in = Box::new(import_array_stream(stream_in_xptr)?);
+ let reader_out = apply_crses_to_sf_stream_impl(
+ reader_in,
+ geometry_column_names.iter().collect(),
+ geometry_column_crses.iter().collect(),
+ )?;
+
+ let out_void = unsafe { savvy_ffi::R_ExternalPtrAddr(stream_out_xptr.0) };
+ if out_void.is_null() {
+ return Err(savvy_err!(
+ "external pointer to null in apply_crses_to_sf_stream()"
+ ));
+ }
+
+ let mut ffi_stream = FFI_ArrowArrayStream::new(reader_out);
+ let ffi_out = out_void as *mut FFI_ArrowArrayStream;
+ unsafe { swap_nonoverlapping(&mut ffi_stream, ffi_out, 1) };
+ Ok(())
+}
+
+fn apply_crses_to_sf_stream_impl(
+ stream: Box<dyn RecordBatchReader + Send>,
+ geometry_column_names: Vec<&str>,
+ geometry_column_crses: Vec<&str>,
+) -> savvy::Result<Box<dyn RecordBatchReader + Send>> {
+ let crs_lookup =
+ zip(geometry_column_names,
geometry_column_crses).collect::<HashMap<&str, &str>>();
+ let schema = stream.schema();
+
+ let new_fields = schema
+ .fields()
+ .iter()
+ .map(|f| {
+ if f.extension_type_name() == Some("ogc.wkb") {
+ let crs = crs_lookup.get(f.name().as_str()).unwrap_or(&"");
+ let sedona_type = SedonaType::Wkb(Edges::Planar,
deserialize_crs(crs)?);
+ Ok(sedona_type.to_storage_field(f.name(), f.is_nullable())?)
+ } else {
+ Ok(f.as_ref().clone())
+ }
+ })
+ .collect::<Result<Vec<_>, savvy::Error>>()?;
+
+ let new_schema =
Arc::new(Schema::new(new_fields).with_metadata(schema.metadata().clone()));
+ let schema_iter = new_schema.clone();
+ let iter = stream.map(move |maybe_batch| -> Result<RecordBatch,
ArrowError> {
+ let batch = maybe_batch?;
+ RecordBatch::try_new(schema_iter.clone(), batch.columns().to_vec())
+ });
+
+ Ok(Box::new(RecordBatchIterator::new(iter, new_schema)))
+}
diff --git a/r/sedonadb/src/rust/src/lib.rs b/r/sedonadb/src/rust/src/lib.rs
index 84251908..0db3f2b7 100644
--- a/r/sedonadb/src/rust/src/lib.rs
+++ b/r/sedonadb/src/rust/src/lib.rs
@@ -14,7 +14,6 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-// Example functions
use std::ffi::c_void;
@@ -26,6 +25,7 @@ use sedona_proj::register::{configure_global_proj_engine,
ProjCrsEngineBuilder};
mod context;
mod dataframe;
+mod datasource;
mod error;
mod expression;
mod ffi;
diff --git a/r/sedonadb/tests/testthat/_snaps/datasource.md
b/r/sedonadb/tests/testthat/_snaps/datasource.md
new file mode 100644
index 00000000..e40fc124
--- /dev/null
+++ b/r/sedonadb/tests/testthat/_snaps/datasource.md
@@ -0,0 +1,4 @@
+# sd_read_sf() works with filter
+
+ filter crs (NULL) does not match output CRS (NAD27)
+
diff --git a/r/sedonadb/tests/testthat/test-dataframe.R
b/r/sedonadb/tests/testthat/test-dataframe.R
index 86393e4d..ae3cfb56 100644
--- a/r/sedonadb/tests/testthat/test-dataframe.R
+++ b/r/sedonadb/tests/testthat/test-dataframe.R
@@ -325,8 +325,6 @@ test_that("sd_write_parquet validates geoparquet_version
parameter", {
})
test_that("sd_write_parquet accepts max_row_group_size parameter", {
- skip_if_not_installed("arrow")
-
tmp_parquet_file <- tempfile(fileext = ".parquet")
tmp_parquet_file_tiny_groups <- tempfile(fileext = ".parquet")
on.exit(unlink(c(tmp_parquet_file, tmp_parquet_file_tiny_groups)))
diff --git a/r/sedonadb/tests/testthat/test-datasource.R
b/r/sedonadb/tests/testthat/test-datasource.R
new file mode 100644
index 00000000..ef003cc8
--- /dev/null
+++ b/r/sedonadb/tests/testthat/test-datasource.R
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("sd_read_sf() works for layers with named geometry columns", {
+ skip_if_not_installed("sf")
+
+ nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+
+ from_stream <- sf::st_as_sf(sd_read_sf(nc_gpkg))
+ from_sf <- sf::st_read(nc_gpkg, quiet = TRUE)
+
+ # Expect identical CRS
+ expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+ # Expect identical content without CRS
+ expect_equal(
+ from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+ from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+ )
+})
+
+test_that("sd_read_sf() works for layers with unnamed geometry columns", {
+ skip_if_not_installed("sf")
+
+ nc_shp <- system.file("shape/nc.shp", package = "sf")
+
+ from_stream <- sf::st_as_sf(sd_read_sf(nc_shp))
+ from_sf <- sf::st_read(nc_shp, quiet = TRUE, promote_to_multi = FALSE)
+
+ # Expect identical CRS
+ expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+ # The from_stream version has a geometry column named "wkb_geometry" but
+ # sf renames this internally to "geometry"
+ expect_true("wkb_geometry" %in% names(from_stream))
+ colnames(from_stream)[colnames(from_stream) == "wkb_geometry"] <- "geometry"
+ sf::st_geometry(from_stream) <- "geometry"
+
+ # Expect identical content without CRS
+ expect_equal(
+ from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+ from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+ )
+})
+
+test_that("sd_read_sf() works for database dsns / non-default layers", {
+ skip_if_not_installed("sf")
+
+ # Can be tested using docker compose up with
+ # postgresql://localhost:5432/postgres?user=postgres&password=password
+ test_uri <- Sys.getenv("SEDONADB_POSTGRESQL_TEST_URI", unset = "")
+ if (identical(test_uri, "")) {
+ skip("SEDONADB_POSTGRESQL_TEST_URI is not set")
+ }
+
+ nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+ sf::st_write(
+ sf::st_read(nc_gpkg, quiet = TRUE),
+ test_uri,
+ "test_sf_nc",
+ append = FALSE,
+ driver = "PostgreSQL",
+ quiet = TRUE
+ )
+
+ from_stream <- sf::st_as_sf(sd_read_sf(test_uri, "test_sf_nc"))
+ from_sf <- sf::st_read(test_uri, "test_sf_nc", quiet = TRUE)
+
+ # Expect identical CRS
+ expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+ # Expect identical content without CRS
+ expect_equal(
+ from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+ from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+ )
+})
+
+test_that("sd_read_sf() works with filter", {
+ skip_if_not_installed("sf")
+
+ nc_gpkg <- system.file("gpkg/nc.gpkg", package = "sf")
+ filter <- wk::rct(-77.901, 36.162, -77.075, 36.556, crs =
sf::st_crs("NAD27"))
+
+ from_stream <- sf::st_as_sf(sd_read_sf(nc_gpkg, filter = filter))
+ from_sf <- sf::st_read(nc_gpkg, quiet = TRUE, wkt_filter =
wk::as_wkt(filter))
+
+ # Expect identical CRS
+ expect_true(sf::st_crs(from_stream) == sf::st_crs(from_sf))
+
+ # Expect identical content without CRS
+ expect_equal(
+ from_stream |> sf::st_set_crs(NA) |> as.data.frame(),
+ from_sf |> sf::st_set_crs(NA) |> as.data.frame()
+ )
+
+ # Check for error if filtered with an invalid CRS
+ wk::wk_crs(filter) <- NULL
+ expect_snapshot_error(sd_read_sf(nc_gpkg, filter = filter))
+})
+
+test_that("sd_read_sf() works for zipped dsns", {
+ skip_if_not_installed("sf")
+
+ fgb <- system.file("files/natural-earth_cities.fgb", package = "sedonadb")
+ fgb_zip <- paste0(fgb, ".zip")
+
+ from_stream_fgb <- sd_read_sf(fgb) |> sf::st_as_sf()
+ from_stream_fgb_zip <- sd_read_sf(fgb_zip) |> sf::st_as_sf()
+ expect_identical(from_stream_fgb_zip, from_stream_fgb)
+})
+
+test_that("sd_read_sf() works for URL dsns", {
+ skip_on_cran()
+ skip_if_not_installed("sf")
+
+ # nolint start: line_length_linter
+ url <-
"https://github.com/geoarrow/geoarrow-data/releases/download/v0.2.0/ns-water_water-point.fgb"
+ # nolint end
+ expect_identical(
+ sd_read_sf(url) |> sd_count(),
+ 44690
+ )
+
+ # nolint start: line_length_linter
+ zipped_url <-
"https://github.com/geoarrow/geoarrow-data/releases/download/v0.1.0/ns-water-water_point.fgb.zip"
+ # nolint end
+ expect_identical(
+ sd_read_sf(zipped_url) |> sd_count(),
+ 44690
+ )
+})