This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new f633cd64 feat(r/sedonadb): Implement `dplyr::select()` for 
sedonadb_dataframe (#419)
f633cd64 is described below

commit f633cd64cdbb56828982895b6d5661cb339dd2f9
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Dec 9 10:53:49 2025 -0600

    feat(r/sedonadb): Implement `dplyr::select()` for sedonadb_dataframe (#419)
---
 r/sedonadb/DESCRIPTION                     |  3 +++
 r/sedonadb/R/000-wrappers.R                |  7 ++++++
 r/sedonadb/R/pkg-dplyr.R                   | 30 ++++++++++++++++++++++++
 r/sedonadb/R/zzz.R                         |  3 +++
 r/sedonadb/src/init.c                      | 10 ++++++++
 r/sedonadb/src/rust/api.h                  |  2 ++
 r/sedonadb/src/rust/src/dataframe.rs       | 26 +++++++++++++++++----
 r/sedonadb/tests/testthat/test-pkg-dplyr.R | 37 ++++++++++++++++++++++++++++++
 8 files changed, 113 insertions(+), 5 deletions(-)

diff --git a/r/sedonadb/DESCRIPTION b/r/sedonadb/DESCRIPTION
index 1f941adf..4876e3be 100644
--- a/r/sedonadb/DESCRIPTION
+++ b/r/sedonadb/DESCRIPTION
@@ -13,9 +13,12 @@ SystemRequirements: Cargo (Rust's package manager), rustc
 Depends: R (>= 4.1.0)
 Suggests:
     adbcdrivermanager,
+    dplyr,
     rlang,
+    tibble,
     sf,
     testthat (>= 3.0.0),
+    tidyselect,
     withr,
     wk
 Imports:
diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R
index 72e50d28..df1f61fd 100644
--- a/r/sedonadb/R/000-wrappers.R
+++ b/r/sedonadb/R/000-wrappers.R
@@ -186,6 +186,12 @@ class(`InternalContext`) <- 
c("sedonadb::InternalContext__bundle", "savvy_sedona
   }
 }
 
+`InternalDataFrame_select_indices` <- function(self) {
+  function(`names`, `indices`) {
+    
.savvy_wrap_InternalDataFrame(.Call(savvy_InternalDataFrame_select_indices__impl,
 `self`, `names`, `indices`))
+  }
+}
+
 `InternalDataFrame_show` <- function(self) {
   function(`ctx`, `width_chars`, `ascii`, `limit` = NULL) {
     `ctx` <- .savvy_extract_ptr(`ctx`, "sedonadb::InternalContext")
@@ -233,6 +239,7 @@ class(`InternalContext`) <- 
c("sedonadb::InternalContext__bundle", "savvy_sedona
   e$`count` <- `InternalDataFrame_count`(ptr)
   e$`limit` <- `InternalDataFrame_limit`(ptr)
   e$`primary_geometry_column_index` <- 
`InternalDataFrame_primary_geometry_column_index`(ptr)
+  e$`select_indices` <- `InternalDataFrame_select_indices`(ptr)
   e$`show` <- `InternalDataFrame_show`(ptr)
   e$`to_arrow_schema` <- `InternalDataFrame_to_arrow_schema`(ptr)
   e$`to_arrow_stream` <- `InternalDataFrame_to_arrow_stream`(ptr)
diff --git a/r/sedonadb/R/pkg-dplyr.R b/r/sedonadb/R/pkg-dplyr.R
new file mode 100644
index 00000000..817dca65
--- /dev/null
+++ b/r/sedonadb/R/pkg-dplyr.R
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+collect.sedonadb_dataframe <- function(x, ...) {
+  rlang::check_dots_empty()
+  tibble::as_tibble(sd_collect(x))
+}
+
+select.sedonadb_dataframe <- function(.data, ...) {
+  schema <- nanoarrow::infer_nanoarrow_schema(.data)
+  ptype <- nanoarrow::infer_nanoarrow_ptype(schema)
+  loc <- tidyselect::eval_select(rlang::expr(c(...)), data = ptype)
+
+  df <- .data$df$select_indices(names(loc), loc - 1L)
+  new_sedonadb_dataframe(.data$ctx, df)
+}
diff --git a/r/sedonadb/R/zzz.R b/r/sedonadb/R/zzz.R
index e286d550..9f9720cc 100644
--- a/r/sedonadb/R/zzz.R
+++ b/r/sedonadb/R/zzz.R
@@ -21,6 +21,9 @@
 
   s3_register("sf::st_as_sf", "sedonadb_dataframe")
 
+  s3_register("dplyr::collect", "sedonadb_dataframe")
+  s3_register("dplyr::select", "sedonadb_dataframe")
+
   # Inject what we need to reduce the Rust code to a simple Rf_eval()
   ns <- asNamespace("sedonadb")
   call <- call("check_interrupts")
diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c
index f72a3cab..8405b2cc 100644
--- a/r/sedonadb/src/init.c
+++ b/r/sedonadb/src/init.c
@@ -156,6 +156,14 @@ SEXP 
savvy_InternalDataFrame_primary_geometry_column_index__impl(SEXP self__) {
   return handle_result(res);
 }
 
+SEXP savvy_InternalDataFrame_select_indices__impl(SEXP self__,
+                                                  SEXP c_arg__names,
+                                                  SEXP c_arg__indices) {
+  SEXP res = savvy_InternalDataFrame_select_indices__ffi(self__, c_arg__names,
+                                                         c_arg__indices);
+  return handle_result(res);
+}
+
 SEXP savvy_InternalDataFrame_show__impl(SEXP self__, SEXP c_arg__ctx,
                                         SEXP c_arg__width_chars,
                                         SEXP c_arg__ascii, SEXP c_arg__limit) {
@@ -236,6 +244,8 @@ static const R_CallMethodDef CallEntries[] = {
      (DL_FUNC)&savvy_InternalDataFrame_limit__impl, 2},
     {"savvy_InternalDataFrame_primary_geometry_column_index__impl",
      (DL_FUNC)&savvy_InternalDataFrame_primary_geometry_column_index__impl, 1},
+    {"savvy_InternalDataFrame_select_indices__impl",
+     (DL_FUNC)&savvy_InternalDataFrame_select_indices__impl, 3},
     {"savvy_InternalDataFrame_show__impl",
      (DL_FUNC)&savvy_InternalDataFrame_show__impl, 5},
     {"savvy_InternalDataFrame_to_arrow_schema__impl",
diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h
index 268c046a..201039e1 100644
--- a/r/sedonadb/src/rust/api.h
+++ b/r/sedonadb/src/rust/api.h
@@ -44,6 +44,8 @@ SEXP savvy_InternalDataFrame_compute__ffi(SEXP self__, SEXP 
c_arg__ctx);
 SEXP savvy_InternalDataFrame_count__ffi(SEXP self__);
 SEXP savvy_InternalDataFrame_limit__ffi(SEXP self__, SEXP c_arg__n);
 SEXP savvy_InternalDataFrame_primary_geometry_column_index__ffi(SEXP self__);
+SEXP savvy_InternalDataFrame_select_indices__ffi(SEXP self__, SEXP 
c_arg__names,
+                                                 SEXP c_arg__indices);
 SEXP savvy_InternalDataFrame_show__ffi(SEXP self__, SEXP c_arg__ctx,
                                        SEXP c_arg__width_chars,
                                        SEXP c_arg__ascii, SEXP c_arg__limit);
diff --git a/r/sedonadb/src/rust/src/dataframe.rs 
b/r/sedonadb/src/rust/src/dataframe.rs
index 6f6f98c2..45a3cf97 100644
--- a/r/sedonadb/src/rust/src/dataframe.rs
+++ b/r/sedonadb/src/rust/src/dataframe.rs
@@ -14,23 +14,22 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use std::ptr::swap_nonoverlapping;
-use std::sync::Arc;
 
 use arrow_array::ffi::FFI_ArrowSchema;
 use arrow_array::ffi_stream::FFI_ArrowArrayStream;
 use arrow_array::{RecordBatchIterator, RecordBatchReader};
 use datafusion::catalog::MemTable;
-use datafusion::{logical_expr::SortExpr, prelude::DataFrame};
+use datafusion::prelude::DataFrame;
 use datafusion_common::Column;
-use datafusion_expr::Expr;
+use datafusion_expr::{select_expr::SelectExpr, Expr, SortExpr};
 use datafusion_ffi::table_provider::FFI_TableProvider;
-use savvy::{savvy, savvy_err, IntoExtPtrSexp, Result};
+use savvy::{savvy, savvy_err, sexp, IntoExtPtrSexp, Result};
 use sedona::context::{SedonaDataFrame, SedonaWriteOptions};
 use sedona::reader::SedonaStreamReader;
 use sedona::show::{DisplayMode, DisplayTableOptions};
 use sedona_geoparquet::options::{GeoParquetVersion, TableGeoParquetOptions};
 use sedona_schema::schema::SedonaSchema;
+use std::{iter::zip, ptr::swap_nonoverlapping, sync::Arc};
 use tokio::runtime::Runtime;
 
 use crate::context::InternalContext;
@@ -292,4 +291,21 @@ impl InternalDataFrame {
 
         Ok(())
     }
+
+    fn select_indices(&self, names: sexp::Sexp, indices: sexp::Sexp) -> 
Result<InternalDataFrame> {
+        let names_strsxp = savvy::StringSexp::try_from(names)?;
+        let indices_intsxp = savvy::IntegerSexp::try_from(indices)?;
+
+        let df_schema = self.inner.schema();
+        let exprs = zip(names_strsxp.iter(), indices_intsxp.iter())
+            .map(|(name, index)| {
+                let (table_ref, field) = 
df_schema.qualified_field(usize::try_from(*index)?);
+                let column = Column::new(table_ref.cloned(), field.name());
+                Ok(SelectExpr::Expression(Expr::Column(column).alias(name)))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let inner = self.inner.clone().select(exprs)?;
+        Ok(new_data_frame(inner, self.runtime.clone()))
+    }
 }
diff --git a/r/sedonadb/tests/testthat/test-pkg-dplyr.R 
b/r/sedonadb/tests/testthat/test-pkg-dplyr.R
new file mode 100644
index 00000000..399a718a
--- /dev/null
+++ b/r/sedonadb/tests/testthat/test-pkg-dplyr.R
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("select() works for sedonadb_dataframe", {
+  skip_if_not_installed("dplyr")
+
+  df <- sd_sql("SELECT 1 as one, 'two' as two, 3.0 as \"THREE\"")
+
+  expect_identical(
+    df |> dplyr::select(2:3) |> dplyr::collect(),
+    tibble::tibble(two = "two", THREE = 3.0)
+  )
+
+  expect_identical(
+    df |> dplyr::select(three_renamed = THREE, one) |> dplyr::collect(),
+    tibble::tibble(three_renamed = 3.0, one = 1)
+  )
+
+  expect_identical(
+    df |> dplyr::select(TWO = two) |> dplyr::collect(),
+    tibble::tibble(TWO = "two")
+  )
+})

Reply via email to