This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 929894e415 GH-35035: [R] Implement names<- for Schemas (#35172)
929894e415 is described below
commit 929894e415bdb633f135903c1fdcbc83cb223fac
Author: Nic Crane <[email protected]>
AuthorDate: Fri Apr 21 15:35:16 2023 +0300
GH-35035: [R] Implement names<- for Schemas (#35172)
* Closes: #35035
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/NAMESPACE | 1 +
r/R/arrowExports.R | 4 ++++
r/R/schema.R | 29 +++++++++++++++++++++++++++++
r/src/arrowExports.cpp | 10 ++++++++++
r/src/schema.cpp | 6 ++++++
r/tests/testthat/test-schema.R | 16 ++++++++++++++++
6 files changed, 66 insertions(+)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 7ab8d5c902..7a8efe0ca3 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -21,6 +21,7 @@ S3method("[[",StructArray)
S3method("[[<-",ArrowTabular)
S3method("[[<-",Schema)
S3method("names<-",ArrowTabular)
+S3method("names<-",Schema)
S3method(Math,ArrowDatum)
S3method(Ops,ArrowDatum)
S3method(Ops,Expression)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index a8e8f5b8af..b494623eed 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1984,6 +1984,10 @@ Schema__WithMetadata <- function(schema, metadata) {
.Call(`_arrow_Schema__WithMetadata`, schema, metadata)
}
+Schema__WithNames <- function(schema, names) {
+ .Call(`_arrow_Schema__WithNames`, schema, names)
+}
+
Schema__serialize <- function(schema) {
.Call(`_arrow_Schema__serialize`, schema)
}
diff --git a/r/R/schema.R b/r/R/schema.R
index 93e826eff2..c8316854c2 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -130,6 +130,32 @@ Schema <- R6Class("Schema",
codes <- set_names(codes, names)
call2("schema", !!!codes)
+ },
+ WithNames = function(names) {
+ if (!inherits(names, "character")) {
+ abort(
+ paste("Replacement names must be character vector, not",
class(names)[1])
+ )
+ }
+
+ if (length(names) != length(self$names)) {
+ abort(
+ c(
+ "Replacement names must contain same number of items as current
names",
+ i = paste("Current names length:", length(self$names)),
+ x = paste("Replacement names length:", length(names))
+ )
+ )
+ }
+
+ existing_metadata <- self$metadata
+ renamed_schema <- Schema__WithNames(self, names)
+
+ # if we have R metadata containing column names, update names there too
+ if (!is.null(existing_metadata$r$columns)) {
+ names(existing_metadata$r$columns) <- names
+ }
+ renamed_schema$WithMetadata(existing_metadata)
}
),
active = list(
@@ -388,3 +414,6 @@ as_schema.StructType <- function(x, ...) {
as.data.frame.Schema <- function(x, row.names = NULL, optional = FALSE, ...) {
as.data.frame(Table__from_schema(x))
}
+
+#' @export
+`names<-.Schema` <- function(x, value) x$WithNames(value)
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 55c59f4b38..80877e827d 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -5150,6 +5150,15 @@ BEGIN_CPP11
END_CPP11
}
// schema.cpp
+std::shared_ptr<arrow::Schema> Schema__WithNames(const
std::shared_ptr<arrow::Schema>& schema, const std::vector<std::string>& names);
+extern "C" SEXP _arrow_Schema__WithNames(SEXP schema_sexp, SEXP names_sexp){
+BEGIN_CPP11
+ arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type
schema(schema_sexp);
+ arrow::r::Input<const std::vector<std::string>&>::type
names(names_sexp);
+ return cpp11::as_sexp(Schema__WithNames(schema, names));
+END_CPP11
+}
+// schema.cpp
cpp11::writable::raws Schema__serialize(const std::shared_ptr<arrow::Schema>&
schema);
extern "C" SEXP _arrow_Schema__serialize(SEXP schema_sexp){
BEGIN_CPP11
@@ -5994,6 +6003,7 @@ static const R_CallMethodDef CallEntries[] = {
{ "_arrow_Schema__HasMetadata", (DL_FUNC)
&_arrow_Schema__HasMetadata, 1},
{ "_arrow_Schema__metadata", (DL_FUNC)
&_arrow_Schema__metadata, 1},
{ "_arrow_Schema__WithMetadata", (DL_FUNC)
&_arrow_Schema__WithMetadata, 2},
+ { "_arrow_Schema__WithNames", (DL_FUNC)
&_arrow_Schema__WithNames, 2},
{ "_arrow_Schema__serialize", (DL_FUNC)
&_arrow_Schema__serialize, 1},
{ "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals,
3},
{ "_arrow_arrow__UnifySchemas", (DL_FUNC)
&_arrow_arrow__UnifySchemas, 1},
diff --git a/r/src/schema.cpp b/r/src/schema.cpp
index 0dac188ec0..cf95970730 100644
--- a/r/src/schema.cpp
+++ b/r/src/schema.cpp
@@ -143,6 +143,12 @@ std::shared_ptr<arrow::Schema> Schema__WithMetadata(
return schema->WithMetadata(std::move(kv));
}
+// [[arrow::export]]
+std::shared_ptr<arrow::Schema> Schema__WithNames(
+ const std::shared_ptr<arrow::Schema>& schema, const
std::vector<std::string>& names) {
+ return ValueOrStop(schema->WithNames(names));
+}
+
// [[arrow::export]]
cpp11::writable::raws Schema__serialize(const std::shared_ptr<arrow::Schema>&
schema) {
auto out = ValueOrStop(arrow::ipc::SerializeSchema(*schema));
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 3a35569f7f..24776e6d0c 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -260,3 +260,19 @@ test_that("as_schema() works for StructType objects", {
struct_type <- struct(col1 = int32())
expect_equal(as_schema(struct_type), schema(col1 = int32()))
})
+
+test_that("schema name assignment", {
+ schm <- schema(x = int8(), y = string(), z = double())
+ expect_identical(names(schm), c("x", "y", "z"))
+ names(schm) <- c("a", "b", "c")
+ expect_identical(names(schm), c("a", "b", "c"))
+ expect_error(names(schm) <- "f", regexp = "Replacement names must contain
same number of items as current names")
+ expect_error(names(schm) <- NULL, regexp = "Replacement names must be
character vector, not NULL")
+
+ # Test that R metadata is updated appropriately
+ df <- data.frame(x = 1:3, y = c("a", "b", "c"))
+ schm2 <- arrow_table(df)$schema
+ names(schm2) <- c("col1", "col2")
+ expect_identical(names(schm2), c("col1", "col2"))
+ expect_identical(names(schm2$r_metadata$columns), c("col1", "col2"))
+})