This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 230859b79d GH-42220: [R] handle vctrs_rcrd extension type in metadata
cleaning (#42226)
230859b79d is described below
commit 230859b79dbd6d138f6521961c7b54c1874299a0
Author: Neal Richardson <[email protected]>
AuthorDate: Sat Jun 22 04:01:46 2024 -0400
GH-42220: [R] handle vctrs_rcrd extension type in metadata cleaning (#42226)
### Rationale for this change
See https://github.com/apache/arrow/issues/42220. Extension type metadata
serialization works slightly differently from the general R metadata path: it
uses a vctrs::ptype object, which is 0-length with attributes. The sanitization
needs to happen on `attributes()`, not the object being serialized.
The error was caused because `vctrs_rcrd` has a special `[<-` method, so
iterating over it and assigning back in the sanitized version (even if it were
the right thing to do) failed.
### What changes are included in this PR?
Sanitize the `attributes()` of vctrs ptypes.
### Are these changes tested?
yes
### Are there any user-facing changes?
other than the bug being fixed, no
* GitHub Issue: #42220
Authored-by: Neal Richardson <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/extension.R | 7 +++++--
r/tests/testthat/test-extension.R | 8 ++++++++
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/r/R/extension.R b/r/R/extension.R
index 3529144e11..e5b12b80ae 100644
--- a/r/R/extension.R
+++ b/r/R/extension.R
@@ -429,7 +429,8 @@ VctrsExtensionType <- R6Class("VctrsExtensionType",
paste0(capture.output(print(self$ptype())), collapse = "\n")
},
deserialize_instance = function() {
- private$.ptype <-
safe_r_metadata(safe_unserialize(self$extension_metadata()))
+ private$.ptype <- safe_unserialize(self$extension_metadata())
+ attributes(private$.ptype) <- safe_r_metadata(attributes(private$.ptype))
},
ExtensionEquals = function(other) {
inherits(other, "VctrsExtensionType") && identical(self$ptype(),
other$ptype())
@@ -510,11 +511,13 @@ vctrs_extension_array <- function(x, ptype =
vctrs::vec_ptype(x),
vctrs_extension_type <- function(x,
storage_type =
infer_type(vctrs::vec_data(x))) {
ptype <- vctrs::vec_ptype(x)
+ # Make sure there are no unsupported objects buried in there
+ attributes(ptype) <- safe_r_metadata(attributes(ptype))
new_extension_type(
storage_type = storage_type,
extension_name = "arrow.r.vctrs",
- extension_metadata = serialize(ptype, NULL),
+ extension_metadata = serialize(ptype, NULL, ascii = TRUE),
type_class = VctrsExtensionType
)
}
diff --git a/r/tests/testthat/test-extension.R
b/r/tests/testthat/test-extension.R
index 8b3d7d8aaa..db26a70acb 100644
--- a/r/tests/testthat/test-extension.R
+++ b/r/tests/testthat/test-extension.R
@@ -343,3 +343,11 @@ test_that("Dataset/arrow_dplyr_query can roundtrip
extension types", {
expect_identical(unclass(roundtripped$extension), roundtripped$letter)
})
+
+test_that("Handling vctrs_rcrd type", {
+ df <- data.frame(
+ x = vctrs::new_rcrd(fields = list(special = 1:3), class = "special")
+ )
+ tab <- arrow_table(df)
+ expect_identical(as.data.frame(tab), df)
+})