This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 230859b79d GH-42220: [R] handle vctrs_rcrd extension type in metadata 
cleaning (#42226)
230859b79d is described below

commit 230859b79dbd6d138f6521961c7b54c1874299a0
Author: Neal Richardson <[email protected]>
AuthorDate: Sat Jun 22 04:01:46 2024 -0400

    GH-42220: [R] handle vctrs_rcrd extension type in metadata cleaning (#42226)
    
    ### Rationale for this change
    
    See https://github.com/apache/arrow/issues/42220. Extension type metadata 
serialization works slightly differently from the general R metadata path: it 
uses a vctrs::ptype object, which is 0-length with attributes. The sanitization 
needs to happen on `attributes()`, not the object being serialized.
    
    The error was caused because `vctrs_rcrd` has a special `[<-` method, so 
iterating over it and assigning back in the sanitized version (even if it were 
the right thing to do) failed.
    
    ### What changes are included in this PR?
    
    Sanitize the `attributes()` of vctrs ptypes.
    
    ### Are these changes tested?
    
    yes
    
    ### Are there any user-facing changes?
    
    other than the bug being fixed, no
    * GitHub Issue: #42220
    
    Authored-by: Neal Richardson <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/extension.R                   | 7 +++++--
 r/tests/testthat/test-extension.R | 8 ++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/r/R/extension.R b/r/R/extension.R
index 3529144e11..e5b12b80ae 100644
--- a/r/R/extension.R
+++ b/r/R/extension.R
@@ -429,7 +429,8 @@ VctrsExtensionType <- R6Class("VctrsExtensionType",
       paste0(capture.output(print(self$ptype())), collapse = "\n")
     },
     deserialize_instance = function() {
-      private$.ptype <- 
safe_r_metadata(safe_unserialize(self$extension_metadata()))
+      private$.ptype <- safe_unserialize(self$extension_metadata())
+      attributes(private$.ptype) <- safe_r_metadata(attributes(private$.ptype))
     },
     ExtensionEquals = function(other) {
       inherits(other, "VctrsExtensionType") && identical(self$ptype(), 
other$ptype())
@@ -510,11 +511,13 @@ vctrs_extension_array <- function(x, ptype = 
vctrs::vec_ptype(x),
 vctrs_extension_type <- function(x,
                                  storage_type = 
infer_type(vctrs::vec_data(x))) {
   ptype <- vctrs::vec_ptype(x)
+  # Make sure there are no unsupported objects buried in there
+  attributes(ptype) <- safe_r_metadata(attributes(ptype))
 
   new_extension_type(
     storage_type = storage_type,
     extension_name = "arrow.r.vctrs",
-    extension_metadata = serialize(ptype, NULL),
+    extension_metadata = serialize(ptype, NULL, ascii = TRUE),
     type_class = VctrsExtensionType
   )
 }
diff --git a/r/tests/testthat/test-extension.R 
b/r/tests/testthat/test-extension.R
index 8b3d7d8aaa..db26a70acb 100644
--- a/r/tests/testthat/test-extension.R
+++ b/r/tests/testthat/test-extension.R
@@ -343,3 +343,11 @@ test_that("Dataset/arrow_dplyr_query can roundtrip 
extension types", {
 
   expect_identical(unclass(roundtripped$extension), roundtripped$letter)
 })
+
+test_that("Handling vctrs_rcrd type", {
+  df <- data.frame(
+    x = vctrs::new_rcrd(fields = list(special = 1:3), class = "special")
+  )
+  tab <- arrow_table(df)
+  expect_identical(as.data.frame(tab), df)
+})

Reply via email to