This is an automated email from the ASF dual-hosted git repository.

npr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5b968b3090 GH-43748: [R] Handle package_version in safe_r_metadata 
(#43895)
5b968b3090 is described below

commit 5b968b3090c744ae26150c93138ba819ed9ebb8e
Author: Neal Richardson <[email protected]>
AuthorDate: Wed Sep 11 21:13:55 2024 -0400

    GH-43748: [R] Handle package_version in safe_r_metadata (#43895)
    
    ### Rationale for this change
    
    See #43748. There is what appears to be a bug in R's
    `[[.numeric_version` implementation that leads to infinite recursion.
    
    Edit: after some digging in R source, this appears to be as designed.
    And other list subclasses that have methods to make them behave like
    atomic types, like `POSIXlt`, also have this.
    
    ### What changes are included in this PR?
    
    When recursing into list objects, `unclass()` them first to get the raw
    list behavior. Also apply the checking to the `attributes()` before
    reapplying them.
    
    ### Are these changes tested?
    
    yes
    
    ### Are there any user-facing changes?
    
    Fewer bugs!
    
    * GitHub Issue: #43748
---
 r/R/metadata.R                   | 21 ++++++++++++++++++++-
 r/tests/testthat/test-metadata.R | 29 ++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/r/R/metadata.R b/r/R/metadata.R
index ba73f08578..61e412be62 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -107,15 +107,34 @@ safe_r_metadata <- function(metadata, on_save = FALSE) {
   # and mutate the `types_removed` variable outside of it.
   check_r_metadata_types_recursive <- function(x) {
     allowed_types <- c("character", "double", "integer", "logical", "complex", 
"list", "NULL")
+    # Pull out the attributes so we can also check them
+    x_attrs <- attributes(x)
+
     if (is.list(x)) {
+      # Add special handling for some base R classes that are list but
+      # their [[ methods leads to infinite recursion.
+      # We unclass here and then reapply attributes after.
+      x <- unclass(x)
+
       types <- map_chr(x, typeof)
-      x[types == "list"] <- map(x[types == "list"], 
check_r_metadata_types_recursive)
       ok <- types %in% allowed_types
       if (!all(ok)) {
         # Record the invalid types, then remove the offending elements
         types_removed <<- c(types_removed, setdiff(types, allowed_types))
         x <- x[ok]
+        if ("names" %in% names(x_attrs)) {
+          # Also prune from the attributes since we'll re-add later
+          x_attrs[["names"]] <- x_attrs[["names"]][ok]
+        }
       }
+      # For the rest, recurse
+      x <- map(x, check_r_metadata_types_recursive)
+    }
+
+    # attributes() of a named list will return a list with a "names" attribute,
+    # so it will recurse indefinitely.
+    if (!is.null(x_attrs) && !identical(x_attrs, list(names = names(x)))) {
+      attributes(x) <- check_r_metadata_types_recursive(x_attrs)
     }
     x
   }
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 175e7ef3b6..06aa1535e0 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -149,6 +149,15 @@ arbitrary\040code\040was\040just\040executed
   )
 })
 
+test_that("R metadata processing doesn't choke on packageVersion() output", {
+  metadata <- list(version = packageVersion("base"))
+  expect_identical(safe_r_metadata(metadata), metadata)
+
+  df <- example_data[1:6]
+  attr(df, "version") <- packageVersion("base")
+  expect_equal_data_frame(Table$create(df), df)
+})
+
 test_that("Complex or unsafe attributes are pruned from R metadata, if they 
exist", {
   tab <- Table$create(example_data[1:6])
   bad <- new.env()
@@ -161,18 +170,24 @@ i Type: \"environment\"
 > If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` 
 > to preserve them.",
     fixed = TRUE
   )
+  # Try hiding it even further, in attributes
+  bad_meta <- list(attributes = structure(list(), hidden_attr = bad))
+  tab$metadata <- list(r = rawToChar(serialize(bad_meta, NULL, ascii = TRUE)))
+  expect_warning(
+    as.data.frame(tab),
+    "Potentially unsafe or invalid elements have been discarded from R 
metadata.
+i Type: \"environment\"
+> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` 
to preserve them.",
+    fixed = TRUE
+  )
+
   # You can set an option to allow them through.
   # It still warns, just differently, and it doesn't prune the attributes
   withr::local_options(list("arrow.unsafe_metadata" = TRUE))
   expect_warning(
-    expect_warning(
-      as.data.frame(tab),
-      "R metadata may have unsafe or invalid elements
+    as.data.frame(tab),
+    "R metadata may have unsafe or invalid elements
 i Type: \"environment\""
-    ),
-    # This particular example ultimately fails because it's not a list
-    "Invalid metadata$r",
-    fixed = TRUE
   )
 })
 

Reply via email to