This is an automated email from the ASF dual-hosted git repository.
npr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5b968b3090 GH-43748: [R] Handle package_version in safe_r_metadata
(#43895)
5b968b3090 is described below
commit 5b968b3090c744ae26150c93138ba819ed9ebb8e
Author: Neal Richardson <[email protected]>
AuthorDate: Wed Sep 11 21:13:55 2024 -0400
GH-43748: [R] Handle package_version in safe_r_metadata (#43895)
### Rationale for this change
See #43748. There is what appears to be a bug in R's
`[[.numeric_version` implementation that leads to infinite recursion.
Edit: after some digging in R source, this appears to be as designed.
And other list subclasses that have methods to make them behave like
atomic types, like `POSIXlt`, also have this.
### What changes are included in this PR?
When recursing into list objects, `unclass()` them first to get the raw
list behavior. Also apply the checking to the `attributes()` before
reapplying them.
### Are these changes tested?
yes
### Are there any user-facing changes?
Fewer bugs!
* GitHub Issue: #43748
---
r/R/metadata.R | 21 ++++++++++++++++++++-
r/tests/testthat/test-metadata.R | 29 ++++++++++++++++++++++-------
2 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/r/R/metadata.R b/r/R/metadata.R
index ba73f08578..61e412be62 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -107,15 +107,34 @@ safe_r_metadata <- function(metadata, on_save = FALSE) {
# and mutate the `types_removed` variable outside of it.
check_r_metadata_types_recursive <- function(x) {
allowed_types <- c("character", "double", "integer", "logical", "complex",
"list", "NULL")
+ # Pull out the attributes so we can also check them
+ x_attrs <- attributes(x)
+
if (is.list(x)) {
+ # Add special handling for some base R classes that are list but
+ # their [[ methods leads to infinite recursion.
+ # We unclass here and then reapply attributes after.
+ x <- unclass(x)
+
types <- map_chr(x, typeof)
- x[types == "list"] <- map(x[types == "list"],
check_r_metadata_types_recursive)
ok <- types %in% allowed_types
if (!all(ok)) {
# Record the invalid types, then remove the offending elements
types_removed <<- c(types_removed, setdiff(types, allowed_types))
x <- x[ok]
+ if ("names" %in% names(x_attrs)) {
+ # Also prune from the attributes since we'll re-add later
+ x_attrs[["names"]] <- x_attrs[["names"]][ok]
+ }
}
+ # For the rest, recurse
+ x <- map(x, check_r_metadata_types_recursive)
+ }
+
+ # attributes() of a named list will return a list with a "names" attribute,
+ # so it will recurse indefinitely.
+ if (!is.null(x_attrs) && !identical(x_attrs, list(names = names(x)))) {
+ attributes(x) <- check_r_metadata_types_recursive(x_attrs)
}
x
}
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 175e7ef3b6..06aa1535e0 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -149,6 +149,15 @@ arbitrary\040code\040was\040just\040executed
)
})
+test_that("R metadata processing doesn't choke on packageVersion() output", {
+ metadata <- list(version = packageVersion("base"))
+ expect_identical(safe_r_metadata(metadata), metadata)
+
+ df <- example_data[1:6]
+ attr(df, "version") <- packageVersion("base")
+ expect_equal_data_frame(Table$create(df), df)
+})
+
test_that("Complex or unsafe attributes are pruned from R metadata, if they
exist", {
tab <- Table$create(example_data[1:6])
bad <- new.env()
@@ -161,18 +170,24 @@ i Type: \"environment\"
> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)`
> to preserve them.",
fixed = TRUE
)
+ # Try hiding it even further, in attributes
+ bad_meta <- list(attributes = structure(list(), hidden_attr = bad))
+ tab$metadata <- list(r = rawToChar(serialize(bad_meta, NULL, ascii = TRUE)))
+ expect_warning(
+ as.data.frame(tab),
+ "Potentially unsafe or invalid elements have been discarded from R
metadata.
+i Type: \"environment\"
+> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)`
to preserve them.",
+ fixed = TRUE
+ )
+
# You can set an option to allow them through.
# It still warns, just differently, and it doesn't prune the attributes
withr::local_options(list("arrow.unsafe_metadata" = TRUE))
expect_warning(
- expect_warning(
- as.data.frame(tab),
- "R metadata may have unsafe or invalid elements
+ as.data.frame(tab),
+ "R metadata may have unsafe or invalid elements
i Type: \"environment\""
- ),
- # This particular example ultimately fails because it's not a list
- "Invalid metadata$r",
- fixed = TRUE
)
})