This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 9ea91ca01f GH-48057: [R] Slow reading performance caused by 
apply_arrow_r_metadata() looping through all columns, including NULL ones 
(#48104)
9ea91ca01f is described below

commit 9ea91ca01fb300d8b74d19652d1ea68d39c2564d
Author: Nic Crane <[email protected]>
AuthorDate: Thu Nov 13 17:49:14 2025 +0000

    GH-48057: [R] Slow reading performance caused by apply_arrow_r_metadata() 
looping through all columns, including NULL ones (#48104)
    
    ### Rationale for this change
    
    Slow reading due to looping through metadata
    
    ### What changes are included in this PR?
    
    Don't loop through NULL metadata
    
    ### Are these changes tested?
    
    Not in unit tests, but see comment below with microbenchmarks.
    
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #48057
    
    Authored-by: Nic Crane <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/metadata.R                   |  2 +-
 r/tests/testthat/test-metadata.R | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/r/R/metadata.R b/r/R/metadata.R
index 93aa5018f6..206a18d09f 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -175,7 +175,7 @@ apply_arrow_r_metadata <- function(x, r_metadata) {
       columns_metadata <- r_metadata$columns
       if (is.data.frame(x)) {
         # if columns metadata exists, apply it here
-        if (length(names(x)) && !is.null(columns_metadata)) {
+        if (length(names(x)) && !is.null(columns_metadata) && 
!all(map_lgl(columns_metadata, is.null))) {
           for (name in intersect(names(columns_metadata), names(x))) {
             x[[name]] <- apply_arrow_r_metadata(x[[name]], 
columns_metadata[[name]])
           }
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 869bde5b4d..90b9f599ec 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -490,3 +490,30 @@ test_that("data.frame class attribute is not saved", {
   df_arrow <- arrow_table(df)
   expect_identical(df_arrow$r_metadata, list(attributes = list(foo = "bar"), 
columns = list(x = NULL)))
 })
+
+test_that("apply_arrow_r_metadata doesn't add in metadata from plain 
data.frame objects - GH48057", {
+  # with just a plain df the (empty) column metadata is not preserved
+  plain_df <- data.frame(x = 1:5)
+  plain_df_arrow <- arrow_table(plain_df)
+
+  expect_equal(plain_df_arrow$metadata$r$columns, list(x = NULL))
+
+  plain_df_no_metadata <- plain_df_arrow$to_data_frame()
+  plain_df_with_metadata <- apply_arrow_r_metadata(plain_df_no_metadata, 
plain_df_arrow$metadata$r)
+
+  expect_identical(plain_df_no_metadata, plain_df_with_metadata)
+
+  # with more complex column metadata - it preserves it
+  spicy_df_arrow <- arrow_table(haven_data)
+
+  expect_equal(
+    spicy_df_arrow$metadata$r$columns,
+    list(num = list(attributes = list(format.spss = "F8.2"), columns = NULL), 
cat_int = NULL, cat_chr = NULL)
+  )
+
+  spicy_df_no_metadata <- spicy_df_arrow$to_data_frame()
+  spicy_df_with_metadata <- apply_arrow_r_metadata(spicy_df_no_metadata, 
spicy_df_arrow$metadata$r)
+
+  expect_null(attr(spicy_df_no_metadata$num, "format.spss"))
+  expect_equal(attr(spicy_df_with_metadata$num, "format.spss"), "F8.2")
+})

Reply via email to