This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new f525b99b0f GH-38084: [R] Do not memory map when explicitly checking 
for file removal (#38085)
f525b99b0f is described below

commit f525b99b0fa0c49da656b0f45c4a0689934621d5
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Oct 6 17:09:26 2023 -0300

    GH-38084: [R] Do not memory map when explicitly checking for file removal 
(#38085)
    
    ### Rationale for this change
    
    We have failing CI on Windows because removing files that have memory 
mapped sections is not allowed.
    
    ### What changes are included in this PR?
    
    Pass `mmap = FALSE` when we explicitly check for removal. I think the other 
cases are OK because `unlink()` fails silently (which is maybe why we haven't 
seen mass CI failures because of this issue before).
    
    ### Are these changes tested?
    
    The changes are covered by existing tests.
    
    ### Are there any user-facing changes?
    
    No.
    
    * Closes: #38084
    
    Authored-by: Dewey Dunnington <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 r/R/parquet.R                             |  6 ++++--
 r/man/read_parquet.Rd                     |  3 +++
 r/tests/testthat/test-parquet.R           | 20 +++++++++++++-------
 r/tests/testthat/test-read-record-batch.R |  2 +-
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/r/R/parquet.R b/r/R/parquet.R
index a58f7810b6..74f51767a2 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -22,6 +22,7 @@
 #'
 #' @inheritParams read_feather
 #' @param props [ParquetArrowReaderProperties]
+#' @param mmap Use TRUE to use memory mapping where possible
 #' @param ... Additional arguments passed to `ParquetFileReader$create()`
 #'
 #' @return A `tibble` if `as_data_frame` is `TRUE` (the default), or an
@@ -43,14 +44,15 @@ read_parquet <- function(file,
                          # Assembling `props` yourself is something you do with
                          # ParquetFileReader but not here.
                          props = ParquetArrowReaderProperties$create(),
+                         mmap = TRUE,
                          ...) {
   if (!inherits(file, "RandomAccessFile")) {
     # Compression is handled inside the parquet file format, so we don't need
     # to detect from the file extension and wrap in a CompressedInputStream
-    file <- make_readable_file(file)
+    file <- make_readable_file(file, mmap = mmap)
     on.exit(file$close())
   }
-  reader <- ParquetFileReader$create(file, props = props, ...)
+  reader <- ParquetFileReader$create(file, props = props, mmap = mmap, ...)
 
   col_select <- enquo(col_select)
   if (!quo_is_null(col_select)) {
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index 3bb76cc2e3..4f19365295 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -9,6 +9,7 @@ read_parquet(
   col_select = NULL,
   as_data_frame = TRUE,
   props = ParquetArrowReaderProperties$create(),
+  mmap = TRUE,
   ...
 )
 }
@@ -29,6 +30,8 @@ an Arrow \link{Table}?}
 
 \item{props}{\link{ParquetArrowReaderProperties}}
 
+\item{mmap}{Use TRUE to use memory mapping where possible}
+
 \item{...}{Additional arguments passed to \code{ParquetFileReader$create()}}
 }
 \value{
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index dbc4a04735..f2359116fd 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -32,7 +32,7 @@ test_that("simple int column roundtrip", {
   pq_tmp_file <- tempfile() # You can specify the .parquet here but that's 
probably not necessary
 
   write_parquet(df, pq_tmp_file)
-  df_read <- read_parquet(pq_tmp_file)
+  df_read <- read_parquet(pq_tmp_file, mmap = FALSE)
   expect_equal(df, df_read)
   # Make sure file connection is cleaned up
   expect_error(file.remove(pq_tmp_file), NA)
@@ -288,15 +288,21 @@ test_that("write_parquet() returns its input", {
 
 test_that("write_parquet() handles version argument", {
   df <- tibble::tibble(x = 1:5)
-  tf <- tempfile()
-  on.exit(unlink(tf))
 
-  purrr::walk(list("1.0", "2.4", "2.6", "latest", 1.0, 2.4, 2.6, 1L), ~ {
-    write_parquet(df, tf, version = .x)
+  versions <- list("1.0", "2.4", "2.6", "latest", 1.0, 2.4, 2.6, 1L)
+  purrr::walk(versions, function(x) {
+    tf <- tempfile()
+    on.exit(unlink(tf))
+
+    write_parquet(df, tf, version = x)
     expect_identical(read_parquet(tf), df)
   })
-  purrr::walk(list("3.0", 3.0, 3L, "A"), ~ {
-    expect_error(write_parquet(df, tf, version = .x))
+
+  invalid_versions <- list("3.0", 3.0, 3L, "A")
+  purrr::walk(invalid_versions, function(x) {
+    tf <- tempfile()
+    on.exit(unlink(tf))
+    expect_error(write_parquet(df, tf, version = x))
   })
 })
 
diff --git a/r/tests/testthat/test-read-record-batch.R 
b/r/tests/testthat/test-read-record-batch.R
index ba109da6c6..f62330ae0e 100644
--- a/r/tests/testthat/test-read-record-batch.R
+++ b/r/tests/testthat/test-read-record-batch.R
@@ -38,7 +38,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader 
roundtrips", {
   writer$close()
   stream$close()
 
-  expect_equal(read_feather(tf, as_data_frame = FALSE), tab)
+  expect_equal(read_feather(tf, as_data_frame = FALSE, mmap = FALSE), tab)
   # Make sure connections are closed
   expect_error(file.remove(tf), NA)
   skip_on_os("windows") # This should pass, we've closed the stream

Reply via email to