This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f525b99b0f GH-38084: [R] Do not memory map when explicitly checking
for file removal (#38085)
f525b99b0f is described below
commit f525b99b0fa0c49da656b0f45c4a0689934621d5
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Oct 6 17:09:26 2023 -0300
GH-38084: [R] Do not memory map when explicitly checking for file removal
(#38085)
### Rationale for this change
We have failing CI on Windows because removing files that have memory
mapped sections is not allowed.
### What changes are included in this PR?
Pass `mmap = FALSE` when we explicitly check for removal. I think the other
cases are OK because `unlink()` fails silently (which is maybe why we haven't
seen mass CI failures because of this issue before).
### Are these changes tested?
The changes are covered by existing tests.
### Are there any user-facing changes?
No.
* Closes: #38084
Authored-by: Dewey Dunnington <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
r/R/parquet.R | 6 ++++--
r/man/read_parquet.Rd | 3 +++
r/tests/testthat/test-parquet.R | 20 +++++++++++++-------
r/tests/testthat/test-read-record-batch.R | 2 +-
4 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/r/R/parquet.R b/r/R/parquet.R
index a58f7810b6..74f51767a2 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -22,6 +22,7 @@
#'
#' @inheritParams read_feather
#' @param props [ParquetArrowReaderProperties]
+#' @param mmap Use TRUE to use memory mapping where possible
#' @param ... Additional arguments passed to `ParquetFileReader$create()`
#'
#' @return A `tibble` if `as_data_frame` is `TRUE` (the default), or an
@@ -43,14 +44,15 @@ read_parquet <- function(file,
# Assembling `props` yourself is something you do with
# ParquetFileReader but not here.
props = ParquetArrowReaderProperties$create(),
+ mmap = TRUE,
...) {
if (!inherits(file, "RandomAccessFile")) {
# Compression is handled inside the parquet file format, so we don't need
# to detect from the file extension and wrap in a CompressedInputStream
- file <- make_readable_file(file)
+ file <- make_readable_file(file, mmap = mmap)
on.exit(file$close())
}
- reader <- ParquetFileReader$create(file, props = props, ...)
+ reader <- ParquetFileReader$create(file, props = props, mmap = mmap, ...)
col_select <- enquo(col_select)
if (!quo_is_null(col_select)) {
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index 3bb76cc2e3..4f19365295 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -9,6 +9,7 @@ read_parquet(
col_select = NULL,
as_data_frame = TRUE,
props = ParquetArrowReaderProperties$create(),
+ mmap = TRUE,
...
)
}
@@ -29,6 +30,8 @@ an Arrow \link{Table}?}
\item{props}{\link{ParquetArrowReaderProperties}}
+\item{mmap}{Use TRUE to use memory mapping where possible}
+
\item{...}{Additional arguments passed to \code{ParquetFileReader$create()}}
}
\value{
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index dbc4a04735..f2359116fd 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -32,7 +32,7 @@ test_that("simple int column roundtrip", {
pq_tmp_file <- tempfile() # You can specify the .parquet here but that's
probably not necessary
write_parquet(df, pq_tmp_file)
- df_read <- read_parquet(pq_tmp_file)
+ df_read <- read_parquet(pq_tmp_file, mmap = FALSE)
expect_equal(df, df_read)
# Make sure file connection is cleaned up
expect_error(file.remove(pq_tmp_file), NA)
@@ -288,15 +288,21 @@ test_that("write_parquet() returns its input", {
test_that("write_parquet() handles version argument", {
df <- tibble::tibble(x = 1:5)
- tf <- tempfile()
- on.exit(unlink(tf))
- purrr::walk(list("1.0", "2.4", "2.6", "latest", 1.0, 2.4, 2.6, 1L), ~ {
- write_parquet(df, tf, version = .x)
+ versions <- list("1.0", "2.4", "2.6", "latest", 1.0, 2.4, 2.6, 1L)
+ purrr::walk(versions, function(x) {
+ tf <- tempfile()
+ on.exit(unlink(tf))
+
+ write_parquet(df, tf, version = x)
expect_identical(read_parquet(tf), df)
})
- purrr::walk(list("3.0", 3.0, 3L, "A"), ~ {
- expect_error(write_parquet(df, tf, version = .x))
+
+ invalid_versions <- list("3.0", 3.0, 3L, "A")
+ purrr::walk(invalid_versions, function(x) {
+ tf <- tempfile()
+ on.exit(unlink(tf))
+ expect_error(write_parquet(df, tf, version = x))
})
})
diff --git a/r/tests/testthat/test-read-record-batch.R
b/r/tests/testthat/test-read-record-batch.R
index ba109da6c6..f62330ae0e 100644
--- a/r/tests/testthat/test-read-record-batch.R
+++ b/r/tests/testthat/test-read-record-batch.R
@@ -38,7 +38,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader
roundtrips", {
writer$close()
stream$close()
- expect_equal(read_feather(tf, as_data_frame = FALSE), tab)
+ expect_equal(read_feather(tf, as_data_frame = FALSE, mmap = FALSE), tab)
# Make sure connections are closed
expect_error(file.remove(tf), NA)
skip_on_os("windows") # This should pass, we've closed the stream