This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ad70464b76 ARROW-16783: [R] Explicit check for supported classes in
arrow_dplyr_query
ad70464b76 is described below
commit ad70464b76ec5e4f5cebaa34ca15ae04801cc4ac
Author: Andy Teucher <[email protected]>
AuthorDate: Tue Jun 28 20:01:34 2022 +0100
ARROW-16783: [R] Explicit check for supported classes in arrow_dplyr_query
Check for supported classes in `arrow_dplyr_query` rather than via
`tryCatch()` in `write_dataset()`
I also added `RecordBatchReader` to the list of supported classes, since
`as_adq()` can be called on `RecordBatchReaders` (e.g.,
https://github.com/apache/arrow/blob/0d5cf1882228624271062e6c19583c8b0c361a20/r/tests/testthat/test-dataset.R#L1000),
and `write_dataset()` also works on `RecordBatchReader`s.
Addresses https://issues.apache.org/jira/browse/ARROW-16783
Closes #13336 from ateucher/r-ds-dup-names-error
Lead-authored-by: Andy Teucher <[email protected]>
Co-authored-by: Andy Teucher <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/dataset-write.R | 16 +---------------
r/R/dplyr.R | 17 ++++++++++++++++-
r/tests/testthat/test-dataset-write.R | 4 +++-
3 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 041adc7afd..496aaad205 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -143,21 +143,7 @@ write_dataset <- function(dataset,
# now to construct `partitioning` and don't want it in the metadata$r
dataset <- dplyr::ungroup(dataset)
}
- dataset <- tryCatch(
- as_adq(dataset),
- error = function(e) {
- supported <- c(
- "Dataset", "RecordBatch", "Table", "arrow_dplyr_query", "data.frame"
- )
- stop(
- "'dataset' must be a ",
- oxford_paste(supported, "or", quote = FALSE),
- ", not ",
- deparse(class(dataset)),
- call. = FALSE
- )
- }
- )
+ dataset <- as_adq(dataset)
}
plan <- ExecPlan$create()
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index c9650fb065..8018cb5a60 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -24,6 +24,21 @@ arrow_dplyr_query <- function(.data) {
# RecordBatch, or Dataset) and the state of the user's dplyr query--things
# like selected columns, filters, and group vars.
# An arrow_dplyr_query can contain another arrow_dplyr_query in .data
+
+ supported <- c(
+ "Dataset", "RecordBatch", "RecordBatchReader",
+ "Table", "arrow_dplyr_query", "data.frame"
+ )
+ if (!inherits(.data, supported)) {
+ stop(
+ "You must supply a ",
+ oxford_paste(supported, "or", quote = FALSE),
+ ", not an object of type ",
+ deparse(class(.data)),
+ call. = FALSE
+ )
+ }
+
gv <- tryCatch(
# If dplyr is not available, or if the input doesn't have a group_vars
# method, assume no group vars
@@ -38,7 +53,7 @@ arrow_dplyr_query <- function(.data) {
dupes <- duplicated(names(.data))
if (any(dupes)) {
abort(c(
- "Duplicated field names",
+ "Field names must be unique.",
x = paste0(
"The following field names were found more than once in the data: ",
oxford_paste(names(.data)[dupes])
diff --git a/r/tests/testthat/test-dataset-write.R
b/r/tests/testthat/test-dataset-write.R
index e755370662..2f4ff7e649 100644
--- a/r/tests/testthat/test-dataset-write.R
+++ b/r/tests/testthat/test-dataset-write.R
@@ -457,7 +457,9 @@ test_that("Writing a dataset: CSV format options", {
test_that("Dataset writing: unsupported features/input validation", {
skip_if_not_available("parquet")
- expect_error(write_dataset(4), "'dataset' must be a Dataset, ")
+ expect_error(write_dataset(4), "You must supply a")
+ expect_error(write_dataset(data.frame(x = 1, x = 2, check.names = FALSE)),
+ "Field names must be unique")
ds <- open_dataset(hive_dir)
expect_error(