This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2ab2c428c5 GH-39076: [R] Fix tests that trigger confusing dplyr
warnings (#39077)
2ab2c428c5 is described below
commit 2ab2c428c55ba41bbb9e51368b93070fca85e49b
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Dec 5 14:58:46 2023 -0400
GH-39076: [R] Fix tests that trigger confusing dplyr warnings (#39077)
### Rationale for this change
Running our test suite results in many spurious warnings being printed that
make it difficult to spot actual warnings.
### What changes are included in this PR?
The data used for specific tests involving `summarise()` was updated to not
trigger the warnings.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* Closes: #39076
Authored-by: Dewey Dunnington <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
r/tests/testthat/test-dplyr-summarize.R | 44 +++++++++++++++------------------
1 file changed, 20 insertions(+), 24 deletions(-)
diff --git a/r/tests/testthat/test-dplyr-summarize.R
b/r/tests/testthat/test-dplyr-summarize.R
index 2999371192..d39c800f3f 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -414,6 +414,8 @@ test_that("median()", {
})
test_that("quantile()", {
+ skip_if_not_available("dataset")
+
# The default method for stats::quantile() throws an error when na.rm = FALSE
# and the input contains NA or NaN, whereas the Arrow tdigest kernels return
# null in this situation. To work around this known difference, the tests
@@ -510,9 +512,9 @@ test_that("quantile()", {
)
# with a vector of 2+ probs
- expect_warning(
- Table$create(tbl) %>%
- summarize(q = quantile(dbl, probs = c(0.2, 0.8), na.rm = TRUE)),
+ expect_error(
+ InMemoryDataset$create(data.frame(x = 1)) %>%
+ summarize(q = quantile(x, probs = c(0.2, 0.8), na.rm = TRUE)),
"quantile() with length(probs) != 1 not supported in Arrow",
fixed = TRUE
)
@@ -910,28 +912,24 @@ test_that("Not (yet) supported: implicit join", {
compare_dplyr_binding(
.input %>%
- group_by(some_grouping) %>%
- summarize(
- dbl - mean(dbl)
- ) %>%
+ group_by(x) %>%
+ summarize(y - mean(y)) %>%
collect(),
- tbl,
+ data.frame(x = 1, y = 2),
warning = paste(
- "Expression dbl - mean\\(dbl\\) is not an aggregate expression",
+ "Expression y - mean\\(y\\) is not an aggregate expression",
"or is not supported in Arrow; pulling data into R"
)
)
compare_dplyr_binding(
.input %>%
- group_by(some_grouping) %>%
- summarize(
- dbl
- ) %>%
+ group_by(x) %>%
+ summarize(y) %>%
collect(),
- tbl,
+ data.frame(x = 1, y = 2),
warning = paste(
- "Expression dbl is not an aggregate expression",
+ "Expression y is not an aggregate expression",
"or is not supported in Arrow; pulling data into R"
)
)
@@ -939,14 +937,12 @@ test_that("Not (yet) supported: implicit join", {
# This one could possibly be supported--in mutate()
compare_dplyr_binding(
.input %>%
- group_by(some_grouping) %>%
- summarize(
- dbl - int
- ) %>%
+ group_by(x) %>%
+ summarize(x - y) %>%
collect(),
- tbl,
+ data.frame(x = 1, y = 2, z = 3),
warning = paste(
- "Expression dbl - int is not an aggregate expression",
+ "Expression x - y is not an aggregate expression",
"or is not supported in Arrow; pulling data into R"
)
)
@@ -1188,12 +1184,12 @@ test_that("Can use across() within summarise()", {
# across() doesn't work in summarise when input expressions evaluate to bare
field references
expect_warning(
- example_data %>%
+ data.frame(x = 1, y = 2) %>%
arrow_table() %>%
- group_by(lgl) %>%
+ group_by(x) %>%
summarise(across(everything())) %>%
collect(),
- regexp = "Expression int is not an aggregate expression or is not
supported in Arrow; pulling data into R"
+ regexp = "Expression y is not an aggregate expression or is not supported
in Arrow; pulling data into R"
)
})