This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2ab2c428c5 GH-39076: [R] Fix tests that trigger confusing dplyr 
warnings (#39077)
2ab2c428c5 is described below

commit 2ab2c428c55ba41bbb9e51368b93070fca85e49b
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Dec 5 14:58:46 2023 -0400

    GH-39076: [R] Fix tests that trigger confusing dplyr warnings (#39077)
    
    ### Rationale for this change
    
    Running our test suite results in many spurious warnings being printed that 
make it difficult to spot actual warnings.
    
    ### What changes are included in this PR?
    
    The data used for specific tests involving `summarise()` was updated to not 
trigger the warnings.
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    No
    * Closes: #39076
    
    Authored-by: Dewey Dunnington <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 r/tests/testthat/test-dplyr-summarize.R | 44 +++++++++++++++------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-summarize.R 
b/r/tests/testthat/test-dplyr-summarize.R
index 2999371192..d39c800f3f 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -414,6 +414,8 @@ test_that("median()", {
 })
 
 test_that("quantile()", {
+  skip_if_not_available("dataset")
+
   # The default method for stats::quantile() throws an error when na.rm = FALSE
   # and the input contains NA or NaN, whereas the Arrow tdigest kernels return
   # null in this situation. To work around this known difference, the tests
@@ -510,9 +512,9 @@ test_that("quantile()", {
   )
 
   # with a vector of 2+ probs
-  expect_warning(
-    Table$create(tbl) %>%
-      summarize(q = quantile(dbl, probs = c(0.2, 0.8), na.rm = TRUE)),
+  expect_error(
+    InMemoryDataset$create(data.frame(x = 1)) %>%
+      summarize(q = quantile(x, probs = c(0.2, 0.8), na.rm = TRUE)),
     "quantile() with length(probs) != 1 not supported in Arrow",
     fixed = TRUE
   )
@@ -910,28 +912,24 @@ test_that("Not (yet) supported: implicit join", {
 
   compare_dplyr_binding(
     .input %>%
-      group_by(some_grouping) %>%
-      summarize(
-        dbl - mean(dbl)
-      ) %>%
+      group_by(x) %>%
+      summarize(y - mean(y)) %>%
       collect(),
-    tbl,
+    data.frame(x = 1, y = 2),
     warning = paste(
-      "Expression dbl - mean\\(dbl\\) is not an aggregate expression",
+      "Expression y - mean\\(y\\) is not an aggregate expression",
       "or is not supported in Arrow; pulling data into R"
     )
   )
 
   compare_dplyr_binding(
     .input %>%
-      group_by(some_grouping) %>%
-      summarize(
-        dbl
-      ) %>%
+      group_by(x) %>%
+      summarize(y) %>%
       collect(),
-    tbl,
+    data.frame(x = 1, y = 2),
     warning = paste(
-      "Expression dbl is not an aggregate expression",
+      "Expression y is not an aggregate expression",
       "or is not supported in Arrow; pulling data into R"
     )
   )
@@ -939,14 +937,12 @@ test_that("Not (yet) supported: implicit join", {
   # This one could possibly be supported--in mutate()
   compare_dplyr_binding(
     .input %>%
-      group_by(some_grouping) %>%
-      summarize(
-        dbl - int
-      ) %>%
+      group_by(x) %>%
+      summarize(x - y) %>%
       collect(),
-    tbl,
+    data.frame(x = 1, y = 2, z = 3),
     warning = paste(
-      "Expression dbl - int is not an aggregate expression",
+      "Expression x - y is not an aggregate expression",
       "or is not supported in Arrow; pulling data into R"
     )
   )
@@ -1188,12 +1184,12 @@ test_that("Can use across() within summarise()", {
 
   # across() doesn't work in summarise when input expressions evaluate to bare 
field references
   expect_warning(
-    example_data %>%
+    data.frame(x = 1, y = 2) %>%
       arrow_table() %>%
-      group_by(lgl) %>%
+      group_by(x) %>%
       summarise(across(everything())) %>%
       collect(),
-    regexp = "Expression int is not an aggregate expression or is not 
supported in Arrow; pulling data into R"
+    regexp = "Expression y is not an aggregate expression or is not supported 
in Arrow; pulling data into R"
   )
 })
 

Reply via email to