This is an automated email from the ASF dual-hosted git repository. kou pushed a commit to branch maint-6.0.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 88683e829759b3689dfa48cc7caa950e8019250b Author: Nic Crane <[email protected]> AuthorDate: Thu Oct 28 17:12:56 2021 +0100 ARROW-14310: [R] Make expect_dplyr_equal() more intuitive * Renamed "input" to ".input" in test helpers to make more obvious that it's a "special" variable * Renamed functions that don't fit the `testthat::expect_*()` pattern to prevent confusion * Added additional documentation to helper functions Closes #11403 from thisisnic/ARROW-14310_expect_dplyr_equal Lead-authored-by: Nic Crane <[email protected]> Co-authored-by: Nic <[email protected]> Signed-off-by: Nic Crane <[email protected]> --- r/tests/testthat/helper-expectation.R | 120 +++++-- r/tests/testthat/test-Array.R | 8 +- r/tests/testthat/test-compute-aggregate.R | 62 ++-- r/tests/testthat/test-compute-sort.R | 60 ++-- r/tests/testthat/test-dplyr-arrange.R | 80 ++--- r/tests/testthat/test-dplyr-collapse.R | 12 +- r/tests/testthat/test-dplyr-count.R | 32 +- r/tests/testthat/test-dplyr-distinct.R | 32 +- r/tests/testthat/test-dplyr-filter.R | 138 +++---- r/tests/testthat/test-dplyr-funcs-conditional.R | 100 +++--- r/tests/testthat/test-dplyr-funcs-datetime.R | 116 +++--- r/tests/testthat/test-dplyr-funcs-math.R | 98 ++--- r/tests/testthat/test-dplyr-funcs-string.R | 455 ++++++++++++------------ r/tests/testthat/test-dplyr-funcs-type.R | 84 ++--- r/tests/testthat/test-dplyr-group-by.R | 54 +-- r/tests/testthat/test-dplyr-join.R | 36 +- r/tests/testthat/test-dplyr-mutate.R | 116 +++--- r/tests/testthat/test-dplyr-query.R | 29 +- r/tests/testthat/test-dplyr-select.R | 72 ++-- r/tests/testthat/test-dplyr-summarize.R | 280 +++++++-------- r/tests/testthat/test-metadata.R | 24 +- r/tests/testthat/test-na-omit.R | 12 +- 22 files changed, 1032 insertions(+), 988 deletions(-) diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index b262db4..ef6142b 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -28,6 +28,8 @@ expect_r6_class <- function(object, class) { expect_s3_class(object, "R6") } +#' Mask `testthat::expect_equal()` in order to compare ArrowObjects using their +#' `Equals` methods from the C++ library. expect_equal <- function(object, expected, ignore_attr = FALSE, ..., info = NULL, label = NULL) { if (inherits(object, "ArrowObject") && inherits(expected, "ArrowObject")) { mc <- match.call() @@ -64,24 +66,35 @@ verify_output <- function(...) { testthat::verify_output(...) } -#' @param expr A dplyr pipeline with `input` as its start -#' @param tbl A tbl/df as reference, will make RB/Table with -#' @param skip_record_batch string skip message, if should skip RB test -#' @param skip_table string skip message, if should skip Table test -#' @param warning string expected warning from the RecordBatch and Table paths, -#' passed to `expect_warning()`. Special values: +#' Ensure that dplyr methods on Arrow objects return the same as for data frames +#' +#' This function compares the output of running a dplyr expression on a tibble +#' or data.frame object against the output of the same expression run on +#' Arrow Table and RecordBatch objects. +#' +#' +#' @param expr A dplyr pipeline which must have `.input` as its start +#' @param tbl A tibble or data.frame which will be substituted for `.input` +#' @param skip_record_batch The skip message to show (if you should skip the +#' RecordBatch test) +#' @param skip_table The skip message to show (if you should skip the Table test) +#' @param warning The expected warning from the RecordBatch and Table comparison +#' paths, passed to `expect_warning()`. Special values: #' * `NA` (the default) for ensuring no warning message #' * `TRUE` is a special case to mean to check for the #' "not supported in Arrow; pulling data into R" message. #' @param ... additional arguments, passed to `expect_equal()` -expect_dplyr_equal <- function(expr, - tbl, - skip_record_batch = NULL, - skip_table = NULL, - warning = NA, - ...) { +compare_dplyr_binding <- function(expr, + tbl, + skip_record_batch = NULL, + skip_table = NULL, + warning = NA, + ...) { + + # Quote the contents of `expr` so that we can evaluate it a few different ways expr <- rlang::enquo(expr) - expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl))) + # Get the expected output by evaluating expr on the .input data.frame using regular dplyr + expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl))) if (isTRUE(warning)) { # Special-case the simple warning: @@ -91,11 +104,12 @@ expect_dplyr_equal <- function(expr, skip_msg <- NULL + # Evaluate `expr` on a RecordBatch object and compare with `expected` if (is.null(skip_record_batch)) { expect_warning( via_batch <- rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = record_batch(tbl))) + rlang::new_data_mask(rlang::env(.input = record_batch(tbl))) ), warning ) @@ -104,11 +118,12 @@ expect_dplyr_equal <- function(expr, skip_msg <- c(skip_msg, skip_record_batch) } + # Evaluate `expr` on a Table object and compare with `expected` if (is.null(skip_table)) { expect_warning( via_table <- rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = arrow_table(tbl))) + rlang::new_data_mask(rlang::env(.input = arrow_table(tbl))) ), warning ) @@ -122,21 +137,30 @@ expect_dplyr_equal <- function(expr, } } -expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its start - tbl, # A tbl/df as reference, will make RB/Table with - ...) { +#' Assert that Arrow dplyr methods error in the same way as methods on data.frame +#' +#' Comparing the error message generated when running expressions on R objects +#' against the error message generated by running the same expression on Arrow +#' Tables and RecordBatches. +#' +#' @param expr A dplyr pipeline which must have `.input` as its start +#' @param tbl A tibble or data.frame which will be substituted for `.input` +#' @param ... additional arguments, passed to `expect_error()` +compare_dplyr_error <- function(expr, + tbl, + ...) { # ensure we have supplied tbl force(tbl) expr <- rlang::enquo(expr) msg <- tryCatch( - rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl))), + rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl))), error = function(e) { msg <- conditionMessage(e) # The error here is of the form: # - # Problem with `filter()` input `..1`. + # Problem with `filter()` .input `..1`. # x object 'b_var' not found # ℹ Input `..1` is `chr == b_var`. # @@ -158,7 +182,7 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star expect_error( rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = record_batch(tbl))) + rlang::new_data_mask(rlang::env(.input = record_batch(tbl))) ), msg, ... @@ -166,27 +190,36 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star expect_error( rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = arrow_table(tbl))) + rlang::new_data_mask(rlang::env(.input = arrow_table(tbl))) ), msg, ... ) } -expect_vector_equal <- function(expr, # A vectorized R expression containing `input` as its input - vec, # A vector as reference, will make Array/ChunkedArray with - skip_array = NULL, # Msg, if should skip Array test - skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test - ignore_attr = FALSE, # ignore attributes? - ...) { +#' Comparing the output of running expressions on R vectors against the same +#' expression run on Arrow Arrays and ChunkedArrays. +#' +#' @param expr A vectorized R expression which must have `.input` as its start +#' @param vec A vector which will be substituted for `.input` +#' @param skip_array The skip message to show (if you should skip the Array test) +#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test) +#' @param ignore_attr Ignore differences in specified attributes? +#' @param ... additional arguments, passed to `expect_as_vector()` +compare_expression <- function(expr, + vec, + skip_array = NULL, + skip_chunked_array = NULL, + ignore_attr = FALSE, + ...) { expr <- rlang::enquo(expr) - expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))) + expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec))) skip_msg <- NULL if (is.null(skip_array)) { via_array <- rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = Array$create(vec))) + rlang::new_data_mask(rlang::env(.input = Array$create(vec))) ) expect_as_vector(via_array, expected, ignore_attr, ...) } else { @@ -199,7 +232,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in via_chunked <- rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) + rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) ) expect_as_vector(via_chunked, expected, ignore_attr, ...) } else { @@ -211,15 +244,24 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in } } -expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input - vec, # A vector as reference, will make Array/ChunkedArray with - skip_array = NULL, # Msg, if should skip Array test - skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test - ...) { +#' Comparing the error message generated when running expressions on R objects +#' against the error message generated by running the same expression on Arrow +#' Arrays and ChunkedArrays. +#' +#' @param expr An R expression which must have `.input` as its start +#' @param vec A vector which will be substituted for `.input` +#' @param skip_array The skip message to show (if you should skip the Array test) +#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test) +#' @param ... additional arguments, passed to `expect_error()` +compare_expression_error <- function(expr, + vec, + skip_array = NULL, + skip_chunked_array = NULL, + ...) { expr <- rlang::enquo(expr) msg <- tryCatch( - rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))), + rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec))), error = function(e) { msg <- conditionMessage(e) @@ -240,7 +282,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in expect_error( rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = Array$create(vec))) + rlang::new_data_mask(rlang::env(.input = Array$create(vec))) ), msg, ... @@ -256,7 +298,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in expect_error( rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) + rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) ), msg, ... diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index a4f8239..ce23c26 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -318,19 +318,19 @@ test_that("support for NaN (ARROW-3615)", { test_that("is.nan() evalutes to FALSE on NA (for consistency with base R)", { x <- c(1.0, NA, NaN, -1.0) - expect_vector_equal(is.nan(input), x) + compare_expression(is.nan(.input), x) }) test_that("is.nan() evalutes to FALSE on non-floats (for consistency with base R)", { x <- c(1L, 2L, 3L) y <- c("foo", "bar") - expect_vector_equal(is.nan(input), x) - expect_vector_equal(is.nan(input), y) + compare_expression(is.nan(.input), x) + compare_expression(is.nan(.input), y) }) test_that("is.na() evalutes to TRUE on NaN (for consistency with base R)", { x <- c(1, NA, NaN, -1) - expect_vector_equal(is.na(input), x) + compare_expression(is.na(.input), x) }) test_that("integer types casts (ARROW-3741)", { diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R index 5054d04..018279d 100644 --- a/r/tests/testthat/test-compute-aggregate.R +++ b/r/tests/testthat/test-compute-aggregate.R @@ -295,40 +295,40 @@ test_that("median passes ... args to quantile", { }) test_that("median.Array and median.ChunkedArray", { - expect_vector_equal( - median(input), + compare_expression( + median(.input), 1:4 ) - expect_vector_equal( - median(input), + compare_expression( + median(.input), 1:5 ) - expect_vector_equal( - median(input), + compare_expression( + median(.input), numeric(0) ) - expect_vector_equal( - median(input, na.rm = FALSE), + compare_expression( + median(.input, na.rm = FALSE), c(1, 2, NA) ) - expect_vector_equal( - median(input, na.rm = TRUE), + compare_expression( + median(.input, na.rm = TRUE), c(1, 2, NA) ) - expect_vector_equal( - median(input, na.rm = TRUE), + compare_expression( + median(.input, na.rm = TRUE), NA_real_ ) - expect_vector_equal( - median(input, na.rm = FALSE), + compare_expression( + median(.input, na.rm = FALSE), c(1, 2, NA) ) - expect_vector_equal( - median(input, na.rm = TRUE), + compare_expression( + median(.input, na.rm = TRUE), c(1, 2, NA) ) - expect_vector_equal( - median(input, na.rm = TRUE), + compare_expression( + median(.input, na.rm = TRUE), NA_real_ ) }) @@ -388,31 +388,31 @@ test_that("value_counts", { test_that("any.Array and any.ChunkedArray", { data <- c(1:10, NA, NA) - expect_vector_equal(any(input > 5), data) - expect_vector_equal(any(input > 5, na.rm = TRUE), data) - expect_vector_equal(any(input < 1), data) - expect_vector_equal(any(input < 1, na.rm = TRUE), data) + compare_expression(any(.input > 5), data) + compare_expression(any(.input > 5, na.rm = TRUE), data) + compare_expression(any(.input < 1), data) + compare_expression(any(.input < 1, na.rm = TRUE), data) data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) - expect_vector_equal(any(input), data_logical) - expect_vector_equal(any(input, na.rm = FALSE), data_logical) - expect_vector_equal(any(input, na.rm = TRUE), data_logical) + compare_expression(any(.input), data_logical) + compare_expression(any(.input, na.rm = FALSE), data_logical) + compare_expression(any(.input, na.rm = TRUE), data_logical) }) test_that("all.Array and all.ChunkedArray", { data <- c(1:10, NA, NA) - expect_vector_equal(all(input > 5), data) - expect_vector_equal(all(input > 5, na.rm = TRUE), data) + compare_expression(all(.input > 5), data) + compare_expression(all(.input > 5, na.rm = TRUE), data) - expect_vector_equal(all(input < 11), data) - expect_vector_equal(all(input < 11, na.rm = TRUE), data) + compare_expression(all(.input < 11), data) + compare_expression(all(.input < 11, na.rm = TRUE), data) data_logical <- c(TRUE, TRUE, NA) - expect_vector_equal(all(input), data_logical) - expect_vector_equal(all(input, na.rm = TRUE), data_logical) + compare_expression(all(.input), data_logical) + compare_expression(all(.input, na.rm = TRUE), data_logical) }) test_that("variance", { diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R index 0b6dd3e..e3574d8 100644 --- a/r/tests/testthat/test-compute-sort.R +++ b/r/tests/testthat/test-compute-sort.R @@ -66,70 +66,70 @@ test_that("ChunkedArray$SortIndices()", { }) test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on integers", { - expect_vector_equal( - sort(input), + compare_expression( + sort(.input), tbl$int ) - expect_vector_equal( - sort(input, na.last = NA), + compare_expression( + sort(.input, na.last = NA), tbl$int ) - expect_vector_equal( - sort(input, na.last = TRUE), + compare_expression( + sort(.input, na.last = TRUE), tbl$int ) - expect_vector_equal( - sort(input, na.last = FALSE), + compare_expression( + sort(.input, na.last = FALSE), tbl$int ) - expect_vector_equal( - sort(input, decreasing = TRUE), + compare_expression( + sort(.input, decreasing = TRUE), tbl$int, ) - expect_vector_equal( - sort(input, decreasing = TRUE, na.last = TRUE), + compare_expression( + sort(.input, decreasing = TRUE, na.last = TRUE), tbl$int, ) - expect_vector_equal( - sort(input, decreasing = TRUE, na.last = FALSE), + compare_expression( + sort(.input, decreasing = TRUE, na.last = FALSE), tbl$int, ) }) test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on strings", { - expect_vector_equal( - sort(input, decreasing = TRUE, na.last = FALSE), + compare_expression( + sort(.input, decreasing = TRUE, na.last = FALSE), tbl$chr ) - expect_vector_equal( - sort(input, decreasing = TRUE, na.last = FALSE), + compare_expression( + sort(.input, decreasing = TRUE, na.last = FALSE), tbl$chr ) }) test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on floats", { - expect_vector_equal( - sort(input, decreasing = TRUE, na.last = TRUE), + compare_expression( + sort(.input, decreasing = TRUE, na.last = TRUE), tbl$dbl ) - expect_vector_equal( - sort(input, decreasing = FALSE, na.last = TRUE), + compare_expression( + sort(.input, decreasing = FALSE, na.last = TRUE), tbl$dbl ) - expect_vector_equal( - sort(input, decreasing = TRUE, na.last = NA), + compare_expression( + sort(.input, decreasing = TRUE, na.last = NA), tbl$dbl ) - expect_vector_equal( - sort(input, decreasing = TRUE, na.last = FALSE), + compare_expression( + sort(.input, decreasing = TRUE, na.last = FALSE), tbl$dbl, ) - expect_vector_equal( - sort(input, decreasing = FALSE, na.last = NA), + compare_expression( + sort(.input, decreasing = FALSE, na.last = NA), tbl$dbl ) - expect_vector_equal( - sort(input, decreasing = FALSE, na.last = FALSE), + compare_expression( + sort(.input, decreasing = FALSE, na.last = FALSE), tbl$dbl, ) }) diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R index ae747ef..d22f64a 100644 --- a/r/tests/testthat/test-dplyr-arrange.R +++ b/r/tests/testthat/test-dplyr-arrange.R @@ -23,118 +23,118 @@ library(dplyr, warn.conflicts = FALSE) tbl <- slice_sample(example_data_for_sorting, prop = 1L) test_that("arrange() on integer, double, and character columns", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(int, chr) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(int, desc(dbl)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(int, desc(desc(dbl))) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(int) %>% arrange(desc(dbl)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(int + dbl, chr) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(zzz = int + dbl, ) %>% arrange(zzz, chr) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(zzz = int + dbl) %>% arrange(int + dbl, chr) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(int + dbl) %>% arrange(int + dbl, chr) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(grp) %>% arrange(int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(grp) %>% arrange(int, dbl, .by_group = TRUE) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(grp, grp2) %>% arrange(int, dbl, .by_group = TRUE) %>% collect(), tbl %>% mutate(grp2 = ifelse(is.na(lgl), 1L, as.integer(lgl))) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(grp) %>% arrange(.by_group = TRUE) %>% pull(grp), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange() %>% collect(), tbl %>% group_by(grp) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(grp) %>% arrange() %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange() %>% collect(), tbl ) test_sort_col <- "chr" - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(!!sym(test_sort_col)) %>% collect(), tbl %>% select(chr, lgl) ) test_sort_cols <- c("int", "dbl") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(!!!syms(test_sort_cols)) %>% collect(), tbl @@ -142,14 +142,14 @@ test_that("arrange() on integer, double, and character columns", { }) test_that("arrange() on datetime columns", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(dttm, int) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(dttm) %>% collect(), tbl %>% @@ -158,8 +158,8 @@ test_that("arrange() on datetime columns", { }) test_that("arrange() on logical columns", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% arrange(lgl, int) %>% collect(), tbl diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index 13d870f..c7281b6 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -94,8 +94,8 @@ test_that("collapse", { expect_true(is_collapsed(collapse(q))) expect_false(is_collapsed(collapse(q)$.data)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2, chr == "d" | chr == "f") %>% select(chr, int, lgl) %>% mutate(twice = int * 2L) %>% @@ -106,8 +106,8 @@ test_that("collapse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2, chr == "d" | chr == "f") %>% collapse() %>% select(chr, int, lgl) %>% @@ -118,8 +118,8 @@ test_that("collapse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2, chr == "d" | chr == "f") %>% collapse() %>% group_by(chr) %>% diff --git a/r/tests/testthat/test-dplyr-count.R b/r/tests/testthat/test-dplyr-count.R index 1a852e1..8af9b57 100644 --- a/r/tests/testthat/test-dplyr-count.R +++ b/r/tests/testthat/test-dplyr-count.R @@ -24,15 +24,15 @@ tbl <- example_data tbl$some_grouping <- rep(c(1, 2), 5) test_that("count/tally", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% count() %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% tally() %>% collect(), tbl @@ -40,16 +40,16 @@ test_that("count/tally", { }) test_that("count/tally with wt and grouped data", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% count(wt = int) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% tally(wt = int) %>% collect(), @@ -58,16 +58,16 @@ test_that("count/tally with wt and grouped data", { }) test_that("count/tally with sort", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% count(wt = int, sort = TRUE) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% tally(wt = int, sort = TRUE) %>% collect(), @@ -76,15 +76,15 @@ test_that("count/tally with sort", { }) test_that("count/tally with name arg", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% count(name = "new_col") %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% tally(name = "new_col") %>% collect(), tbl diff --git a/r/tests/testthat/test-dplyr-distinct.R b/r/tests/testthat/test-dplyr-distinct.R index 4e85a5a..3a44c73 100644 --- a/r/tests/testthat/test-dplyr-distinct.R +++ b/r/tests/testthat/test-dplyr-distinct.R @@ -23,8 +23,8 @@ tbl <- example_data tbl$some_grouping <- rep(c(1, 2), 5) test_that("distinct()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% distinct(some_grouping, lgl) %>% collect() %>% arrange(some_grouping, lgl), @@ -33,16 +33,16 @@ test_that("distinct()", { }) test_that("distinct() works without any variables", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% distinct() %>% arrange(int) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(x = int + 1) %>% distinct() %>% # Even though we have group_by(x), all cols (including int) are kept @@ -53,8 +53,8 @@ test_that("distinct() works without any variables", { }) test_that("distinct() can retain groups", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping, int) %>% distinct(lgl) %>% collect() %>% @@ -63,8 +63,8 @@ test_that("distinct() can retain groups", { ) # With expressions here - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(y = some_grouping, int) %>% distinct(x = lgl) %>% collect() %>% @@ -74,16 +74,16 @@ test_that("distinct() can retain groups", { }) test_that("distinct() can contain expressions", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% distinct(lgl, x = some_grouping + 1) %>% collect() %>% arrange(lgl, x), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(lgl, int) %>% distinct(x = some_grouping + 1) %>% collect() %>% @@ -94,8 +94,8 @@ test_that("distinct() can contain expressions", { test_that("distinct() can return all columns", { skip("ARROW-13993 - need this to return correct rows from other cols") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% distinct(lgl, .keep_all = TRUE) %>% collect() %>% arrange(int), diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 74b7cf3..72a6422 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -29,8 +29,8 @@ tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, si tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl)) # nolint test_that("filter() on is.na()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(is.na(lgl)) %>% select(chr, int, lgl) %>% collect(), @@ -39,8 +39,8 @@ test_that("filter() on is.na()", { }) test_that("filter() with NAs in selection", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(lgl) %>% select(chr, int, lgl) %>% collect(), @@ -49,8 +49,8 @@ test_that("filter() with NAs in selection", { }) test_that("Filter returning an empty Table should not segfault (ARROW-8354)", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(false) %>% select(chr, int, lgl) %>% collect(), @@ -60,8 +60,8 @@ test_that("Filter returning an empty Table should not segfault (ARROW-8354)", { test_that("filtering with expression", { char_sym <- "b" - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(chr == char_sym) %>% select(string = chr, int) %>% collect(), @@ -70,56 +70,56 @@ test_that("filtering with expression", { }) test_that("filtering with arithmetic", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl + 1 > 3) %>% select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl / 2 > 3) %>% select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl / 2L > 3) %>% select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int / 2 > 3) %>% select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int / 2L > 3) %>% select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl %/% 2 > 3) %>% select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl^2 > 3) %>% select(string = chr, int, dbl) %>% collect(), @@ -128,24 +128,24 @@ test_that("filtering with arithmetic", { }) test_that("filtering with expression + autocasting", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl + 1 > 3L) %>% # test autocasting with comparison to 3L select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int + 1 > 3) %>% select(string = chr, int, dbl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int^2 > 3) %>% select(string = chr, int, dbl) %>% collect(), @@ -154,8 +154,8 @@ test_that("filtering with expression + autocasting", { }) test_that("More complex select/filter", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2, chr == "d" | chr == "f") %>% select(chr, int, lgl) %>% filter(int < 5) %>% @@ -166,8 +166,8 @@ test_that("More complex select/filter", { }) test_that("filter() with %in%", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2, chr %in% c("d", "f")) %>% collect(), tbl @@ -175,20 +175,20 @@ test_that("filter() with %in%", { }) test_that("Negative scalar values", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(some_negative > -2) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(some_negative %in% -1) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int == -some_negative) %>% collect(), tbl @@ -196,15 +196,15 @@ test_that("Negative scalar values", { }) test_that("filter() with between()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(between(dbl, 1, 2)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(between(dbl, 0.5, 2)) %>% collect(), tbl @@ -243,15 +243,15 @@ test_that("filter() with between()", { test_that("filter() with string ops", { skip_if_not_available("utf8proc") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2, str_length(verses) > 25) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2, str_length(str_trim(padded_strings, "left")) > 5) %>% collect(), tbl @@ -260,31 +260,31 @@ test_that("filter() with string ops", { test_that("filter environment scope", { # "object 'b_var' not found" - expect_dplyr_error(input %>% filter(chr == b_var), tbl) + compare_dplyr_error(.input %>% filter(chr == b_var), tbl) b_var <- "b" - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(chr == b_var) %>% collect(), tbl ) # Also for functions # 'could not find function "isEqualTo"' because we haven't defined it yet - expect_dplyr_error(input %>% filter(isEqualTo(int, 4)), tbl) + compare_dplyr_error(.input %>% filter(isEqualTo(int, 4)), tbl) # This works but only because there are S3 methods for those operations isEqualTo <- function(x, y) x == y & !is.na(x) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(-fct) %>% # factor levels aren't identical filter(isEqualTo(int, 4)) %>% collect(), tbl ) # Try something that needs to call another nse_func - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(-fct) %>% filter(nchar(padded_strings) < 10) %>% collect(), @@ -292,8 +292,8 @@ test_that("filter environment scope", { ) isShortString <- function(x) nchar(x) < 10 skip("TODO: 14071") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(-fct) %>% filter(isShortString(padded_strings)) %>% collect(), @@ -327,15 +327,15 @@ test_that("Filtering on a column that doesn't exist errors correctly", { }) test_that("Filtering with unsupported functions", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int > 2, pnorm(dbl) > .99) %>% collect(), tbl, warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter( nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg int > 2, # good @@ -361,7 +361,7 @@ test_that("Calling Arrow compute functions 'directly'", { select(string = chr, int, dbl) ) - expect_dplyr_equal( + compare_dplyr_binding( tbl %>% record_batch() %>% filter(arrow_greater(arrow_add(dbl, 1), 3L)) %>% @@ -374,16 +374,16 @@ test_that("Calling Arrow compute functions 'directly'", { }) test_that("filter() with .data pronoun", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(.data$dbl > 4) %>% select(.data$chr, .data$int, .data$lgl) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(is.na(.data$lgl)) %>% select(.data$chr, .data$int, .data$lgl) %>% collect(), @@ -392,8 +392,8 @@ test_that("filter() with .data pronoun", { # and the .env pronoun too! chr <- 4 - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(.data$dbl > .env$chr) %>% select(.data$chr, .data$int, .data$lgl) %>% collect(), @@ -402,8 +402,8 @@ test_that("filter() with .data pronoun", { skip("test now faulty - code no longer gives error & outputs a empty tibble") # but there is an error if we don't override the masking with `.env` - expect_dplyr_error( - input %>% + compare_dplyr_error( + .input %>% filter(.data$dbl > chr) %>% select(.data$chr, .data$int, .data$lgl) %>% collect(), diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index e597d36..4f27007 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -26,8 +26,8 @@ tbl$verses <- verses[[1]] tbl$another_chr <- tail(letters, 10) test_that("if_else and ifelse", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(int > 5, 1, 0) ) %>% @@ -35,8 +35,8 @@ test_that("if_else and ifelse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(int > 5, int, 0L) ) %>% @@ -53,8 +53,8 @@ test_that("if_else and ifelse", { "NotImplemented: Function if_else has no kernel matching input types" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(int > 5, 1, NA_real_) ) %>% @@ -62,8 +62,8 @@ test_that("if_else and ifelse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = ifelse(int > 5, 1, 0) ) %>% @@ -71,8 +71,8 @@ test_that("if_else and ifelse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(dbl > 5, TRUE, FALSE) ) %>% @@ -80,8 +80,8 @@ test_that("if_else and ifelse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(chr %in% letters[1:3], 1L, 3L) ) %>% @@ -89,8 +89,8 @@ test_that("if_else and ifelse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(int > 5, "one", "zero") ) %>% @@ -98,8 +98,8 @@ test_that("if_else and ifelse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(int > 5, chr, another_chr) ) %>% @@ -107,8 +107,8 @@ test_that("if_else and ifelse", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(int > 5, "true", chr, missing = "MISSING") ) %>% @@ -118,8 +118,8 @@ test_that("if_else and ifelse", { # TODO: remove the mutate + warning after ARROW-13358 is merged and Arrow # supports factors in if(_)else - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(int > 5, fct, factor("a")) ) %>% @@ -131,8 +131,8 @@ test_that("if_else and ifelse", { ) # detecting NA and NaN works just fine - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(is.na(dbl), chr, "false", missing = "MISSING") ) %>% @@ -142,8 +142,8 @@ test_that("if_else and ifelse", { # However, currently comparisons with NaNs return false and not NaNs or NAs skip("ARROW-13364") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( y = if_else(dbl > 5, chr, another_chr, missing = "MISSING") ) %>% @@ -152,8 +152,8 @@ test_that("if_else and ifelse", { ) skip("TODO: could? should? we support the autocasting in ifelse") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = ifelse(int > 5, 1, FALSE)) %>% collect(), tbl @@ -161,26 +161,26 @@ test_that("if_else and ifelse", { }) test_that("case_when()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(cw = case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(cw = case_when(int > 5 ~ 1, TRUE ~ 0)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(cw = case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(case_when( dbl + int - 1.1 == dbl2 ~ TRUE, NA ~ NA, @@ -256,21 +256,21 @@ test_that("case_when()", { ) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(cw = case_when(lgl ~ "abc")) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(cw = case_when(lgl ~ verses, !false ~ paste(chr, chr))) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( cw = case_when(!(!(!(lgl))) ~ factor(chr), TRUE ~ fct) ) %>% @@ -288,8 +288,8 @@ test_that("coalesce()", { y = c(NA_character_, "b", "c"), z = c("a", "b", "c") ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( cw = coalesce(w), cz = coalesce(z), @@ -308,8 +308,8 @@ test_that("coalesce()", { y = c(NA_integer_, 2L, 3L), z = 1:3 ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( cw = coalesce(w), cz = coalesce(z), @@ -328,8 +328,8 @@ test_that("coalesce()", { y = c(NA_real_, 2.2, 3.3), z = c(1.1, 2.2, 3.3) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( cw = coalesce(w), cz = coalesce(z), @@ -369,8 +369,8 @@ test_that("coalesce()", { float32() ) # with R literal values - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( c1 = coalesce(4.4), c2 = coalesce(NA_real_), @@ -390,8 +390,8 @@ test_that("coalesce()", { x = factor("a", levels = c("a", "z")), y = factor("b", levels = c("a", "b", "c")) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(c = coalesce(x, y)) %>% collect() %>% # This is a no-op on the Arrow side, but necessary to make the results equal diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index b18898e..5cb515e 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -47,8 +47,8 @@ test_df <- tibble::tibble( # These tests test component extraction from timestamp objects test_that("extract year from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = year(datetime)) %>% collect(), test_df @@ -56,8 +56,8 @@ test_that("extract year from timestamp", { }) test_that("extract isoyear from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = isoyear(datetime)) %>% collect(), test_df @@ -65,8 +65,8 @@ test_that("extract isoyear from timestamp", { }) test_that("extract quarter from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = quarter(datetime)) %>% collect(), test_df @@ -74,8 +74,8 @@ test_that("extract quarter from timestamp", { }) test_that("extract month from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = month(datetime)) %>% collect(), test_df @@ -83,8 +83,8 @@ test_that("extract month from timestamp", { }) test_that("extract isoweek from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = isoweek(datetime)) %>% collect(), test_df @@ -92,8 +92,8 @@ test_that("extract isoweek from timestamp", { }) test_that("extract epiweek from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = epiweek(datetime)) %>% collect(), test_df @@ -101,8 +101,8 @@ test_that("extract epiweek from timestamp", { }) test_that("extract day from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = day(datetime)) %>% collect(), test_df @@ -110,22 +110,22 @@ test_that("extract day from timestamp", { }) test_that("extract wday from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(datetime)) %>% collect(), test_df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date, week_start = 3)) %>% collect(), test_df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date, week_start = 1)) %>% collect(), test_df @@ -133,16 +133,16 @@ test_that("extract wday from timestamp", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date, label = TRUE)) %>% mutate(x = as.character(x)) %>% collect(), test_df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(datetime, label = TRUE, abbr = TRUE)) %>% mutate(x = as.character(x)) %>% collect(), @@ -151,8 +151,8 @@ test_that("extract wday from timestamp", { }) test_that("extract yday from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = yday(datetime)) %>% collect(), test_df @@ -160,8 +160,8 @@ test_that("extract yday from timestamp", { }) test_that("extract hour from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = hour(datetime)) %>% collect(), test_df @@ -169,8 +169,8 @@ test_that("extract hour from timestamp", { }) test_that("extract minute from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = minute(datetime)) %>% collect(), test_df @@ -178,8 +178,8 @@ test_that("extract minute from timestamp", { }) test_that("extract second from timestamp", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = second(datetime)) %>% collect(), test_df, @@ -191,8 +191,8 @@ test_that("extract second from timestamp", { # These tests test extraction of components from date32 objects test_that("extract year from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = year(date)) %>% collect(), test_df @@ -200,8 +200,8 @@ test_that("extract year from date", { }) test_that("extract isoyear from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = isoyear(date)) %>% collect(), test_df @@ -209,8 +209,8 @@ test_that("extract isoyear from date", { }) test_that("extract quarter from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = quarter(date)) %>% collect(), test_df @@ -218,8 +218,8 @@ test_that("extract quarter from date", { }) test_that("extract month from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = month(date)) %>% collect(), test_df @@ -227,8 +227,8 @@ test_that("extract month from date", { }) test_that("extract isoweek from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = isoweek(date)) %>% collect(), test_df @@ -236,8 +236,8 @@ test_that("extract isoweek from date", { }) test_that("extract epiweek from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = epiweek(date)) %>% collect(), test_df @@ -245,8 +245,8 @@ test_that("extract epiweek from date", { }) test_that("extract day from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = day(date)) %>% collect(), test_df @@ -254,22 +254,22 @@ test_that("extract day from date", { }) test_that("extract wday from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date)) %>% collect(), test_df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date, week_start = 3)) %>% collect(), test_df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date, week_start = 1)) %>% collect(), test_df @@ -277,16 +277,16 @@ test_that("extract wday from date", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>% mutate(x = as.character(x)) %>% collect(), test_df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = wday(date, label = TRUE)) %>% mutate(x = as.character(x)) %>% collect(), @@ -295,8 +295,8 @@ test_that("extract wday from date", { }) test_that("extract yday from date", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = yday(date)) %>% collect(), test_df diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 2f2de18..b666306 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -23,8 +23,8 @@ library(dplyr, warn.conflicts = FALSE) test_that("abs()", { df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(abs = abs(x)) %>% collect(), df @@ -34,8 +34,8 @@ test_that("abs()", { test_that("sign()", { df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(sign = sign(x)) %>% collect(), df @@ -45,8 +45,8 @@ test_that("sign()", { test_that("ceiling(), floor(), trunc(), round()", { df <- tibble(x = c(-1, -0.55, -0.5, -0.1, 0, 0.1, 0.5, 0.55, 1, NA, NaN)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( c = ceiling(x), f = floor(x), @@ -58,8 +58,8 @@ test_that("ceiling(), floor(), trunc(), round()", { ) # with digits set to 1 - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(x %% 0.5 == 0) %>% # filter out indeterminate cases (see below) mutate(r = round(x, 1)) %>% collect(), @@ -67,8 +67,8 @@ test_that("ceiling(), floor(), trunc(), round()", { ) # with digits set to -1 - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( rd = round(floor(x * 111), -1), # double y = ifelse(is.nan(x), NA_integer_, x), @@ -100,8 +100,8 @@ test_that("ceiling(), floor(), trunc(), round()", { skip_on_cran() skip_on_os("windows") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(r = round(x, 1)) %>% collect(), df @@ -139,37 +139,37 @@ test_that("ceiling(), floor(), trunc(), round()", { test_that("log functions", { df <- tibble(x = c(1:10, NA, NA)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log(x, base = exp(1))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log(x, base = 2)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log(x, base = 10)) %>% collect(), df ) # test log(, base = (length == 1)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log(x, base = 5)) %>% collect(), df @@ -190,8 +190,8 @@ test_that("log functions", { ) # test log(, base = Expression) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% # test cases where base = 1 below filter(x != 1) %>% mutate( @@ -205,8 +205,8 @@ test_that("log functions", { # log(1, base = 1) is NaN in both R and Arrow # suppress the R warning because R warns but Arrow does not suppressWarnings( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log(x, base = y)) %>% collect(), tibble(x = 1, y = 1) @@ -214,36 +214,36 @@ test_that("log functions", { ) # log(n != 1, base = 1) is Inf in R and Arrow - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log(x, base = y)) %>% collect(), tibble(x = 10, y = 1) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = logb(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log1p(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log2(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = log10(x)) %>% collect(), df @@ -253,36 +253,36 @@ test_that("log functions", { test_that("trig functions", { df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = sin(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = cos(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = tan(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = asin(x)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = acos(x)) %>% collect(), df @@ -292,15 +292,15 @@ test_that("trig functions", { test_that("arith functions ", { df <- tibble(x = c(1:5, NA)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( int_div = x %/% 2, addition = x + 1, multiplication = x * 3, subtraction = x - 5, division = x / 2, - power = x ^ 3, + power = x^3, modulo = x %% 3 ) %>% collect(), diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 333735b..5e092f4 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -35,84 +35,84 @@ test_that("paste, paste0, and str_c", { y <- Expression$field_ref("y") # no NAs in data - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste(v, w)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste(v, w, sep = "-")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste0(v, w)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(str_c(v, w)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(str_c(v, w, sep = "+")) %>% collect(), df ) # NAs in data - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste(x, y)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste(x, y, sep = "-")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(str_c(x, y)) %>% collect(), df ) # non-character column in dots - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste0(x, y, z)) %>% collect(), df ) # literal string in dots - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste(x, "foo", y)) %>% collect(), df ) # literal NA in dots - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste(x, NA, y)) %>% collect(), df ) # expressions in dots - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste0(x, toupper(y), as.character(z))) %>% collect(), df @@ -125,16 +125,16 @@ test_that("paste, paste0, and str_c", { "Invalid separator" ) # emits null in str_c() (consistent with stringr::str_c()) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(str_c(x, y, sep = NA_character_)) %>% collect(), df ) # sep passed in dots to paste0 (which doesn't take a sep argument) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(paste0(x, y, sep = "-")) %>% collect(), df @@ -181,8 +181,8 @@ test_that("paste, paste0, and str_c", { test_that("grepl with ignore.case = FALSE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(grepl("o", x, fixed = TRUE)) %>% collect(), df @@ -191,14 +191,14 @@ test_that("grepl with ignore.case = FALSE and fixed = TRUE", { test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = sub("Foo", "baz", x, fixed = TRUE)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = gsub("o", "u", x, fixed = TRUE)) %>% collect(), df @@ -212,20 +212,20 @@ test_that("grepl", { df <- tibble(x = c("Foo", "bar")) for (fixed in c(TRUE, FALSE)) { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(grepl("Foo", x, fixed = fixed)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = grepl("^B.+", x, ignore.case = FALSE, fixed = fixed)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(grepl("Foo", x, ignore.case = FALSE, fixed = fixed)) %>% collect(), df @@ -237,7 +237,7 @@ test_that("grepl with ignore.case = TRUE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar")) # base::grepl() ignores ignore.case = TRUE with a warning when fixed = TRUE, - # so we can't use expect_dplyr_equal() for these tests + # so we can't use compare_dplyr_binding() for these tests expect_equal( df %>% Table$create() %>% @@ -257,44 +257,44 @@ test_that("grepl with ignore.case = TRUE and fixed = TRUE", { test_that("str_detect", { df <- tibble(x = c("Foo", "bar")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_detect(x, regex("^F"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE), negate = TRUE)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_detect(x, fixed("o"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_detect(x, fixed("O"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_detect(x, fixed("O", ignore_case = TRUE))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_detect(x, fixed("O", ignore_case = TRUE), negate = TRUE)) %>% collect(), df @@ -305,20 +305,20 @@ test_that("sub and gsub", { df <- tibble(x = c("Foo", "bar")) for (fixed in c(TRUE, FALSE)) { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = sub("^B.+", "baz", x, ignore.case = FALSE, fixed = fixed)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = sub("Foo", "baz", x, ignore.case = FALSE, fixed = fixed)) %>% collect(), df @@ -330,7 +330,7 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar")) # base::sub() and base::gsub() ignore ignore.case = TRUE with a warning when - # fixed = TRUE, so we can't use expect_dplyr_equal() for these tests + # fixed = TRUE, so we can't use compare_dplyr_binding() for these tests expect_equal( df %>% Table$create() %>% @@ -357,47 +357,47 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", { test_that("str_replace and str_replace_all", { df <- tibble(x = c("Foo", "bar")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace_all(x, "^F", "baz")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace_all(x, regex("^F"), "baz")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace_all(x, fixed("o"), "u")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace(x, fixed("O"), "u")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace(x, fixed("O", ignore_case = TRUE), "u")) %>% collect(), df @@ -407,8 +407,8 @@ test_that("str_replace and str_replace_all", { test_that("strsplit and str_split", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strsplit(x, "and")) %>% collect(), df, @@ -416,50 +416,50 @@ test_that("strsplit and str_split", { # has type information in it, but it's just a bare list from R/dplyr. ignore_attr = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>% collect(), df, ignore_attr = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strsplit(x, " +and +")) %>% collect(), df, ignore_attr = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_split(x, "and")) %>% collect(), df, ignore_attr = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_split(x, "and", n = 2)) %>% collect(), df, ignore_attr = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_split(x, fixed("and"), n = 2)) %>% collect(), df, ignore_attr = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_split(x, regex("and"), n = 2)) %>% collect(), df, ignore_attr = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_split(x, "Foo|bar", n = 2)) %>% collect(), df, @@ -469,8 +469,8 @@ test_that("strsplit and str_split", { test_that("str_to_lower, str_to_upper, and str_to_title", { df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( x_lower = str_to_lower(x), x_upper = str_to_upper(x), @@ -596,8 +596,8 @@ test_that("backreferences in pattern in string detection", { skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)") df <- tibble(x = c("Foo", "bar")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_detect(x, regex("F([aeiou])\\1"))) %>% collect(), df @@ -607,8 +607,8 @@ test_that("backreferences in pattern in string detection", { test_that("backreferences (substitutions) in string replacement", { df <- tibble(x = c("Foo", "bar")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(desc = sub( "(?:https?|ftp)://([^/\r\n]+)(/[^\r\n]*)?", "path `\\2` on server `\\1`", @@ -617,20 +617,20 @@ test_that("backreferences (substitutions) in string replacement", { collect(), tibble(url = "https://arrow.apache.org/docs/r/") ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace(x, "^(\\w)o(.*)", "\\1\\2p")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>% collect(), df @@ -640,7 +640,7 @@ test_that("backreferences (substitutions) in string replacement", { test_that("edge cases in string detection and replacement", { # in case-insensitive fixed match/replace, test that "\\E" in the search # string and backslashes in the replacement string are interpreted literally. - # this test does not use expect_dplyr_equal() because base::sub() and + # this test does not use compare_dplyr_binding() because base::sub() and # base::grepl() do not support ignore.case = TRUE when fixed = TRUE. expect_equal( tibble(x = c("\\Q\\e\\D")) %>% @@ -659,14 +659,14 @@ test_that("edge cases in string detection and replacement", { # test that a user's "(?i)" prefix does not break the "(?i)" prefix that's # added in case-insensitive regex match/replace - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(grepl("(?i)^[abc]{3}$", x, ignore.case = TRUE, fixed = FALSE)) %>% collect(), tibble(x = c("ABC")) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = sub("(?i)^[abc]{3}$", "123", x, ignore.case = TRUE, fixed = FALSE)) %>% collect(), tibble(x = c("ABC")) @@ -762,38 +762,39 @@ test_that("strftime", { formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G %V %u" - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strftime(datetime, format = formats)) %>% collect(), times ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strftime(date, format = formats_date)) %>% collect(), times ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strftime(datetime, format = formats, tz = "Pacific/Marquesas")) %>% collect(), times ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strftime(datetime, format = formats, tz = "EST", usetz = TRUE)) %>% collect(), times ) withr::with_timezone( - "Pacific/Marquesas", { - expect_dplyr_equal( - input %>% + "Pacific/Marquesas", + { + compare_dplyr_binding( + .input %>% mutate( x = strftime(datetime, format = formats, tz = "EST"), x_date = strftime(date, format = formats_date, tz = "EST") @@ -802,8 +803,8 @@ test_that("strftime", { times ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( x = strftime(datetime, format = formats), x_date = strftime(date, format = formats_date) @@ -828,8 +829,8 @@ test_that("strftime", { # Timestamps with second precision are represented as integers while # milliseconds, microsecond and nanoseconds are represented as fixed floating # point numbers with 3, 6 and 9 decimal places respectively. - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = strftime(datetime, format = "%S")) %>% transmute(as.double(substr(x, 1, 2))) %>% collect(), @@ -842,8 +843,8 @@ test_that("format_ISO8601", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% collect(), times @@ -870,16 +871,16 @@ test_that("format_ISO8601", { "Timezone not present, cannot convert to string with timezone: %Y-%m-%dT%H:%M:%S%z" ) } else { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = format_ISO8601(x, precision = "ymd", usetz = TRUE)) %>% collect(), times ) # See comment regarding %S flag in strftime tests - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = TRUE)) %>% mutate(x = gsub("\\.0*", "", x)) %>% collect(), @@ -889,8 +890,8 @@ test_that("format_ISO8601", { # See comment regarding %S flag in strftime tests - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = FALSE)) %>% mutate(x = gsub("\\.0*", "", x)) %>% collect(), @@ -945,15 +946,15 @@ test_that("stri_reverse and arrow_ascii_reverse functions", { df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = stri_reverse(x)) %>% collect(), df_utf8 ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = stri_reverse(x)) %>% collect(), df_ascii @@ -980,7 +981,7 @@ test_that("str_like", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) # TODO: After new version of stringr with str_like has been released, update all - # these tests to use expect_dplyr_equal + # these tests to use compare_dplyr_binding # No match - entire string expect_equal( @@ -1029,8 +1030,8 @@ test_that("str_like", { # This will give an error until a new version of stringr with str_like has been released skip_if_not(packageVersion("stringr") > "1.4.0") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_like(x, "%baz%")) %>% collect(), df @@ -1040,36 +1041,36 @@ test_that("str_like", { test_that("str_pad", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_pad(x, width = 31)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_pad(x, width = 30, side = "right")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_pad(x, width = 31, side = "left", pad = "+")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_pad(x, width = 10, side = "left", pad = "+")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(x = str_pad(x, width = 31, side = "both")) %>% collect(), df @@ -1079,64 +1080,64 @@ test_that("str_pad", { test_that("substr", { df <- tibble(x = "Apache Arrow") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, 1, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, 0, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, -1, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, 6, 1)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, -1, -2)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, 9, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, 1, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, 8, 12)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substr(x, -5, -1)) %>% collect(), df @@ -1157,8 +1158,8 @@ test_that("substring", { # nse_funcs$substring just calls nse_funcs$substr, tested extensively above df <- tibble(x = "Apache Arrow") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = substring(x, 1, 6)) %>% collect(), df @@ -1168,71 +1169,71 @@ test_that("substring", { test_that("str_sub", { df <- tibble(x = "Apache Arrow") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, 1, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, 0, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, -1, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, 6, 1)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, -1, -2)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, -1, 3)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, 9, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, 1, 6)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, 8, 12)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(y = str_sub(x, -5, -1)) %>% collect(), df @@ -1252,85 +1253,85 @@ test_that("str_sub", { test_that("str_starts, str_ends, startsWith, endsWith", { df <- tibble(x = c("Foo", "bar", "baz", "qux")) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_starts(x, "b.*")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_starts(x, "b.*", negate = TRUE)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_starts(x, fixed("b.*"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_starts(x, fixed("b"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_ends(x, "r")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_ends(x, "r", negate = TRUE)) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_ends(x, fixed("r$"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(str_ends(x, fixed("r"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(startsWith(x, "b")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(endsWith(x, "r")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(startsWith(x, "b.*")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(endsWith(x, "r$")) %>% collect(), df @@ -1343,22 +1344,22 @@ test_that("str_count", { dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d..") ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(a_count = str_count(cities, pattern = "a")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(p_count = str_count(cities, pattern = "d")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(p_count = str_count(cities, pattern = regex("d", ignore_case = TRUE) )) %>% @@ -1366,31 +1367,31 @@ test_that("str_count", { df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(e_count = str_count(cities, pattern = "u")) %>% collect(), df ) # nse_funcs$str_count() is not vectorised over pattern - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(let_count = str_count(cities, pattern = c("a", "b", "e", "g", "p", "n", "s"))) %>% collect(), df, warning = TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(dots_count = str_count(dots, ".")) %>% collect(), df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(dots_count = str_count(dots, fixed("."))) %>% collect(), df diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 7f7396d..859dc14 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -88,8 +88,8 @@ test_that("explicit type conversions with cast()", { test_that("explicit type conversions with as.*()", { library(bit64) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( int2chr = as.character(int), int2dbl = as.double(int), @@ -103,8 +103,8 @@ test_that("explicit type conversions with as.*()", { collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( chr2chr = as.character(chr), chr2dbl = as.double(chr), @@ -114,8 +114,8 @@ test_that("explicit type conversions with as.*()", { collect(), tibble(chr = c("1", "2", "3")) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( chr2i64 = as.integer64(chr), dbl2i64 = as.integer64(dbl), @@ -124,8 +124,8 @@ test_that("explicit type conversions with as.*()", { collect(), tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10)) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( chr2lgl = as.logical(chr), dbl2lgl = as.logical(dbl), @@ -138,8 +138,8 @@ test_that("explicit type conversions with as.*()", { int = c(1L, 0L, -99L, 0L) ) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( dbl2chr = as.character(dbl), dbl2dbl = as.double(dbl), @@ -170,8 +170,8 @@ test_that("is.finite(), is.infinite(), is.nan()", { -4.94065645841246544e-324, 1.79769313486231570e+308, 0, NA_real_, NaN, Inf, -Inf )) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( is_fin = is.finite(x), is_inf = is.infinite(x) @@ -180,8 +180,8 @@ test_that("is.finite(), is.infinite(), is.nan()", { df ) # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( is_nan = is.nan(x) ) %>% @@ -192,8 +192,8 @@ test_that("is.finite(), is.infinite(), is.nan()", { test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( is_na = is.na(x) ) %>% @@ -306,8 +306,8 @@ test_that("type checks with is() giving Arrow types", { test_that("type checks with is() giving R types", { library(bit64) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( chr_is_chr = is(chr, "character"), chr_is_fct = is(chr, "factor"), @@ -348,8 +348,8 @@ test_that("type checks with is() giving R types", { collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( i64_is_chr = is(i64, "character"), i64_is_fct = is(i64, "factor"), @@ -378,8 +378,8 @@ test_that("type checks with is() giving R types", { test_that("type checks with is.*()", { library(bit64) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( chr_is_chr = is.character(chr), chr_is_dbl = is.double(chr), @@ -425,8 +425,8 @@ test_that("type checks with is.*()", { collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( i64_is_chr = is.character(i64), # TODO: investigate why this is not matching when testthat runs it @@ -457,8 +457,8 @@ test_that("type checks with is.*()", { test_that("type checks with is_*()", { library(rlang, warn.conflicts = FALSE) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( chr_is_chr = is_character(chr), chr_is_dbl = is_double(chr), @@ -487,8 +487,8 @@ test_that("type checks with is_*()", { }) test_that("type checks on expressions", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( a = is.character(as.character(int)), b = is.integer(as.character(int)), @@ -503,8 +503,8 @@ test_that("type checks on expressions", { # the code in the expectation below depends on RE2 skip_if_not_available("re2") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( a = is.logical(grepl("[def]", chr)) ) %>% @@ -514,8 +514,8 @@ test_that("type checks on expressions", { }) test_that("type checks on R scalar literals", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( chr_is_chr = is.character("foo"), int_is_chr = is.character(42L), @@ -543,16 +543,16 @@ test_that("as.factor()/dictionary_encode()", { df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B")) df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = as.factor(x)) %>% collect(), df1 ) expect_warning( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(x = as.factor(x)) %>% collect(), df2 @@ -593,8 +593,8 @@ test_that("bad explicit type conversions with as.*()", { # Arrow returns lowercase "true", "false" (instead of "TRUE", "FALSE" like R) expect_error( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(lgl2chr = as.character(lgl)) %>% collect(), tibble(lgl = c(TRUE, FALSE, NA)) @@ -605,8 +605,8 @@ test_that("bad explicit type conversions with as.*()", { # a warning like R does) expect_error( expect_warning( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(chr2num = as.numeric(chr)) %>% collect(), tibble(chr = c("l.O", "S.S", "")) @@ -617,8 +617,8 @@ test_that("bad explicit type conversions with as.*()", { # Arrow fails to parse these strings as Booleans (instead of returning NAs # like R does) expect_error( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(chr2lgl = as.logical(chr)) %>% collect(), tibble(chr = c("TRU", "FAX", "")) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index 9f1d385..7cfcfb5 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -23,8 +23,8 @@ library(stringr) tbl <- example_data test_that("group_by groupings are recorded", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(chr) %>% select(int, chr) %>% filter(int > 5) %>% @@ -34,20 +34,20 @@ test_that("group_by groupings are recorded", { }) test_that("group_by supports creating/renaming", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(chr, numbers = int) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(chr, numbers = int * 4) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(int > 4, lgl, foo = int > 5) %>% collect(), tbl @@ -55,8 +55,8 @@ test_that("group_by supports creating/renaming", { }) test_that("ungroup", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(chr) %>% select(int, chr) %>% ungroup() %>% @@ -66,11 +66,11 @@ test_that("ungroup", { ) # to confirm that the above expectation is actually testing what we think it's - # testing, verify that expect_dplyr_equal() distinguishes between grouped and + # testing, verify that compare_dplyr_binding() distinguishes between grouped and # ungrouped tibbles expect_error( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(chr) %>% select(int, chr) %>% (function(x) if (inherits(x, "tbl_df")) ungroup(x) else x) %>% @@ -82,8 +82,8 @@ test_that("ungroup", { }) test_that("group_by then rename", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(chr) %>% select(string = chr, int) %>% collect(), @@ -93,14 +93,14 @@ test_that("group_by then rename", { test_that("group_by with .drop", { test_groups <- c("starting_a_fight", "consoling_a_child", "petting_a_dog") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(!!!syms(test_groups), .drop = TRUE) %>% collect(), example_with_logical_factors ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(!!!syms(test_groups), .drop = FALSE) %>% collect(), example_with_logical_factors @@ -131,25 +131,25 @@ test_that("group_by with .drop", { group_by_drop_default(), TRUE ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(.drop = FALSE) %>% # no group by vars group_by_drop_default(), example_with_logical_factors ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by_drop_default(), example_with_logical_factors ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(!!!syms(test_groups)) %>% group_by_drop_default(), example_with_logical_factors ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(!!!syms(test_groups), .drop = FALSE) %>% ungroup() %>% group_by_drop_default(), diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R index 03c1705..3ff9ad8 100644 --- a/r/tests/testthat/test-dplyr-join.R +++ b/r/tests/testthat/test-dplyr-join.R @@ -40,8 +40,8 @@ to_join_tab <- Table$create(to_join) test_that("left_join", { expect_message( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% left_join(to_join) %>% collect(), left @@ -51,14 +51,14 @@ test_that("left_join", { }) test_that("left_join `by` args", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% left_join(to_join, by = "some_grouping") %>% collect(), left ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% left_join( to_join %>% rename(the_grouping = some_grouping), @@ -70,8 +70,8 @@ test_that("left_join `by` args", { # TODO: allow renaming columns on the right side as well skip("ARROW-14184") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% rename(the_grouping = some_grouping) %>% left_join( to_join, @@ -108,8 +108,8 @@ test_that("Error handling", { # TODO: casting: int and float columns? test_that("right_join", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% right_join(to_join, by = "some_grouping") %>% collect(), left @@ -117,8 +117,8 @@ test_that("right_join", { }) test_that("inner_join", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% inner_join(to_join, by = "some_grouping") %>% collect(), left @@ -126,8 +126,8 @@ test_that("inner_join", { }) test_that("full_join", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% full_join(to_join, by = "some_grouping") %>% collect(), left @@ -135,8 +135,8 @@ test_that("full_join", { }) test_that("semi_join", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% semi_join(to_join, by = "some_grouping") %>% collect(), left @@ -144,8 +144,8 @@ test_that("semi_join", { }) test_that("anti_join", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% anti_join(to_join, by = "some_grouping") %>% collect(), left diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 419c583..886ec9e 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -35,8 +35,8 @@ test_that("mutate() is lazy", { }) test_that("basic mutate", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% filter(int > 5) %>% mutate(int = int + 6L) %>% @@ -46,8 +46,8 @@ test_that("basic mutate", { }) test_that("mutate() with NULL inputs", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(int = NULL) %>% collect(), tbl @@ -55,8 +55,8 @@ test_that("mutate() with NULL inputs", { }) test_that("empty mutate()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate() %>% collect(), tbl @@ -64,8 +64,8 @@ test_that("empty mutate()", { }) test_that("transmute", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% filter(int > 5) %>% transmute(int = int + 6L) %>% @@ -75,8 +75,8 @@ test_that("transmute", { }) test_that("transmute() with NULL inputs", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute(int = NULL) %>% collect(), tbl @@ -84,8 +84,8 @@ test_that("transmute() with NULL inputs", { }) test_that("empty transmute()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute() %>% collect(), tbl @@ -128,8 +128,8 @@ test_that("transmute() defuses dots arguments (ARROW-13262)", { }) test_that("mutate and refer to previous mutants", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, verses) %>% mutate( line_lengths = nchar(verses), @@ -142,8 +142,8 @@ test_that("mutate and refer to previous mutants", { }) test_that("nchar() arguments", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, verses) %>% mutate( line_lengths = nchar(verses, type = "bytes"), @@ -154,8 +154,8 @@ test_that("nchar() arguments", { tbl ) # This tests the whole abandon_ship() machinery - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, verses) %>% mutate( line_lengths = nchar(verses, type = "bytes", allowNA = TRUE), @@ -172,8 +172,8 @@ test_that("nchar() arguments", { }) test_that("mutate with .data pronoun", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, verses) %>% mutate( line_lengths = str_length(verses), @@ -186,8 +186,8 @@ test_that("mutate with .data pronoun", { }) test_that("mutate with unnamed expressions", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, padded_strings) %>% mutate( int, # bare column name @@ -200,8 +200,8 @@ test_that("mutate with unnamed expressions", { }) test_that("mutate with reassigning same name", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% transmute( new = lgl, new = chr @@ -212,8 +212,8 @@ test_that("mutate with reassigning same name", { }) test_that("mutate with single value for recycling", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, padded_strings) %>% mutate( dr_bronner = 1 # ALL ONE! @@ -225,8 +225,8 @@ test_that("mutate with single value for recycling", { test_that("dplyr::mutate's examples", { # Newly created variables are available immediately - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(name, mass) %>% mutate( mass2 = mass * 2, @@ -238,8 +238,8 @@ test_that("dplyr::mutate's examples", { # As well as adding new variables, you can use mutate() to # remove variables and modify existing variables. - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(name, height, mass, homeworld) %>% mutate( mass = NULL, @@ -253,8 +253,8 @@ test_that("dplyr::mutate's examples", { # but warn that they're pulling data into R to do so # across and autosplicing: ARROW-11699 - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(name, homeworld, species) %>% mutate(across(!name, as.factor)) %>% collect(), @@ -263,8 +263,8 @@ test_that("dplyr::mutate's examples", { ) # group_by then mutate - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(name, mass, homeworld) %>% group_by(homeworld) %>% mutate(rank = min_rank(desc(mass))) %>% @@ -275,8 +275,8 @@ test_that("dplyr::mutate's examples", { # `.before` and `.after` experimental args: ARROW-11701 df <- tibble(x = 1, y = 2) - expect_dplyr_equal( - input %>% mutate(z = x + y) %>% collect(), + compare_dplyr_binding( + .input %>% mutate(z = x + y) %>% collect(), df ) #> # A tibble: 1 x 3 @@ -284,16 +284,16 @@ test_that("dplyr::mutate's examples", { #> <dbl> <dbl> <dbl> #> 1 1 2 3 - expect_dplyr_equal( - input %>% mutate(z = x + y, .before = 1) %>% collect(), + compare_dplyr_binding( + .input %>% mutate(z = x + y, .before = 1) %>% collect(), df ) #> # A tibble: 1 x 3 #> z x y #> <dbl> <dbl> <dbl> #> 1 3 1 2 - expect_dplyr_equal( - input %>% mutate(z = x + y, .after = x) %>% collect(), + compare_dplyr_binding( + .input %>% mutate(z = x + y, .after = x) %>% collect(), df ) #> # A tibble: 1 x 3 @@ -304,32 +304,32 @@ test_that("dplyr::mutate's examples", { # By default, mutate() keeps all columns from the input data. # Experimental: You can override with `.keep` df <- tibble(x = 1, y = 2, a = "a", b = "b") - expect_dplyr_equal( - input %>% mutate(z = x + y, .keep = "all") %>% collect(), # the default + compare_dplyr_binding( + .input %>% mutate(z = x + y, .keep = "all") %>% collect(), # the default df ) #> # A tibble: 1 x 5 #> x y a b z #> <dbl> <dbl> <chr> <chr> <dbl> #> 1 1 2 a b 3 - expect_dplyr_equal( - input %>% mutate(z = x + y, .keep = "used") %>% collect(), + compare_dplyr_binding( + .input %>% mutate(z = x + y, .keep = "used") %>% collect(), df ) #> # A tibble: 1 x 3 #> x y z #> <dbl> <dbl> <dbl> #> 1 1 2 3 - expect_dplyr_equal( - input %>% mutate(z = x + y, .keep = "unused") %>% collect(), + compare_dplyr_binding( + .input %>% mutate(z = x + y, .keep = "unused") %>% collect(), df ) #> # A tibble: 1 x 3 #> a b z #> <chr> <chr> <dbl> #> 1 a b 3 - expect_dplyr_equal( - input %>% mutate(z = x + y, .keep = "none") %>% collect(), # same as transmute() + compare_dplyr_binding( + .input %>% mutate(z = x + y, .keep = "none") %>% collect(), # same as transmute() df ) #> # A tibble: 1 x 1 @@ -342,8 +342,8 @@ test_that("dplyr::mutate's examples", { # tibbles because the expressions are computed within groups. # The following normalises `mass` by the global average: # TODO: ARROW-13926 - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>% collect(), @@ -353,16 +353,16 @@ test_that("dplyr::mutate's examples", { }) test_that("Can mutate after group_by as long as there are no aggregations", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% group_by(chr) %>% mutate(int = int + 6L) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(mean = int, chr) %>% # rename `int` to `mean` and use `mean` in `mutate()` to test that # `all_funs()` does not incorrectly identify it as an aggregate function @@ -498,8 +498,8 @@ test_that("mutate and pmin/pmax", { val3 = c(0, NA, NA) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( max_val_1 = pmax(val1, val2, val3), max_val_2 = pmax(val1, val2, val3, na.rm = TRUE), @@ -510,8 +510,8 @@ test_that("mutate and pmin/pmax", { df ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate( max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = TRUE), min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = TRUE), diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index 07cdf08..21a55f4 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -43,8 +43,8 @@ test_that("basic select/filter/collect", { }) test_that("dim() on query", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int > 5) %>% select(int, chr) %>% dim(), @@ -71,20 +71,20 @@ See $.data for the source Arrow object', }) test_that("pull", { - expect_dplyr_equal( - input %>% pull(), + compare_dplyr_binding( + .input %>% pull(), tbl ) - expect_dplyr_equal( - input %>% pull(1), + compare_dplyr_binding( + .input %>% pull(1), tbl ) - expect_dplyr_equal( - input %>% pull(chr), + compare_dplyr_binding( + .input %>% pull(chr), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int > 4) %>% rename(strng = chr) %>% pull(strng), @@ -224,8 +224,9 @@ test_that("head", { }) test_that("arrange then head returns the right data (ARROW-14162)", { - expect_dplyr_equal( - input %>% + + compare_dplyr_binding( + .input %>% # mpg has ties so we need to sort by two things to get deterministic order arrange(mpg, disp) %>% head(4) %>% @@ -236,8 +237,8 @@ test_that("arrange then head returns the right data (ARROW-14162)", { }) test_that("arrange then tail returns the right data", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% # mpg has ties so we need to sort by two things to get deterministic order arrange(mpg, disp) %>% tail(4) %>% diff --git a/r/tests/testthat/test-dplyr-select.R b/r/tests/testthat/test-dplyr-select.R index 35c149c..2ca2b10 100644 --- a/r/tests/testthat/test-dplyr-select.R +++ b/r/tests/testthat/test-dplyr-select.R @@ -23,16 +23,16 @@ library(stringr) tbl <- example_data test_that("Empty select returns no columns", { - expect_dplyr_equal( - input %>% select() %>% collect(), + compare_dplyr_binding( + .input %>% select() %>% collect(), tbl, skip_table = "Table with 0 cols doesn't know how many rows it should have" ) }) test_that("Empty select still includes the group_by columns", { expect_message( - expect_dplyr_equal( - input %>% group_by(chr) %>% select() %>% collect(), + compare_dplyr_binding( + .input %>% group_by(chr) %>% select() %>% collect(), tbl ), "Adding missing grouping variables" @@ -40,20 +40,20 @@ test_that("Empty select still includes the group_by columns", { }) test_that("select/rename", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(string = chr, int) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% rename(string = chr) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% rename(strng = chr) %>% rename(other = strng) %>% collect(), @@ -66,8 +66,8 @@ test_that("select/rename with selection helpers", { # TODO: add some passing tests here expect_error( - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(where(is.numeric)) %>% collect(), tbl @@ -77,15 +77,15 @@ test_that("select/rename with selection helpers", { }) test_that("filtering with rename", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(chr == "b") %>% select(string = chr, int) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(string = chr, int) %>% filter(string == "b") %>% collect(), @@ -95,49 +95,49 @@ test_that("filtering with rename", { test_that("relocate", { df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") - expect_dplyr_equal( - input %>% relocate(f) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(f) %>% collect(), df, ) - expect_dplyr_equal( - input %>% relocate(a, .after = c) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(a, .after = c) %>% collect(), df, ) - expect_dplyr_equal( - input %>% relocate(f, .before = b) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(f, .before = b) %>% collect(), df, ) - expect_dplyr_equal( - input %>% relocate(a, .after = last_col()) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(a, .after = last_col()) %>% collect(), df, ) - expect_dplyr_equal( - input %>% relocate(ff = f) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(ff = f) %>% collect(), df, ) }) test_that("relocate with selection helpers", { df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") - expect_dplyr_equal( - input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(), df ) - expect_dplyr_equal( - input %>% relocate(where(is.character)) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(where(is.character)) %>% collect(), df ) - expect_dplyr_equal( - input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(), df ) - expect_dplyr_equal( - input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(), + compare_dplyr_binding( + .input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(), df ) # works after other dplyr verbs - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(c = as.character(c)) %>% relocate(d, e, f, .after = where(is.numeric)) %>% collect(), diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index a722e52..3988412 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -45,14 +45,14 @@ test_that("summarize() doesn't evaluate eagerly", { }) test_that("Can aggregate in Arrow", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(total = sum(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(total = sum(int)) %>% collect(), tbl @@ -60,24 +60,24 @@ test_that("Can aggregate in Arrow", { }) test_that("Group by sum on dataset", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(total = sum(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(total = sum(int * 4, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(total = sum(int)) %>% collect(), @@ -86,16 +86,16 @@ test_that("Group by sum on dataset", { }) test_that("Group by mean on dataset", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(mean = mean(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(mean = mean(int, na.rm = FALSE)) %>% collect(), @@ -104,16 +104,16 @@ test_that("Group by mean on dataset", { }) test_that("Group by sd on dataset", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(sd = sd(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(sd = sd(int, na.rm = FALSE)) %>% collect(), @@ -122,16 +122,16 @@ test_that("Group by sd on dataset", { }) test_that("Group by var on dataset", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(var = var(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(var = var(int, na.rm = FALSE)) %>% collect(), @@ -140,15 +140,15 @@ test_that("Group by var on dataset", { }) test_that("n()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(counts = n()) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(counts = n()) %>% arrange(some_grouping) %>% @@ -158,53 +158,53 @@ test_that("n()", { }) test_that("Group by any/all", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(any(lgl, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(all(lgl, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(any(lgl, na.rm = FALSE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(all(lgl, na.rm = FALSE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(has_words = nchar(verses) < 0) %>% group_by(some_grouping) %>% summarize(any(has_words, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(has_words = nchar(verses) < 0) %>% group_by(some_grouping) %>% summarize(all(has_words, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(has_words = all(nchar(verses) < 0, na.rm = TRUE)) %>% collect(), @@ -214,43 +214,43 @@ test_that("Group by any/all", { test_that("n_distinct() on dataset", { # With groupby - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(distinct = n_distinct(lgl, na.rm = FALSE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>% collect(), tbl ) # Without groupby - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(distinct = n_distinct(lgl, na.rm = FALSE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(distinct = n_distinct(int, lgl)) %>% collect(), tbl, warning = "Multiple arguments" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(distinct = n_distinct(int, lgl)) %>% collect(), @@ -260,15 +260,15 @@ test_that("n_distinct() on dataset", { }) test_that("Functions that take ... but we only accept a single arg", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(distinct = n_distinct()) %>% collect(), tbl, warning = "0 arguments" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(distinct = n_distinct(int, lgl)) %>% collect(), tbl, @@ -301,8 +301,8 @@ test_that("median()", { local_edition(2) # with groups - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( med_dbl = median(dbl), @@ -318,8 +318,8 @@ test_that("median()", { warning = "median\\(\\) currently returns an approximate median in Arrow" ) # without groups, with na.rm = TRUE - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize( med_dbl_narmt = median(dbl, na.rm = TRUE), med_int_narmt = as.double(median(int, TRUE)) @@ -329,8 +329,8 @@ test_that("median()", { warning = "median\\(\\) currently returns an approximate median in Arrow" ) # without groups, with na.rm = FALSE (the default) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize( med_dbl = median(dbl), med_int = as.double(median(int)), @@ -354,8 +354,8 @@ test_that("quantile()", { # controls whether the result has a names attribute. It defaults to # names = TRUE. With Arrow, it is not possible to give the result a names # attribute, so the quantile() binding in Arrow does not accept a `names` - # argument. Differences in this names attribute cause expect_dplyr_equal() to - # report that the objects are not equal, so we do not use expect_dplyr_equal() + # argument. Differences in this names attribute cause compare_dplyr_binding() to + # report that the objects are not equal, so we do not use compare_dplyr_binding() # in the tests below. # The tests below all use probs = 0.5 because other values cause differences @@ -443,16 +443,16 @@ test_that("quantile()", { }) test_that("summarize() with min() and max()", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% filter(int > 5) %>% # this filters out the NAs in `int` summarize(min_int = min(int), max_int = max(int)) %>% collect(), tbl, ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% filter(int > 5) %>% # this filters out the NAs in `int` summarize( @@ -462,15 +462,15 @@ test_that("summarize() with min() and max()", { collect(), tbl, ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% summarize(min_int = min(int), max_int = max(int)) %>% collect(), tbl, ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int) %>% summarize( min_int = min(int, na.rm = TRUE), @@ -479,8 +479,8 @@ test_that("summarize() with min() and max()", { collect(), tbl, ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(dbl, int) %>% summarize( min_int = -min(log(ceiling(dbl)), na.rm = TRUE), @@ -491,15 +491,15 @@ test_that("summarize() with min() and max()", { ) # multiple dots arguments to min(), max() not supported - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize(min_mult = min(dbl, int)) %>% collect(), tbl, warning = "Multiple arguments to min\\(\\) not supported by Arrow" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(int, dbl, dbl2) %>% summarize(max_mult = max(int, dbl, dbl2)) %>% collect(), @@ -509,8 +509,8 @@ test_that("summarize() with min() and max()", { # min(logical) or max(logical) yields integer in R # min(Boolean) or max(Boolean) yields Boolean in Arrow - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(lgl) %>% summarize( max_lgl = as.logical(max(lgl, na.rm = TRUE)), @@ -522,8 +522,8 @@ test_that("summarize() with min() and max()", { }) test_that("min() and max() on character strings", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% summarize( min_chr = min(chr, na.rm = TRUE), max_chr = max(chr, na.rm = TRUE) @@ -532,8 +532,8 @@ test_that("min() and max() on character strings", { tbl, ) skip("Strings not supported by hash_min_max (ARROW-13988)") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(fct) %>% summarize( min_chr = min(chr, na.rm = TRUE), @@ -548,8 +548,8 @@ test_that("summarise() with !!sym()", { test_chr_col <- "int" test_dbl_col <- "dbl" test_lgl_col <- "lgl" - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(false) %>% summarise( sum = sum(!!sym(test_dbl_col)), @@ -568,24 +568,24 @@ test_that("summarise() with !!sym()", { }) test_that("Filter and aggregate", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(some_grouping == 2) %>% summarize(total = sum(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int > 5) %>% summarize(total = sum(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(some_grouping == 2) %>% group_by(some_grouping) %>% summarize(total = sum(int, na.rm = TRUE)) %>% @@ -593,8 +593,8 @@ test_that("Filter and aggregate", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(int > 5) %>% group_by(some_grouping) %>% summarize(total = sum(int, na.rm = TRUE)) %>% @@ -604,16 +604,16 @@ test_that("Filter and aggregate", { }) test_that("Group by edge cases", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping * 2) %>% summarize(total = sum(int, na.rm = TRUE)) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(alt = some_grouping * 2) %>% summarize(total = sum(int, na.rm = TRUE)) %>% collect(), @@ -629,8 +629,8 @@ test_that("Do things after summarize", { pull() %>% tail(1) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% filter(int > 5) %>% summarize(total = sum(int, na.rm = TRUE)) %>% @@ -640,8 +640,8 @@ test_that("Do things after summarize", { tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% filter(dbl > 2) %>% select(chr, int, lgl) %>% mutate(twice = int * 2L) %>% @@ -658,8 +658,8 @@ test_that("Do things after summarize", { test_that("Expressions on aggregations", { # This is what it effectively is - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( any = any(lgl), @@ -672,8 +672,8 @@ test_that("Expressions on aggregations", { tbl ) # More concisely: - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize(any(lgl) & !all(lgl)) %>% collect(), @@ -681,8 +681,8 @@ test_that("Expressions on aggregations", { ) # Save one of the aggregates first - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( any_lgl = any(lgl), @@ -693,8 +693,8 @@ test_that("Expressions on aggregations", { ) # Make sure order of columns in result is correct - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( any_lgl = any(lgl), @@ -707,8 +707,8 @@ test_that("Expressions on aggregations", { # Aggregate on an aggregate (trivial but dplyr allows) skip("Aggregate on an aggregate not supported") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( any_lgl = any(any(lgl)) @@ -719,8 +719,8 @@ test_that("Expressions on aggregations", { }) test_that("Summarize with 0 arguments", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize() %>% collect(), @@ -730,8 +730,8 @@ test_that("Summarize with 0 arguments", { test_that("Not (yet) supported: implicit join", { withr::local_options(list(arrow.debug = TRUE)) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( sum((dbl - mean(dbl))^2) @@ -740,8 +740,8 @@ test_that("Not (yet) supported: implicit join", { tbl, warning = "Expression sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\) not supported in Arrow; pulling data into R" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( sum(dbl - mean(dbl)) @@ -750,8 +750,8 @@ test_that("Not (yet) supported: implicit join", { tbl, warning = "Expression sum\\(dbl - mean\\(dbl\\)\\) not supported in Arrow; pulling data into R" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( sqrt(sum((dbl - mean(dbl))^2) / (n() - 1L)) @@ -761,8 +761,8 @@ test_that("Not (yet) supported: implicit join", { warning = "Expression sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\) not supported in Arrow; pulling data into R" ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( dbl - mean(dbl) @@ -773,8 +773,8 @@ test_that("Not (yet) supported: implicit join", { ) # This one could possibly be supported--in mutate() - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping) %>% summarize( dbl - int @@ -786,36 +786,36 @@ test_that("Not (yet) supported: implicit join", { }) test_that(".groups argument", { - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping, int < 6) %>% summarize(count = n()) %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping, int < 6) %>% summarize(count = n(), .groups = "drop_last") %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping, int < 6) %>% summarize(count = n(), .groups = "keep") %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping, int < 6) %>% summarize(count = n(), .groups = "drop") %>% collect(), tbl ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(some_grouping, int < 6) %>% summarize(count = n(), .groups = "rowwise") %>% collect(), @@ -844,8 +844,8 @@ test_that("summarize() handles group_by .drop", { x = 1:10, y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c")) ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(y) %>% count() %>% collect() %>% @@ -853,8 +853,8 @@ test_that("summarize() handles group_by .drop", { tbl ) # Not supported: check message - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(y, .drop = FALSE) %>% count() %>% collect() %>% @@ -867,8 +867,8 @@ test_that("summarize() handles group_by .drop", { ) # But this is ok because there is no factor group - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(y, .drop = FALSE) %>% count() %>% collect() %>% diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index c560da7..4c4d8a7 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -315,26 +315,26 @@ test_that("When we encounter SF cols, we warn", { test_that("dplyr with metadata", { skip_if_not_available("dataset") - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% collect(), example_with_metadata ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% select(a) %>% collect(), example_with_metadata ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(z = b * 4) %>% select(z, a) %>% collect(), example_with_metadata ) - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(z = nchar(a)) %>% select(z, a) %>% collect(), @@ -342,8 +342,8 @@ test_that("dplyr with metadata", { ) # dplyr drops top-level attributes if you do summarize, though attributes # of grouping columns appear to come through - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% group_by(a) %>% summarize(n()) %>% collect(), @@ -351,8 +351,8 @@ test_that("dplyr with metadata", { ) # Same name in output but different data, so the column metadata shouldn't # carry through - expect_dplyr_equal( - input %>% + compare_dplyr_binding( + .input %>% mutate(a = nchar(a)) %>% select(a) %>% collect(), diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index 894dbe3..fafebb4 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -26,18 +26,18 @@ test_that("na.fail on Scalar", { }) test_that("na.omit on Array and ChunkedArray", { - expect_vector_equal(na.omit(input), data_no_na) - expect_vector_equal(na.omit(input), data_na, ignore_attr = TRUE) + compare_expression(na.omit(.input), data_no_na) + compare_expression(na.omit(.input), data_na, ignore_attr = TRUE) }) test_that("na.exclude on Array and ChunkedArray", { - expect_vector_equal(na.exclude(input), data_no_na) - expect_vector_equal(na.exclude(input), data_na, ignore_attr = TRUE) + compare_expression(na.exclude(.input), data_no_na) + compare_expression(na.exclude(.input), data_na, ignore_attr = TRUE) }) test_that("na.fail on Array and ChunkedArray", { - expect_vector_equal(na.fail(input), data_no_na, ignore_attr = TRUE) - expect_vector_error(na.fail(input), data_na) + compare_expression(na.fail(.input), data_no_na, ignore_attr = TRUE) + compare_expression_error(na.fail(.input), data_na) }) test_that("na.fail on Scalar", {
