nealrichardson commented on a change in pull request #11403:
URL: https://github.com/apache/arrow/pull/11403#discussion_r736507402
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -64,24 +65,34 @@ verify_output <- function(...) {
testthat::verify_output(...)
}
-#' @param expr A dplyr pipeline with `input` as its start
-#' @param tbl A tbl/df as reference, will make RB/Table with
-#' @param skip_record_batch string skip message, if should skip RB test
-#' @param skip_table string skip message, if should skip Table test
-#' @param warning string expected warning from the RecordBatch and Table paths,
-#' passed to `expect_warning()`. Special values:
+#' Compare dplyr binding
+#'
+#' This function compares the output of running a dplyr expression on a tibble
+#' or data.frame object against the output of the same expression run on
+#' Arrow Table and RecordBatch objects.
+#'
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param skip_record_batch The skip message to show (if you should skip the
+#' RecordBatch test)
+#' @param skip_table The skip message to show (if you should skip the Table
test)
+#' @param warning The expected warning from the RecordBatch and Table
comparison
+#' paths, passed to `expect_warning()`. Special values:
#' * `NA` (the default) for ensuring no warning message
#' * `TRUE` is a special case to mean to check for the
#' "not supported in Arrow; pulling data into R" message.
#' @param ... additional arguments, passed to `expect_equal()`
-expect_dplyr_equal <- function(expr,
- tbl,
- skip_record_batch = NULL,
- skip_table = NULL,
- warning = NA,
- ...) {
+compare_dplyr_binding <- function(expr,
+ tbl,
+ skip_record_batch = NULL,
+ skip_table = NULL,
+ warning = NA,
+ ...) {
+
+ # Quote the contents of `expr` and evaluate it to get the expected value
expr <- rlang::enquo(expr)
- expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input =
tbl)))
+ expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input =
tbl)))
Review comment:
```suggestion
# Get the expected output by evaluating expr on the .input data.frame
using regular dplyr
expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input
= tbl)))
```
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -122,21 +135,30 @@ expect_dplyr_equal <- function(expr,
}
}
-expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its
start
- tbl, # A tbl/df as reference, will make
RB/Table with
- ...) {
+#' Compare dplyr error
Review comment:
```suggestion
#' Assert that Arrow dplyr methods error in the same way as methods on
data.frame
```
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -64,24 +65,31 @@ verify_output <- function(...) {
testthat::verify_output(...)
}
-#' @param expr A dplyr pipeline with `input` as its start
-#' @param tbl A tbl/df as reference, will make RB/Table with
-#' @param skip_record_batch string skip message, if should skip RB test
-#' @param skip_table string skip message, if should skip Table test
-#' @param warning string expected warning from the RecordBatch and Table paths,
+#' Compare dplyr binding
+#'
+#' Comparing the output of running expressions on R objects against the same
+#' expression run on Arrow Tables and RecordBatches.
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param skip_record_batch The skip message to show (if you should skip the
RecordBatch test)
+#' @param skip_table The skip message to show (if you should skip the Table
test)
+#' @param warning The expected warning from the RecordBatch and Table
comparison paths,
#' passed to `expect_warning()`. Special values:
#' * `NA` (the default) for ensuring no warning message
#' * `TRUE` is a special case to mean to check for the
#' "not supported in Arrow; pulling data into R" message.
#' @param ... additional arguments, passed to `expect_equal()`
-expect_dplyr_equal <- function(expr,
- tbl,
- skip_record_batch = NULL,
- skip_table = NULL,
- warning = NA,
- ...) {
+compare_dplyr_binding <- function(expr,
+ tbl,
+ skip_record_batch = NULL,
+ skip_table = NULL,
+ warning = NA,
+ ...) {
+
+ # Quote the contents of `expr` and evaluate it to get the expected value
Review comment:
```suggestion
# Quote the contents of `expr` so that we can evaluate it a few different
ways
```
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -158,35 +180,46 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline
with `input` as its star
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+ rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
),
msg,
...
)
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = arrow_table(tbl)))
+ rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
),
msg,
...
)
}
-expect_vector_equal <- function(expr, # A vectorized R expression containing
`input` as its input
- vec, # A vector as reference, will make
Array/ChunkedArray with
- skip_array = NULL, # Msg, if should skip Array
test
- skip_chunked_array = NULL, # Msg, if should
skip ChunkedArray test
- ignore_attr = FALSE, # ignore attributes?
- ...) {
+#' Compare vector
+#'
+#' Comparing the output of running expressions on R vectors against the same
+#' expression run on Arrow Arrays and ChunkedArrays.
+#'
+#' @param expr A vectorized R expression which must have `.input` as its start
+#' @param tbl A vector which will be substituted for `.input`
Review comment:
```suggestion
#' @param vec A vector which will be substituted for `.input`
```
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -64,24 +65,31 @@ verify_output <- function(...) {
testthat::verify_output(...)
}
-#' @param expr A dplyr pipeline with `input` as its start
-#' @param tbl A tbl/df as reference, will make RB/Table with
-#' @param skip_record_batch string skip message, if should skip RB test
-#' @param skip_table string skip message, if should skip Table test
-#' @param warning string expected warning from the RecordBatch and Table paths,
+#' Compare dplyr binding
Review comment:
Since that is just the name of the function written without underscores,
let's give a more descriptive title
```suggestion
#' Ensure that dplyr methods on Arrow objects return the same as for data
frames
```
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -158,35 +180,46 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline
with `input` as its star
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+ rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
),
msg,
...
)
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = arrow_table(tbl)))
+ rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
),
msg,
...
)
}
-expect_vector_equal <- function(expr, # A vectorized R expression containing
`input` as its input
- vec, # A vector as reference, will make
Array/ChunkedArray with
- skip_array = NULL, # Msg, if should skip Array
test
- skip_chunked_array = NULL, # Msg, if should
skip ChunkedArray test
- ignore_attr = FALSE, # ignore attributes?
- ...) {
+#' Compare vector
Review comment:
Can you give the rest of these better single line titles (or delete
these "titles" that just restate the function name)?
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -158,35 +180,46 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline
with `input` as its star
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+ rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
),
msg,
...
)
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = arrow_table(tbl)))
+ rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
),
msg,
...
)
}
-expect_vector_equal <- function(expr, # A vectorized R expression containing
`input` as its input
- vec, # A vector as reference, will make
Array/ChunkedArray with
- skip_array = NULL, # Msg, if should skip Array
test
- skip_chunked_array = NULL, # Msg, if should
skip ChunkedArray test
- ignore_attr = FALSE, # ignore attributes?
- ...) {
+#' Compare vector
+#'
+#' Comparing the output of running expressions on R vectors against the same
+#' expression run on Arrow Arrays and ChunkedArrays.
+#'
+#' @param expr A vectorized R expression which must have `.input` as its start
+#' @param tbl A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array
test)
+#' @param skip_chunked_array The skip message to show (if you should skip the
ChunkedArray test)
+#' @param ignore_attr Ignore differences in specified attributes?
+#' @param ... additional arguments, passed to `expect_as_vector()`
+compare_vector <- function(expr,
Review comment:
Not that `expect_vector_equal` is great, but IMO `compare_vector` isn't
an improvement in clarity.
##########
File path: r/tests/testthat/helper-expectation.R
##########
@@ -28,6 +28,7 @@ expect_r6_class <- function(object, class) {
expect_s3_class(object, "R6")
}
+#' Redefines `expect_equal()` so it can run on objects with class `ArrowObject`
Review comment:
```suggestion
#' Mask `testthat::expect_equal()` in order to compare ArrowObjects using
their
#' `Equals` methods from the C++ library.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]