ianmcook commented on a change in pull request #9999:
URL: https://github.com/apache/arrow/pull/9999#discussion_r620578035
##########
File path: r/tests/testthat/test-dplyr-mutate.R
##########
@@ -415,3 +412,154 @@ test_that("mutate and write_dataset", {
summarize(mean = mean(integer))
)
})
+
+# PACHA ADDITIONS ----
+# READ THIS CAREFULLY PLEASE, IT'S MY 1ST DAY WRITING THIS KIND OF SENSITIVE
TESTS
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L1-L10
+# the rest of that test belongs in L55-62 here
+test_that("empty mutate returns input", {
+ # dbl2 = 5, so I'm grouping by a constant
+ gtbl <- group_by(tbl, dbl2)
+
+ expect_dplyr_equal(input %>% mutate() %>% collect(), tbl)
+ expect_dplyr_equal(input %>% mutate(!!!list()) %>% collect(), tbl)
+ expect_dplyr_equal(input %>% mutate() %>% collect(), gtbl)
+ expect_dplyr_equal(input %>% mutate(!!!list()) %>% collect(), gtbl)
+})
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L12-L6
+test_that("rownames preserved", {
+ skip("Row names are not preserved")
+ df <- data.frame(x = c(1, 2), row.names = c("a", "b"))
+ expect_dplyr_equal(input %>% mutate(y = c(3, 4)) %>% collect() %>%
rownames(), df)
+})
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L18-L29
+test_that("mutations applied progressively", {
+ df <- tibble(x = 1)
+
+ expect_dplyr_equal(
+ input %>% mutate(y = x + 1, z = y + 1) %>% collect(),
+ df
+ )
+ expect_dplyr_equal(
+ input %>% mutate(x = x + 1, x = x + 1) %>% collect(),
+ df
+ )
+ expect_dplyr_equal(
+ input %>% mutate(y = x + 1, z = y + 1) %>% collect(),
+ df
+ )
+
+ df <- data.frame(x = 1, y = 2)
+ expect_equal(
+ df %>% Table$create() %>% mutate(x2 = x, x3 = x2 + 1) %>% collect(),
+ df %>% Table$create() %>% mutate(x2 = x + 0, x3 = x2 + 1) %>% collect()
+ )
+})
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L37-L54
+test_that("can remove variables with NULL (dplyr #462)", {
+ df <- tibble(x = 1:3, y = 1:3)
+ gf <- group_by(df, x)
+
+ expect_dplyr_equal(input %>% mutate(y = NULL) %>% collect(), df)
+ expect_dplyr_equal(input %>% mutate(y = NULL) %>% collect(), gf)
+
+ # even if it doesn't exist
+ expect_dplyr_equal(input %>% mutate(z = NULL) %>% collect(), df)
+ # or was just created
+ expect_dplyr_equal(input %>% mutate(z = rep(1, nrow(input)), z = NULL) %>%
collect(), df)
+
+ # regression test for https://github.com/tidyverse/dplyr/issues/4974
+ expect_dplyr_equal(
+ input %>% mutate(z = 1, x = NULL, y = NULL) %>% collect(),
+ data.frame(x = 1, y = 1)
+ )
+})
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L71-L75
+# test_that("assignments don't overwrite variables (dplyr #315)", {
+# expect_dplyr_equal(
+# tibble(x = 1, y = 2) %>% mutate(z = {x <- 10; x}) %>% collect(),
+# tibble(x = 1, y = 2, z = 10)
+# )
+# })
+# NOT SURE ABOUT THIS!
+test_that("assignments don't overwrite variables (dplyr #315)", {
+ expect_dplyr_equal(
+ input %>% mutate(z = {x <- 10; x}) %>% collect(),
+ tibble(x = 1, y = 2, z = 10)
+ )
+})
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L77-L81
+# NOT SURE ABOUT THIS!
+# test_that("can mutate a data frame with zero columns and `NULL` column
names", {
+# df <- vctrs::new_data_frame(n = 2L)
+# colnames(df) <- NULL
+# expect_dplyr_equal(
+# input %>% mutate(x = 1) %>% collect(),
+# df
+# )
+# })
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L95-L100
+# glue is a dependency of tidyselect
+test_that("glue() is supported", {
+ expect_dplyr_equal(
+ input %>% mutate(y = glue::glue("")) %>% collect(),
+ tibble(x = 1, y = glue::glue(""))
+ )
+})
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L102-L106
+test_that("mutate disambiguates NA and NaN (#1448)", {
+ expect_dplyr_equal(
+ input %>% mutate(y = x * 1) %>% select(y) %>% collect(),
+ tibble(x = c(1, NA, NaN))
+ )
+})
+
+# similar to
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L102-L106
+# this is somewhat "contained" in the previous test
+# test_that("mutate handles data frame columns", {
+# expect_dplyr_equal(
+# input %>% mutate(new_col = data.frame(x = 1:3)) %>% select(new_col) %>%
collect(),
+# data.frame(x = 1:3)
+# )
+#
+# # mutate() on grouped data not supported in Arrow; this will be pulling
data into R
+# # expect_dplyr_equal(
+# # input %>%
+# # group_by(x) %>%
+# # mutate(new_col = x) %>%
+# # ungroup() %>%
+# # select(new_col) %>%
+# # collect(),
+# # data.frame(x = 1:3)
+# # )
+#
+# # ROWWISE IS NOT IMPLEMENTED
+# # expect_dplyr_equal(
+# # input %>%
+# # rowwise(x) %>%
+# # mutate(new_col = x) %>%
+# # ungroup() %>%
+# # select(new_col) %>%
+# # collect(),
+# # data.frame(x = 1:3)
+# # )
+# })
+
+# QUESTIONS SO FAR ----
+
+#
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L56-L59
+#
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L108-L115
+# does it make sense to create expect_dplyr_named() to mimic the behaviour
from dply tests?
+
+#
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L61-L69
+#
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L83-L91
+#
https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r#L129-L142
+# does it make sense to create expect_dplyr_identical() to mimic the behaviour
from dplyr tests?
Review comment:
I don't think we need a function `expect_dplyr_identical()`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]