jonkeane commented on a change in pull request #12339:
URL: https://github.com/apache/arrow/pull/12339#discussion_r804911698



##########
File path: r/tests/testthat/test-dplyr-join.R
##########
@@ -249,3 +249,24 @@ test_that("arrow dplyr query correctly filters then 
joins", {
     )
   )
 })
+
+
+test_that("arrow dplyr query can join with tibble", {
+  # ARROW-14908
+  existing_use_threads <- getOption("arrow.use_threads")
+  options(arrow.use_threads = FALSE)
+  dir_out <- tempdir()
+
+  # Note: Species is a DictionaryArray, but this still fails even if we 
convert to StringArray.
+  write_dataset(iris, file.path(dir_out, "iris"))
+  species_codes <- data.frame(Species = c("setosa", "versicolor", "virginica"),
+                              code = c("SET", "VER", "VIR"))
+
+  iris <- open_dataset(file.path(dir_out, "iris"))
+
+  res <- left_join(iris, species_codes) %>% collect() # We should not segfault 
here.
+  expect_equal(nrow(res), 150) 

Review comment:
       ```suggestion
     expect_equal(nrow(res), 150)
   ```

##########
File path: r/tests/testthat/test-dplyr-join.R
##########
@@ -249,3 +249,24 @@ test_that("arrow dplyr query correctly filters then 
joins", {
     )
   )
 })
+
+
+test_that("arrow dplyr query can join with tibble", {
+  # ARROW-14908
+  existing_use_threads <- getOption("arrow.use_threads")
+  options(arrow.use_threads = FALSE)

Review comment:
       Nice test! This might be better to use 
`withr::with_options(list(arrow.use_threads = FALSE), { ... })` and then you 
don't need to worry about resetting later
   
   Similar to 
https://github.com/apache/arrow/blob/3b9462a4ffc9f1d20ffc4ba578adec0f0ed8ffbd/r/tests/testthat/test-parquet.R#L302-L313

##########
File path: r/tests/testthat/test-dplyr-join.R
##########
@@ -249,3 +249,24 @@ test_that("arrow dplyr query correctly filters then 
joins", {
     )
   )
 })
+
+
+test_that("arrow dplyr query can join with tibble", {
+  # ARROW-14908
+  existing_use_threads <- getOption("arrow.use_threads")
+  options(arrow.use_threads = FALSE)
+  dir_out <- tempdir()
+
+  # Note: Species is a DictionaryArray, but this still fails even if we 
convert to StringArray.
+  write_dataset(iris, file.path(dir_out, "iris"))
+  species_codes <- data.frame(Species = c("setosa", "versicolor", "virginica"),
+                              code = c("SET", "VER", "VIR"))
+
+  iris <- open_dataset(file.path(dir_out, "iris"))
+
+  res <- left_join(iris, species_codes) %>% collect() # We should not segfault 
here.
+  expect_equal(nrow(res), 150) 
+
+  # Reset
+  options(arrow.use_threads = existing_use_threads)
+})

Review comment:
       ```suggestion
   })
   
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to