dragosmg commented on code in PR #13541:
URL: https://github.com/apache/arrow/pull/13541#discussion_r923311158
##########
r/tests/testthat/test-dplyr-query.R:
##########
@@ -433,3 +433,131 @@ test_that("query_can_stream()", {
query_can_stream()
)
})
+
+test_that("show_exec_plan()", {
+ # minimal test - this fails if we don't coerce the input to
`show_exec_plan()`
+ # to be an `arrow_dplyr_query`
+ expect_output(
+ mtcars %>%
+ arrow_table() %>%
+ show_exec_plan(),
+ regexp = paste0(
+ "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
+ "ProjectNode.*", # this node would evaluate Expressions to
produce new columns
+ "TableSourceNode" # the entry point
+ )
+ )
+
+ # arrow_table and mutate
+ expect_output(
+ tbl %>%
+ arrow_table() %>%
+ filter(dbl > 2, chr != "e") %>%
+ select(chr, int, lgl) %>%
+ mutate(int_plus_ten = int + 10) %>%
+ show_exec_plan(),
+ regexp = paste0(
+ "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
+ "chr, int, lgl, \"int_plus_ten\".*", # selected columns
+ "FilterNode.*", # the filter node
+ "(dbl > 2).*", # the filter expressions
+ "chr != \"e\".*",
+ "TableSourceNode" # the entry point
+ )
+ )
+
+ # record_batch and mutate
+ expect_output(
+ tbl %>%
+ record_batch() %>%
+ filter(dbl > 2, chr != "e") %>%
+ select(chr, int, lgl) %>%
+ mutate(int_plus_ten = int + 10) %>%
+ show_exec_plan(),
+ regexp = paste0(
+ "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
+ "chr, int, lgl, \"int_plus_ten\".*", # selected columns
+ "(dbl > 2).*", # the filter expressions
+ "chr != \"e\".*",
+ "TableSourceNode" # the entry point"
+ )
+ )
+
+ # test with group_by and summarise
+ expect_output(
+ tbl %>%
+ arrow_table() %>%
+ group_by(lgl) %>%
+ summarise(avg = mean(dbl, na.rm = TRUE)) %>%
+ ungroup() %>%
+ show_exec_plan(),
+ regexp = paste0(
+ "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
+ "GroupByNode.*", # the group_by statement
+ "keys.*\"lgl\".*", # the key for the aggregations
+ "aggregates.*\\thash_mean.*avg.*skip_nulls=true, min_count=0.*", # the
aggregations
+ "ProjectNode.*", # the output columns
+ "TableSourceNode" # the entry point
+ )
+ )
+
+ # test with join
+ expect_output(
+ tbl %>%
+ arrow_table() %>%
+ left_join(
+ example_data %>%
+ arrow_table() %>%
+ mutate(doubled_dbl = dbl * 2) %>%
+ select(int, doubled_dbl),
+ by = "int"
+ ) %>%
+ select(int, verses, doubled_dbl) %>%
+ show_exec_plan(),
+ regexp = paste0(
+ "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
+ "HashJoinNode.*", # the join
+ "ProjectNode.*", # the output columns for the
second table
+ "\"doubled_dbl\"\\: multiply_checked\\(dbl, 2\\).*", # the mutate
+ "TableSourceNode.*", # the second table
+ "ProjectNode.*", # output columns for the first
table
+ "TableSourceNode" # the first table
+ )
+ )
+
+ expect_output(
+ mtcars %>%
+ arrow_table() %>%
+ filter(mpg > 20) %>%
+ arrange(desc(wt)) %>%
+ show_exec_plan(),
+ regexp = paste0(
+ "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
+ "ProjectNode.*", # output columns
+ "order_by_sink.*", # there should be something here regarding
Review Comment:
I'm going to mark as resolved as these were only placeholders.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]