nealrichardson commented on code in PR #12826:
URL: https://github.com/apache/arrow/pull/12826#discussion_r941574663
##########
r/tests/testthat/test-dataset.R:
##########
@@ -1349,3 +1348,93 @@ test_that("FileSystemFactoryOptions input validation", {
fixed = TRUE
)
})
+
+test_that("can add in augmented fields", {
+ ds <- open_dataset(hive_dir)
+
+ observed <- ds %>%
+ mutate(file_name = add_filename()) %>%
+ collect()
+
+ expect_named(
+ observed,
+ c("int", "dbl", "lgl", "chr", "fct", "ts", "group", "other", "file_name")
+ )
+
+ expect_equal(
+ sort(unique(observed$file_name)),
+ list.files(hive_dir, full.names = TRUE, recursive = TRUE)
+ )
+
+ error_regex <- paste(
+ "Augmented fields such as 'filename' must",
+ "only be used with with Dataset objects which have",
+ "not been aggregated or joined."
+ )
+
+ # errors appropriately with ArrowTabular objects
+ expect_error(
+ arrow_table(mtcars) %>%
+ mutate(file = add_filename()) %>%
+ collect(),
+ regexp = error_regex
+ )
+
+ # errors appropriately with aggregation
+ expect_error(
+ ds %>%
+ summarise(max_int = max(int)) %>%
+ mutate(file_name = add_filename()) %>%
+ collect(),
+ regexp = error_regex
+ )
+
+ # joins to tables
+ another_table <- select(example_data, int, dbl2)
+ expect_error(
+ ds %>%
+ left_join(another_table, by = "int") %>%
+ mutate(file = add_filename()) %>%
+ collect(),
+ regexp = error_regex
+ )
+
+ # and on joins to datasets
+ another_dataset <- write_dataset(another_table, "another_dataset")
+ expect_error(
+ ds %>%
+ left_join(open_dataset("another_dataset"), by = "int") %>%
+ mutate(file = add_filename()) %>%
+ collect(),
+ regexp = error_regex
+ )
+
+ # this hits the implicit_schema path by joining afterwards
+ join_after <- ds %>%
+ mutate(file = add_filename()) %>%
Review Comment:
Indentation here and on the next example are off
##########
r/R/util.R:
##########
@@ -217,8 +225,27 @@ handle_csv_read_error <- function(e, schema, call) {
"header being read in as data."
)
)
+ abort(msg, call = call)
+ }
+}
+
+# This function only raises an error if
+# the appropriate string was found and so errors must be raised manually after
+# calling this if matching error not found
+# TODO: Refactor as part of ARROW-17355 to prevent potential missed errors
+handle_augmented_field_misuse <- function(e, call) {
+ msg <- conditionMessage(e)
+ if (grepl("No match for FieldRef.Name(__filename)", msg, fixed = TRUE)) {
+ msg <- c(
+ msg,
+ i = paste(
+ "Augmented fields such as 'filename' must",
+ "only be used with with Dataset objects which have",
+ "not been aggregated or joined."
Review Comment:
Wordsmithing here, how about something like "'filename' can only be used
with Dataset objects, and it can only be added before doing an aggregation or a
join"?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]