This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fa0af70b21 MINOR: [R][Docs] Improve error message around add_filename
(#37372)
fa0af70b21 is described below
commit fa0af70b21d7d0cbd94ae0bd27ea34b936f7f347
Author: Bryce Mecum <[email protected]>
AuthorDate: Fri Aug 25 10:32:15 2023 -0800
MINOR: [R][Docs] Improve error message around add_filename (#37372)
### Rationale for this change
Before this change, it's difficult for the user to not get stuck when they
run into an error trying to use the result of add_filename in subsequent
pipeline steps.
### What changes are included in this PR?
- Update error message string now includes advice
- Updated docs page for add_filename including an example
### Are these changes tested?
Yes. Tests were updated and confirmed to pass.
### Are there any user-facing changes?
No.
Authored-by: Bryce Mecum <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
r/R/dplyr-funcs-augmented.R | 24 +++++++++++++++++++-----
r/R/util.R | 5 +++--
r/man/add_filename.Rd | 21 ++++++++++++++++++---
r/tests/testthat/test-dataset.R | 5 +++--
4 files changed, 43 insertions(+), 12 deletions(-)
diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
index 1067f15573..116248d2dd 100644
--- a/r/R/dplyr-funcs-augmented.R
+++ b/r/R/dplyr-funcs-augmented.R
@@ -20,13 +20,27 @@
#' This function only exists inside `arrow` `dplyr` queries, and it only is
#' valid when quering on a `FileSystemDataset`.
#'
-#' @return A `FieldRef` `Expression` that refers to the filename augmented
-#' column.
-#' @examples
-#' \dontrun{
+#' To use filenames generated by this function in subsequent pipeline steps,
you
+#' must either call \code{\link[dplyr:compute]{compute()}} or
+#' \code{\link[dplyr:collect]{collect()}} first. See Examples.
+#'
+#' @return A `FieldRef` \code{\link{Expression}} that refers to the filename
+#' augmented column.
+#'
+#' @examples \dontrun{
+#' open_dataset("nyc-taxi") %>% mutate(
+#' file =
+#' add_filename()
+#' )
+#'
+#' # To use a verb like mutate() with add_filename() we need to first call
+#' # compute()
#' open_dataset("nyc-taxi") %>%
-#' mutate(file = add_filename())
+#' mutate(file = add_filename()) %>%
+#' compute() %>%
+#' mutate(filename_length = nchar(file))
#' }
+#'
#' @keywords internal
add_filename <- function() Expression$field_ref("__filename")
diff --git a/r/R/util.R b/r/R/util.R
index 46ce92d0c1..a7cb5b3792 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -223,8 +223,9 @@ handle_augmented_field_misuse <- function(msg, call) {
msg,
i = paste(
"`add_filename()` or use of the `__filename` augmented field can only",
- "be used with with Dataset objects, and can only be added before
doing",
- "an aggregation or a join."
+ "be used with Dataset objects, can only be added before doing",
+ "an aggregation or a join, and cannot be referenced in subsequent",
+ "pipeline steps until either compute() or collect() is called."
)
)
abort(msg, call = call)
diff --git a/r/man/add_filename.Rd b/r/man/add_filename.Rd
index ca7ed0e4b1..93718435a2 100644
--- a/r/man/add_filename.Rd
+++ b/r/man/add_filename.Rd
@@ -7,17 +7,32 @@
add_filename()
}
\value{
-A \code{FieldRef} \code{Expression} that refers to the filename augmented
-column.
+A \code{FieldRef} \code{\link{Expression}} that refers to the filename
+augmented column.
}
\description{
This function only exists inside \code{arrow} \code{dplyr} queries, and it
only is
valid when quering on a \code{FileSystemDataset}.
}
+\details{
+To use filenames generated by this function in subsequent pipeline steps, you
+must either call \code{\link[dplyr:compute]{compute()}} or
+\code{\link[dplyr:collect]{collect()}} first. See Examples.
+}
\examples{
\dontrun{
+open_dataset("nyc-taxi") \%>\% mutate(
+ file =
+ add_filename()
+)
+
+# To use a verb like mutate() with add_filename() we need to first call
+# compute()
open_dataset("nyc-taxi") \%>\%
- mutate(file = add_filename())
+ mutate(file = add_filename()) \%>\%
+ compute() \%>\%
+ mutate(filename_length = nchar(file))
}
+
}
\keyword{internal}
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index cbeb081d0b..b7632084e4 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1440,8 +1440,9 @@ test_that("can add in augmented fields", {
error_regex <- paste(
"`add_filename()` or use of the `__filename` augmented field can only",
- "be used with with Dataset objects, and can only be added before doing",
- "an aggregation or a join."
+ "be used with Dataset objects, can only be added before doing",
+ "an aggregation or a join, and cannot be referenced in subsequent",
+ "pipeline steps until either compute() or collect() is called."
)
# errors appropriately with ArrowTabular objects