This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository r-cran-tidyselect.
commit 04f9298a565af505716ef26b1b04d8b9193e1eb3 Author: Andreas Tille <[email protected]> Date: Fri Oct 13 13:47:41 2017 +0200 New upstream version 0.2.2 --- DESCRIPTION | 29 ++++ MD5 | 30 ++++ NAMESPACE | 33 ++++ NEWS.md | 189 +++++++++++++++++++++ R/RcppExports.R | 7 + R/reexport-rlang.R | 11 ++ R/select-helpers.R | 185 ++++++++++++++++++++ R/tidyselect.R | 44 +++++ R/utils-errors.R | 144 ++++++++++++++++ R/utils.R | 53 ++++++ R/vars-pull.R | 74 ++++++++ R/vars-rename.R | 76 +++++++++ R/vars-select.R | 317 +++++++++++++++++++++++++++++++++++ R/vars.R | 112 +++++++++++++ README.md | 18 ++ man/poke_vars.Rd | 94 +++++++++++ man/reexports.Rd | 22 +++ man/select_helpers.Rd | 81 +++++++++ man/tidyselect-package.Rd | 27 +++ man/vars_pull.Rd | 47 ++++++ man/vars_select.Rd | 132 +++++++++++++++ man/vars_select_helpers.Rd | 36 ++++ src/RcppExports.cpp | 29 ++++ src/combine_variables.cpp | 147 ++++++++++++++++ tests/testthat.R | 5 + tests/testthat/test-inds-combine.R | 59 +++++++ tests/testthat/test-select-helpers.R | 231 +++++++++++++++++++++++++ tests/testthat/test-vars-pull.R | 36 ++++ tests/testthat/test-vars-rename.R | 61 +++++++ tests/testthat/test-vars-select.R | 98 +++++++++++ tests/testthat/test-vars.R | 33 ++++ 31 files changed, 2460 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..8361a44 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,29 @@ +Package: tidyselect +Title: Select from a Set of Strings +Version: 0.2.2 +Authors@R: c( + person("Lionel", "Henry", ,"[email protected]", c("aut", "cre")), + person("Hadley", "Wickham", ,"[email protected]", "aut"), + person("RStudio", role = "cph") + ) +Description: A backend for the selecting functions of the 'tidyverse'. + It makes it easy to implement select-like functions in your own + packages in a way that is consistent with other 'tidyverse' + interfaces for selection. +Depends: R (>= 3.1.0) +Imports: glue, purrr, rlang (>= 0.1), Rcpp (>= 0.12.0) +Suggests: dplyr, testthat +LinkingTo: Rcpp (>= 0.12.0), +License: GPL-3 +Encoding: UTF-8 +LazyData: true +ByteCompile: true +RoxygenNote: 6.0.1 +NeedsCompilation: yes +Packaged: 2017-10-10 21:06:50 UTC; lionel +Author: Lionel Henry [aut, cre], + Hadley Wickham [aut], + RStudio [cph] +Maintainer: Lionel Henry <[email protected]> +Repository: CRAN +Date/Publication: 2017-10-10 22:04:35 UTC diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..989ca79 --- /dev/null +++ b/MD5 @@ -0,0 +1,30 @@ +f6ff8a856cebd09a22426911e9abcee3 *DESCRIPTION +23c69ab4f5d97760fbbf36f884dfc09e *NAMESPACE +6fc49d7cbe8e2d50762185cae068e610 *NEWS.md +bafe96321ddcd176588766edadcf9845 *R/RcppExports.R +69a92531f4b91811f4ae95266ed9694a *R/reexport-rlang.R +94226287f0bb798ab8129ecfb5978adc *R/select-helpers.R +5c261ba21836f29ecc1e8a73bb316227 *R/tidyselect.R +7154ddb8e62c49ad9d28cddaf3e884e6 *R/utils-errors.R +30e621ee66d039207f240fd067c9e534 *R/utils.R +792a0f15ccbd1e7500ffb4ca0b75d211 *R/vars-pull.R +42290f9407fd1bcf7da82afa89bae8d9 *R/vars-rename.R +a5aab0d035245e7c13f701948aa90376 *R/vars-select.R +abcbfb6056d51afd7511ee726d5489f3 *R/vars.R +03e436d518158095d74235b602e55ea1 *README.md +e23020fdd709b092691c07f04ce4903a *man/poke_vars.Rd +335b613500b581dbf2a8e3ac2addb99d *man/reexports.Rd +8516e4bc9ab89f932fce61c0bc112dbd *man/select_helpers.Rd +9a2e175212b3e30ae2b3ffd348b2296b *man/tidyselect-package.Rd +0e7c7f47a2990758110e7cbd0eef8cab *man/vars_pull.Rd +a6abe04beaf42c2d9ed6cc17fb6317f0 *man/vars_select.Rd +8bb04e2c571254d8ee3a0155927ab759 *man/vars_select_helpers.Rd +cf9919d2432c4879b5f3f84f7aaf8530 *src/RcppExports.cpp +e24e6997d1c8ab932377b38d88d31fe5 *src/combine_variables.cpp +2dfb04adf6a141cf668b2ac7db3ba2f9 *tests/testthat.R +4e68e35e1ea4e25993986ad4d0460a87 *tests/testthat/test-inds-combine.R +9e063f42aa6fa98dc97b7cd1b15b44a0 *tests/testthat/test-select-helpers.R +7c1959a37e59594dfd9744d456d64965 *tests/testthat/test-vars-pull.R +307e1e026e52ef23be69cc78b5c5cc4c *tests/testthat/test-vars-rename.R +f6edd9daa4f0061806e9fefd7fd5e450 *tests/testthat/test-vars-select.R +4450b843bcd598686cd4cec050f9ce73 *tests/testthat/test-vars.R diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..2542d22 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,33 @@ +# Generated by roxygen2: do not edit by hand + +export(contains) +export(ends_with) +export(enquo) +export(everything) +export(last_col) +export(matches) +export(num_range) +export(one_of) +export(peek_vars) +export(poke_vars) +export(quo) +export(quo_name) +export(quos) +export(scoped_vars) +export(starts_with) +export(vars_pull) +export(vars_rename) +export(vars_select) +export(vars_select_helpers) +export(with_vars) +import(rlang) +importFrom(Rcpp,cppFunction) +importFrom(glue,glue) +importFrom(purrr,discard) +importFrom(purrr,map) +importFrom(purrr,map2) +importFrom(purrr,map2_chr) +importFrom(purrr,map_chr) +importFrom(purrr,map_if) +importFrom(purrr,map_lgl) +useDynLib(tidyselect, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..599c293 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,189 @@ + +# tidyselect 0.2.2 + +* `dplyr` is now correctly mentioned as suggested package. + + +# tidyselect 0.2.1 + +* `-` now supports character vectors in addition to strings. This + makes it easy to unquote column names to exclude from the set: + + ```{r} + vars <- c("cyl", "am", "disp", "drat") + vars_select(names(mtcars), - (!! vars)) + ``` + +* `last_col()` now issues an error when the variable vector is empty. + +* `last_col()` now returns column positions rather than column names + for consistency with other helpers. This also makes it compatible + with functions like `seq()`. + +* `c()` now supports character vectors the same way as `-` and `seq()`. + (#37 @gergness) + + +# tidyselect 0.2.0 + +The main point of this release is to revert a troublesome behaviour +introduced in tidyselect 0.1.0. It also includes a few features. + + +## Evaluation rules + +The special evaluation semantics for selection have been changed +back to the old behaviour because the new rules were causing too +much trouble and confusion. From now on data expressions (symbols +and calls to `:` and `c()`) can refer to both registered variables +and to objects from the context. + +However the semantics for context expressions (any calls other than +to `:` and `c()`) remain the same. Those expressions are evaluated +in the context only and cannot refer to registered variables. + +If you're writing functions and refer to contextual objects, it is +still a good idea to avoid data expressions. Since registered +variables are change as a function of user input and you never know +if your local objects might be shadowed by a variable. Consider: + +``` +n <- 2 +vars_select(letters, 1:n) +``` + +Should that select up to the second element of `letters` or up to +the 14th? Since the variables have precedence in a data expression, +this will select the 14 first letters. This can be made more robust +by turning the data expression into a context expression: + +``` +vars_select(letters, seq(1, n)) +``` + +You can also use quasiquotation since unquoted arguments are +guaranteed to be evaluated without any user data in scope. While +equivalent because of the special rules for context expressions, +this may be clearer to the reader accustomed to tidy eval: + +```{r} +vars_select(letters, seq(1, !! n)) +``` + +Finally, you may want to be more explicit in the opposite direction. +If you expect a variable to be found in the data but not in the +context, you can use the `.data` pronoun: + +```{r} +vars_select(names(mtcars), .data$cyl : .data$drat) +``` + +## New features + +* The new select helper `last_col()` is helpful to select over a + custom range: `vars_select(vars, 3:last_col())`. + +* `:` and `-` now handle strings as well. This makes it easy to + unquote a column name: `(!! name) : last_col()` or `-(!! name)`. + +* `vars_select()` gains a `.strict` argument similar to + `rename_vars()`. If set to `FALSE`, errors about unknown variables + are ignored. + +* `vars_select()` now treats `NULL` as empty inputs. This follows a + trend in the tidyverse tools. + +* `vars_rename()` now handles variable positions (integers or round + doubles) just like `vars_select()` (#20). + +* `vars_rename()` is now implemented with the tidy eval framework. + Like `vars_select()`, expressions are evaluated without any user + data in scope. In addition a variable context is now established so + you can write rename helpers. Those should return a single round + number or a string (variable position or variable name). + +* `has_vars()` is a predicate that tests whether a variable context + has been set (#21). + +* The selection helpers are now exported in a list + `vars_select_helpers`. This is intended for APIs that embed the + helpers in the evaluation environment. + + +## Fixes + +* `one_of()` argument `vars` has been renamed to `.vars` to avoid + spurious matching. + + +# tidyselect 0.1.1 + +tidyselect is the new home for the legacy functions +`dplyr::select_vars()`, `dplyr::rename_vars()` and +`dplyr::select_var()`. + + +## API changes + +We took this opportunity to make a few changes to the API: + +* `select_vars()` and `rename_vars()` are now `vars_select()` and + `vars_rename()`. This follows the tidyverse convention that a prefix + corresponds to the input type while suffixes indicate the output + type. Similarly, `select_var()` is now `vars_pull()`. + +* The arguments are now prefixed with dots to limit argument matching + issues. While the dots help, it is still a good idea to splice a + list of captured quosures to make sure dotted arguments are never + matched to `vars_select()`'s named arguments: + + ``` + vars_select(vars, !!! quos(...)) + ``` + +* Error messages can now be customised. For consistency with dplyr, + error messages refer to "columns" by default. This assumes that the + variables being selected come from a data frame. If this is not + appropriate for your DSL, you can now add an attribute `vars_type` + to the `.vars` vector to specify alternative names. This must be a + character vector of length 2 whose first component is the singular + form and the second is the plural. For example, `c("variable", + "variables")`. + + +## Establishing a variable context + +tidyselect provides a few more ways of establishing a variable +context: + +* `scoped_vars()` sets up a variable context along with an an exit + hook that automatically restores the previous variables. It is the + preferred way of changing the variable context. + + `with_vars()` takes variables and an expression and evaluates the + latter in the context of the former. + +* `poke_vars()` establishes a new variable context. It returns the + previous context invisibly and it is your responsibility to restore + it after you are done. This is for expert use only. + + `current_vars()` has been renamed to `peek_vars()`. This naming is a + reference to [peek and poke](https://en.wikipedia.org/wiki/PEEK_and_POKE) + from legacy languages. + + +## New evaluation semantics + +The evaluation semantics for selecting verbs have changed. Symbols are +now evaluated in a data-only context that is isolated from the calling +environment. This means that you can no longer refer to local variables +unless you are explicitly unquoting these variables with `!!`, which +is mostly for expert use. + +Note that since dplyr 0.7, helper calls (like `starts_with()`) obey +the opposite behaviour and are evaluated in the calling context +isolated from the data context. To sum up, symbols can only refer to +data frame objects, while helpers can only refer to contextual +objects. This differs from usual R evaluation semantics where both +the data and the calling environment are in scope (with the former +prevailing over the latter). diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..77ec5c9 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,7 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +inds_combine <- function(vars, xs) { + .Call(`_tidyselect_inds_combine`, vars, xs) +} + diff --git a/R/reexport-rlang.R b/R/reexport-rlang.R new file mode 100644 index 0000000..641e951 --- /dev/null +++ b/R/reexport-rlang.R @@ -0,0 +1,11 @@ +#' @export +rlang::quo + +#' @export +rlang::quos + +#' @export +rlang::enquo + +#' @export +rlang::quo_name diff --git a/R/select-helpers.R b/R/select-helpers.R new file mode 100644 index 0000000..3afb2a3 --- /dev/null +++ b/R/select-helpers.R @@ -0,0 +1,185 @@ +#' Select helpers +#' +#' These functions allow you to select variables based on their names. +#' * `starts_with()`: starts with a prefix +#' * `ends_with()`: ends with a prefix +#' * `contains()`: contains a literal string +#' * `matches()`: matches a regular expression +#' * `num_range()`: a numerical range like x01, x02, x03. +#' * `one_of()`: variables in character vector. +#' * `everything()`: all variables. +#' * `last_col()`: last variable, possibly with an offset. +#' +#' @param match A string. +#' @param ignore.case If `TRUE`, the default, ignores case when matching +#' names. +#' @param vars,.vars A character vector of variable names. When called +#' from inside [select()] these are automatically set to the names +#' of the table. +#' @name select_helpers +#' @return An integer vector giving the position of the matched variables. +#' @examples +#' nms <- names(iris) +#' vars_select(nms, starts_with("Petal")) +#' vars_select(nms, ends_with("Width")) +#' vars_select(nms, contains("etal")) +#' vars_select(nms, matches(".t.")) +#' vars_select(nms, Petal.Length, Petal.Width) +#' vars_select(nms, everything()) +#' vars_select(nms, last_col()) +#' vars_select(nms, last_col(offset = 2)) +#' +#' vars <- c("Petal.Length", "Petal.Width") +#' vars_select(nms, one_of(vars)) +NULL + +#' @export +#' @rdname select_helpers +starts_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { + stopifnot(is_string(match), !is.na(match), nchar(match) > 0) + + if (ignore.case) match <- tolower(match) + n <- nchar(match) + + if (ignore.case) vars <- tolower(vars) + which_vars(match, substr(vars, 1, n)) +} + +#' @export +#' @rdname select_helpers +ends_with <- function(match, ignore.case = TRUE, vars = peek_vars()) { + stopifnot(is_string(match), !is.na(match), nchar(match) > 0) + + if (ignore.case) match <- tolower(match) + n <- nchar(match) + + if (ignore.case) vars <- tolower(vars) + length <- nchar(vars) + + which_vars(match, substr(vars, pmax(1, length - n + 1), length)) +} + +#' @export +#' @rdname select_helpers +contains <- function(match, ignore.case = TRUE, vars = peek_vars()) { + stopifnot(is_string(match), nchar(match) > 0) + + if (ignore.case) { + vars <- tolower(vars) + match <- tolower(match) + } + grep_vars(match, vars, fixed = TRUE) +} + +#' @export +#' @rdname select_helpers +matches <- function(match, ignore.case = TRUE, vars = peek_vars()) { + stopifnot(is_string(match), nchar(match) > 0) + + grep_vars(match, vars, ignore.case = ignore.case) +} + +#' @export +#' @rdname select_helpers +#' @param prefix A prefix that starts the numeric range. +#' @param range A sequence of integers, like `1:5` +#' @param width Optionally, the "width" of the numeric range. For example, +#' a range of 2 gives "01", a range of three "001", etc. +num_range <- function(prefix, range, width = NULL, vars = peek_vars()) { + if (!is_null(width)) { + range <- sprintf(paste0("%0", width, "d"), range) + } + match_vars(paste0(prefix, range), vars) +} + +#' @export +#' @rdname select_helpers +#' @param ... One or more character vectors. +one_of <- function(..., .vars = peek_vars()) { + keep <- c(...) + + if (!is_character(keep)) { + bad("All arguments must be character vectors, not {type_of(keep)}") + } + + if (!all(keep %in% .vars)) { + bad <- setdiff(keep, .vars) + warn(glue("Unknown { plural(.vars) }: ", paste0("`", bad, "`", collapse = ", "))) + } + + match_vars(keep, .vars) +} + +#' @export +#' @rdname select_helpers +everything <- function(vars = peek_vars()) { + seq_along(vars) +} + + +#' @export +#' @param offset Set it to `n` to select the nth var from the end. +#' @rdname select_helpers +last_col <- function(offset = 0L, vars = peek_vars()) { + stopifnot(is_integerish(offset)) + n <- length(vars) + + if (offset && n <= offset) { + abort(glue("`offset` must be smaller than the number of { plural(vars) }")) + } else if (n == 0) { + abort(glue("Can't select last { singular(vars) } when input is empty")) + } else { + n - as.integer(offset) + } +} + +match_vars <- function(needle, haystack) { + x <- match(needle, haystack) + x[!is.na(x)] +} + +grep_vars <- function(needle, haystack, ...) { + grep(needle, haystack, ...) +} + +which_vars <- function(needle, haystack) { + which(needle == haystack) +} + + +#' List of selection helpers +#' +#' This list contains all selection helpers exported in tidyselect. It +#' is useful when you want to embed the helpers in your API without +#' having to track addition of new helpers in tidyselect. +#' +#' @export +#' @examples +#' # You can easily embed the helpers by burying them in the scopes of +#' # input quosures. For this example we need an environment where +#' # tidyselect is not attached: +#' local(envir = baseenv(), { +#' vars <- c("foo", "bar", "baz") +#' helpers <- tidyselect::vars_select_helpers +#' +#' my_select <- function(...) { +#' quos <- rlang::quos(...) +#' quos <- lapply(quos, rlang::env_bury, !!! helpers) +#' +#' tidyselect::vars_select(vars, !!! quos) +#' } +#' +#' # The user can now call my_select() with helpers without having +#' # to attach tidyselect: +#' my_select(starts_with("b")) +#' }) +vars_select_helpers <- list( + starts_with = starts_with, + ends_with = ends_with, + contains = contains, + matches = matches, + num_range = num_range, + one_of = one_of, + everything = everything, + last_col = last_col +) diff --git a/R/tidyselect.R b/R/tidyselect.R new file mode 100644 index 0000000..6270dd4 --- /dev/null +++ b/R/tidyselect.R @@ -0,0 +1,44 @@ +#' @import rlang +#' @importFrom glue glue +#' @importFrom purrr discard map map_chr map_if map_lgl map2 map2_chr +#' @importFrom Rcpp cppFunction +#' @useDynLib tidyselect, .registration = TRUE +"_PACKAGE" + + +maybe_hotpatch_dplyr <- function(...) { + if (!is_installed("dplyr") || env_has(ns_env("dplyr"), "peek_vars")) { + return(FALSE) + } + + fns <- list( + current_vars = peek_vars, + set_current_vars = poke_vars + ) + env <- ns_env("dplyr") + nms <- names(fns) + + for (i in seq_along(fns)) { + hotpatch_binding(nms[[i]], fns[[i]], env) + } + + TRUE +} +hotpatch_binding <- function(binding, fn, env) { + unlock <- env_get(base_env(), "unlockBinding") + unlock(binding, env) + + env_bind(env, !! binding := fn) + + lock <- env_get(base_env(), "lockBinding") + lock(binding, env = env) +} + +push_dplyr_hotpatch <- function(...) { + maybe_hotpatch_dplyr() + setHook(packageEvent("dplyr", "onLoad"), maybe_hotpatch_dplyr) +} + +.onLoad <- function(...) { + setHook(packageEvent("tidyselect", "onLoad"), push_dplyr_hotpatch) +} diff --git a/R/utils-errors.R b/R/utils-errors.R new file mode 100644 index 0000000..4ff8c98 --- /dev/null +++ b/R/utils-errors.R @@ -0,0 +1,144 @@ +check_pkg <- function(name, reason) { + if (is_installed(name)) + return(invisible(TRUE)) + + glubort(NULL, 'The {name} package is required to {reason}. + Please install it with `install.packages("{name}")`' + ) +} + +# ngettext() does extra work, this function is a simpler version +pluralise <- function(n, singular, plural) { + if (n == 1) { + singular + } else { + plural + } +} +pluralise_len <- function(x, singular, plural) { + pluralise(length(x), singular, plural) +} + +bad <- function(..., .envir = parent.frame()) { + glubort(NULL, ..., .envir = parent.frame()) +} + +bad_args <- function(args, ..., .envir = parent.frame()) { + glubort(fmt_args(args), ..., .envir = .envir) +} + +bad_pos_args <- function(pos_args, ..., .envir = parent.frame()) { + glubort(fmt_pos_args(pos_args), ..., .envir = .envir) +} + +bad_calls <- function(calls, ..., .envir = parent.frame()) { + glubort(fmt_calls(calls), ..., .envir = .envir) +} + +bad_named_calls <- function(named_calls, ..., .envir = parent.frame()) { + glubort(fmt_named_calls(named_calls), ..., .envir = .envir) +} + +bad_eq_ops <- function(named_calls, ..., .envir = parent.frame()) { + glubort(fmt_wrong_eq_ops(named_calls), ..., .envir = .envir) +} + +bad_cols <- function(cols, ..., .envir = parent.frame()) { + glubort(fmt_cols(cols), ..., .envir = .envir) +} + +bad_measures <- function(measures, ..., .envir = parent.frame()) { + glubort(fmt_measures(measures), ..., .envir = .envir) +} + +glubort <- function(header, ..., .envir = parent.frame(), .abort = abort) { + text <- glue(..., .envir = .envir) + if (!is_null(header)) text <- paste0(header, " ", text) + .abort(text) +} + +fmt_args <- function(x) { + x <- parse_args(x) + fmt_obj(x) +} + +fmt_pos_args <- function(x) { + args <- pluralise_len(x, "Argument", "Arguments") + glue("{args} {fmt_comma(x)}") +} + +fmt_calls <- function(...) { + x <- parse_named_call(...) + fmt_obj(x) +} + +fmt_named_calls <- function(...) { + x <- parse_named_call(...) + fmt_named(x) +} + +fmt_wrong_eq_ops <- function(...) { + x <- parse_named_call(...) + fmt_comma( + paste0(fmt_obj1(names2(x)), " (", fmt_obj1(paste0(names2(x), " = ", x)), ")") + ) +} + +fmt_cols <- function(x) { + cols <- pluralise_len(x, "Column", "Columns") + glue("{cols} {fmt_obj(x)}") +} + +fmt_measures <- function(x) { + measures <- pluralise_len(x, "Measure", "Measures") + glue("{measures} {fmt_obj(x)}") +} + +fmt_named <- function(x) { + fmt_comma(paste0(fmt_obj1(names2(x)), " = ", x)) +} + +fmt_obj <- function(x) { + fmt_comma(fmt_obj1(x)) +} + +fmt_obj1 <- function(x) { + paste0("`", x, "`") +} + +fmt_classes <- function(x) { + paste(class(x), collapse = "/") +} + +fmt_dims <- function(x) { + paste0("[", paste0(x, collapse = " x "), "]") +} + +fmt_comma <- function(...) { + MAX_ITEMS <- 6L + + x <- paste0(...) + if (length(x) > MAX_ITEMS) { + length(x) <- MAX_ITEMS + x[[MAX_ITEMS]] <- "..." + } + + glue::collapse(x, sep = ", ", last = " and ") +} + +parse_args <- function(x) { + # convert single formula to list of length 1 + x <- unlist(list(x), recursive = FALSE) + is_fml <- map_lgl(x, is_formula) + x[is_fml] <- map_chr(map(x[is_fml], "[[", 2), as_string) + unlist(x) +} + +parse_named_call <- function(x) { + map_chr(x, quo_text) +} + +bad_unknown_vars <- function(vars, unknown) { + thing <- vars_pluralise_len(vars, unknown) + abort(glue("Unknown { thing } { fmt_args(unknown) } ")) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..837b5a2 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,53 @@ + +is_negated <- function(x) { + is_lang(x, "-", n = 1) +} + +sym_dollar <- quote(`$`) +sym_brackets2 <- quote(`[[`) +is_data_pronoun <- function(expr) { + is_lang(expr, list(sym_dollar, sym_brackets2)) && + identical(node_cadr(expr), quote(.data)) +} + +singular <- function(vars) { + nm <- attr(vars, "type") %||% c("column", "columns") + if (!is_character(nm, 2)) { + abort("The `type` attribute must be a character vector of length 2") + } + nm[[1]] +} +plural <- function(vars) { + nm <- attr(vars, "type") %||% c("column", "columns") + if (!is_character(nm, 2)) { + abort("The `type` attribute must be a character vector of length 2") + } + nm[[2]] +} +Singular <- function(vars) { + capitalise_first(singular(vars)) +} +Plural <- function(vars) { + capitalise_first(plural(vars)) +} + +vars_pluralise <- function(vars) { + pluralise(vars, singular(vars), plural(vars)) +} +vars_pluralise_len <- function(vars, x) { + pluralise_len(x, singular(vars), plural(vars)) +} + +capitalise_first <- function(chr) { + gsub("(^[[:alpha:]])", "\\U\\1", chr, perl = TRUE) +} + +paren_sym <- quote(`(`) +minus_sym <- quote(`-`) +colon_sym <- quote(`:`) +c_sym <- quote(`c`) + +is_language <- is_lang +quo_is_language <- function(quo, name = NULL, n = NULL, ns = NULL) { + is_language(f_rhs(quo), name = name, n = n, ns = ns) +} diff --git a/R/vars-pull.R b/R/vars-pull.R new file mode 100644 index 0000000..cfd6589 --- /dev/null +++ b/R/vars-pull.R @@ -0,0 +1,74 @@ +#' Select variable +#' +#' This function powers [dplyr::pull()] and various functions of the +#' tidyr package. It is similar to [vars_select()] but returns only +#' one column name and has slightly different semantics: it allows +#' negative numbers to select columns from the end. +#' +#' @inheritParams vars_select +#' @param var A variable specified as: +#' * a literal variable name +#' * a positive integer, giving the position counting from the left +#' * a negative integer, giving the position counting from the right. +#' +#' The default returns the last column (on the assumption that's the +#' column you've created most recently). +#' +#' This argument is taken by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names and column positions). +#' @return The selected column name as an unnamed string. +#' @seealso [dplyr::pull()], [vars_select()] +#' @export +#' @keywords internal +#' @examples +#' # It takes its argument by expression: +#' vars_pull(letters, c) +#' +#' # Negative numbers select from the end: +#' vars_pull(letters, -3) +#' +#' # You can unquote variables: +#' var <- 10 +#' vars_pull(letters, !! var) +vars_pull <- function(vars, var = -1) { + var_env <- set_names(as_list(seq_along(vars)), vars) + var <- eval_tidy(enquo(var), var_env) + n <- length(vars) + + # Fall degenerate values like `Inf` through integerish branch + if (is_double(var, 1) && !is.finite(var)) { + var <- na_int + } + + if (is_string(var)) { + pos <- match_var(var, vars) + } else if (is_integerish(var, 1)) { + if (is_na(var) || abs(var) > n || var == 0L) { + abort(glue( + "`var` must be a value between {-n} and {n} (excluding zero), not {var}" + )) + } + if (var < 0) { + pos <- var + n + 1 + } else { + pos <- var + } + } else { + type <- friendly_type(type_of(var)) + abort(glue( + "`var` must evaluate to a single number or a { singular(vars) } name, not {type}" + )) + } + + vars[[pos]] +} + +# FIXME: Workaround rlang bug +is_integerish <- function(x, n = NULL) { + if (typeof(x) == "integer") return(TRUE) + if (typeof(x) != "double") return(FALSE) + if (!is.finite(x)) return(FALSE) + if (!is_null(n) && length(x) != n) return(FALSE) + all(x == as.integer(x)) +} diff --git a/R/vars-rename.R b/R/vars-rename.R new file mode 100644 index 0000000..ce7e579 --- /dev/null +++ b/R/vars-rename.R @@ -0,0 +1,76 @@ +#' @export +#' @rdname vars_select +#' @param .strict If `TRUE`, will throw an error if you attempt to rename a +#' variable that doesn't exist. +vars_rename <- function(.vars, ..., .strict = TRUE) { + quos <- quos(...) + + if (any(names2(quos) == "")) { + abort("All arguments must be named") + } + if (!.strict) { + quos <- discard(quos, is_unknown_symbol, .vars) + } + + new_vars <- names(quos) + old_vars <- vars_rename_eval(quos, .vars) + + known <- old_vars %in% .vars + + if (!all(known)) { + if (.strict) { + unknown <- old_vars[!known] + bad_unknown_vars(.vars, unknown) + } else { + old_vars <- old_vars[known] + new_vars <- new_vars[known] + } + } + + select <- set_names(.vars, .vars) + renamed_idx <- match(old_vars, .vars) + names(select)[renamed_idx] <- new_vars + + select +} + +vars_rename_eval <- function(quos, vars) { + scoped_vars(vars) + + # Only symbols have data in scope + is_symbol <- map_lgl(quos, is_symbol_expr) + data <- set_names(as.list(seq_along(vars)), vars) + renamed <- map_if(quos, is_symbol, eval_tidy, data) + + # All expressions are evaluated in the context only + renamed <- map_if(renamed, !is_symbol, eval_tidy) + + renamed <- map2_chr(renamed, names(quos), validate_renamed_var, vars) + renamed +} +is_symbol_expr <- function(quo) { + expr <- get_expr(quo) + is_symbol(expr) || is_data_pronoun(expr) +} + +validate_renamed_var <- function(expr, name, vars) { + switch_type(expr, + integer = , + double = + if (!is_integerish(expr)) { + abort(glue("{ Singular(vars) } positions must be round numbers")) + } else if (length(expr) != 1) { + abort(glue("{ Singular(vars) } positions must be scalar")) + } else { + return(vars[[expr]]) + }, + string = + return(expr) + ) + + actual_type <- friendly_type(type_of(expr)) + named_call <- ll(!! name := expr) + bad_named_calls(named_call, + "must be a { singular(vars) } name or position, not {actual_type}" + ) +} diff --git a/R/vars-select.R b/R/vars-select.R new file mode 100644 index 0000000..4183bf4 --- /dev/null +++ b/R/vars-select.R @@ -0,0 +1,317 @@ +#' Select or rename variables +#' +#' These functions power [dplyr::select()] and [dplyr::rename()]. They +#' enable dplyr selecting or renaming semantics in your own functions. +#' +#' @section Customising error messages: +#' +#' For consistency with dplyr, error messages refer to "columns" by +#' default. This assumes that the variables being selected come from a +#' data frame. If this is not appropriate for your DSL, you can add an +#' attribute `type` to the `.vars` vector to specify alternative +#' names. This must be a character vector of length 2 whose first +#' component is the singular form and the second is the plural. For +#' example, `c("variable", "variables")`. +#' +#' @param .vars A character vector of existing column names. +#' @param ...,args Expressions to compute +#' +#' These arguments are automatically [quoted][rlang::quo] and +#' [evaluated][rlang::eval_tidy] in a context where elements of +#' `vars` are objects representing their positions within +#' `vars`. They support [unquoting][rlang::quasiquotation] and +#' splicing. See `vignette("programming")` for an introduction to +#' these concepts. +#' +#' Note that except for `:`, `-` and `c()`, all complex expressions +#' are evaluated outside that context. This is to prevent accidental +#' matching to `vars` elements when you refer to variables from the +#' calling context. +#' @param .include,.exclude Character vector of column names to always +#' include/exclude. +#' @param .strict If `FALSE`, errors about unknown columns are ignored. +#' @seealso [vars_pull()] +#' @export +#' @keywords internal +#' @return A named character vector. Values are existing column names, +#' names are new names. +#' @examples +#' # Keep variables +#' vars_select(names(iris), everything()) +#' vars_select(names(iris), starts_with("Petal")) +#' vars_select(names(iris), ends_with("Width")) +#' vars_select(names(iris), contains("etal")) +#' vars_select(names(iris), matches(".t.")) +#' vars_select(names(iris), Petal.Length, Petal.Width) +#' vars_select(names(iris), one_of("Petal.Length", "Petal.Width")) +#' +#' df <- as.data.frame(matrix(runif(100), nrow = 10)) +#' df <- df[c(3, 4, 7, 1, 9, 8, 5, 2, 6, 10)] +#' vars_select(names(df), num_range("V", 4:6)) +#' +#' # Drop variables +#' vars_select(names(iris), -starts_with("Petal")) +#' vars_select(names(iris), -ends_with("Width")) +#' vars_select(names(iris), -contains("etal")) +#' vars_select(names(iris), -matches(".t.")) +#' vars_select(names(iris), -Petal.Length, -Petal.Width) +#' +#' # Rename variables +#' vars_select(names(iris), petal_length = Petal.Length) +#' vars_select(names(iris), petal = starts_with("Petal")) +#' +#' # Rename variables preserving all existing +#' vars_rename(names(iris), petal_length = Petal.Length) +#' +#' # You can unquote symbols or quosures +#' vars_select(names(iris), !! quote(Petal.Length)) +#' +#' # And unquote-splice lists of symbols or quosures +#' vars_select(names(iris), !!! list(quo(Petal.Length), quote(Petal.Width))) +#' +#' +#' # If you want to avoid ambiguity about where to find objects you +#' # have two solutions provided by the tidy eval framework. If you +#' # want to refer to local objects, you can explicitly unquote +#' # them. They must contain variable positions (integers) or variable +#' # names (strings): +#' Species <- 2 +#' vars_select(names(iris), Species) # Picks up `Species` from the data frame +#' vars_select(names(iris), !! Species) # Picks up the local object referring to column 2 +#' +#' # If you want to make sure that a variable is picked up from the +#' # data, you can use the `.data` pronoun: +#' vars_select(names(iris), .data$Species) +#' +#' +#' # If you're writing a wrapper around vars_select(), pass the dots +#' # via splicing to avoid matching dotted arguments to vars_select() +#' # named arguments (`vars`, `include` and `exclude`): +#' wrapper <- function(...) { +#' vars_select(names(mtcars), !!! quos(...)) +#' } +#' +#' # This won't partial-match on `vars`: +#' wrapper(var = cyl) +#' +#' # This won't match on `include`: +#' wrapper(include = cyl) +#' +#' +#' # If your wrapper takes named arguments, you need to capture then +#' # unquote to pass them to vars_select(). See the vignette on +#' # programming with dplyr for more on this: +#' wrapper <- function(var1, var2) { +#' vars_select(names(mtcars), !! enquo(var1), !! enquo(var2)) +#' } +#' wrapper(starts_with("d"), starts_with("c")) +vars_select <- function(.vars, ..., + .include = character(), + .exclude = character(), + .strict = TRUE) { + quos <- quos(...) + + if (!.strict) { + quos <- ignore_unknown_symbols(.vars, quos) + } + + ind_list <- vars_select_eval(.vars, quos) + + # This takes care of NULL inputs and of ignored errors when + # `.strict` is FALSE + is_empty <- map_lgl(ind_list, is_null) + ind_list <- discard(ind_list, is_empty) + quos <- discard(quos, is_empty) + + if (is_empty(ind_list)) { + .vars <- setdiff(.include, .exclude) + return(set_names(.vars, .vars)) + } + + # if the first selector is exclusive (negative), start with all columns + first <- f_rhs(quos[[1]]) + initial_case <- if (is_negated(first)) list(seq_along(.vars)) else integer(0) + + ind_list <- c(initial_case, ind_list) + names(ind_list) <- c(names2(initial_case), names2(quos)) + + # Match strings to variable positions + ind_list <- map_if(ind_list, is_character, match_var, table = .vars) + + is_integerish <- map_lgl(ind_list, is_integerish) + if (any(!is_integerish)) { + bad <- quos[!is_integerish] + first <- ind_list[!is_integerish][[1]] + first_type <- friendly_type(type_of(first)) + bad_calls(bad, + "must evaluate to { singular(.vars) } positions or names, \\ + not { first_type }" + ) + } + + incl <- inds_combine(.vars, ind_list) + + # Include/.exclude specified variables + sel <- set_names(.vars[incl], names(incl)) + sel <- c(setdiff2(.include, sel), sel) + sel <- setdiff2(sel, .exclude) + + # Ensure all output .vars named + if (is_empty(sel)) { + cnd_signal("tidyselect_empty", .mufflable = FALSE) + names(sel) <- sel + } else { + unnamed <- names2(sel) == "" + names(sel)[unnamed] <- sel[unnamed] + } + + sel +} + +ignore_unknown_symbols <- function(vars, quos) { + quos <- discard(quos, is_ignored, vars) + quos <- map_if(quos, is_concat_lang, lang_ignore_unknown_symbols, vars) + quos +} +lang_ignore_unknown_symbols <- function(quo, vars) { + expr <- get_expr(quo) + + args <- lang_args(expr) + args <- discard(args, is_unknown_symbol, vars) + expr <- lang(node_car(expr), !!! args) + + set_expr(quo, expr) +} + +is_ignored <- function(quo, vars) { + is_unknown_symbol(quo, vars) || is_ignored_minus_lang(quo, vars) +} +is_ignored_minus_lang <- function(quo, vars) { + expr <- get_expr(quo) + + if (!is_language(expr, quote(`-`), 1L)) { + return(FALSE) + } + + is_unknown_symbol(node_cadr(expr), vars) +} +is_unknown_symbol <- function(quo, vars) { + expr <- get_expr(quo) + + if (!is_symbol(expr) && !is_string(expr)) { + return(FALSE) + } + + !as_string(expr) %in% vars +} +is_concat_lang <- function(quo) { + quo_is_language(quo, quote(`c`)) +} + +vars_select_eval <- function(vars, quos) { + scoped_vars(vars) + + # Overscope `c`, `:` and `-` with versions that handle strings + data_helpers <- list(`:` = vars_colon, `-` = vars_minus, `c` = vars_c) + overscope_top <- new_environment(data_helpers) + + # Symbols and calls to `:` and `c()` are evaluated with data in scope + is_helper <- map_lgl(quos, quo_is_helper) + data <- set_names(as.list(seq_along(vars)), vars) + data <- data[!names(data) == ""] + overscope <- env_bury(overscope_top, !!! data) + + overscope <- new_overscope(overscope, overscope_top) + overscope$.data <- data + + ind_list <- map_if(quos, !is_helper, overscope_eval_next, overscope = overscope) + + # All other calls are evaluated in the context only + # They are always evaluated strictly + ind_list <- map_if(ind_list, is_helper, eval_tidy) + + ind_list +} + +vars_colon <- function(x, y) { + if (is_string(x)) { + x <- match_strings(x) + } + if (is_string(y)) { + y <- match_strings(y) + } + + x:y +} +vars_minus <- function(x, y) { + if (!missing(y)) { + return(x - y) + } + + if (is_character(x)) { + x <- match_strings(x) + } + + -x +} +vars_c <- function(...) { + dots <- map_if(list(...), is_character, match_strings) + do.call(`c`, dots) +} +match_strings <- function(x) { + vars <- peek_vars() + out <- match(x, vars) + + if (any(are_na(out))) { + unknown <- x[are_na(out)] + bad_unknown_vars(vars, unknown) + } + + out +} + +extract_expr <- function(expr) { + expr <- get_expr(expr) + while(is_lang(expr, paren_sym)) { + expr <- get_expr(expr[[2]]) + } + expr +} + +quo_is_helper <- function(quo) { + expr <- extract_expr(quo) + + if (!is_lang(expr)) { + return(FALSE) + } + + if (is_data_pronoun(expr)) { + return(FALSE) + } + + if (is_lang(expr, minus_sym, n = 1)) { + operand <- extract_expr(expr[[2]]) + return(quo_is_helper(operand)) + } + + if (is_lang(expr, list(colon_sym, c_sym))) { + return(FALSE) + } + + TRUE +} +match_var <- function(chr, table) { + pos <- match(chr, table) + if (any(are_na(pos))) { + chr <- glue::collapse(chr[are_na(pos)], ", ") + abort(glue( + "Strings must match { singular(table) } names. \\ + Unknown { plural(table) }: { chr }" + )) + } + pos +} + +setdiff2 <- function(x, y) { + x[match(x, y, 0L) == 0L] +} diff --git a/R/vars.R b/R/vars.R new file mode 100644 index 0000000..ba72ae2 --- /dev/null +++ b/R/vars.R @@ -0,0 +1,112 @@ +#' Replace or get current variables +#' +#' @description +#' +#' Variables are made available to [select helpers][select_helpers] by +#' registering them in a special placeholder. +#' +#' * `scoped_vars()` changes the current variables and sets up a +#' function exit hook that automatically restores the previous +#' variables once the current function returns. +#' +#' * `with_vars()` takes an expression to be evaluated in a variable +#' context. +#' +#' * `poke_vars()` changes the contents of the placeholder with a new +#' set of variables. It returns the previous variables invisibly and +#' it is your responsibility to restore them after you are +#' done. This is for expert use only. +#' +#' * `peek_vars()` returns the variables currently registered. +#' +#' * `has_vars()` returns `TRUE` if a variable context has been set, +#' `FALSE` otherwise. +#' +#' @param vars A character vector of variable names. +#' @return For `poke_vars()` and `scoped_vars()`, the old variables +#' invisibly. For `peek_vars()`, the variables currently +#' registered. +#' @export +#' @examples +#' poke_vars(letters) +#' peek_vars() +#' +#' # Now that the variables are registered, the helpers can figure out +#' # the positions of elements within the variable vector: +#' one_of(c("d", "z")) +#' +#' # In a function be sure to restore the previous variables. An exit +#' # hook is the best way to do it: +#' fn <- function(vars) { +#' old <- poke_vars(vars) +#' on.exit(poke_vars(old)) +#' +#' one_of("d") +#' } +#' fn(letters) +#' fn(letters[3:5]) +#' +#' # The previous variables are still registered after fn() was +#' # called: +#' peek_vars() +#' +#' +#' # It is recommended to use the scoped variant as it restores the +#' # state automatically when the function returns: +#' fn <- function(vars) { +#' scoped_vars(vars) +#' starts_with("r") +#' } +#' fn(c("red", "blue", "rose")) +#' +#' # The with_vars() helper makes it easy to pass an expression that +#' # should be evaluated in a variable context. Thanks to lazy +#' # evaluation, you can just pass the expression argument from your +#' # wrapper to with_vars(): +#' fn <- function(expr) { +#' vars <- c("red", "blue", "rose") +#' with_vars(vars, expr) +#' } +#' fn(starts_with("r")) +poke_vars <- function(vars) { + stopifnot(is_character(vars) || is_null(vars)) + + old <- vars_env$selected + vars_env$selected <- vars + + invisible(old) +} +#' @rdname poke_vars +#' @export +peek_vars <- function() { + vars_env$selected %||% abort("No tidyselect variables were registered") +} + +#' @rdname poke_vars +#' @param frame The frame environment where the exit hook for +#' restoring the old variables should be registered. +#' @export +scoped_vars <- function(vars, frame = caller_env()) { + old <- poke_vars(vars) + + # Inline everything so the call will succeed in any environment + expr <- lang(on.exit, lang(poke_vars, old), add = TRUE) + eval_bare(expr, frame) + + invisible(old) +} +#' @rdname poke_vars +#' @param expr An expression to be evaluated within the variable +#' context. +#' @export +with_vars <- function(vars, expr) { + scoped_vars(vars) + expr +} + +#' @rdname poke_vars +has_vars <- function() { + !is_null(vars_env$selected) +} + +vars_env <- new_environment() diff --git a/README.md b/README.md new file mode 100644 index 0000000..281fb13 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# tidyselect + +## Overview + +The tidyselect package is the backend of functions like `dplyr::select()` +or `dplyr::pull()` as well as several tidyr verbs. It allows you to +create selecting verbs that are consistent with other tidyverse packages. + + +## Installation + +tidyselect is on CRAN. You can also install the development version +from github with: + +```r +# install.packages("devtools") +devtools::install_github("tidyverse/tidyselect") +``` diff --git a/man/poke_vars.Rd b/man/poke_vars.Rd new file mode 100644 index 0000000..055e073 --- /dev/null +++ b/man/poke_vars.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vars.R +\name{poke_vars} +\alias{poke_vars} +\alias{peek_vars} +\alias{scoped_vars} +\alias{with_vars} +\alias{has_vars} +\title{Replace or get current variables} +\usage{ +poke_vars(vars) + +peek_vars() + +scoped_vars(vars, frame = caller_env()) + +with_vars(vars, expr) + +has_vars() +} +\arguments{ +\item{vars}{A character vector of variable names.} + +\item{frame}{The frame environment where the exit hook for +restoring the old variables should be registered.} + +\item{expr}{An expression to be evaluated within the variable +context.} +} +\value{ +For \code{poke_vars()} and \code{scoped_vars()}, the old variables +invisibly. For \code{peek_vars()}, the variables currently +registered. +} +\description{ +Variables are made available to \link[=select_helpers]{select helpers} by +registering them in a special placeholder. +\itemize{ +\item \code{scoped_vars()} changes the current variables and sets up a +function exit hook that automatically restores the previous +variables once the current function returns. +\item \code{with_vars()} takes an expression to be evaluated in a variable +context. +\item \code{poke_vars()} changes the contents of the placeholder with a new +set of variables. It returns the previous variables invisibly and +it is your responsibility to restore them after you are +done. This is for expert use only. +\item \code{peek_vars()} returns the variables currently registered. +\item \code{has_vars()} returns \code{TRUE} if a variable context has been set, +\code{FALSE} otherwise. +} +} +\examples{ +poke_vars(letters) +peek_vars() + +# Now that the variables are registered, the helpers can figure out +# the positions of elements within the variable vector: +one_of(c("d", "z")) + +# In a function be sure to restore the previous variables. An exit +# hook is the best way to do it: +fn <- function(vars) { + old <- poke_vars(vars) + on.exit(poke_vars(old)) + + one_of("d") +} +fn(letters) +fn(letters[3:5]) + +# The previous variables are still registered after fn() was +# called: +peek_vars() + + +# It is recommended to use the scoped variant as it restores the +# state automatically when the function returns: +fn <- function(vars) { + scoped_vars(vars) + starts_with("r") +} +fn(c("red", "blue", "rose")) + +# The with_vars() helper makes it easy to pass an expression that +# should be evaluated in a variable context. Thanks to lazy +# evaluation, you can just pass the expression argument from your +# wrapper to with_vars(): +fn <- function(expr) { + vars <- c("red", "blue", "rose") + with_vars(vars, expr) +} +fn(starts_with("r")) +} diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 0000000..8ea8178 --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reexport-rlang.R +\docType{import} +\name{reexports} +\alias{reexports} +\alias{quo} +\alias{reexports} +\alias{quos} +\alias{reexports} +\alias{enquo} +\alias{reexports} +\alias{quo_name} +\title{Objects exported from other packages} +\keyword{internal} +\description{ +These objects are imported from other packages. Follow the links +below to see their documentation. + +\describe{ + \item{rlang}{\code{\link[rlang]{quo}}, \code{\link[rlang]{quos}}, \code{\link[rlang]{enquo}}, \code{\link[rlang]{quo_name}}} +}} + diff --git a/man/select_helpers.Rd b/man/select_helpers.Rd new file mode 100644 index 0000000..f549ac6 --- /dev/null +++ b/man/select_helpers.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/select-helpers.R +\name{select_helpers} +\alias{select_helpers} +\alias{starts_with} +\alias{ends_with} +\alias{contains} +\alias{matches} +\alias{num_range} +\alias{one_of} +\alias{everything} +\alias{last_col} +\title{Select helpers} +\usage{ +starts_with(match, ignore.case = TRUE, vars = peek_vars()) + +ends_with(match, ignore.case = TRUE, vars = peek_vars()) + +contains(match, ignore.case = TRUE, vars = peek_vars()) + +matches(match, ignore.case = TRUE, vars = peek_vars()) + +num_range(prefix, range, width = NULL, vars = peek_vars()) + +one_of(..., .vars = peek_vars()) + +everything(vars = peek_vars()) + +last_col(offset = 0L, vars = peek_vars()) +} +\arguments{ +\item{match}{A string.} + +\item{ignore.case}{If \code{TRUE}, the default, ignores case when matching +names.} + +\item{vars, .vars}{A character vector of variable names. When called +from inside \code{\link[=select]{select()}} these are automatically set to the names +of the table.} + +\item{prefix}{A prefix that starts the numeric range.} + +\item{range}{A sequence of integers, like \code{1:5}} + +\item{width}{Optionally, the "width" of the numeric range. For example, +a range of 2 gives "01", a range of three "001", etc.} + +\item{...}{One or more character vectors.} + +\item{offset}{Set it to \code{n} to select the nth var from the end.} +} +\value{ +An integer vector giving the position of the matched variables. +} +\description{ +These functions allow you to select variables based on their names. +\itemize{ +\item \code{starts_with()}: starts with a prefix +\item \code{ends_with()}: ends with a prefix +\item \code{contains()}: contains a literal string +\item \code{matches()}: matches a regular expression +\item \code{num_range()}: a numerical range like x01, x02, x03. +\item \code{one_of()}: variables in character vector. +\item \code{everything()}: all variables. +\item \code{last_col()}: last variable, possibly with an offset. +} +} +\examples{ +nms <- names(iris) +vars_select(nms, starts_with("Petal")) +vars_select(nms, ends_with("Width")) +vars_select(nms, contains("etal")) +vars_select(nms, matches(".t.")) +vars_select(nms, Petal.Length, Petal.Width) +vars_select(nms, everything()) +vars_select(nms, last_col()) +vars_select(nms, last_col(offset = 2)) + +vars <- c("Petal.Length", "Petal.Width") +vars_select(nms, one_of(vars)) +} diff --git a/man/tidyselect-package.Rd b/man/tidyselect-package.Rd new file mode 100644 index 0000000..00e62d3 --- /dev/null +++ b/man/tidyselect-package.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyselect.R +\docType{package} +\name{tidyselect-package} +\alias{tidyselect} +\alias{tidyselect-package} +\title{tidyselect: Select from a Set of Strings} +\description{ +A backend for the selecting functions of the 'tidyverse'. +It makes it easy to implement select-like functions in your own +packages in a way that is consistent with other 'tidyverse' +interfaces for selection. +} +\author{ +\strong{Maintainer}: Lionel Henry \email{[email protected]} + +Authors: +\itemize{ + \item Hadley Wickham \email{[email protected]} +} + +Other contributors: +\itemize{ + \item RStudio [copyright holder] +} + +} diff --git a/man/vars_pull.Rd b/man/vars_pull.Rd new file mode 100644 index 0000000..37c8c75 --- /dev/null +++ b/man/vars_pull.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vars-pull.R +\name{vars_pull} +\alias{vars_pull} +\title{Select variable} +\usage{ +vars_pull(vars, var = -1) +} +\arguments{ +\item{var}{A variable specified as: +\itemize{ +\item a literal variable name +\item a positive integer, giving the position counting from the left +\item a negative integer, giving the position counting from the right. +} + +The default returns the last column (on the assumption that's the +column you've created most recently). + +This argument is taken by expression and supports +\link[rlang:quasiquotation]{quasiquotation} (you can unquote column +names and column positions).} +} +\value{ +The selected column name as an unnamed string. +} +\description{ +This function powers \code{\link[dplyr:pull]{dplyr::pull()}} and various functions of the +tidyr package. It is similar to \code{\link[=vars_select]{vars_select()}} but returns only +one column name and has slightly different semantics: it allows +negative numbers to select columns from the end. +} +\examples{ +# It takes its argument by expression: +vars_pull(letters, c) + +# Negative numbers select from the end: +vars_pull(letters, -3) + +# You can unquote variables: +var <- 10 +vars_pull(letters, !! var) +} +\seealso{ +\code{\link[dplyr:pull]{dplyr::pull()}}, \code{\link[=vars_select]{vars_select()}} +} +\keyword{internal} diff --git a/man/vars_select.Rd b/man/vars_select.Rd new file mode 100644 index 0000000..b78e2ad --- /dev/null +++ b/man/vars_select.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vars-rename.R, R/vars-select.R +\name{vars_rename} +\alias{vars_rename} +\alias{vars_select} +\title{Select or rename variables} +\usage{ +vars_rename(.vars, ..., .strict = TRUE) + +vars_select(.vars, ..., .include = character(), .exclude = character(), + .strict = TRUE) +} +\arguments{ +\item{.vars}{A character vector of existing column names.} + +\item{..., args}{Expressions to compute + +These arguments are automatically \link[rlang:quo]{quoted} and +\link[rlang:eval_tidy]{evaluated} in a context where elements of +\code{vars} are objects representing their positions within +\code{vars}. They support \link[rlang:quasiquotation]{unquoting} and +splicing. See \code{vignette("programming")} for an introduction to +these concepts. + +Note that except for \code{:}, \code{-} and \code{c()}, all complex expressions +are evaluated outside that context. This is to prevent accidental +matching to \code{vars} elements when you refer to variables from the +calling context.} + +\item{.strict}{If \code{TRUE}, will throw an error if you attempt to rename a +variable that doesn't exist.} + +\item{.include, .exclude}{Character vector of column names to always +include/exclude.} + +\item{.strict}{If \code{FALSE}, errors about unknown columns are ignored.} +} +\value{ +A named character vector. Values are existing column names, +names are new names. +} +\description{ +These functions power \code{\link[dplyr:select]{dplyr::select()}} and \code{\link[dplyr:rename]{dplyr::rename()}}. They +enable dplyr selecting or renaming semantics in your own functions. +} +\section{Customising error messages}{ + + +For consistency with dplyr, error messages refer to "columns" by +default. This assumes that the variables being selected come from a +data frame. If this is not appropriate for your DSL, you can add an +attribute \code{type} to the \code{.vars} vector to specify alternative +names. This must be a character vector of length 2 whose first +component is the singular form and the second is the plural. For +example, \code{c("variable", "variables")}. +} + +\examples{ +# Keep variables +vars_select(names(iris), everything()) +vars_select(names(iris), starts_with("Petal")) +vars_select(names(iris), ends_with("Width")) +vars_select(names(iris), contains("etal")) +vars_select(names(iris), matches(".t.")) +vars_select(names(iris), Petal.Length, Petal.Width) +vars_select(names(iris), one_of("Petal.Length", "Petal.Width")) + +df <- as.data.frame(matrix(runif(100), nrow = 10)) +df <- df[c(3, 4, 7, 1, 9, 8, 5, 2, 6, 10)] +vars_select(names(df), num_range("V", 4:6)) + +# Drop variables +vars_select(names(iris), -starts_with("Petal")) +vars_select(names(iris), -ends_with("Width")) +vars_select(names(iris), -contains("etal")) +vars_select(names(iris), -matches(".t.")) +vars_select(names(iris), -Petal.Length, -Petal.Width) + +# Rename variables +vars_select(names(iris), petal_length = Petal.Length) +vars_select(names(iris), petal = starts_with("Petal")) + +# Rename variables preserving all existing +vars_rename(names(iris), petal_length = Petal.Length) + +# You can unquote symbols or quosures +vars_select(names(iris), !! quote(Petal.Length)) + +# And unquote-splice lists of symbols or quosures +vars_select(names(iris), !!! list(quo(Petal.Length), quote(Petal.Width))) + + +# If you want to avoid ambiguity about where to find objects you +# have two solutions provided by the tidy eval framework. If you +# want to refer to local objects, you can explicitly unquote +# them. They must contain variable positions (integers) or variable +# names (strings): +Species <- 2 +vars_select(names(iris), Species) # Picks up `Species` from the data frame +vars_select(names(iris), !! Species) # Picks up the local object referring to column 2 + +# If you want to make sure that a variable is picked up from the +# data, you can use the `.data` pronoun: +vars_select(names(iris), .data$Species) + + +# If you're writing a wrapper around vars_select(), pass the dots +# via splicing to avoid matching dotted arguments to vars_select() +# named arguments (`vars`, `include` and `exclude`): +wrapper <- function(...) { + vars_select(names(mtcars), !!! quos(...)) +} + +# This won't partial-match on `vars`: +wrapper(var = cyl) + +# This won't match on `include`: +wrapper(include = cyl) + + +# If your wrapper takes named arguments, you need to capture then +# unquote to pass them to vars_select(). See the vignette on +# programming with dplyr for more on this: +wrapper <- function(var1, var2) { + vars_select(names(mtcars), !! enquo(var1), !! enquo(var2)) +} +wrapper(starts_with("d"), starts_with("c")) +} +\seealso{ +\code{\link[=vars_pull]{vars_pull()}} +} +\keyword{internal} diff --git a/man/vars_select_helpers.Rd b/man/vars_select_helpers.Rd new file mode 100644 index 0000000..a483e12 --- /dev/null +++ b/man/vars_select_helpers.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/select-helpers.R +\docType{data} +\name{vars_select_helpers} +\alias{vars_select_helpers} +\title{List of selection helpers} +\format{An object of class \code{list} of length 8.} +\usage{ +vars_select_helpers +} +\description{ +This list contains all selection helpers exported in tidyselect. It +is useful when you want to embed the helpers in your API without +having to track addition of new helpers in tidyselect. +} +\examples{ +# You can easily embed the helpers by burying them in the scopes of +# input quosures. For this example we need an environment where +# tidyselect is not attached: +local(envir = baseenv(), { + vars <- c("foo", "bar", "baz") + helpers <- tidyselect::vars_select_helpers + + my_select <- function(...) { + quos <- rlang::quos(...) + quos <- lapply(quos, rlang::env_bury, !!! helpers) + + tidyselect::vars_select(vars, !!! quos) + } + + # The user can now call my_select() with helpers without having + # to attach tidyselect: + my_select(starts_with("b")) +}) +} +\keyword{datasets} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..2dcf3e7 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,29 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include <Rcpp.h> + +using namespace Rcpp; + +// inds_combine +SEXP inds_combine(CharacterVector vars, ListOf<IntegerVector> xs); +RcppExport SEXP _tidyselect_inds_combine(SEXP varsSEXP, SEXP xsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< CharacterVector >::type vars(varsSEXP); + Rcpp::traits::input_parameter< ListOf<IntegerVector> >::type xs(xsSEXP); + rcpp_result_gen = Rcpp::wrap(inds_combine(vars, xs)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_tidyselect_inds_combine", (DL_FUNC) &_tidyselect_inds_combine, 2}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_tidyselect(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/combine_variables.cpp b/src/combine_variables.cpp new file mode 100644 index 0000000..1845cc0 --- /dev/null +++ b/src/combine_variables.cpp @@ -0,0 +1,147 @@ +#include <Rcpp.h> +using namespace Rcpp; + +int vector_sign(IntegerVector x) { + bool pos = false, neg = false; + + int n = x.size(); + for (int i = 0; i < n; ++i) { + if (x[i] < 0) neg = true; + if (x[i] > 0) pos = true; + + if (neg && pos) break; + } + + if (neg == pos) { + // Either mixed, or all zeros + return 0; + } else if (neg) { + return -1; + } else { + return 1; + } +} + +class VarList { + + std::vector<int> out_indx; + std::vector<String> out_name; + + int find(int i) { + std::vector<int>::iterator pos = std::find(out_indx.begin(), out_indx.end(), i); + if (pos == out_indx.end()) { + return -1; + } else { + return pos - out_indx.begin(); + } + } + +public: + explicit VarList(int n) : out_indx(), out_name() { + out_indx.reserve(n); + out_name.reserve(n); + } + + bool has(int i) { + return find(i) != -1; + } + + void add(int i, String name) { + out_indx.push_back(i); + out_name.push_back(name); + } + void remove(int i) { + int pos = find(i); + if (pos == -1) return; + + out_indx.erase(out_indx.begin() + pos); + out_name.erase(out_name.begin() + pos); + } + void update(int i, String name) { + int pos = find(i); + if (pos == -1) { + add(i, name); + } else { + out_name[pos] = name; + } + } + + operator SEXP() { + IntegerVector out(out_indx.begin(), out_indx.end()); + CharacterVector out_names(out_name.begin(), out_name.end()); + out.names() = out_names; + + return out; + } +}; + +// [[Rcpp::export]] +SEXP inds_combine(CharacterVector vars, ListOf<IntegerVector> xs) { + VarList selected(vars.size()); + if (xs.size() == 0) + return IntegerVector::create(); + + // Workaround bug in ListOf<>; can't access attributes + SEXP raw_names = Rf_getAttrib(xs, Rf_mkString("names")); + CharacterVector xs_names; + if (raw_names == R_NilValue) { + xs_names = CharacterVector(xs.size(), ""); + } else { + xs_names = raw_names; + } + + // If first component is negative, pre-fill with existing vars + if (vector_sign(xs[0]) == -1) { + for (int j = 0; j < vars.size(); ++j) { + selected.add(j + 1, vars[j]); + } + } + + for (int i = 0; i < xs.size(); ++i) { + IntegerVector x = xs[i]; + if (x.size() == 0) continue; + + int sign = vector_sign(x); + + if (sign == 0) + stop("Each argument must yield either positive or negative integers"); + + if (sign == 1) { + bool group_named = xs_names[i] != ""; + bool has_names = x.attr("names") != R_NilValue; + if (group_named) { + if (x.size() == 1) { + selected.update(x[0], xs_names[i]); + } else { + // If the group is named, children are numbered sequentially + for (int j = 0; j < x.size(); ++j) { + std::stringstream out; + out << xs_names[i] << j + 1; + selected.update(x[j], out.str()); + } + } + } else if (has_names) { + CharacterVector names = x.names(); + for (int j = 0; j < x.size(); ++j) { + selected.update(x[j], names[j]); + } + } else { + for (int j = 0; j < x.size(); ++j) { + int pos = x[j]; + if (pos < 1 || pos > vars.size()) + stop("Position must be between 0 and n"); + + // Add default name, if not all ready present + if (!selected.has(pos)) + selected.update(pos, vars[pos - 1]); + } + } + } else { + for (int j = 0; j < x.size(); ++j) { + selected.remove(-x[j]); + } + } + } + + return selected; +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..e92aa96 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,5 @@ +library("testthat") +library("rlang") +library("tidyselect") + +test_check("tidyselect") diff --git a/tests/testthat/test-inds-combine.R b/tests/testthat/test-inds-combine.R new file mode 100644 index 0000000..7284bc4 --- /dev/null +++ b/tests/testthat/test-inds-combine.R @@ -0,0 +1,59 @@ +context("combine indices") + +# This is the low C++ function which works on integer indices + +test_that("empty index gives empty output", { + vars <- inds_combine(letters, list()) + expect_equal(length(vars), 0) + + vars <- inds_combine(letters, list(numeric())) + expect_equal(length(vars), 0) +}) + +test_that("positive indexes kept", { + expect_equal(inds_combine(letters, list(1)), c(a = 1)) + expect_equal(inds_combine(letters, list(1, 26)), c(a = 1, z = 26)) + expect_equal(inds_combine(letters, list(c(1, 26))), c(a = 1, z = 26)) +}) + +test_that("indexes returned in order they appear", { + expect_equal(inds_combine(letters, list(26, 1)), c(z = 26, a = 1)) +}) + + +test_that("negative index in first position includes all others", { + vars <- inds_combine(letters[1:3], list(-1)) + expect_equal(vars, c(b = 2, c = 3)) +}) + +test_that("named inputs rename outputs", { + expect_equal(inds_combine(letters[1:3], list(d = 1)), c(d = 1)) + expect_equal(inds_combine(letters[1:3], list(c(d = 1))), c(d = 1)) +}) + +test_that("if multiple names, last kept", { + expect_equal(inds_combine(letters[1:3], list(d = 1, e = 1)), c(e = 1)) + expect_equal(inds_combine(letters[1:3], list(c(d = 1, e = 1))), c(e = 1)) +}) + +test_that("if one name for multiple vars, use integer index", { + expect_equal(inds_combine(letters[1:3], list(x = 1:3)), c(x1 = 1, x2 = 2, x3 = 3)) +}) + +test_that("invalid inputs raise error", { + expect_error( + inds_combine(names(mtcars), list(0)), + "Each argument must yield either positive or negative integers", + fixed = TRUE + ) + expect_error( + inds_combine(names(mtcars), list(c(-1, 1))), + "Each argument must yield either positive or negative integers", + fixed = TRUE + ) + expect_error( + inds_combine(names(mtcars), list(12)), + "Position must be between 0 and n", + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-select-helpers.R b/tests/testthat/test-select-helpers.R new file mode 100644 index 0000000..4b15777 --- /dev/null +++ b/tests/testthat/test-select-helpers.R @@ -0,0 +1,231 @@ +context("select helpers") + +test_that("no set variables throws warning", { + expect_error(starts_with("z"), "No tidyselect variables were registered") +}) + +test_that("failed match removes all columns", { + scoped_vars(c("x", "y")) + + expect_equal(starts_with("z"), integer(0)) + expect_equal(ends_with("z"), integer(0)) + expect_equal(contains("z"), integer(0)) + expect_equal(matches("z"), integer(0)) + expect_equal(num_range("z", 1:3), integer(0)) +}) + + +test_that("matches return integer positions", { + scoped_vars(c("abc", "acd", "bbc", "bbd", "eee")) + + expect_equal(starts_with("a"), c(1L, 2L)) + expect_equal(ends_with("d"), c(2L, 4L)) + expect_equal(contains("eee"), 5L) + expect_equal(matches(".b."), c(1L, 3L, 4L)) +}) + +test_that("throws with empty pattern is provided", { + # error messages from rlang + expect_error(starts_with("")) + expect_error(ends_with("")) + expect_error(contains("")) + expect_error(matches("")) +}) + +test_that("can use a variable", { + vars <- "x" + names(vars) <- vars + + expect_equal(vars_select(vars, starts_with(vars)), c(x = "x")) + expect_equal(vars_select(vars, ends_with(vars)), c(x = "x")) + expect_equal(vars_select(vars, contains(vars)), c(x = "x")) + expect_equal(vars_select(vars, matches(vars)), c(x = "x")) +}) + +test_that("can use a variable even if it exists in the data (#2266)", { + vars <- c("x", "y") + names(vars) <- vars + + y <- "x" + expected_result <- c(x = "x") + + expect_equal(vars_select(vars, starts_with(y)), expected_result) + expect_equal(vars_select(vars, ends_with(y)), expected_result) + expect_equal(vars_select(vars, contains(y)), expected_result) + expect_equal(vars_select(vars, matches(y)), expected_result) +}) + +test_that("num_range selects numeric ranges", { + vars <- c("x1", "x2", "x01", "x02", "x10", "x11") + names(vars) <- vars + + expect_equal(vars_select(vars, num_range("x", 1:2)), vars[1:2]) + expect_equal(vars_select(vars, num_range("x", 1:2, width = 2)), vars[3:4]) + expect_equal(vars_select(vars, num_range("x", 10:11)), vars[5:6]) + expect_equal(vars_select(vars, num_range("x", 10:11, width = 2)), vars[5:6]) +}) + +test_that("position must resolve to numeric variables throws error", { + expect_error( + vars_select(letters, !! list()), + 'must evaluate to column positions or names', + fixed = TRUE + ) +}) + + +# one_of ------------------------------------------------------------------ + +test_that("one_of gives useful errors", { + expect_error( + one_of(1L, .vars = c("x", "y")), + "All arguments must be character vectors, not integer", + fixed = TRUE + ) +}) + +test_that("one_of tolerates but warns for unknown columns", { + vars <- c("x", "y") + + expect_warning(res <- one_of("z", .vars = vars), "Unknown columns: `z`") + expect_equal(res, integer(0)) + expect_warning(res <- one_of(c("x", "z"), .vars = vars), "Unknown columns: `z`") + expect_equal(res, 1L) + +}) + +test_that("one_of converts names to positions", { + expect_equal(one_of("a", "z", .vars = letters), c(1L, 26L)) +}) + +test_that("one_of works with variables", { + vars <- c("x", "y") + expected_result <- c(x = "x") + var <- "x" + expect_equal(vars_select(vars, one_of(var)), expected_result) + # error messages from rlang + expect_error(vars_select(vars, one_of(`_x`)), "not found") + expect_error(vars_select(vars, one_of(`_y`)), "not found") +}) + +test_that("one_of works when passed variable name matches the column name (#2266)", { + vars <- c("x", "y") + expected_result <- c(x = "x") + x <- "x" + y <- "x" + expect_equal(vars_select(vars, one_of(!! x)), expected_result) + expect_equal(vars_select(vars, one_of(!! y)), expected_result) + expect_equal(vars_select(vars, one_of(y)), expected_result) +}) + +# first-selector ---------------------------------------------------------- + +test_that("initial (single) selector defaults correctly (issue #2275)", { + cn <- setNames(nm = c("x", "y", "z")) + + ### Single Column Selected + + # single columns (present), explicit + expect_equal(vars_select(cn, x), cn["x"]) + expect_equal(vars_select(cn, -x), cn[c("y", "z")]) + + # single columns (present), matched + expect_equal(vars_select(cn, contains("x")), cn["x"]) + expect_equal(vars_select(cn, -contains("x")), cn[c("y", "z")]) + + # single columns (not present), explicit + expect_error(vars_select(cn, foo), "not found") + expect_error(vars_select(cn, -foo), "not found") + + # single columns (not present), matched + expect_equal(vars_select(cn, contains("foo")), cn[integer()]) + expect_equal(vars_select(cn, -contains("foo")), cn) +}) + +test_that("initial (of multiple) selectors default correctly (issue #2275)", { + cn <- setNames(nm = c("x", "y", "z")) + + ### Multiple Columns Selected + + # explicit(present) + matched(present) + expect_equal(vars_select(cn, x, contains("y")), cn[c("x", "y")]) + expect_equal(vars_select(cn, x, -contains("y")), cn["x"]) + expect_equal(vars_select(cn, -x, contains("y")), cn[c("y", "z")]) + expect_equal(vars_select(cn, -x, -contains("y")), cn["z"]) + + # explicit(present) + matched(not present) + expect_equal(vars_select(cn, x, contains("foo")), cn["x"]) + expect_equal(vars_select(cn, x, -contains("foo")), cn["x"]) + expect_equal(vars_select(cn, -x, contains("foo")), cn[c("y", "z")]) + expect_equal(vars_select(cn, -x, -contains("foo")), cn[c("y", "z")]) + + # matched(present) + explicit(present) + expect_equal(vars_select(cn, contains("x"), y), cn[c("x", "y")]) + expect_equal(vars_select(cn, contains("x"), -y), cn["x"]) + expect_equal(vars_select(cn, -contains("x"), y), cn[c("y", "z")]) + expect_equal(vars_select(cn, -contains("x"), -y), cn["z"]) + + # matched(not present) + explicit(not present) + expect_error(vars_select(cn, contains("foo"), bar), "object 'bar' not found") + expect_error(vars_select(cn, contains("foo"), -bar), "object 'bar' not found") + expect_error(vars_select(cn, -contains("foo"), bar), "object 'bar' not found") + expect_error(vars_select(cn, -contains("foo"), -bar), "object 'bar' not found") + + # matched(present) + matched(present) + expect_equal(vars_select(cn, contains("x"), contains("y")), cn[c("x", "y")]) + expect_equal(vars_select(cn, contains("x"), -contains("y")), cn["x"]) + expect_equal(vars_select(cn, -contains("x"), contains("y")), cn[c("y", "z")]) + expect_equal(vars_select(cn, -contains("x"), -contains("y")), cn["z"]) + + # matched(present) + matched(not present) + expect_equal(vars_select(cn, contains("x"), contains("foo")), cn["x"]) + expect_equal(vars_select(cn, contains("x"), -contains("foo")), cn["x"]) + expect_equal(vars_select(cn, -contains("x"), contains("foo")), cn[c("y", "z")]) + expect_equal(vars_select(cn, -contains("x"), -contains("foo")), cn[c("y", "z")]) + + # matched(not present) + matched(present) + expect_equal(vars_select(cn, contains("foo"), contains("x")), cn["x"]) + expect_equal(vars_select(cn, contains("foo"), -contains("x")), cn[integer()]) + expect_equal(vars_select(cn, -contains("foo"), contains("x")), cn) + expect_equal(vars_select(cn, -contains("foo"), -contains("x")), cn[c("y", "z")]) + + # matched(not present) + matched(not present) + expect_equal(vars_select(cn, contains("foo"), contains("bar")), cn[integer()]) + expect_equal(vars_select(cn, contains("foo"), -contains("bar")), cn[integer()]) + expect_equal(vars_select(cn, -contains("foo"), contains("bar")), cn) + expect_equal(vars_select(cn, -contains("foo"), -contains("bar")), cn) +}) + +test_that("middle (no-match) selector should not clear previous selectors (issue #2275)", { + cn <- setNames(nm = c("x", "y", "z")) + + expect_equal( + vars_select(cn, contains("x"), contains("foo"), contains("z")), + cn[c("x", "z")] + ) + expect_equal( + vars_select(cn, contains("x"), -contains("foo"), contains("z")), + cn[c("x", "z")] + ) +}) + +test_that("can select with c() (#2685)", { + expect_identical(vars_select(letters, c(a, z)), c(a = "a", z = "z")) +}) + +test_that("can select with .data pronoun (#2715)", { + expect_identical(vars_select("foo", .data$foo), c(foo = "foo")) + expect_identical(vars_select("foo", .data[["foo"]]), c(foo = "foo")) + + expect_identical(vars_select(c("a", "b", "c"), .data$a : .data$b), c(a = "a", b = "b")) + expect_identical(vars_select(c("a", "b", "c"), .data[["a"]] : .data[["b"]]), c(a = "a", b = "b")) +}) + +test_that("last_col() selects last argument with offset", { + vars <- letters[1:3] + expect_identical(last_col(0, vars), 3L) + expect_identical(last_col(2, vars), 1L) + + expect_error(last_col(3, vars), "`offset` must be smaller than the number of columns") + expect_error(last_col(vars = chr()), "Can't select last column when input is empty") +}) diff --git a/tests/testthat/test-vars-pull.R b/tests/testthat/test-vars-pull.R new file mode 100644 index 0000000..862ae69 --- /dev/null +++ b/tests/testthat/test-vars-pull.R @@ -0,0 +1,36 @@ +context("pull var") + +test_that("errors for bad inputs", { + expect_error( + vars_pull(letters, letters), + "`var` must evaluate to a single number", + fixed = TRUE + ) + + expect_error( + vars_pull(letters, aa), + "object 'aa' not found", + fixed = TRUE + ) + + expect_error( + vars_pull(letters, 0), + "`var` must be a value between -26 and 26 (excluding zero), not 0", + fixed = TRUE + ) + expect_error( + vars_pull(letters, 100), + "`var` must be a value between -26 and 26 (excluding zero), not 100", + fixed = TRUE + ) + expect_error( + vars_pull(letters, -Inf), + "`var` must be a value between -26 and 26 (excluding zero), not NA", + fixed = TRUE + ) + expect_error( + vars_pull(letters, NA_integer_), + "`var` must be a value between -26 and 26 (excluding zero), not NA", + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-vars-rename.R b/tests/testthat/test-vars-rename.R new file mode 100644 index 0000000..111d172 --- /dev/null +++ b/tests/testthat/test-vars-rename.R @@ -0,0 +1,61 @@ +context("rename vars") + +test_that("when .strict = FALSE, vars_rename always succeeds", { + expect_error( + vars_rename(c("a", "b"), d = e, .strict = TRUE), + "object 'e' not found", + fixed = TRUE + ) + + expect_error( + vars_rename(c("a", "b"), d = e, f = g, .strict = TRUE), + "object 'e' not found", + fixed = TRUE + ) + + expect_equal( + vars_rename(c("a", "b"), d = e, .strict = FALSE), + c("a" = "a", "b" = "b") + ) + + expect_identical( + vars_rename("x", A = x, B = y, .strict = FALSE), + c(A = "x") + ) + + expect_error( + vars_rename(c("a", "b"), d = "e", f = "g", .strict = TRUE), + "Unknown columns `e` and `g`", + fixed = TRUE + ) + + expect_identical( + vars_rename("x", A = "x", B = "y", .strict = FALSE), + c(A = "x") + ) +}) + +test_that("vars_rename() works with positions", { + expect_identical(vars_rename(letters[1:4], new1 = 2, new2 = 4), c(a = "a", new1 = "b", c = "c", new2 = "d")) + expect_error(vars_rename(letters, new = 1.5), "Column positions must be round numbers") +}) + +test_that("vars_rename() expects symbol or string", { + expect_error( + vars_rename(letters, d = !! list()), + '`d` = list() must be a column name or position, not a list', + fixed = TRUE + ) +}) + +test_that("vars_rename() sets variable context", { + expect_identical(vars_rename(c("a", "b"), B = one_of("b")), c(a = "a", B = "b")) +}) + +test_that("vars_rename() fails with vectors", { + expect_error(vars_rename(letters, A = 1:2), "Column positions must be scalar") +}) + +test_that("vars_rename() supports `.data` pronoun", { + expect_identical(vars_rename(c("a", "b"), B = .data$b), c(a = "a", B = "b")) +}) diff --git a/tests/testthat/test-vars-select.R b/tests/testthat/test-vars-select.R new file mode 100644 index 0000000..f11b158 --- /dev/null +++ b/tests/testthat/test-vars-select.R @@ -0,0 +1,98 @@ +context("select vars") + +test_that("vars_select can rename variables", { + vars <- c("a", "b") + expect_equal(vars_select(vars, b = a, a = b), c("b" = "a", "a" = "b")) +}) + +test_that("last rename wins", { + vars <- c("a", "b") + + expect_equal(vars_select(vars, b = a, c = a), c("c" = "a")) +}) + +test_that("negative index removes values", { + vars <- letters[1:3] + + expect_equal(vars_select(vars, -c), c(a = "a", b = "b")) + expect_equal(vars_select(vars, a:c, -c), c(a = "a", b = "b")) + expect_equal(vars_select(vars, a, b, c, -c), c(a = "a", b = "b")) + expect_equal(vars_select(vars, -c, a, b), c(a = "a", b = "b")) +}) + +test_that("can select with character vectors", { + expect_identical(vars_select(letters, "b", !! "z", c("b", "c")), set_names(c("b", "z", "c"))) +}) + +test_that("abort on unknown columns", { + expect_error(vars_select(letters, "foo"), "must match column names") + expect_error(vars_select(letters, c("a", "bar", "foo", "d")), "`bar`") +}) + +test_that("symbol overscope is not isolated from context", { + foo <- 10 + expect_identical(vars_select(letters, foo), c(j = "j")) + expect_identical(vars_select(letters, ((foo))), c(j = "j")) +}) + +test_that("symbol overscope works with parenthesised expressions", { + expect_identical(vars_select(letters, ((((a)):((w))))), vars_select(letters, a:w)) + expect_identical(vars_select(letters, -((((a)):((y))))), c(z = "z")) +}) + +test_that("can select with unnamed elements", { + expect_identical(vars_select(c("a", ""), a), c(a = "a")) +}) + +test_that("can customise error messages", { + vars <- set_attrs(letters, type = c("variable", "variables")) + + expect_error(vars_select(vars, "foo"), "match variable names. Unknown variables:") + expect_warning(vars_select(vars, one_of("bim")), "Unknown variables:") + expect_error(vars_rename(vars, A = "foo"), "Unknown variable `foo`") + expect_error(vars_pull(vars, !! c("a", "b")), "or a variable name") +}) + +test_that("can supply empty inputs", { + empty_vars <- set_names(chr()) + expect_identical(vars_select(letters), empty_vars) + expect_identical(vars_select(letters, NULL), empty_vars) + expect_identical(vars_select(letters, chr()), empty_vars) + + expect_identical(vars_select(letters, a, NULL), c(a = "a")) + expect_identical(vars_select(letters, a, chr()), c(a = "a")) +}) + +test_that("unknown variables errors are ignored if `.strict` is FALSE", { + expect_identical(vars_select(letters, `_foo`, .strict = FALSE), set_names(chr())) + expect_identical(vars_select(letters, a, `_foo`, .strict = FALSE), c(a = "a")) + expect_identical(vars_select(letters, a, "_foo", .strict = FALSE), c(a = "a")) + + expect_identical(vars_select(letters, a, -`_foo`, .strict = FALSE), c(a = "a")) + expect_identical(vars_select(letters, a, -"`_foo`", .strict = FALSE), c(a = "a")) + + expect_identical(vars_select(letters, c(a, `_foo`, c), .strict = FALSE), c(a = "a", c = "c")) + expect_identical(vars_select(letters, c(a, "_foo", c), .strict = FALSE), c(a = "a", c = "c")) +}) + +test_that("`:` handles strings", { + expect_identical(vars_select(letters, "b":"d"), vars_select(letters, b:d)) + expect_error(vars_select(letters, "b":"Z"), "Unknown column `Z`") +}) + +test_that("`-` handles strings", { + expect_identical(vars_select(letters, -"c"), vars_select(letters, -c)) +}) + +test_that("`-` handles positions", { + expect_identical(vars_select(letters, 10 - 7), vars_select(letters, 3)) +}) + +test_that("`-` handles character vectors (#35)", { + expect_identical(vars_select(letters, - (!! letters[1:20])), vars_select(letters, -(1:20))) + expect_error(vars_select(letters, - c("foo", "z", "bar")), "Unknown column `foo`") +}) + +test_that("can select `c` despite overscoped c()", { + expect_identical(vars_select(letters, c), c(c = "c")) +}) diff --git a/tests/testthat/test-vars.R b/tests/testthat/test-vars.R new file mode 100644 index 0000000..33435bd --- /dev/null +++ b/tests/testthat/test-vars.R @@ -0,0 +1,33 @@ +context("vars") + +test_that("scoped_vars() restores previous state", { + vars <- c("a", "b", "c") + scoped_vars(vars) + + fn <- function() { + scoped_vars(c("d", "e", "f")) + starts_with("e") + } + expect_identical(fn(), 2L) + + expect_identical(peek_vars(), vars) +}) + +test_that("with_vars() works", { + vars <- c("a", "b", "c") + scoped_vars(vars) + + fn <- function(expr) { + with_vars(c("rose", "blue", "red"), expr) + } + expect_identical(fn(starts_with("r")), c(1L, 3L)) + + expect_identical(peek_vars(), vars) +}) + +test_that("has_vars() detects variables", { + expect_false(has_vars()) + + scoped_vars(letters) + expect_true(has_vars()) +}) -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-tidyselect.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
