nealrichardson commented on a change in pull request #10327: URL: https://github.com/apache/arrow/pull/10327#discussion_r640171712
########## File path: r/R/type.R ########## @@ -423,6 +423,24 @@ as_type <- function(type, name = "type") { type } +canonical_type_str <- function(type_str) { Review comment: Should `as_type()` also wrap this and accept a string type name, returning a DataType? (I know I wrote the function basically to handle `double()`, but looking at the function name I'd kinda expect it to take the string name too.) If so, you could simplify `nse_funcs$is()`. ########## File path: r/R/type.R ########## @@ -423,6 +423,24 @@ as_type <- function(type, name = "type") { type } +canonical_type_str <- function(type_str) { + # canonicalizes data type strings, converting aliases to match the strings Review comment: In some senses I think this is backwards: IIUC "float64" is canonical, but (for some reason) the C++ library prints it as "double". ########## File path: r/R/dplyr-functions.R ########## @@ -109,6 +126,55 @@ nse_funcs$as.numeric <- function(x) { Expression$create("cast", x, options = cast_options(to_type = float64())) } +# is.* type functions +nse_funcs$is.character <- function(x) { + x$type_id() %in% Type[c("STRING", "LARGE_STRING")] +} +nse_funcs$is.numeric <- function(x) { + x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", + "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE", + "DECIMAL", "DECIMAL256")] +} +nse_funcs$is.double <- function(x) { + x$type_id() == Type["DOUBLE"] +} +nse_funcs$is.integer <- function(x) { + x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", + "UINT64", "INT64")] +} +nse_funcs$is.integer64 <- function(x) { + x$type_id() == Type["INT64"] +} +nse_funcs$is.logical <- function(x) { + x$type_id() == Type["BOOL"] +} +nse_funcs$is.factor <- function(x) { + x$type_id() == Type["DICTIONARY"] +} +nse_funcs$is.list <- function(x) { + x$type_id() %in% Type[c("LIST", "FIXED_SIZE_LIST", "LARGE_LIST")] +} + +# rlang::is_* type functions +nse_funcs$is_character <- function(x, n = NULL) { + nse_funcs$is.character(x) && (is.null(n) || length(x) == n) +} +nse_funcs$is_double <- function(x, n = NULL, finite = NULL) { + nse_funcs$is.double(x) && + (is.null(n) || length(x) == n) && + (is.null(finite) || + (finite && as.vector(!any(is_in(x, c(NA_real_, Inf,-Inf, NaN)))))) Review comment: Likewise we can't do that--`as.vector()` on an Expression in a dataset query isn't going to do what you hope ########## File path: r/R/dplyr-functions.R ########## @@ -109,6 +119,55 @@ nse_funcs$as.numeric <- function(x) { Expression$create("cast", x, options = cast_options(to_type = float64())) } +# is.* type functions +nse_funcs$is.character <- function(x) { + x$type_id() %in% Type[c("STRING", "LARGE_STRING")] +} +nse_funcs$is.numeric <- function(x) { + x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", + "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE", + "DECIMAL", "DECIMAL256")] +} +nse_funcs$is.double <- function(x) { + x$type_id() == Type["DOUBLE"] +} +nse_funcs$is.integer <- function(x) { + x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", + "UINT64", "INT64")] +} +nse_funcs$is.integer64 <- function(x) { + x$type_id() == Type["INT64"] +} +nse_funcs$is.logical <- function(x) { + x$type_id() == Type["BOOL"] +} +nse_funcs$is.factor <- function(x) { + x$type_id() == Type["DICTIONARY"] +} +nse_funcs$is.list <- function(x) { + x$type_id() %in% Type[c("LIST", "FIXED_SIZE_LIST", "LARGE_LIST")] +} + +# rlang::is_* type functions +nse_funcs$is_character <- function(x, n = NULL) { + nse_funcs$is.character(x) && (is.null(n) || length(x) == n) Review comment: This (and the others like it) won't work because `x` is an `Expression` and doesn't have a known `length`. I think we need to error if `!is.null(n)` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org