paleolimbot commented on code in PR #13960:
URL: https://github.com/apache/arrow/pull/13960#discussion_r956387776


##########
r/R/dplyr-eval.R:
##########
@@ -121,3 +128,172 @@ format_expr <- function(x) {
   }
   head(out, 1)
 }
+
+# vector of function names that do not have corresponding bindings, but we
+# shouldn't try to translate
+translation_exceptions <- c(
+  # commmon R or dplyr functions we do not have bindings for
+  "c",
+  "$",
+  "factor",
+  "~",
+  # "(",
+  "across",
+  ":",
+  "[",
+  "regex",
+  "fixed",
+  "list",
+  "%>%",
+  # all the types functions
+  "int8",
+  "int16",
+  "int32",
+  "int64",
+  "uint8",
+  "uint16",
+  "uint32",
+  "uint64",
+  "float16",
+  "halffloat",
+  "float32",
+  "float",
+  "float64",
+  "boolean",
+  "bool",
+  "utf8",
+  "large_utf8",
+  "binary",
+  "large_binary",
+  "fixed_size_binary",
+  "string",
+  "date32",
+  "date64",
+  "time32",
+  "time64",
+  "duration",
+  "null",
+  "timestamp",
+  "decimal",
+  "decimal128",
+  "decimal256"
+)
+
+#' Register user defined bindings
+#'
+#' The function takes a quosure for the evaluation of which a data mask is
+#' required. If there are corresponding bindings (in the functions registry) 
for
+#' all the function calls inside the quosure's expression, it does nothing.
+#' If there are any unknown ones, it decomposes and assesses whether there are
+#' matching bindings. It does this recursively.
+#'
+#' It sets a copy of the bindings environment (i.e. the function registry) as
+#' the parent environment for the user-defined functions' environments.
+#'
+#' @param quo quosure for which the data mask is being built
+#' @param .env bindings environment to register against
+#'
+#' @return the function does not return anything, it is used for its 
side-effects
+#' @keywords internal
+#' @noRd
+register_user_bindings <- function(quo, .env) {
+  # figure out which of the calls we do not have bindings for or we shouldn't
+  # translate
+  unknown_functions_chr <- setdiff(
+    all_funs(quo),
+    union(
+      names(.env),
+      translation_exceptions
+    )
+  )
+
+  if (length(unknown_functions_chr) != 0) {
+    # get the actual functions from the quosure's original environment or, if
+    # the call contains `::`, get the function from the namespace
+    unknown_functions <- purrr::map_if(
+      .x = unknown_functions_chr,
+      .p = ~ !grepl("::", .x),
+      .f = ~ tryCatch(as_function(.x, env = rlang::quo_get_env(quo)), error = 
function(e) NULL),
+      .else = ~ asNamespace(sub(":{+}.*?$", "", .x))[[sub("^.*?:{+}", "", .x)]]
+    )
+
+    # set the original quosure environment as the parent environment for the
+    # functions
+    parent.env(.env) <- rlang::quo_get_env(quo)
+    for (i in seq_along(unknown_functions)) {
+      unknown_fn_name <- unknown_functions_chr[[i]]
+      unknown_fn <- unknown_functions[[i]]
+      if (!is.null(unknown_fn) && registrable(unknown_fn, .env)) {
+        environment(unknown_fn) <- .env
+        function_body <- rlang::fn_body(unknown_fn)
+        body_calls <- all_funs(function_body[[2]])
+
+        # only register a valid bindings if none of the calls in body come back
+        # as NULL, otherwise register NULL
+        if (purrr::none(mget(body_calls, envir = .env), is.null)) {
+          register_binding(
+            unknown_fn_name,
+            unknown_fn,
+            registry = .env,
+            update_cache = TRUE
+          )

Review Comment:
   One thing to make sure of here is that this only affects *this* call (i.e., 
future calls to `mutate()` or `filter()` from some unrelated scope shouldn't be 
able to access the binding you're registering here. I see that you're using 
`.env` but I worry that `update_cache = TRUE` is still adding something to the 
global registry.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to