paleolimbot commented on code in PR #13960:
URL: https://github.com/apache/arrow/pull/13960#discussion_r955297904


##########
r/R/dplyr-eval.R:
##########
@@ -121,3 +128,120 @@ format_expr <- function(x) {
   }
   head(out, 1)
 }
+
+# vector of function names that do not have corresponding bindings, but we
+# shouldn't try to translate
+translation_exceptions <- c(
+  "c",
+  "$",
+  "factor",
+  # "~",
+  # "(",
+  "across",
+  ":",
+  "[",
+  "regex",
+  "fixed",
+  "list",
+  "%>%",
+  # all the types functions
+  "int8",
+  "int16",
+  "int32",
+  "int64",
+  "uint8",
+  "uint16",
+  "uint32",
+  "uint64",
+  "float16",
+  "halffloat",
+  "float32",
+  "float",
+  "float64",
+  "boolean",
+  "bool",
+  "utf8",
+  "large_utf8",
+  "binary",
+  "large_binary",
+  "fixed_size_binary",
+  "string",
+  "date32",
+  "date64",
+  "time32",
+  "time64",
+  "duration",
+  "null",
+  "timestamp",
+  "decimal",
+  "decimal128",
+  "decimal256"
+)
+
+register_user_bindings <- function(quo, .env) {
+  unknown_functions_chr <- setdiff(
+    all_funs(quo),
+    union(
+      names(.env),
+      translation_exceptions
+    )
+  )
+
+  if (length(unknown_functions_chr != 0)) {

Review Comment:
   ```suggestion
     if (length(unknown_functions_chr) != 0) {
   ```



##########
r/R/dplyr-eval.R:
##########
@@ -121,3 +128,120 @@ format_expr <- function(x) {
   }
   head(out, 1)
 }
+
+# vector of function names that do not have corresponding bindings, but we
+# shouldn't try to translate
+translation_exceptions <- c(
+  "c",
+  "$",
+  "factor",
+  # "~",
+  # "(",
+  "across",
+  ":",
+  "[",
+  "regex",
+  "fixed",
+  "list",
+  "%>%",
+  # all the types functions
+  "int8",
+  "int16",
+  "int32",
+  "int64",
+  "uint8",
+  "uint16",
+  "uint32",
+  "uint64",
+  "float16",
+  "halffloat",
+  "float32",
+  "float",
+  "float64",
+  "boolean",
+  "bool",
+  "utf8",
+  "large_utf8",
+  "binary",
+  "large_binary",
+  "fixed_size_binary",
+  "string",
+  "date32",
+  "date64",
+  "time32",
+  "time64",
+  "duration",
+  "null",
+  "timestamp",
+  "decimal",
+  "decimal128",
+  "decimal256"
+)
+
+register_user_bindings <- function(quo, .env) {
+  unknown_functions_chr <- setdiff(
+    all_funs(quo),
+    union(
+      names(.env),
+      translation_exceptions
+    )
+  )
+
+  if (length(unknown_functions_chr != 0)) {
+    # get the actual functions from the quosure's original environment or, if
+    # the call contains `::`, get the function from the namespace
+    unknown_functions <- purrr::map_if(
+      .x = unknown_functions_chr,
+      .p = ~ !grepl("::", .x),
+      .f = ~ tryCatch(as_function(.x, env = rlang::quo_get_env(quo)), error = 
function(e) NULL),
+      .else = ~ asNamespace(sub(":{+}.*?$", "", .x))[[sub("^.*?:{+}", "", .x)]]
+    )
+
+    # set the original quosure environment as the parent environment for the
+    # functions
+    parent.env(.env) <- rlang::quo_get_env(quo)

Review Comment:
   Is `.env` here `nse_funcs`? If so, I think you need to copy it first (or 
else every call to this will modify the parent of the environment in the R 
package?)



##########
r/R/dplyr-eval.R:
##########
@@ -121,3 +128,120 @@ format_expr <- function(x) {
   }
   head(out, 1)
 }
+
+# vector of function names that do not have corresponding bindings, but we
+# shouldn't try to translate
+translation_exceptions <- c(
+  "c",
+  "$",
+  "factor",
+  # "~",
+  # "(",
+  "across",
+  ":",
+  "[",
+  "regex",
+  "fixed",
+  "list",
+  "%>%",
+  # all the types functions
+  "int8",
+  "int16",
+  "int32",
+  "int64",
+  "uint8",
+  "uint16",
+  "uint32",
+  "uint64",
+  "float16",
+  "halffloat",
+  "float32",
+  "float",
+  "float64",
+  "boolean",
+  "bool",
+  "utf8",
+  "large_utf8",
+  "binary",
+  "large_binary",
+  "fixed_size_binary",
+  "string",
+  "date32",
+  "date64",
+  "time32",
+  "time64",
+  "duration",
+  "null",
+  "timestamp",
+  "decimal",
+  "decimal128",
+  "decimal256"
+)
+
+register_user_bindings <- function(quo, .env) {

Review Comment:
   I put a breakpoint here and this is indeed the function that is getting 
called recursively (i.e., there's apparently no way to get through this 
function without calling `register_user_bindings()`. I'm pretty sure it's the 
`length()` thing I saw below.



##########
r/R/dplyr-eval.R:
##########
@@ -121,3 +128,120 @@ format_expr <- function(x) {
   }
   head(out, 1)
 }
+
+# vector of function names that do not have corresponding bindings, but we
+# shouldn't try to translate
+translation_exceptions <- c(
+  "c",
+  "$",
+  "factor",
+  # "~",
+  # "(",
+  "across",
+  ":",
+  "[",
+  "regex",
+  "fixed",
+  "list",
+  "%>%",
+  # all the types functions
+  "int8",
+  "int16",
+  "int32",
+  "int64",
+  "uint8",
+  "uint16",
+  "uint32",
+  "uint64",
+  "float16",
+  "halffloat",
+  "float32",
+  "float",
+  "float64",
+  "boolean",
+  "bool",
+  "utf8",
+  "large_utf8",
+  "binary",
+  "large_binary",
+  "fixed_size_binary",
+  "string",
+  "date32",
+  "date64",
+  "time32",
+  "time64",
+  "duration",
+  "null",
+  "timestamp",
+  "decimal",
+  "decimal128",
+  "decimal256"
+)
+
+register_user_bindings <- function(quo, .env) {
+  unknown_functions_chr <- setdiff(
+    all_funs(quo),
+    union(
+      names(.env),
+      translation_exceptions
+    )
+  )
+
+  if (length(unknown_functions_chr != 0)) {
+    # get the actual functions from the quosure's original environment or, if
+    # the call contains `::`, get the function from the namespace
+    unknown_functions <- purrr::map_if(
+      .x = unknown_functions_chr,
+      .p = ~ !grepl("::", .x),
+      .f = ~ tryCatch(as_function(.x, env = rlang::quo_get_env(quo)), error = 
function(e) NULL),
+      .else = ~ asNamespace(sub(":{+}.*?$", "", .x))[[sub("^.*?:{+}", "", .x)]]
+    )
+
+    # set the original quosure environment as the parent environment for the
+    # functions
+    parent.env(.env) <- rlang::quo_get_env(quo)

Review Comment:
   Also, I'm not sure you want `rlang::quo_get_env(quo)`...you might want the 
original function environment (i.e., `environment(unknown_fn)`?



##########
r/R/dplyr-eval.R:
##########
@@ -121,3 +128,120 @@ format_expr <- function(x) {
   }
   head(out, 1)
 }
+
+# vector of function names that do not have corresponding bindings, but we
+# shouldn't try to translate
+translation_exceptions <- c(
+  "c",
+  "$",
+  "factor",
+  # "~",
+  # "(",
+  "across",
+  ":",
+  "[",
+  "regex",
+  "fixed",
+  "list",
+  "%>%",
+  # all the types functions
+  "int8",
+  "int16",
+  "int32",
+  "int64",
+  "uint8",
+  "uint16",
+  "uint32",
+  "uint64",
+  "float16",
+  "halffloat",
+  "float32",
+  "float",
+  "float64",
+  "boolean",
+  "bool",
+  "utf8",
+  "large_utf8",
+  "binary",
+  "large_binary",
+  "fixed_size_binary",
+  "string",
+  "date32",
+  "date64",
+  "time32",
+  "time64",
+  "duration",
+  "null",
+  "timestamp",
+  "decimal",
+  "decimal128",
+  "decimal256"
+)
+
+register_user_bindings <- function(quo, .env) {
+  unknown_functions_chr <- setdiff(
+    all_funs(quo),
+    union(
+      names(.env),
+      translation_exceptions
+    )
+  )
+
+  if (length(unknown_functions_chr != 0)) {
+    # get the actual functions from the quosure's original environment or, if
+    # the call contains `::`, get the function from the namespace
+    unknown_functions <- purrr::map_if(
+      .x = unknown_functions_chr,
+      .p = ~ !grepl("::", .x),
+      .f = ~ tryCatch(as_function(.x, env = rlang::quo_get_env(quo)), error = 
function(e) NULL),
+      .else = ~ asNamespace(sub(":{+}.*?$", "", .x))[[sub("^.*?:{+}", "", .x)]]
+    )
+
+    # set the original quosure environment as the parent environment for the
+    # functions
+    parent.env(.env) <- rlang::quo_get_env(quo)
+    for (i in seq_along(unknown_functions)) {
+      unknown_fn_name <- unknown_functions_chr[[i]]
+      unknown_fn <- unknown_functions[[i]]
+      if (!is.null(unknown_fn) && registrable(unknown_fn, .env)) {
+        environment(unknown_fn) <- .env
+        register_binding(
+          unknown_fn_name,
+          unknown_fn,
+          registry = .env,
+          update_cache = TRUE
+        )
+      } else {
+        # if there are call we don't have bindings for, try to register them 
first
+        unknown_function_body <-
+          tryCatch(
+            rlang::fn_body(unknown_fn),
+            error = function(e) NULL
+          )
+        if (!is.null(unknown_function_body)) {
+          new_quo <- rlang::new_quosure(unknown_function_body[[2]], env = 
quo_get_env(quo))

Review Comment:
   `unknown_function_body[[2]]` here probably only works for function bodies 
that are `{ .. }`...it might be worth checking a function defined like 
`function(x) some_fun(x, 1, 2, 3)` (or maybe I'm missing where you handle this)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to