thisisnic commented on code in PR #49536:
URL: https://github.com/apache/arrow/pull/49536#discussion_r3045788121


##########
r/R/dplyr-funcs-conditional.R:
##########
@@ -15,6 +15,169 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#' Parse logical condition formulas
+#'
+#' Converts condition ~ value formulas into Arrow expressions. Unlike
+#' [parse_value_mapping()], the LHS must be a logical expression (not a value
+#' to match against).
+#'
+#' @param formulas A list of two-sided formulas where LHS is a logical 
condition
+#'   and RHS is the value to use when TRUE (e.g., `x > 5 ~ "high"`).
+#' @param mask The data mask for evaluating formula expressions.
+#'
+#' @return A list with `query` (list of logical expressions) and `value`
+#'   (list of replacement expressions).
+#'
+#' @keywords internal
+#' @noRd
+parse_condition_formulas <- function(formulas, mask) {
+  fn <- call_name(rlang::caller_call())
+  # Compact NULL entries (allows conditional formulas like: if (cond) x ~ y)
+  formulas <- compact(formulas)
+  n <- length(formulas)
+  query <- vector("list", n)
+  value <- vector("list", n)
+  # Process each formula: condition ~ value
+  for (i in seq_len(n)) {
+    f <- formulas[[i]]
+    if (!is_formula(f, lhs = TRUE)) {
+      validation_error(paste0("Each argument to ", fn, "() must be a two-sided 
formula"))
+    }
+    # f[[2]] is LHS (logical condition), f[[3]] is RHS (value when TRUE)
+    query[[i]] <- arrow_eval(f[[2]], mask)
+    value[[i]] <- arrow_eval(f[[3]], mask)
+    # Validate LHS is logical (unlike parse_value_mapping which does equality 
matching)
+    if (!call_binding("is.logical", query[[i]])) {
+      validation_error(paste0("Left side of each formula in ", fn, "() must be 
a logical expression"))
+    }
+  }
+  list(query = query, value = value)
+}
+
+#' Create case_when Expression from query/value lists
+#' @param query List of logical Arrow Expressions.
+#' @param value List of value Arrow Expressions.
+#' @return An Arrow Expression representing the case_when.
+#' @keywords internal
+#' @noRd
+build_case_when_expr <- function(query, value) {
+  Expression$create(
+    "case_when",
+    args = c(
+      Expression$create(
+        "make_struct",
+        args = query,
+        options = list(field_names = as.character(seq_along(query)))
+      ),
+      value
+    )
+  )
+}
+
+#' Build a match expression for x against a value (scalar, NA, or vector).
+#' @param x Arrow Expression for the column to match against.
+#' @param match_value Value to match - R scalar, vector, or NA. Expressions
+#'   are compared with equality.
+#' @return Arrow Expression that is TRUE when x matches match_value.
+#' @keywords internal
+#' @noRd
+build_match_expr <- function(x, match_value) {
+  # Expressions or length-1 non-NA: use equality directly
+  if (inherits(match_value, "Expression") || length(match_value) == 1 && 
!is.na(match_value)) {
+    return(x == match_value)
+  }
+
+  # R scalar NA requires is.na() since x == NA returns NA in Arrow
+  if (length(match_value) == 1) {
+    return(call_binding("is.na", x))
+  }
+
+  # R vector: use %in%, handling NA separately if present
+  has_na <- any(is.na(match_value))
+  non_na_values <- match_value[!is.na(match_value)]
+
+  if (length(non_na_values) == 0) {
+    call_binding("is.na", x)
+  } else if (has_na) {
+    call_binding("%in%", x, non_na_values) | call_binding("is.na", x)
+  } else {
+    call_binding("%in%", x, match_value)
+  }
+}
+
+#' Build query/value lists from parallel from/to vectors.
+#' NA values in `from` use is.na() for matching.
+#' @param x Arrow Expression for the column to match against.
+#' @param from Vector of values to match.
+#' @param to Vector of replacement values (recycled to length of `from`).
+#' @return list(query, value) for use with build_case_when_expr().
+#' @keywords internal
+#' @noRd
+parse_from_to_mapping <- function(x, from, to) {
+  n <- length(from)
+  to <- vctrs::vec_recycle(to, n)
+  query <- map(from, ~ build_match_expr(x, .x))
+  value <- map(to, Expression$scalar)
+  list(query = query, value = value)
+}

Review Comment:
   We do not; I like it from an readibility point of view though as it 
contrasts with `parse_formula_mapping()` - I think I factored it out when I was 
reviewing the AI code to make it easier to read.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to