alamb commented on code in PR #8886:
URL: https://github.com/apache/arrow-datafusion/pull/8886#discussion_r1457650837


##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -121,58 +291,220 @@ where
     }
 }
 
+// given an function that maps a `&str`, `&str` to an arrow native type,

Review Comment:
   This is a very cool function. Maybe we can pull it into a shared 
function/utility at some point



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -84,7 +172,96 @@ where
     array.iter().map(|x| x.map(&op).transpose()).collect()
 }
 
-// given an function that maps a `&str` to a arrow native type,
+/// given a function `op` that maps `&str`, `&str` to the first successful 
Result
+/// of an arrow native type, returns a `PrimitiveArray` after the application 
of the
+/// function to `args` and the subsequence application of the `op2` function 
to any
+/// successful result. This function calls the `op` function with the first 
and second
+/// argument and if not successful continues with first and third, first and 
fourth,
+/// etc until the result was successful or no more arguments are present.
+/// # Errors
+/// This function errors iff:
+/// * the number of arguments is not > 1 or
+/// * the array arguments are not castable to a `GenericStringArray` or
+/// * the function `op` errors for all input
+pub(crate) fn strings_to_primitive_function<'a, T, O, F, F2>(
+    args: &'a [ColumnarValue],
+    op: F,
+    op2: F2,
+    name: &str,
+) -> Result<PrimitiveArray<O>>
+where
+    O: ArrowPrimitiveType,
+    T: OffsetSizeTrait,
+    F: Fn(&'a str, &'a str) -> Result<O::Native>,
+    F2: Fn(O::Native) -> O::Native,
+{
+    if args.len() < 2 {
+        return internal_err!(
+            "{:?} args were supplied but {} takes 2 or more arguments",
+            args.len(),
+            name
+        );
+    }
+
+    // this will throw the error if any of the array args are not castable to 
GenericStringArray
+    let data = args
+        .iter()
+        .map(|a| match a {
+            ColumnarValue::Array(a) => {
+                Ok(Either::Left(as_generic_string_array::<T>(a.as_ref())?))
+            }
+            ColumnarValue::Scalar(s) => match s {
+                ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => 
Ok(Either::Right(a)),
+                other => internal_err!(
+                    "Unexpected scalar type encountered '{other}' for function 
'{name}'"
+                ),
+            },
+        })
+        .collect::<Result<Vec<Either<&GenericStringArray<T>, 
&Option<String>>>>>()?;
+
+    let first_arg = &data.first().unwrap().left().unwrap();
+
+    first_arg
+        .iter()
+        .enumerate()
+        .map(|(pos, x)| {
+            let mut val = None;
+
+            if let Some(x) = x {
+                let param_args = data.iter().skip(1);
+
+                // go through the args and find the first successful result. 
Only the last
+                // failure will be returned if no successful result was 
received.
+                for param_arg in param_args {
+                    // param_arg is an array, use the corresponding index into 
the array as the arg
+                    // we're currently parsing
+                    let p = *param_arg;
+                    let r = if p.is_left() {
+                        let p = p.left().unwrap();
+                        op(x, p.value(pos))
+                    }
+                    // args is a scalar, use it directly
+                    else if let Some(p) = p.right().unwrap() {
+                        op(x, p.as_str())
+                    } else {
+                        continue;
+                    };
+
+                    if r.is_ok() {

Review Comment:
   One thing to consider here is that this pattern will allocate a string to 
generate an error message that will get discarded if there are more arguments 
to go. 
   
   The alternate is to pass back an Option and only generate an `Error` when it 
actually needs to be returned
   
   This is a potential optimization for the future, I think, not needed for 
this PR



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -121,58 +291,220 @@ where
     }
 }
 
+// given an function that maps a `&str`, `&str` to an arrow native type,
+// returns a `ColumnarValue` where the function is applied to either a 
`ArrayRef` or `ScalarValue`
+// depending on the `args`'s variant.
+fn handle_multiple<'a, O, F, S, M>(
+    args: &'a [ColumnarValue],
+    op: F,
+    op2: M,
+    name: &str,
+) -> Result<ColumnarValue>
+where
+    O: ArrowPrimitiveType,
+    S: ScalarType<O::Native>,
+    F: Fn(&'a str, &'a str) -> Result<O::Native>,
+    M: Fn(O::Native) -> O::Native,
+{
+    match &args[0] {
+        ColumnarValue::Array(a) => match a.data_type() {
+            DataType::Utf8 | DataType::LargeUtf8 => {
+                // validate the column types
+                for (pos, arg) in args.iter().enumerate() {
+                    match arg {
+                        ColumnarValue::Array(arg) => match arg.data_type() {
+                                DataType::Utf8 | DataType::LargeUtf8 => {
+                                    // all good
+                                },
+                                other => return internal_err!("Unsupported 
data type {other:?} for function {name}, arg # {pos}"),
+                            },
+                        ColumnarValue::Scalar(arg) => { match arg.data_type() {
+                            DataType::Utf8 | DataType::LargeUtf8 => {
+                                // all good
+                            },
+                            other => return internal_err!("Unsupported data 
type {other:?} for function {name}, arg # {pos}"),
+                        }}
+                    }
+                }
+
+                Ok(ColumnarValue::Array(Arc::new(
+                    strings_to_primitive_function::<i32, O, _, _>(args, op, 
op2, name)?,
+                )))
+            }
+            other => {
+                internal_err!("Unsupported data type {other:?} for function 
{name}")
+            }
+        },
+        // if the first argument is a scalar utf8 all arguments are expected 
to be scalar utf8
+        ColumnarValue::Scalar(scalar) => match scalar {
+            ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
+                let mut val: Option<Result<ColumnarValue>> = None;
+                let mut err: Option<DataFusionError> = None;
+
+                match a {
+                    Some(a) => {
+                        // enumerate all the values finding the first one that 
returns an Ok result
+                        for (pos, v) in args.iter().enumerate().skip(1) {
+                            if let ColumnarValue::Scalar(s) = v {
+                                if let ScalarValue::Utf8(x) | 
ScalarValue::LargeUtf8(x) =
+                                    s
+                                {
+                                    if let Some(s) = x {
+                                        match op(a.as_str(), s.as_str()) {
+                                            Ok(r) => {
+                                                val = 
Some(Ok(ColumnarValue::Scalar(
+                                                    S::scalar(Some(op2(r))),
+                                                )));
+                                                break;
+                                            }
+                                            Err(e) => {
+                                                err = Some(e);
+                                            }
+                                        }
+                                    }
+                                } else {
+                                    return internal_err!("Unsupported data 
type {s:?} for function {name}, arg # {pos}");

Review Comment:
   We don't have to support invoking this pattern, but I don't think it should 
generate an internal error - maybe we can add a test for this too



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to