alamb commented on code in PR #12203:
URL: https://github.com/apache/datafusion/pull/12203#discussion_r1754936324


##########
datafusion/functions/src/regex/regexpreplace.rs:
##########
@@ -187,104 +188,117 @@ fn regex_replace_posix_groups(replacement: &str) -> 
String {
 /// # Ok(())
 /// # }
 /// ```
-pub fn regexp_replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
+pub fn regexp_replace<'a, T: OffsetSizeTrait, V, B>(
+    string_array: V,
+    pattern_array: B,
+    replacement_array: B,
+    flags: Option<&ArrayRef>,
+) -> Result<ArrayRef>
+where
+    V: ArrayAccessor<Item = &'a str>,
+    B: ArrayAccessor<Item = &'a str>,
+{
     // Default implementation for regexp_replace, assumes all args are arrays
     // and args is a sequence of 3 or 4 elements.
 
     // creating Regex is expensive so create hashmap for memoization
     let mut patterns: HashMap<String, Regex> = HashMap::new();
 
-    match args.len() {
-        3 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let pattern_array = as_generic_string_array::<T>(&args[1])?;
-            let replacement_array = as_generic_string_array::<T>(&args[2])?;
-
-            let result = string_array
-            .iter()
-            .zip(pattern_array.iter())
-            .zip(replacement_array.iter())
-            .map(|((string, pattern), replacement)| match (string, pattern, 
replacement) {
-                (Some(string), Some(pattern), Some(replacement)) => {
-                    let replacement = regex_replace_posix_groups(replacement);
-
-                    // if patterns hashmap already has regexp then use else 
create and return
-                    let re = match patterns.get(pattern) {
-                        Some(re) => Ok(re),
-                        None => {
-                            match Regex::new(pattern) {
-                                Ok(re) => {
-                                    patterns.insert(pattern.to_string(), re);
-                                    Ok(patterns.get(pattern).unwrap())
+    let string_array_iter = ArrayIter::new(string_array);
+    let pattern_array_iter = ArrayIter::new(pattern_array);
+    let replacement_array_iter = ArrayIter::new(replacement_array);
+
+    match flags {
+        None => {
+            let result = string_array_iter
+                .zip(pattern_array_iter)
+                .zip(replacement_array_iter)
+                .map(|((string, pattern), replacement)| {
+                    match (string, pattern, replacement) {
+                        (Some(string), Some(pattern), Some(replacement)) => {
+                            let replacement = 
regex_replace_posix_groups(replacement);
+                            // if patterns hashmap already has regexp then use 
else create and return
+                            let re = match patterns.get(pattern) {
+                                Some(re) => Ok(re),
+                                None => match Regex::new(pattern) {
+                                    Ok(re) => {
+                                        patterns.insert(pattern.to_string(), 
re);
+                                        Ok(patterns.get(pattern).unwrap())
+                                    }
+                                    Err(err) => {
+                                        
Err(DataFusionError::External(Box::new(err)))
+                                    }
                                 },
-                                Err(err) => 
Err(DataFusionError::External(Box::new(err))),
-                            }
-                        }
-                    };
+                            };
 
-                    Some(re.map(|re| re.replace(string, 
replacement.as_str()))).transpose()
-                }
-            _ => Ok(None)
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
+                            Some(re.map(|re| re.replace(string, 
replacement.as_str())))
+                                .transpose()
+                        }
+                        _ => Ok(None),
+                    }
+                })
+                .collect::<Result<GenericStringArray<T>>>()?;
 
             Ok(Arc::new(result) as ArrayRef)
         }
-        4 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let pattern_array = as_generic_string_array::<T>(&args[1])?;
-            let replacement_array = as_generic_string_array::<T>(&args[2])?;
-            let flags_array = as_generic_string_array::<T>(&args[3])?;
-
-            let result = string_array
-            .iter()
-            .zip(pattern_array.iter())
-            .zip(replacement_array.iter())
-            .zip(flags_array.iter())
-            .map(|(((string, pattern), replacement), flags)| match (string, 
pattern, replacement, flags) {
-                (Some(string), Some(pattern), Some(replacement), Some(flags)) 
=> {
-                    let replacement = regex_replace_posix_groups(replacement);
-
-                    // format flags into rust pattern
-                    let (pattern, replace_all) = if flags == "g" {
-                        (pattern.to_string(), true)
-                    } else if flags.contains('g') {
-                        (format!("(?{}){}", flags.to_string().replace('g', 
""), pattern), true)
-                    } else {
-                        (format!("(?{flags}){pattern}"), false)
-                    };
-
-                    // if patterns hashmap already has regexp then use else 
create and return
-                    let re = match patterns.get(&pattern) {
-                        Some(re) => Ok(re),
-                        None => {
-                            match Regex::new(pattern.as_str()) {
-                                Ok(re) => {
-                                    patterns.insert(pattern.clone(), re);
-                                    Ok(patterns.get(&pattern).unwrap())
+        Some(flags) => {
+            let flags_array = as_generic_string_array::<T>(flags)?;
+
+            let result = string_array_iter
+                .zip(pattern_array_iter)
+                .zip(replacement_array_iter)
+                .zip(flags_array.iter())
+                .map(|(((string, pattern), replacement), flags)| {
+                    match (string, pattern, replacement, flags) {
+                        (Some(string), Some(pattern), Some(replacement), 
Some(flags)) => {
+                            let replacement = 
regex_replace_posix_groups(replacement);
+
+                            // format flags into rust pattern
+                            let (pattern, replace_all) = if flags == "g" {
+                                (pattern.to_string(), true)
+                            } else if flags.contains('g') {
+                                (
+                                    format!(
+                                        "(?{}){}",
+                                        flags.to_string().replace('g', ""),
+                                        pattern
+                                    ),
+                                    true,
+                                )
+                            } else {
+                                (format!("(?{flags}){pattern}"), false)
+                            };
+
+                            // if patterns hashmap already has regexp then use 
else create and return
+                            let re = match patterns.get(&pattern) {
+                                Some(re) => Ok(re),
+                                None => match Regex::new(pattern.as_str()) {
+                                    Ok(re) => {
+                                        patterns.insert(pattern.clone(), re);
+                                        Ok(patterns.get(&pattern).unwrap())
+                                    }
+                                    Err(err) => {
+                                        
Err(DataFusionError::External(Box::new(err)))
+                                    }
                                 },
-                                Err(err) => 
Err(DataFusionError::External(Box::new(err))),
-                            }
+                            };
+
+                            Some(re.map(|re| {
+                                if replace_all {
+                                    re.replace_all(string, 
replacement.as_str())
+                                } else {
+                                    re.replace(string, replacement.as_str())
+                                }
+                            }))
+                            .transpose()
                         }
-                    };
-
-                    Some(re.map(|re| {
-                        if replace_all {
-                            re.replace_all(string, replacement.as_str())
-                        } else {
-                            re.replace(string, replacement.as_str())
-                        }
-                    })).transpose()
-                }
-            _ => Ok(None)
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
+                        _ => Ok(None),
+                    }
+                })
+                .collect::<Result<GenericStringArray<T>>>()?;

Review Comment:
   the way this is written it will always return a GenericStringArray (aka 
`StringArray` or `LargeStringArray`)
   
   It might make more sense to check the first argument's data type, and if it 
is `Utf8View` collect into a `StringViewArray` directly 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to