seddonm1 commented on a change in pull request #9243:
URL: https://github.com/apache/arrow/pull/9243#discussion_r567147472



##########
File path: rust/datafusion/src/physical_plan/string_expressions.rs
##########
@@ -34,42 +35,553 @@ macro_rules! downcast_vec {
     }};
 }
 
-/// concatenate string columns together.
-pub fn concatenate(args: &[ArrayRef]) -> Result<StringArray> {
+/// Returns the numeric code of the first character of the argument.
+pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<Int32Array> {
+    let array = args[0]
+        .as_any()
+        .downcast_ref::<GenericStringArray<T>>()
+        .unwrap();
+    // first map is the iterator, second is for the `Option<_>`
+    Ok(array
+        .iter()
+        .map(|x| {
+            x.map(|x: &str| {
+                let mut chars = x.chars();
+                chars.next().map_or(0, |v| v as i32)
+            })
+        })
+        .collect())
+}
+
+/// Removes the longest string containing only characters in characters (a 
space by default) from the start and end of string.
+pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<StringArray> {
+    match args.len() {
+        0 => Err(DataFusionError::Internal(
+            "btrim was called with 0 arguments. It requires at least 
1.".to_string(),
+        )),
+        1 => {
+            let string_array = args[0]
+                .as_any()
+                .downcast_ref::<GenericStringArray<T>>()
+                .unwrap();
+
+            Ok(string_array
+                .iter()
+                .map(|x| x.map(|x: &str| x.trim()))
+                .collect())
+        }
+        2 => {
+            let string_array = args[0]
+                .as_any()
+                .downcast_ref::<GenericStringArray<T>>()
+                .unwrap();
+
+            let characters_array = args[1]
+                .as_any()
+                .downcast_ref::<GenericStringArray<T>>()
+                .unwrap();
+
+            Ok(string_array
+                .iter()
+                .enumerate()
+                .map(|(i, x)| {
+                    if characters_array.is_null(i) {
+                        None
+                    } else {
+                        x.map(|x: &str| {
+                            let chars: Vec<char> =
+                                characters_array.value(i).chars().collect();
+                            x.trim_start_matches(&chars[..])
+                                .trim_end_matches(&chars[..])
+                        })
+                    }
+                })
+                .collect())
+        }
+        other => Err(DataFusionError::Internal(format!(
+            "btrim was called with {} arguments. It requires at most 2.",
+            other
+        ))),
+    }
+}
+
+/// Returns number of characters in the string.
+pub fn character_length_i32(args: &[ArrayRef]) -> Result<Int32Array> {
+    let array = args[0]
+        .as_any()
+        .downcast_ref::<GenericStringArray<i32>>()
+        .unwrap();
+    // first map is the iterator, second is for the `Option<_>`
+    Ok(array
+        .iter()
+        .map(|x| x.map(|x: &str| x.graphemes(true).count() as i32))
+        .collect())
+}
+
+/// Returns number of characters in the string.
+pub fn character_length_i64(args: &[ArrayRef]) -> Result<Int64Array> {
+    let array = args[0]
+        .as_any()
+        .downcast_ref::<GenericStringArray<i64>>()
+        .unwrap();
+    // first map is the iterator, second is for the `Option<_>`
+    Ok(array
+        .iter()
+        .map(|x| x.map(|x: &str| x.graphemes(true).count() as i64))
+        .collect())
+}
+
+/// Returns the character with the given code.
+pub fn chr(args: &[ArrayRef]) -> Result<StringArray> {
+    let array = args[0].as_any().downcast_ref::<Int64Array>().unwrap();
+    // first map is the iterator, second is for the `Option<_>`
+    Ok(array
+        .iter()
+        .map(|x: Option<i64>| {
+            x.map(|x| {
+                if x == 0 {
+                    Err(DataFusionError::Internal(
+                        "null character not permitted.".to_string(),
+                    ))
+                } else {
+                    match core::char::from_u32(x as u32) {
+                        Some(x) => Ok(x.to_string()),
+                        None => Err(DataFusionError::Internal(
+                            "requested character too large for 
encoding.".to_string(),
+                        )),
+                    }
+                }
+                .unwrap()
+            })
+        })
+        .collect())
+}
+
+/// Concatenates the text representations of all the arguments. NULL arguments 
are ignored.
+pub fn concat(args: &[ArrayRef]) -> Result<StringArray> {
     // downcast all arguments to strings
     let args = downcast_vec!(args, 
StringArray).collect::<Result<Vec<&StringArray>>>()?;
     // do not accept 0 arguments.
     if args.is_empty() {
         return Err(DataFusionError::Internal(
-            "Concatenate was called with 0 arguments. It requires at least 
one."
-                .to_string(),
+            "concat was called with 0 arguments. It requires at least 
2.".to_string(),
         ));
     }
 
     let mut builder = StringBuilder::new(args.len());
     // for each entry in the array
     for index in 0..args[0].len() {
         let mut owned_string: String = "".to_owned();
-
-        // if any is null, the result is null
-        let mut is_null = false;
         for arg in &args {
-            if arg.is_null(index) {
-                is_null = true;
-                break; // short-circuit as we already know the result
-            } else {
+            if arg.is_valid(index) {
                 owned_string.push_str(&arg.value(index));
             }
         }
-        if is_null {
+        builder.append_value(&owned_string)?;
+    }
+    Ok(builder.finish())
+}
+
+/// Concatenates all but the first argument, with separators. The first 
argument is used as the separator string, and should not be NULL. Other NULL 
arguments are ignored.
+pub fn concat_ws(args: &[ArrayRef]) -> Result<StringArray> {
+    // downcast all arguments to strings
+    let args = downcast_vec!(args, 
StringArray).collect::<Result<Vec<&StringArray>>>()?;
+    // do not accept 0 or 1 arguments.
+    if args.len() < 2 {
+        return Err(DataFusionError::Internal(format!(
+            "concat_ws was called with {} arguments. It requires at least 2.",
+            args.len()
+        )));
+    }
+
+    let mut builder = StringBuilder::new(args.len());
+    // for each entry in the array
+    for index in 0..args[0].len() {
+        let mut owned_string: String = "".to_owned();
+        if args[0].is_null(index) {
             builder.append_null()?;
         } else {
+            let sep = args[0].value(index);
+            for arg_index in 1..args.len() {
+                let arg = &args[arg_index];
+                if !arg.is_null(index) {

Review comment:
       thanks i will have a look at this today
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to