seddonm1 commented on a change in pull request #8966:
URL: https://github.com/apache/arrow/pull/8966#discussion_r546302442



##########
File path: rust/datafusion/src/physical_plan/string_expressions.rs
##########
@@ -66,3 +71,73 @@ pub fn concatenate(args: &[ArrayRef]) -> Result<StringArray> 
{
     }
     Ok(builder.finish())
 }
+
+/// character_length returns number of characters in the string
+/// character_length('josé') = 4
+pub fn character_length(args: &[ArrayRef]) -> Result<Int32Array> {
+    let num_rows = args[0].len();
+    let string_args =
+        &args[0]
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .ok_or_else(|| {
+                DataFusionError::Internal(
+                    "could not cast input to StringArray".to_string(),
+                )
+            })?;
+
+    let result = (0..num_rows)
+        .map(|i| {
+            if string_args.is_null(i) {
+                // NB: Since we use the same null bitset as the input,
+                // the output for this value will be ignored, but we
+                // need some value in the array we are building.
+                Ok(0)
+            } else {
+                Ok(string_args.value(i).chars().count() as i32)
+            }
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    let data = ArrayData::new(
+        DataType::Int32,
+        num_rows,
+        Some(string_args.null_count()),
+        string_args.data().null_buffer().cloned(),
+        0,
+        vec![Buffer::from(result.to_byte_slice())],
+        vec![],
+    );
+
+    Ok(Int32Array::from(Arc::new(data)))
+}
+
+macro_rules! string_unary_function {
+    ($NAME:ident, $FUNC:ident) => {
+        /// string function that accepts utf8 and returns utf8
+        pub fn $NAME(args: &[ArrayRef]) -> Result<StringArray> {
+            let string_args = &args[0]
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .ok_or_else(|| {
+                    DataFusionError::Internal(
+                        "could not cast input to StringArray".to_string(),
+                    )
+                })?;
+
+            let mut builder = StringBuilder::new(args.len());
+            for index in 0..args[0].len() {
+                if string_args.is_null(index) {
+                    builder.append_null()?;
+                } else {
+                    builder.append_value(&string_args.value(index).$FUNC())?;
+                }
+            }
+            Ok(builder.finish())

Review comment:
       This did work well but I have struggled to make it work with code that 
supports both `Utf8` and `LargeUtf8` types as the code does now. Maybe you 
could help here.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to