seddonm1 commented on a change in pull request #9243:
URL: https://github.com/apache/arrow/pull/9243#discussion_r561653478
##########
File path: rust/datafusion/src/physical_plan/string_expressions.rs
##########
@@ -34,42 +34,446 @@ macro_rules! downcast_vec {
}};
}
-/// concatenate string columns together.
-pub fn concatenate(args: &[ArrayRef]) -> Result<StringArray> {
+/// Returns the numeric code of the first character of the argument.
+pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) ->
Result<Int32Array> {
+ let array = args[0]
+ .as_any()
+ .downcast_ref::<GenericStringArray<T>>()
+ .unwrap();
+ // first map is the iterator, second is for the `Option<_>`
+ Ok(array
+ .iter()
+ .map(|x| {
+ x.map(|x: &str| {
+ let mut chars = x.chars();
+ chars.next().map_or(0, |v| v as i32)
+ })
+ })
+ .collect())
+}
+
+/// Removes the longest string containing only characters in characters (a
space by default) from the start and end of string.
+pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) ->
Result<StringArray> {
+ match args.len() {
+ 0 => Err(DataFusionError::Internal(
+ "btrim was called with 0 arguments. It requires at least
one.".to_string(),
+ )),
+ 1 => {
+ let string_array = args[0]
+ .as_any()
+ .downcast_ref::<GenericStringArray<T>>()
+ .unwrap();
+
+ Ok(string_array
+ .iter()
+ .map(|x| x.map(|x: &str| x.trim()))
+ .collect())
+ }
+ 2 => {
+ let string_array = args[0]
+ .as_any()
+ .downcast_ref::<GenericStringArray<T>>()
+ .unwrap();
+
+ let characters_array = args[1]
+ .as_any()
+ .downcast_ref::<GenericStringArray<T>>()
+ .unwrap();
+
+ Ok(string_array
+ .iter()
+ .enumerate()
+ .map(|(i, x)| {
+ if characters_array.is_null(i) {
+ None
+ } else {
+ x.map(|x: &str| {
+ let chars: Vec<char> =
+ characters_array.value(i).chars().collect();
+ x.trim_start_matches(&chars[..])
+ .trim_end_matches(&chars[..])
+ })
+ }
+ })
+ .collect())
+ }
+ other => Err(DataFusionError::Internal(format!(
+ "btrim was called with {} arguments. It requires at most two.",
+ other
+ ))),
+ }
+}
+
+/// Returns the character with the given code.
+pub fn chr(args: &[ArrayRef]) -> Result<StringArray> {
+ let array = args[0].as_any().downcast_ref::<Int64Array>().unwrap();
+ // first map is the iterator, second is for the `Option<_>`
+ Ok(array
+ .iter()
+ .map(|x: Option<i64>| {
+ x.map(|x| {
+ if x == 0 {
+ Err(DataFusionError::Internal(
+ "null character not permitted.".to_string(),
+ ))
+ } else {
+ match core::char::from_u32(x as u32) {
+ Some(x) => Ok(x.to_string()),
+ None => Err(DataFusionError::Internal(
+ "requested character too large for
encoding.".to_string(),
+ )),
+ }
+ }
+ .unwrap()
Review comment:
I'm not sure if we should be panicing if these characters appear
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]