This is an automated email from the ASF dual-hosted git repository. wayne pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new e58f86c60a refactor `character_length` impl by unifying null handling logic (#16877) e58f86c60a is described below commit e58f86c60a54fa4f27fef9d37d6929059a522067 Author: Ruihang Xia <waynest...@gmail.com> AuthorDate: Tue Aug 12 16:16:06 2025 -0700 refactor `character_length` impl by unifying null handling logic (#16877) --- .../functions/src/unicode/character_length.rs | 53 +++++++--------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs index 4ee5995f0a..c14a92ffdd 100644 --- a/datafusion/functions/src/unicode/character_length.rs +++ b/datafusion/functions/src/unicode/character_length.rs @@ -136,56 +136,37 @@ where // string is ASCII only is relatively cheap. // If strings are ASCII only, count bytes instead. let is_array_ascii_only = array.is_ascii(); - let array = if array.null_count() == 0 { + let nulls = array.nulls().cloned(); + let array = { if is_array_ascii_only { let values: Vec<_> = (0..array.len()) .map(|i| { - let value = array.value(i); + // Safety: we are iterating with array.len() so the index is always valid + let value = unsafe { array.value_unchecked(i) }; T::Native::usize_as(value.len()) }) .collect(); - PrimitiveArray::<T>::new(values.into(), None) + PrimitiveArray::<T>::new(values.into(), nulls) } else { let values: Vec<_> = (0..array.len()) .map(|i| { - let value = array.value(i); - if value.is_ascii() { - T::Native::usize_as(value.len()) + // Safety: we are iterating with array.len() so the index is always valid + if array.is_null(i) { + T::default_value() } else { - T::Native::usize_as(value.chars().count()) + let value = unsafe { array.value_unchecked(i) }; + if value.is_empty() { + T::default_value() + } else if value.is_ascii() { + T::Native::usize_as(value.len()) + } else { + T::Native::usize_as(value.chars().count()) + } } }) .collect(); - PrimitiveArray::<T>::new(values.into(), None) + PrimitiveArray::<T>::new(values.into(), nulls) } - } else if is_array_ascii_only { - let values: Vec<_> = (0..array.len()) - .map(|i| { - if array.is_null(i) { - T::default_value() - } else { - let value = array.value(i); - T::Native::usize_as(value.len()) - } - }) - .collect(); - PrimitiveArray::<T>::new(values.into(), array.nulls().cloned()) - } else { - let values: Vec<_> = (0..array.len()) - .map(|i| { - if array.is_null(i) { - T::default_value() - } else { - let value = array.value(i); - if value.is_ascii() { - T::Native::usize_as(value.len()) - } else { - T::Native::usize_as(value.chars().count()) - } - } - }) - .collect(); - PrimitiveArray::<T>::new(values.into(), array.nulls().cloned()) }; Ok(Arc::new(array)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org