This is an automated email from the ASF dual-hosted git repository.

wayne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e58f86c60a refactor `character_length` impl by unifying null handling 
logic (#16877)
e58f86c60a is described below

commit e58f86c60a54fa4f27fef9d37d6929059a522067
Author: Ruihang Xia <waynest...@gmail.com>
AuthorDate: Tue Aug 12 16:16:06 2025 -0700

    refactor `character_length` impl by unifying null handling logic (#16877)
---
 .../functions/src/unicode/character_length.rs      | 53 +++++++---------------
 1 file changed, 17 insertions(+), 36 deletions(-)

diff --git a/datafusion/functions/src/unicode/character_length.rs 
b/datafusion/functions/src/unicode/character_length.rs
index 4ee5995f0a..c14a92ffdd 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -136,56 +136,37 @@ where
     // string is ASCII only is relatively cheap.
     // If strings are ASCII only, count bytes instead.
     let is_array_ascii_only = array.is_ascii();
-    let array = if array.null_count() == 0 {
+    let nulls = array.nulls().cloned();
+    let array = {
         if is_array_ascii_only {
             let values: Vec<_> = (0..array.len())
                 .map(|i| {
-                    let value = array.value(i);
+                    // Safety: we are iterating with array.len() so the index 
is always valid
+                    let value = unsafe { array.value_unchecked(i) };
                     T::Native::usize_as(value.len())
                 })
                 .collect();
-            PrimitiveArray::<T>::new(values.into(), None)
+            PrimitiveArray::<T>::new(values.into(), nulls)
         } else {
             let values: Vec<_> = (0..array.len())
                 .map(|i| {
-                    let value = array.value(i);
-                    if value.is_ascii() {
-                        T::Native::usize_as(value.len())
+                    // Safety: we are iterating with array.len() so the index 
is always valid
+                    if array.is_null(i) {
+                        T::default_value()
                     } else {
-                        T::Native::usize_as(value.chars().count())
+                        let value = unsafe { array.value_unchecked(i) };
+                        if value.is_empty() {
+                            T::default_value()
+                        } else if value.is_ascii() {
+                            T::Native::usize_as(value.len())
+                        } else {
+                            T::Native::usize_as(value.chars().count())
+                        }
                     }
                 })
                 .collect();
-            PrimitiveArray::<T>::new(values.into(), None)
+            PrimitiveArray::<T>::new(values.into(), nulls)
         }
-    } else if is_array_ascii_only {
-        let values: Vec<_> = (0..array.len())
-            .map(|i| {
-                if array.is_null(i) {
-                    T::default_value()
-                } else {
-                    let value = array.value(i);
-                    T::Native::usize_as(value.len())
-                }
-            })
-            .collect();
-        PrimitiveArray::<T>::new(values.into(), array.nulls().cloned())
-    } else {
-        let values: Vec<_> = (0..array.len())
-            .map(|i| {
-                if array.is_null(i) {
-                    T::default_value()
-                } else {
-                    let value = array.value(i);
-                    if value.is_ascii() {
-                        T::Native::usize_as(value.len())
-                    } else {
-                        T::Native::usize_as(value.chars().count())
-                    }
-                }
-            })
-            .collect();
-        PrimitiveArray::<T>::new(values.into(), array.nulls().cloned())
     };
 
     Ok(Arc::new(array))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to