goldmedal commented on code in PR #13752:
URL: https://github.com/apache/datafusion/pull/13752#discussion_r1888481983


##########
datafusion/functions/src/unicode/initcap.rs:
##########
@@ -213,7 +243,7 @@ mod tests {
             Ok(Some("Hi Thomas")),
             &str,
             Utf8,
-            StringArray
+            StringViewArray

Review Comment:
   👍 



##########
datafusion/functions/src/unicode/initcap.rs:
##########
@@ -123,24 +124,36 @@ fn get_initcap_doc() -> &'static Documentation {
 fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = as_generic_string_array::<T>(&args[0])?;
 
-    // first map is the iterator, second is for the `Option<_>`
-    let result = string_array
-        .iter()
-        .map(initcap_string)
-        .collect::<GenericStringArray<T>>();
+    let mut builder = GenericStringBuilder::<T>::with_capacity(
+        string_array.len(),
+        string_array.value_data().len(),
+    );
 
-    Ok(Arc::new(result) as ArrayRef)
+    string_array.iter().for_each(|str| match str {
+        Some(s) => {
+            let initcap_str = initcap_string(Some(s)).unwrap();
+            builder.append_value(initcap_str);
+        }
+        None => builder.append_null(),
+    });
+
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 fn initcap_utf8view(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_view_array = as_string_view_array(&args[0])?;
 
-    let result = string_view_array
-        .iter()
-        .map(initcap_string)
-        .collect::<StringArray>();
+    let mut builder = 
StringViewBuilder::with_capacity(string_view_array.len());
+
+    string_view_array.iter().for_each(|str| match str {
+        Some(s) => {
+            let initcap_str = initcap_string(Some(s)).unwrap();
+            builder.append_value(initcap_str);
+        }
+        None => builder.append_null(),
+    });
 
-    Ok(Arc::new(result) as ArrayRef)
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 fn initcap_string(input: Option<&str>) -> Option<String> {

Review Comment:
   ```suggestion
   fn initcap_string(input: &str) -> String {
   ```
   It's only used by L135 and L150 and never be `None`. I think we can remove 
the `Option`.



##########
datafusion/functions/src/unicode/initcap.rs:
##########
@@ -149,13 +162,16 @@ fn initcap_string(input: Option<&str>) -> Option<String> {
         let mut prev_is_alphanumeric = false;
 
         for c in s.chars() {
-            let transformed = if prev_is_alphanumeric {
-                c.to_ascii_lowercase()
+            if prev_is_alphanumeric {
+                for lc in c.to_lowercase() {
+                    result.push(lc);
+                }
             } else {
-                c.to_ascii_uppercase()
-            };
-            result.push(transformed);
-            prev_is_alphanumeric = c.is_ascii_alphanumeric();
+                for uc in c.to_uppercase() {
+                    result.push(uc);
+                }
+            }

Review Comment:
   Does this change slow down the ASCII-only case? If yes, maybe we can 
consider keeping a faster path for ASCII-only cases. (like 
https://github.com/apache/datafusion/issues/12306)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to