martin-g commented on code in PR #20278:
URL: https://github.com/apache/datafusion/pull/20278#discussion_r2817875220


##########
datafusion/functions/src/unicode/rpad.rs:
##########
@@ -273,27 +287,49 @@ where
                                     );
                                 }
                                 let length = if length < 0 { 0 } else { length 
as usize };
-                                // Reuse buffer by clearing and refilling
-                                graphemes_buf.clear();
-                                graphemes_buf.extend(string.graphemes(true));
-
-                                if length < graphemes_buf.len() {
-                                    builder
-                                        
.append_value(graphemes_buf[..length].concat());
-                                } else if fill.is_empty() {
-                                    builder.append_value(string);
+                                if string.is_ascii() && fill.is_ascii() {
+                                    // ASCII fast path: byte length == 
character length,
+                                    // so we skip expensive grapheme 
segmentation.
+                                    let str_len = string.len();
+                                    if length < str_len {
+                                        
builder.append_value(&string[..length]);
+                                    } else if fill.is_empty() {
+                                        builder.append_value(string);
+                                    } else {
+                                        let pad_len = length - str_len;
+                                        let fill_len = fill.len();
+                                        let full_reps = pad_len / fill_len;
+                                        let remainder = pad_len % fill_len;
+                                        builder.write_str(string)?;
+                                        for _ in 0..full_reps {
+                                            builder.write_str(fill)?;
+                                        }
+                                        
builder.append_value(&fill[..remainder]);
+                                    }
                                 } else {
-                                    builder.write_str(string)?;
-                                    // Reuse fill_chars_buf by clearing and 
refilling
-                                    fill_chars_buf.clear();
-                                    fill_chars_buf.extend(fill.chars());
-                                    for l in 0..length - graphemes_buf.len() {
-                                        let c = *fill_chars_buf
-                                            .get(l % fill_chars_buf.len())
-                                            .unwrap();
-                                        builder.write_char(c)?;
+                                    // Reuse buffer by clearing and refilling
+                                    graphemes_buf.clear();
+                                    
graphemes_buf.extend(string.graphemes(true));
+
+                                    if length < graphemes_buf.len() {
+                                        builder.append_value(
+                                            graphemes_buf[..length].concat(),
+                                        );
+                                    } else if fill.is_empty() {
+                                        builder.append_value(string);
+                                    } else {
+                                        builder.write_str(string)?;
+                                        // Reuse fill_chars_buf by clearing 
and refilling
+                                        fill_chars_buf.clear();
+                                        fill_chars_buf.extend(fill.chars());
+                                        for l in 0..length - 
graphemes_buf.len() {
+                                            let c = *fill_chars_buf
+                                                .get(l % fill_chars_buf.len())
+                                                .unwrap();
+                                            builder.write_char(c)?;
+                                        }
+                                        builder.append_value("");

Review Comment:
   Yes, sorry! My mistake!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to