neilconway commented on code in PR #20278:
URL: https://github.com/apache/datafusion/pull/20278#discussion_r2818040775
##########
datafusion/functions/src/unicode/rpad.rs:
##########
@@ -273,27 +287,49 @@ where
);
}
let length = if length < 0 { 0 } else { length
as usize };
- // Reuse buffer by clearing and refilling
- graphemes_buf.clear();
- graphemes_buf.extend(string.graphemes(true));
-
- if length < graphemes_buf.len() {
- builder
-
.append_value(graphemes_buf[..length].concat());
- } else if fill.is_empty() {
- builder.append_value(string);
+ if string.is_ascii() && fill.is_ascii() {
+ // ASCII fast path: byte length ==
character length,
+ // so we skip expensive grapheme
segmentation.
+ let str_len = string.len();
+ if length < str_len {
+
builder.append_value(&string[..length]);
+ } else if fill.is_empty() {
+ builder.append_value(string);
+ } else {
+ let pad_len = length - str_len;
+ let fill_len = fill.len();
+ let full_reps = pad_len / fill_len;
+ let remainder = pad_len % fill_len;
+ builder.write_str(string)?;
+ for _ in 0..full_reps {
+ builder.write_str(fill)?;
+ }
+
builder.append_value(&fill[..remainder]);
Review Comment:
Thanks, done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]