neilconway commented on code in PR #20278:
URL: https://github.com/apache/datafusion/pull/20278#discussion_r2817785948
##########
datafusion/functions/src/unicode/rpad.rs:
##########
@@ -273,27 +287,49 @@ where
);
}
let length = if length < 0 { 0 } else { length
as usize };
- // Reuse buffer by clearing and refilling
- graphemes_buf.clear();
- graphemes_buf.extend(string.graphemes(true));
-
- if length < graphemes_buf.len() {
- builder
-
.append_value(graphemes_buf[..length].concat());
- } else if fill.is_empty() {
- builder.append_value(string);
+ if string.is_ascii() && fill.is_ascii() {
+ // ASCII fast path: byte length ==
character length,
+ // so we skip expensive grapheme
segmentation.
+ let str_len = string.len();
+ if length < str_len {
+
builder.append_value(&string[..length]);
+ } else if fill.is_empty() {
+ builder.append_value(string);
+ } else {
+ let pad_len = length - str_len;
+ let fill_len = fill.len();
+ let full_reps = pad_len / fill_len;
+ let remainder = pad_len % fill_len;
+ builder.write_str(string)?;
+ for _ in 0..full_reps {
+ builder.write_str(fill)?;
+ }
+
builder.append_value(&fill[..remainder]);
+ }
} else {
- builder.write_str(string)?;
- // Reuse fill_chars_buf by clearing and
refilling
- fill_chars_buf.clear();
- fill_chars_buf.extend(fill.chars());
- for l in 0..length - graphemes_buf.len() {
- let c = *fill_chars_buf
- .get(l % fill_chars_buf.len())
- .unwrap();
- builder.write_char(c)?;
+ // Reuse buffer by clearing and refilling
+ graphemes_buf.clear();
+
graphemes_buf.extend(string.graphemes(true));
+
+ if length < graphemes_buf.len() {
+ builder.append_value(
+ graphemes_buf[..length].concat(),
+ );
+ } else if fill.is_empty() {
+ builder.append_value(string);
Review Comment:
`append_value` does not return a value, so I think we don't want to make
this change. (`builder.write_str()` _does_ return a `Result`, but only because
it needs to in order to implement `std::fmt::Write`).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]