Dandandan commented on code in PR #14195:
URL: https://github.com/apache/datafusion/pull/14195#discussion_r1923260496
##########
datafusion/functions/src/unicode/reverse.rs:
##########
@@ -119,12 +119,21 @@ fn reverse_impl<'a, T: OffsetSizeTrait, V:
StringArrayType<'a>>(
) -> Result<ArrayRef> {
let mut builder =
GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);
- let mut reversed = String::new();
+ let mut string_buf = String::new();
for string in string_array.iter() {
if let Some(s) = string {
- reversed.extend(s.chars().rev());
- builder.append_value(&reversed);
- reversed.clear();
+ if s.is_ascii() {
+ // SAFETY: Since the original string was ASCII, reversing the
bytes still results in valid UTF-8.
+ let reversed = unsafe {
+ // reverse bytes directly since ASCII characters are
single bytes
+
String::from_utf8_unchecked(s.bytes().rev().collect::<Vec<u8>>())
Review Comment:
We could avoid a allocation here by copying into a `Vec<u8>` buffer
```
// slightly faster than using iterator
bytes_buf.extend(s.as_bytes())
bytes_buf.reverse()
...
bytes_buf.clear()
```
##########
datafusion/functions/src/unicode/reverse.rs:
##########
@@ -119,12 +119,21 @@ fn reverse_impl<'a, T: OffsetSizeTrait, V:
StringArrayType<'a>>(
) -> Result<ArrayRef> {
let mut builder =
GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);
- let mut reversed = String::new();
+ let mut string_buf = String::new();
for string in string_array.iter() {
if let Some(s) = string {
- reversed.extend(s.chars().rev());
- builder.append_value(&reversed);
- reversed.clear();
+ if s.is_ascii() {
+ // SAFETY: Since the original string was ASCII, reversing the
bytes still results in valid UTF-8.
+ let reversed = unsafe {
+ // reverse bytes directly since ASCII characters are
single bytes
+
String::from_utf8_unchecked(s.bytes().rev().collect::<Vec<u8>>())
Review Comment:
We could avoid a allocation here by copying into a `Vec<u8>` buffer
```
// slightly faster than using iterator `bytes`
bytes_buf.extend(s.as_bytes())
bytes_buf.reverse()
...
bytes_buf.clear()
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]