edponce commented on a change in pull request #10317:
URL: https://github.com/apache/arrow/pull/10317#discussion_r633832364
##########
File path: cpp/src/arrow/compute/kernels/scalar_string.cc
##########
@@ -266,6 +271,56 @@ void EnsureLookupTablesFilled() {}
#endif // ARROW_WITH_UTF8PROC
+template <typename Type>
+struct AsciiReverse : StringTransform<Type, AsciiReverse<Type>> {
+ using Base = StringTransform<Type, AsciiReverse<Type>>;
+ using offset_type = typename Base::offset_type;
+
+ bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
+ uint8_t* output, offset_type* output_written) {
+ uint8_t utf8_char_found = 0;
+ for (offset_type i = 0; i < input_string_ncodeunits; i++) {
+ // if a utf8 char is found, report to utf8_char_found
+ utf8_char_found |= input[i] & 0x80;
+ output[input_string_ncodeunits - i - 1] = input[i];
+ }
+ *output_written = input_string_ncodeunits;
+ return utf8_char_found == 0;
+ }
+
+ static Status InvalidStatus() { return Status::Invalid("Non-ascii sequence
in input"); }
Review comment:
I do not think you should add *InvalidStatus* methods with specific
messages that do not necessarily generalize for all string errors. Why not
directly invoke `Status::Invalid("Specific message to specific code block")`?
##########
File path: cpp/src/arrow/compute/kernels/scalar_string.cc
##########
@@ -266,6 +271,56 @@ void EnsureLookupTablesFilled() {}
#endif // ARROW_WITH_UTF8PROC
+template <typename Type>
+struct AsciiReverse : StringTransform<Type, AsciiReverse<Type>> {
+ using Base = StringTransform<Type, AsciiReverse<Type>>;
+ using offset_type = typename Base::offset_type;
+
+ bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
+ uint8_t* output, offset_type* output_written) {
+ uint8_t utf8_char_found = 0;
+ for (offset_type i = 0; i < input_string_ncodeunits; i++) {
+ // if a utf8 char is found, report to utf8_char_found
+ utf8_char_found |= input[i] & 0x80;
+ output[input_string_ncodeunits - i - 1] = input[i];
+ }
+ *output_written = input_string_ncodeunits;
+ return utf8_char_found == 0;
+ }
+
+ static Status InvalidStatus() { return Status::Invalid("Non-ascii sequence
in input"); }
Review comment:
I do not think you should add `InvalidStatus` methods with specific
messages that do not necessarily generalize for all string errors. Why not
directly invoke `Status::Invalid("Specific message to specific code block")`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]