projjal commented on a change in pull request #12440:
URL: https://github.com/apache/arrow/pull/12440#discussion_r827652674
##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -2209,26 +2215,37 @@ const char* right_utf8_int32(gdv_int64 context, const
char* text, gdv_int32 text
// initially counts the number of utf8 characters in the defined text
int32_t char_count = utf8_length(context, text, text_len);
+
// char_count is zero if input has invalid utf8 char
if (char_count == 0) {
*out_len = 0;
return "";
}
+ //case where right('abcdef', -6) -> "" and right('abcdef', -7) -> ""
+ if(number < 0 && abs(number) >= char_count) {
+ *out_len = 0;
+ return "";
+ }
+
int32_t start_char_pos; // the char result start position (inclusive)
- int32_t end_char_len; // the char result end position (inclusive)
+ int32_t end_pos; // the char result end position (inclusive)
+
if (number > 0) {
// case where right('abc', 5) ==> 'abc' start_char_pos=1.
start_char_pos = (char_count > number) ? char_count - number : 0;
- end_char_len = char_count - start_char_pos;
} else {
start_char_pos = number * -1;
- end_char_len = char_count - start_char_pos;
}
- // calculate the start byte position and the output length
+ end_pos = char_count;
+
+ // calculate the start byte and end byte position
int32_t start_byte_pos = utf8_byte_pos(context, text, text_len,
start_char_pos);
- *out_len = utf8_byte_pos(context, text, text_len, end_char_len);
+ int32_t end_byte_pos = utf8_byte_pos(context, text, text_len, end_pos);
Review comment:
Calculating this doesn't seem necessary
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]