Copilot commented on code in PR #49660:
URL: https://github.com/apache/arrow/pull/49660#discussion_r3068638626
##########
cpp/src/arrow/vendored/base64.cpp:
##########
@@ -93,38 +89,67 @@ std::string base64_encode(std::string_view
string_to_encode) {
return base64_encode(bytes_to_encode, in_len);
}
-std::string base64_decode(std::string_view encoded_string) {
+Result<std::string> base64_decode(std::string_view encoded_string) {
size_t in_len = encoded_string.size();
int i = 0;
- int j = 0;
int in_ = 0;
Review Comment:
`in_` is used as an index into `encoded_string` and is also reported in
error messages, but it is declared as `int`. For very large inputs this can
overflow and lead to incorrect indexing / undefined behavior. Prefer `size_t`
(or `std::string_view::size_type`) for indices and related counters.
```suggestion
std::string_view::size_type in_ = 0;
```
##########
cpp/src/arrow/flight/flight_test.cc:
##########
@@ -620,7 +620,8 @@ void ParseBasicHeader(const CallHeaders& incoming_headers,
std::string& username
std::string& password) {
std::string encoded_credentials =
FindKeyValPrefixInCallHeaders(incoming_headers, kAuthHeader,
kBasicPrefix);
- std::stringstream
decoded_stream(arrow::util::base64_decode(encoded_credentials));
+ std::string decoded =
arrow::util::base64_decode(encoded_credentials).ValueOrDie();
+ std::stringstream decoded_stream(decoded);
Review Comment:
`ValueOrDie()` will abort the process on invalid base64 input, which can
turn a test failure into a hard crash. Since this is in test code, prefer
`ASSERT_OK_AND_ASSIGN` / `ARROW_ASSIGN_OR_RAISE` (or at least
`ASSERT_OK(result.status())`) so failures are reported cleanly and don’t
terminate the entire test run.
##########
cpp/src/arrow/vendored/base64.cpp:
##########
@@ -93,38 +89,67 @@ std::string base64_encode(std::string_view
string_to_encode) {
return base64_encode(bytes_to_encode, in_len);
}
-std::string base64_decode(std::string_view encoded_string) {
+Result<std::string> base64_decode(std::string_view encoded_string) {
size_t in_len = encoded_string.size();
int i = 0;
- int j = 0;
int in_ = 0;
+ int padding_count = 0;
+ int block_padding = 0;
+ bool padding_started = false;
unsigned char char_array_4[4], char_array_3[3];
std::string ret;
- while (in_len-- && ( encoded_string[in_] != '=') &&
is_base64(encoded_string[in_])) {
- char_array_4[i++] = encoded_string[in_]; in_++;
- if (i ==4) {
- for (i = 0; i <4; i++)
- char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff;
+ if (encoded_string.size() % 4 != 0) {
+ return Status::Invalid("Invalid base64 input: length is not a multiple of
4");
+ }
- char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] &
0x30) >> 4);
- char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] &
0x3c) >> 2);
- char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+ while (in_len--) {
+ unsigned char c = encoded_string[in_];
- for (i = 0; (i < 3); i++)
- ret += char_array_3[i];
- i = 0;
+ if (c == '=') {
+ padding_started = true;
+ padding_count++;
+
+ if (padding_count > 2) {
+ return Status::Invalid("Invalid base64 input: too many padding
characters");
+ }
+
+ char_array_4[i++] = 0;
+ } else {
+ if (padding_started) {
+ return Status::Invalid("Invalid base64 input: padding characters must
be at the end");
+ }
+
+ if (base64_chars.find(c) == std::string::npos) {
+ return Status::Invalid(
+ "Invalid base64 input: contains non-base64 byte at position " +
+ std::to_string(in_));
+ }
Review Comment:
`base64_chars.find()` is called for validation on every input byte and then
again during the decode step, adding extra work per character. Consider
switching to a 256-entry lookup table (byte -> sextet or -1) to validate and
map in one step, keeping strict validation without repeated linear searches.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]