dmitry-chirkov-dremio commented on code in PR #49660:
URL: https://github.com/apache/arrow/pull/49660#discussion_r3074390688
##########
cpp/src/arrow/vendored/base64.cpp:
##########
@@ -93,38 +89,67 @@ std::string base64_encode(std::string_view
string_to_encode) {
return base64_encode(bytes_to_encode, in_len);
}
-std::string base64_decode(std::string_view encoded_string) {
+Result<std::string> base64_decode(std::string_view encoded_string) {
size_t in_len = encoded_string.size();
int i = 0;
- int j = 0;
int in_ = 0;
+ int padding_count = 0;
+ int block_padding = 0;
+ bool padding_started = false;
unsigned char char_array_4[4], char_array_3[3];
std::string ret;
- while (in_len-- && ( encoded_string[in_] != '=') &&
is_base64(encoded_string[in_])) {
- char_array_4[i++] = encoded_string[in_]; in_++;
- if (i ==4) {
- for (i = 0; i <4; i++)
- char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff;
+ if (encoded_string.size() % 4 != 0) {
+ return Status::Invalid("Invalid base64 input: length is not a multiple of
4");
+ }
- char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] &
0x30) >> 4);
- char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] &
0x3c) >> 2);
- char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+ while (in_len--) {
+ unsigned char c = encoded_string[in_];
- for (i = 0; (i < 3); i++)
- ret += char_array_3[i];
- i = 0;
+ if (c == '=') {
+ padding_started = true;
+ padding_count++;
+
+ if (padding_count > 2) {
+ return Status::Invalid("Invalid base64 input: too many padding
characters");
+ }
+
+ char_array_4[i++] = 0;
+ } else {
+ if (padding_started) {
+ return Status::Invalid("Invalid base64 input: padding characters must
be at the end");
+ }
+
+ if (base64_chars.find(c) == std::string::npos) {
+ return Status::Invalid(
+ "Invalid base64 input: contains non-base64 byte at position " +
+ std::to_string(in_));
+ }
Review Comment:
+1 for optimization. If someone says "can be addressed in a followup" then a
new issue should be filed.
I do believe it should be addressed.
##########
cpp/src/arrow/vendored/base64.cpp:
##########
@@ -30,6 +30,7 @@
*/
#include "arrow/util/base64.h"
+#include "arrow/result.h"
#include <iostream>
Review Comment:
NIT: possibly unused
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]