Copilot commented on code in PR #59607:
URL: https://github.com/apache/doris/pull/59607#discussion_r2667010298
##########
be/src/vec/functions/function_quantile_state.cpp:
##########
@@ -218,10 +221,134 @@ class FunctionQuantileStatePercent : public IFunction {
}
};
+class FunctionQuantileStateFromBase64 : public IFunction {
+public:
+ static constexpr auto name = "quantile_state_from_base64";
+ String get_name() const override { return name; }
+
+ static FunctionPtr create() { return
std::make_shared<FunctionQuantileStateFromBase64>(); }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeQuantileState>());
+ }
+
+ size_t get_number_of_arguments() const override { return 1; }
+
+ bool use_default_implementation_for_nulls() const override { return true; }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) const override
{
+ auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto res_data_column = ColumnQuantileState::create();
+ auto& null_map = res_null_map->get_data();
+ auto& res = res_data_column->get_data();
+
+ auto& argument_column = block.get_by_position(arguments[0]).column;
+ const auto& str_column = static_cast<const
ColumnString&>(*argument_column);
+ const ColumnString::Chars& data = str_column.get_chars();
+ const ColumnString::Offsets& offsets = str_column.get_offsets();
+
+ res.reserve(input_rows_count);
+
+ std::string decode_buff;
+ int last_decode_buff_len = 0;
+ int curr_decode_buff_len = 0;
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ const char* src_str = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
+ int64_t src_size = offsets[i] - offsets[i - 1];
+
+ if (src_size == 0 || 0 != src_size % 4) {
+ res.emplace_back();
+ null_map[i] = 1;
+ continue;
+ }
+
+ curr_decode_buff_len = src_size + 3;
+ if (curr_decode_buff_len > last_decode_buff_len) {
+ decode_buff.resize(curr_decode_buff_len);
+ last_decode_buff_len = curr_decode_buff_len;
+ }
+ auto outlen = base64_decode(src_str, src_size, decode_buff.data());
+ if (outlen < 0) {
+ res.emplace_back();
+ null_map[i] = 1;
+ } else {
+ doris::Slice decoded_slice(decode_buff.data(), outlen);
+ doris::QuantileState quantile_state;
+ if (!quantile_state.deserialize(decoded_slice)) {
+ return Status::RuntimeError(fmt::format(
+ "quantile_state_from_base64 decode failed: base64:
{}", src_str));
Review Comment:
The error message formatting is unsafe because `src_str` is not
null-terminated and could read beyond the intended string bounds. Similar
functions in the codebase (like `bitmap_from_base64` on line 292-293 of
function_bitmap.cpp) use `std::string(src_str, src_size)` to safely create a
bounded string for error messages. The same issue exists in `hll_from_base64`
(line 201 of function_hll.cpp), but that doesn't make it correct here. This
should be changed to use the safe string constructor.
```suggestion
"quantile_state_from_base64 decode failed:
base64: {}",
std::string(src_str, src_size)));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]