wumeibanfa commented on code in PR #56648:
URL: https://github.com/apache/doris/pull/56648#discussion_r2425092686
##########
be/src/vec/functions/function_varbinary.cpp:
##########
@@ -143,11 +149,181 @@ class FunctionFromBinary : public IFunction {
}
};
+struct NameVarbinaryLength {
+ static constexpr auto name = "length";
+};
+
+struct VarbinaryLengthImpl {
+ using ReturnType = DataTypeInt32;
+ using ReturnColumnType = ColumnInt32;
+ static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY;
+
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeVarbinary>()};
+ }
+
+ static Status vector(const PaddedPODArray<doris::StringView>& data,
+ PaddedPODArray<Int32>& res) {
+ size_t rows_count = data.size();
+ res.resize(rows_count);
+ for (size_t i = 0; i < rows_count; ++i) {
+ res[i] = data[i].size();
+ }
+ return Status::OK();
+ }
+};
+
+using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl,
NameVarbinaryLength>;
+
+struct ToBase64BinaryImpl {
+ static constexpr auto name = "to_base64_binary";
+ using ReturnType = DataTypeString;
+ using ColumnType = ColumnString;
+ static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY;
+
+ static Status vector(const PaddedPODArray<doris::StringView>& data,
+ ColumnString::Chars& dst_data, ColumnString::Offsets&
dst_offsets) {
+ auto rows_count = data.size();
+ dst_offsets.resize(rows_count);
+
+ std::array<char, string_hex::MAX_STACK_CIPHER_LEN> stack_buf;
+ std::vector<char> heap_buf;
+ for (size_t i = 0; i < rows_count; i++) {
+ auto binary = data[i];
+ auto binlen = binary.size();
+
+ if (binlen == 0) {
+ StringOP::push_empty_string(i, dst_data, dst_offsets);
+ continue;
+ }
+
+ char* dst = nullptr;
+ auto cipher_len = 4 * ((binlen + 2) / 3);
+ if (cipher_len <= stack_buf.size()) {
+ dst = stack_buf.data();
+ } else {
+ heap_buf.resize(cipher_len);
+ dst = heap_buf.data();
+ }
+
+ auto outlen =
+ doris::base64_encode(reinterpret_cast<const unsigned
char*>(binary.data()),
+ binlen, reinterpret_cast<unsigned
char*>(dst));
+
+ StringOP::push_value_string(std::string_view(dst, outlen), i,
dst_data, dst_offsets);
+ }
+
+ return Status::OK();
+ }
+};
+
+using FunctionToBase64Binary = FunctionStringEncode<ToBase64BinaryImpl, false>;
+
+struct FromBase64BinaryImpl {
+ static constexpr auto name = "from_base64_binary";
+ using ReturnType = DataTypeVarbinary;
+ using ColumnType = ColumnVarbinary;
+
+ static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
+ ColumnVarbinary* res, NullMap& null_map) {
+ auto rows_count = offsets.size();
+
+ std::array<char, string_hex::MAX_STACK_CIPHER_LEN> stack_buf;
+ std::vector<char> heap_buf;
+ for (size_t i = 0; i < rows_count; i++) {
+ const auto* source = reinterpret_cast<const char*>(&data[offsets[i
- 1]]);
+ ColumnString::Offset slen = offsets[i] - offsets[i - 1];
+
+ if (slen == 0) {
+ res->insert_default();
+ continue;
+ }
+
+ UInt32 cipher_len = slen;
+ char* dst = nullptr;
Review Comment:
我没理解`所以可以避免拷贝dst,直接resize
res然后写到里面,最后shrink下应该就行了`这句什么意思,看起来这里我只需要给一个理论的接近上限就可以了?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]