wumeibanfa commented on code in PR #56648:
URL: https://github.com/apache/doris/pull/56648#discussion_r2427735034
##########
be/src/vec/functions/function_varbinary.cpp:
##########
@@ -143,11 +149,181 @@ class FunctionFromBinary : public IFunction {
}
};
+struct NameVarbinaryLength {
+ static constexpr auto name = "length";
+};
+
+struct VarbinaryLengthImpl {
+ using ReturnType = DataTypeInt32;
+ using ReturnColumnType = ColumnInt32;
+ static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY;
+
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeVarbinary>()};
+ }
+
+ static Status vector(const PaddedPODArray<doris::StringView>& data,
+ PaddedPODArray<Int32>& res) {
+ size_t rows_count = data.size();
+ res.resize(rows_count);
+ for (size_t i = 0; i < rows_count; ++i) {
+ res[i] = data[i].size();
+ }
+ return Status::OK();
+ }
+};
+
+using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl,
NameVarbinaryLength>;
+
+struct ToBase64BinaryImpl {
+ static constexpr auto name = "to_base64_binary";
+ using ReturnType = DataTypeString;
+ using ColumnType = ColumnString;
+ static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY;
+
+ static Status vector(const PaddedPODArray<doris::StringView>& data,
+ ColumnString::Chars& dst_data, ColumnString::Offsets&
dst_offsets) {
+ auto rows_count = data.size();
+ dst_offsets.resize(rows_count);
+
+ std::array<char, string_hex::MAX_STACK_CIPHER_LEN> stack_buf;
+ std::vector<char> heap_buf;
+ for (size_t i = 0; i < rows_count; i++) {
+ auto binary = data[i];
+ auto binlen = binary.size();
+
+ if (binlen == 0) {
+ StringOP::push_empty_string(i, dst_data, dst_offsets);
+ continue;
+ }
+
+ char* dst = nullptr;
+ auto cipher_len = 4 * ((binlen + 2) / 3);
+ if (cipher_len <= stack_buf.size()) {
+ dst = stack_buf.data();
+ } else {
+ heap_buf.resize(cipher_len);
+ dst = heap_buf.data();
+ }
+
+ auto outlen =
+ doris::base64_encode(reinterpret_cast<const unsigned
char*>(binary.data()),
+ binlen, reinterpret_cast<unsigned
char*>(dst));
+
+ StringOP::push_value_string(std::string_view(dst, outlen), i,
dst_data, dst_offsets);
+ }
+
+ return Status::OK();
+ }
+};
+
+using FunctionToBase64Binary = FunctionStringEncode<ToBase64BinaryImpl, false>;
+
+struct FromBase64BinaryImpl {
+ static constexpr auto name = "from_base64_binary";
+ using ReturnType = DataTypeVarbinary;
+ using ColumnType = ColumnVarbinary;
+
+ static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
+ ColumnVarbinary* res, NullMap& null_map) {
+ auto rows_count = offsets.size();
+
+ std::array<char, string_hex::MAX_STACK_CIPHER_LEN> stack_buf;
+ std::vector<char> heap_buf;
+ for (size_t i = 0; i < rows_count; i++) {
+ const auto* source = reinterpret_cast<const char*>(&data[offsets[i
- 1]]);
+ ColumnString::Offset slen = offsets[i] - offsets[i - 1];
+
+ if (slen == 0) {
+ res->insert_default();
+ continue;
+ }
+
+ UInt32 cipher_len = slen;
+ char* dst = nullptr;
+ if (cipher_len <= stack_buf.size()) {
+ dst = stack_buf.data();
+ } else {
+ heap_buf.resize(cipher_len);
+ dst = heap_buf.data();
+ }
+
+ auto outlen = doris::base64_decode(source, slen, dst);
+
+ if (outlen < 0) {
+ null_map[i] = 1;
+ res->insert_default();
+ } else {
+ res->insert_data(dst, outlen);
+ }
+ }
+
+ return Status::OK();
+ }
+};
+
+using FunctionFromBase64Binary =
FunctionStringOperateToNullType<FromBase64BinaryImpl>;
+
+struct SubBinary3Impl {
+ static DataTypes get_variadic_argument_types() {
+ return DataTypes {std::make_shared<DataTypeVarbinary>(),
std::make_shared<DataTypeInt32>(),
+ std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, uint32_t result,
+ size_t input_rows_count) {
+ SubBinaryUtil::sub_binary_execute(block, arguments, result,
input_rows_count);
+ return Status::OK();
+ }
+};
+
+struct SubBinary2Impl {
+ static DataTypes get_variadic_argument_types() {
+ return DataTypes {std::make_shared<DataTypeVarbinary>(),
std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, uint32_t result,
+ size_t input_rows_count) {
+ auto col_len = ColumnInt32::create(input_rows_count);
+ auto& strlen_data = col_len->get_data();
+
+ ColumnPtr binary_col;
+ bool binary_const;
+ std::tie(binary_col, binary_const) =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+
+ const auto& binarys = assert_cast<const
ColumnVarbinary*>(binary_col.get());
+
+ if (binary_const) {
+ std::fill(strlen_data.begin(), strlen_data.end(),
binarys->get_data()[0].size());
+ } else {
+ for (int i = 0; i < input_rows_count; ++i) {
+ strlen_data[i] = binarys->get_data()[i].size();
+ }
+ }
+
+ // we complete the column2(strlen) with the default value - each row's
strlen.
+ block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(),
"strlen"});
Review Comment:
我这里把SubBinary2Impl删了,只留下一个SubBinary3Impl
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]