zhangstar333 commented on code in PR #56648: URL: https://github.com/apache/doris/pull/56648#discussion_r2418401992
########## be/src/vec/functions/function_varbinary.h: ########## @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_varbinary.h" +#include "vec/core/block.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/data_types/data_type_varbinary.h" +#include "vec/functions/function.h" +#include "vec/utils/varbinaryop_subbinary.h" + +namespace doris::vectorized { +#include "common/compile_check_avoid_begin.h" + +template <typename Impl> +class FunctionBinaryUnary : public IFunction { Review Comment: using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>; using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>; seems could expand those function class to reuse directly ########## be/src/vec/utils/varbinaryop_subbinary.h: ########## @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/columns/column_const.h" +#include "vec/columns/column_varbinary.h" +#include "vec/core/block.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_varbinary.h" + +namespace doris::vectorized { +#include "common/compile_check_avoid_begin.h" + +struct SubBinaryUtil { + static void sub_binary_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + DCHECK_EQ(arguments.size(), 3); + auto res = ColumnVarbinary::create(); + + bool col_const[3]; + ColumnPtr argument_columns[3]; + for (int i = 0; i < 3; ++i) { + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + } + + const auto* specific_binary_column = + assert_cast<const ColumnVarbinary*>(argument_columns[0].get()); + const auto* specific_start_column = + assert_cast<const ColumnInt32*>(argument_columns[1].get()); + const auto* specific_len_column = + assert_cast<const ColumnInt32*>(argument_columns[2].get()); + + std::visit( + [&](auto binary_const, auto start_const, auto len_const) { + vectors<binary_const, start_const, len_const>( + specific_binary_column, specific_start_column, specific_len_column, + res.get(), input_rows_count); + }, + vectorized::make_bool_variant(col_const[0]), + vectorized::make_bool_variant(col_const[1]), + vectorized::make_bool_variant(col_const[2])); + block.get_by_position(result).column = std::move(res); + } + +private: + template <bool binary_const, bool start_const, bool len_const> + static void vectors(const ColumnVarbinary* binarys, const ColumnInt32* start, + const ColumnInt32* len, ColumnVarbinary* res, size_t size) { + if constexpr (start_const && len_const) { + if (start->get_data()[0] == 0 || len->get_data()[0] <= 0) { + for (size_t i = 0; i < size; ++i) { + res->insert_default(); + } + return; + } + } + + for (size_t i = 0; i < size; ++i) { + StringRef binary = binarys->get_data_at(index_check_const<binary_const>(i)); Review Comment: better get StringView ########## be/src/vec/functions/function_varbinary.cpp: ########## @@ -32,10 +35,14 @@ #include "vec/data_types/data_type_varbinary.h" #include "vec/functions/function.h" #include "vec/functions/function_helpers.h" +#include "vec/functions/function_totype.h" #include "vec/functions/simple_function_factory.h" #include "vec/functions/string_hex_util.h" namespace doris::vectorized { +#include "common/compile_check_avoid_begin.h" + +static constexpr int MAX_STACK_CIPHER_LEN = 1024 * 64; Review Comment: this maybe could remove ########## be/src/vec/functions/function_varbinary.cpp: ########## @@ -143,11 +150,197 @@ class FunctionFromBinary : public IFunction { } }; +struct NameVarbinaryLength { + static constexpr auto name = "length"; +}; + +struct VarbinaryLengthImpl { + using ReturnType = DataTypeInt32; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; + using ReturnColumnType = ColumnInt32; + + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeVarbinary>()}; + } + + static Status vector(const PaddedPODArray<doris::StringView>& data, + PaddedPODArray<Int32>& res) { + int rows_count = data.size(); + res.resize(rows_count); + for (int i = 0; i < rows_count; ++i) { + res[i] = data[i].size(); + } + return Status::OK(); + } +}; + +using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl, NameVarbinaryLength>; + +struct ToBase64BinaryImpl { + static constexpr auto name = "to_base64_binary"; + static constexpr auto is_nullable = false; + + using ReturnType = DataTypeString; + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + auto& col_ptr = block.get_by_position(arguments[0]).column; + if (const auto* col = check_and_get_column<ColumnVarbinary>(col_ptr.get())) { + auto result_column = ColumnString::create(); + result_column->get_offsets().reserve(input_rows_count); + + for (size_t i = 0; i < input_rows_count; i++) { + auto binary = col->get_data_at(i); + + if (binary.size == 0) { + result_column->insert_default(); + continue; + } + + char dst_array[MAX_STACK_CIPHER_LEN]; + char* dst = dst_array; + + int cipher_len = 4 * ((binary.size + 2) / 3); + std::unique_ptr<char[]> dst_uptr; + if (cipher_len > MAX_STACK_CIPHER_LEN) { + dst_uptr.reset(new char[cipher_len]); + dst = dst_uptr.get(); + } + + auto len = doris::base64_encode(reinterpret_cast<const unsigned char*>(binary.data), + binary.size, reinterpret_cast<unsigned char*>(dst)); + + result_column->insert_data(dst, len); + } + block.replace_by_position(result, std::move(result_column)); + } else { + return Status::RuntimeError("Illegal column {} of argument of function {}", + block.get_by_position(arguments[0]).column->get_name(), + ToBase64BinaryImpl::name); + } + + return Status::OK(); + } +}; + +using FunctionToBase64Binary = FunctionBinaryUnary<ToBase64BinaryImpl>; Review Comment: using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>; using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>; seems could expand those function class to reuse directly ########## be/src/vec/functions/function_varbinary.cpp: ########## @@ -143,11 +150,197 @@ class FunctionFromBinary : public IFunction { } }; +struct NameVarbinaryLength { + static constexpr auto name = "length"; +}; + +struct VarbinaryLengthImpl { + using ReturnType = DataTypeInt32; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; + using ReturnColumnType = ColumnInt32; + + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeVarbinary>()}; + } + + static Status vector(const PaddedPODArray<doris::StringView>& data, + PaddedPODArray<Int32>& res) { + int rows_count = data.size(); + res.resize(rows_count); + for (int i = 0; i < rows_count; ++i) { + res[i] = data[i].size(); + } + return Status::OK(); + } +}; + +using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl, NameVarbinaryLength>; + +struct ToBase64BinaryImpl { + static constexpr auto name = "to_base64_binary"; + static constexpr auto is_nullable = false; + + using ReturnType = DataTypeString; + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + auto& col_ptr = block.get_by_position(arguments[0]).column; + if (const auto* col = check_and_get_column<ColumnVarbinary>(col_ptr.get())) { + auto result_column = ColumnString::create(); + result_column->get_offsets().reserve(input_rows_count); + + for (size_t i = 0; i < input_rows_count; i++) { + auto binary = col->get_data_at(i); Review Comment: ```suggestion auto binary = col->get_data()[i]; ``` ########## be/src/vec/utils/varbinaryop_subbinary.h: ########## @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/columns/column_const.h" +#include "vec/columns/column_varbinary.h" +#include "vec/core/block.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_varbinary.h" + +namespace doris::vectorized { +#include "common/compile_check_avoid_begin.h" + +struct SubBinaryUtil { + static void sub_binary_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + DCHECK_EQ(arguments.size(), 3); + auto res = ColumnVarbinary::create(); + + bool col_const[3]; + ColumnPtr argument_columns[3]; + for (int i = 0; i < 3; ++i) { + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + } + + const auto* specific_binary_column = + assert_cast<const ColumnVarbinary*>(argument_columns[0].get()); + const auto* specific_start_column = + assert_cast<const ColumnInt32*>(argument_columns[1].get()); + const auto* specific_len_column = + assert_cast<const ColumnInt32*>(argument_columns[2].get()); + + std::visit( + [&](auto binary_const, auto start_const, auto len_const) { + vectors<binary_const, start_const, len_const>( + specific_binary_column, specific_start_column, specific_len_column, + res.get(), input_rows_count); + }, + vectorized::make_bool_variant(col_const[0]), + vectorized::make_bool_variant(col_const[1]), + vectorized::make_bool_variant(col_const[2])); + block.get_by_position(result).column = std::move(res); + } + +private: + template <bool binary_const, bool start_const, bool len_const> + static void vectors(const ColumnVarbinary* binarys, const ColumnInt32* start, + const ColumnInt32* len, ColumnVarbinary* res, size_t size) { + if constexpr (start_const && len_const) { + if (start->get_data()[0] == 0 || len->get_data()[0] <= 0) { + for (size_t i = 0; i < size; ++i) { + res->insert_default(); + } + return; + } + } + + for (size_t i = 0; i < size; ++i) { + StringRef binary = binarys->get_data_at(index_check_const<binary_const>(i)); + int binary_size = binary.size; + + int start_value = start->get_data()[index_check_const<start_const>(i)]; + int len_value = len->get_data()[index_check_const<len_const>(i)]; + + if (start_value > binary_size || start_value < -binary_size || binary_size == 0 || Review Comment: The conditions here can be enclosed in parentheses and could add some comments for easier reading. ########## be/src/vec/functions/function_varbinary.cpp: ########## @@ -143,11 +150,197 @@ class FunctionFromBinary : public IFunction { } }; +struct NameVarbinaryLength { + static constexpr auto name = "length"; +}; + +struct VarbinaryLengthImpl { + using ReturnType = DataTypeInt32; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; + using ReturnColumnType = ColumnInt32; + + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeVarbinary>()}; + } + + static Status vector(const PaddedPODArray<doris::StringView>& data, + PaddedPODArray<Int32>& res) { + int rows_count = data.size(); + res.resize(rows_count); + for (int i = 0; i < rows_count; ++i) { + res[i] = data[i].size(); + } + return Status::OK(); + } +}; + +using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl, NameVarbinaryLength>; + +struct ToBase64BinaryImpl { + static constexpr auto name = "to_base64_binary"; + static constexpr auto is_nullable = false; + + using ReturnType = DataTypeString; + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + auto& col_ptr = block.get_by_position(arguments[0]).column; + if (const auto* col = check_and_get_column<ColumnVarbinary>(col_ptr.get())) { + auto result_column = ColumnString::create(); + result_column->get_offsets().reserve(input_rows_count); + + for (size_t i = 0; i < input_rows_count; i++) { + auto binary = col->get_data_at(i); + + if (binary.size == 0) { + result_column->insert_default(); + continue; + } + + char dst_array[MAX_STACK_CIPHER_LEN]; + char* dst = dst_array; + + int cipher_len = 4 * ((binary.size + 2) / 3); + std::unique_ptr<char[]> dst_uptr; + if (cipher_len > MAX_STACK_CIPHER_LEN) { + dst_uptr.reset(new char[cipher_len]); + dst = dst_uptr.get(); + } + + auto len = doris::base64_encode(reinterpret_cast<const unsigned char*>(binary.data), + binary.size, reinterpret_cast<unsigned char*>(dst)); + + result_column->insert_data(dst, len); + } + block.replace_by_position(result, std::move(result_column)); + } else { + return Status::RuntimeError("Illegal column {} of argument of function {}", + block.get_by_position(arguments[0]).column->get_name(), + ToBase64BinaryImpl::name); + } + + return Status::OK(); + } +}; + +using FunctionToBase64Binary = FunctionBinaryUnary<ToBase64BinaryImpl>; + +struct FromBase64BinaryImpl { + static constexpr auto name = "from_base64_binary"; + static constexpr auto is_nullable = true; + + using ReturnType = DataTypeVarbinary; + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + auto& col_ptr = block.get_by_position(arguments[0]).column; + if (const auto* col = check_and_get_column<ColumnString>(col_ptr.get())) { + auto result_column = ColumnVarbinary::create(); + auto null_map = ColumnUInt8::create(input_rows_count, 0); + + for (size_t i = 0; i < input_rows_count; i++) { + auto base64_string = col->get_data_at(i); + + if (base64_string.size == 0) { + result_column->insert_default(); + continue; + } + + char dst_array[MAX_STACK_CIPHER_LEN]; + char* dst = dst_array; Review Comment: itto ########## be/src/vec/functions/function_varbinary.cpp: ########## @@ -143,11 +150,197 @@ class FunctionFromBinary : public IFunction { } }; +struct NameVarbinaryLength { + static constexpr auto name = "length"; +}; + +struct VarbinaryLengthImpl { + using ReturnType = DataTypeInt32; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; + using ReturnColumnType = ColumnInt32; + + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeVarbinary>()}; + } + + static Status vector(const PaddedPODArray<doris::StringView>& data, + PaddedPODArray<Int32>& res) { + int rows_count = data.size(); + res.resize(rows_count); + for (int i = 0; i < rows_count; ++i) { + res[i] = data[i].size(); + } + return Status::OK(); + } +}; + +using FunctionBinaryLength = FunctionUnaryToType<VarbinaryLengthImpl, NameVarbinaryLength>; + +struct ToBase64BinaryImpl { + static constexpr auto name = "to_base64_binary"; + static constexpr auto is_nullable = false; + + using ReturnType = DataTypeString; + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + auto& col_ptr = block.get_by_position(arguments[0]).column; + if (const auto* col = check_and_get_column<ColumnVarbinary>(col_ptr.get())) { + auto result_column = ColumnString::create(); + result_column->get_offsets().reserve(input_rows_count); + + for (size_t i = 0; i < input_rows_count; i++) { + auto binary = col->get_data_at(i); + + if (binary.size == 0) { + result_column->insert_default(); + continue; + } + + char dst_array[MAX_STACK_CIPHER_LEN]; + char* dst = dst_array; Review Comment: seems could be like Line 70-71, move it outside of for loop -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
