This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit c46c54300dca683156da5e65aa9329d166f848a6 Author: zclllyybb <[email protected]> AuthorDate: Tue May 28 10:35:29 2024 +0800 [Not-Pick-2.1](upgrade) Remove compatibility code to prepare for next major version (#35312) Remove compatibility code to prepare for next major version --- be/src/agent/be_exec_version_manager.h | 21 +- .../aggregate_function_simple_factory.h | 4 +- .../vec/functions/array/function_array_index.cpp | 4 - be/src/vec/functions/array/function_array_index.h | 32 +- .../function_date_or_datetime_computation.cpp | 6 +- .../function_date_or_datetime_computation.h | 35 -- be/src/vec/functions/function_string.cpp | 145 ------ be/src/vec/functions/function_string.h | 490 --------------------- be/src/vec/functions/function_timestamp.cpp | 194 -------- be/src/vec/functions/simple_function_factory.h | 6 +- 10 files changed, 31 insertions(+), 906 deletions(-) diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index 320559035b7..d3b120eda21 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -46,22 +46,26 @@ private: static const int min_be_exec_version; }; -/* +/** * When we have some breaking change for execute engine, we should update be_exec_version. + * NOTICE: The change could only be dont in X.Y.0 version. and if you introduced new version number N, + * remember remove version N-1's all REUSEABLE changes in master branch only. REUSEABLE means scalar or agg functions' replacement. + * If not, the old replacement will happens in the new version which is wrong. + * * 0: not contain be_exec_version. - * 1: start from doris 1.2 + * 1: start from doris 1.2.0 * a. remove ColumnString terminating zero. * b. runtime filter use new hash method. - * 2: start from doris 2.0 + * 2: start from doris 2.0.0 * a. function month/day/hour/minute/second's return type is changed to smaller type. * b. in order to solve agg of sum/count is not compatibility during the upgrade process * c. change the string hash method in runtime filter * d. elt function return type change to nullable(string) * e. add repeat_max_num in repeat function - * 3: start from doris 2.0 (by some mistakes) + * 3: start from doris 2.0.0 (by some mistakes) * a. aggregation function do not serialize bitmap to string. * b. support window funnel mode. - * 4: start from doris 2.1 + * 4: start from doris 2.1.0 * a. ignore this line, window funnel mode should be enabled from 2.0. * b. array contains/position/countequal function return nullable in less situations. * c. cleared old version of Version 2. @@ -71,9 +75,10 @@ private: * g. do local merge of remote runtime filter * h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS * - * 5: start from doris 2.1.4 - * a. change the impl of percentile -*/ + * 5: start from doris 3.0.0 + * a. change the impl of percentile (need fix) + * b. clear old version of version 3->4 + */ constexpr inline int BeExecVersionManager::max_be_exec_version = 5; constexpr inline int BeExecVersionManager::min_be_exec_version = 0; diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h index b879f829ea9..fbdc06c0503 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h +++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h @@ -59,9 +59,9 @@ private: AggregateFunctions aggregate_functions; AggregateFunctions nullable_aggregate_functions; std::unordered_map<std::string, std::string> function_alias; - /// @TEMPORARY: for be_exec_version=2 + /// @TEMPORARY: for be_exec_version=4 /// in order to solve agg of sum/count is not compatibility during the upgrade process - constexpr static int AGG_FUNCTION_NEW = 2; + constexpr static int AGG_FUNCTION_NEW = 5; /// @TEMPORARY: for be_exec_version < AGG_FUNCTION_NEW. replace function to old version. std::unordered_map<std::string, std::string> function_to_replace; diff --git a/be/src/vec/functions/array/function_array_index.cpp b/be/src/vec/functions/array/function_array_index.cpp index 63ee5e85264..c153d6843fb 100644 --- a/be/src/vec/functions/array/function_array_index.cpp +++ b/be/src/vec/functions/array/function_array_index.cpp @@ -25,10 +25,6 @@ void register_function_array_index(SimpleFunctionFactory& factory) { factory.register_function<FunctionArrayIndex<ArrayContainsAction>>(); factory.register_function<FunctionArrayIndex<ArrayPositionAction>>(); factory.register_function<FunctionArrayIndex<ArrayCountEqual>>(); - - factory.register_alternative_function<FunctionArrayIndex<ArrayContainsAction, true>>(); - factory.register_alternative_function<FunctionArrayIndex<ArrayPositionAction, true>>(); - factory.register_alternative_function<FunctionArrayIndex<ArrayCountEqual, true>>(); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index 8abbc8be3a9..7090c8548cd 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -79,7 +79,7 @@ struct ParamValue { Field value; }; -template <typename ConcreteAction, bool OldVersion = false> +template <typename ConcreteAction> class FunctionArrayIndex : public IFunction { public: using ResultType = typename ConcreteAction::ResultType; @@ -157,14 +157,10 @@ public: } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - if constexpr (OldVersion) { + if (arguments[0]->is_nullable()) { return make_nullable(std::make_shared<DataTypeNumber<ResultType>>()); } else { - if (arguments[0]->is_nullable()) { - return make_nullable(std::make_shared<DataTypeNumber<ResultType>>()); - } else { - return std::make_shared<DataTypeNumber<ResultType>>(); - } + return std::make_shared<DataTypeNumber<ResultType>>(); } } @@ -236,14 +232,11 @@ private: } dst_data[row] = res; } - if constexpr (OldVersion) { - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } else { - if (outer_null_map == nullptr) { - return dst; - } - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); + + if (outer_null_map == nullptr) { + return dst; } + return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); } template <typename NestedColumnType, typename RightColumnType> @@ -300,14 +293,11 @@ private: } dst_data[row] = res; } - if constexpr (OldVersion) { - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } else { - if (outer_null_map == nullptr) { - return dst; - } - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); + + if (outer_null_map == nullptr) { + return dst; } + return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); } template <typename NestedColumnType> diff --git a/be/src/vec/functions/function_date_or_datetime_computation.cpp b/be/src/vec/functions/function_date_or_datetime_computation.cpp index 2644b2bf3fe..f6bf806ad46 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.cpp +++ b/be/src/vec/functions/function_date_or_datetime_computation.cpp @@ -73,9 +73,6 @@ using FunctionNow = FunctionCurrentDateOrDateTime<CurrentDateTimeImpl<NowFunctio using FunctionNowWithPrecision = FunctionCurrentDateOrDateTime<CurrentDateTimeImpl<NowFunctionName, true>>; -using FunctionNowWithPrecisionOld = - FunctionCurrentDateOrDateTimeOld<CurrentDateTimeImpl<NowFunctionName, true>>; - struct CurDateFunctionName { static constexpr auto name = "curdate"; }; @@ -139,8 +136,7 @@ void register_function_date_time_computation(SimpleFunctionFactory& factory) { factory.register_function<FunctionMilliSecToDateTime>(); factory.register_function<FunctionSecToDateTime>(); - factory.register_alternative_function<FunctionNowWithPrecisionOld>(); - + // alias factory.register_alias("days_add", "date_add"); factory.register_alias("days_add", "adddate"); factory.register_alias("months_add", "add_months"); diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index 916e9e1003f..51072ee2aae 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -949,41 +949,6 @@ struct CurrentDateTimeImpl { } }; -template <typename FunctionImpl> -class FunctionCurrentDateOrDateTimeOld : public IFunction { -public: - static constexpr bool has_variadic_argument = - !std::is_void_v<decltype(has_variadic_argument_types(std::declval<FunctionImpl>()))>; - - static constexpr auto name = FunctionImpl::name; - static FunctionPtr create() { return std::make_shared<FunctionCurrentDateOrDateTimeOld>(); } - - String get_name() const override { return name; } - - size_t get_number_of_arguments() const override { return 0; } - - // the only diff in old version is it's ALWAYS_NOT_NULLABLE - bool use_default_implementation_for_nulls() const override { return false; } - - DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { - return std::make_shared<typename FunctionImpl::ReturnType>(); - } - - bool is_variadic() const override { return true; } - - DataTypes get_variadic_argument_types_impl() const override { - if constexpr (has_variadic_argument) { - return FunctionImpl::get_variadic_argument_types(); - } - return {}; - } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - return FunctionImpl::execute(context, block, arguments, result, input_rows_count); - } -}; - template <typename FunctionName, typename DateType, typename NativeType> struct CurrentDateImpl { using ReturnType = DateType; diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index bfbd57f4747..407c9ffa1ce 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -692,96 +692,6 @@ struct UnHexImpl { } }; -struct UnHexOldImpl { - static constexpr auto name = "unhex"; - using ReturnType = DataTypeString; - using ColumnType = ColumnString; - - static bool check_and_decode_one(char& c, const char src_c, bool flag) { - int k = flag ? 16 : 1; - int value = src_c - '0'; - // 9 = ('9'-'0') - if (value >= 0 && value <= 9) { - c += value * k; - return true; - } - - value = src_c - 'A'; - // 5 = ('F'-'A') - if (value >= 0 && value <= 5) { - c += (value + 10) * k; - return true; - } - - value = src_c - 'a'; - // 5 = ('f'-'a') - if (value >= 0 && value <= 5) { - c += (value + 10) * k; - return true; - } - // not in ( ['0','9'], ['a','f'], ['A','F'] ) - return false; - } - - static int hex_decode(const char* src_str, size_t src_len, char* dst_str) { - // if str length is odd or 0, return empty string like mysql dose. - if ((src_len & 1) != 0 or src_len == 0) { - return 0; - } - //check and decode one character at the same time - // character in ( ['0','9'], ['a','f'], ['A','F'] ), return 'NULL' like mysql dose. - for (auto i = 0, dst_index = 0; i < src_len; i += 2, dst_index++) { - char c = 0; - // combine two character into dst_str one character - bool left_4bits_flag = check_and_decode_one(c, *(src_str + i), true); - bool right_4bits_flag = check_and_decode_one(c, *(src_str + i + 1), false); - - if (!left_4bits_flag || !right_4bits_flag) { - return 0; - } - *(dst_str + dst_index) = c; - } - return src_len / 2; - } - - static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets, - NullMap& null_map) { - auto rows_count = offsets.size(); - dst_offsets.resize(rows_count); - - for (int i = 0; i < rows_count; ++i) { - if (null_map[i]) { - StringOP::push_null_string(i, dst_data, dst_offsets, null_map); - continue; - } - const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); - size_t srclen = offsets[i] - offsets[i - 1]; - - if (srclen == 0) { - StringOP::push_empty_string(i, dst_data, dst_offsets); - continue; - } - - char dst_array[MAX_STACK_CIPHER_LEN]; - char* dst = dst_array; - - int cipher_len = srclen / 2; - std::unique_ptr<char[]> dst_uptr; - if (cipher_len > MAX_STACK_CIPHER_LEN) { - dst_uptr.reset(new char[cipher_len]); - dst = dst_uptr.get(); - } - - int outlen = hex_decode(source, srclen, dst); - - StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets); - } - - return Status::OK(); - } -}; - struct NameStringSpace { static constexpr auto name = "space"; }; @@ -850,49 +760,6 @@ struct ToBase64Impl { } }; -struct ToBase64OldImpl { - static constexpr auto name = "to_base64"; - using ReturnType = DataTypeString; - using ColumnType = ColumnString; - - static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets, - NullMap& null_map) { - auto rows_count = offsets.size(); - dst_offsets.resize(rows_count); - - for (int i = 0; i < rows_count; ++i) { - if (null_map[i]) { - StringOP::push_null_string(i, dst_data, dst_offsets, null_map); - continue; - } - - const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); - size_t srclen = offsets[i] - offsets[i - 1]; - - if (srclen == 0) { - StringOP::push_empty_string(i, dst_data, dst_offsets); - continue; - } - - char dst_array[MAX_STACK_CIPHER_LEN]; - char* dst = dst_array; - - int cipher_len = (int)(4.0 * ceil((double)srclen / 3.0)); - std::unique_ptr<char[]> dst_uptr; - if (cipher_len > MAX_STACK_CIPHER_LEN) { - dst_uptr.reset(new char[cipher_len]); - dst = dst_uptr.get(); - } - - auto outlen = base64_encode((const unsigned char*)source, srclen, (unsigned char*)dst); - - StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets); - } - return Status::OK(); - } -}; - struct FromBase64Impl { static constexpr auto name = "from_base64"; using ReturnType = DataTypeString; @@ -1087,9 +954,6 @@ using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>; using FunctionUnHex = FunctionStringEncode<UnHexImpl>; using FunctionToBase64 = FunctionStringEncode<ToBase64Impl>; - -using FunctionUnHexOld = FunctionStringOperateToNullType<UnHexOldImpl>; -using FunctionToBase64Old = FunctionStringOperateToNullType<ToBase64OldImpl>; using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>; using FunctionStringAppendTrailingCharIfAbsent = @@ -1161,15 +1025,6 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>(); factory.register_function<FunctionStrcmp>(); - /// @TEMPORARY: for be_exec_version=3 - factory.register_alternative_function<FunctionSubstringOld<Substr3ImplOld>>(); - factory.register_alternative_function<FunctionSubstringOld<Substr2ImplOld>>(); - factory.register_alternative_function<FunctionLeftOld>(); - factory.register_alternative_function<FunctionRightOld>(); - factory.register_alternative_function<FunctionSubstringIndexOld>(); - factory.register_alternative_function<FunctionUnHexOld>(); - factory.register_alternative_function<FunctionToBase64Old>(); - factory.register_alias(FunctionLeft::name, "strleft"); factory.register_alias(FunctionRight::name, "strright"); factory.register_alias(SubstringUtil::name, "substr"); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 5be0b88de62..31d18c3add3 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -360,173 +360,6 @@ private: } }; -struct SubstringUtilOld { - static constexpr auto name = "substring"; - - static void substring_execute(Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { - DCHECK_EQ(arguments.size(), 3); - auto res = ColumnString::create(); - auto null_map = ColumnUInt8::create(input_rows_count, 0); - - bool col_const[3]; - ColumnPtr argument_columns[3]; - for (int i = 0; i < 3; ++i) { - col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); - } - argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( - *block.get_by_position(arguments[0]).column) - .convert_to_full_column() - : block.get_by_position(arguments[0]).column; - - default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); - - for (int i = 0; i < 3; i++) { - check_set_nullable(argument_columns[i], null_map, col_const[i]); - } - - const auto* specific_str_column = - assert_cast<const ColumnString*>(argument_columns[0].get()); - const auto* specific_start_column = - assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get()); - const auto* specific_len_column = - assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get()); - - auto vectors = vectors_utf8<false>; - bool is_ascii = simd::VStringFunctions::is_ascii( - {specific_str_column->get_chars().data(), specific_str_column->get_chars().size()}); - if (col_const[1] && col_const[2] && is_ascii) { - vectors = vectors_ascii<true>; - } else if (col_const[1] && col_const[2]) { - vectors = vectors_utf8<true>; - } else if (is_ascii) { - vectors = vectors_ascii<false>; - } - vectors(specific_str_column->get_chars(), specific_str_column->get_offsets(), - specific_start_column->get_data(), specific_len_column->get_data(), - null_map->get_data(), res->get_chars(), res->get_offsets()); - - block.get_by_position(result).column = - ColumnNullable::create(std::move(res), std::move(null_map)); - } - -private: - template <bool is_const> - static void vectors_utf8(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, - const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, - NullMap& null_map, ColumnString::Chars& res_chars, - ColumnString::Offsets& res_offsets) { - size_t size = offsets.size(); - res_offsets.resize(size); - res_chars.reserve(chars.size()); - - std::array<std::byte, 128 * 1024> buf; - PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; - PMR::vector<size_t> index {&pool}; - - if constexpr (is_const) { - if (start[0] == 0 || len[0] <= 0) { - for (size_t i = 0; i < size; ++i) { - StringOP::push_empty_string(i, res_chars, res_offsets); - } - return; - } - } - - for (size_t i = 0; i < size; ++i) { - int str_size = offsets[i] - offsets[i - 1]; - const char* str_data = (char*)chars.data() + offsets[i - 1]; - int start_value = is_const ? start[0] : start[i]; - int len_value = is_const ? len[0] : len[i]; - - // return empty string if start > src.length - if (start_value > str_size || str_size == 0 || start_value == 0 || len_value <= 0) { - StringOP::push_empty_string(i, res_chars, res_offsets); - continue; - } - - size_t byte_pos = 0; - index.clear(); - for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { - char_size = get_utf8_byte_length(str_data[j]); - index.push_back(j); - if (start_value > 0 && index.size() > start_value + len_value) { - break; - } - } - - int fixed_pos = start_value; - if (fixed_pos < -(int)index.size()) { - StringOP::push_empty_string(i, res_chars, res_offsets); - continue; - } - if (fixed_pos < 0) { - fixed_pos = index.size() + fixed_pos + 1; - } - if (fixed_pos > index.size()) { - StringOP::push_null_string(i, res_chars, res_offsets, null_map); - continue; - } - - byte_pos = index[fixed_pos - 1]; - size_t fixed_len = str_size - byte_pos; - if (fixed_pos + len_value <= index.size()) { - fixed_len = index[fixed_pos + len_value - 1] - byte_pos; - } - - if (byte_pos <= str_size && fixed_len > 0) { - StringOP::push_value_string_reserved_and_allow_overflow( - {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); - } else { - StringOP::push_empty_string(i, res_chars, res_offsets); - } - } - } - - template <bool is_const> - static void vectors_ascii(const ColumnString::Chars& chars, - const ColumnString::Offsets& offsets, - const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, - NullMap& null_map, ColumnString::Chars& res_chars, - ColumnString::Offsets& res_offsets) { - size_t size = offsets.size(); - res_offsets.resize(size); - - if constexpr (is_const) { - if (start[0] == 0 || len[0] <= 0) { - for (size_t i = 0; i < size; ++i) { - StringOP::push_empty_string(i, res_chars, res_offsets); - } - return; - } - res_chars.reserve(std::min(chars.size(), len[0] * size)); - } else { - res_chars.reserve(chars.size()); - } - - for (size_t i = 0; i < size; ++i) { - int str_size = offsets[i] - offsets[i - 1]; - const char* str_data = (char*)chars.data() + offsets[i - 1]; - - int start_value = is_const ? start[0] : start[i]; - int len_value = is_const ? len[0] : len[i]; - - if (start_value > str_size || start_value < -str_size || str_size == 0 || - len_value <= 0) { - StringOP::push_empty_string(i, res_chars, res_offsets); - continue; - } - int fixed_pos = start_value - 1; - if (fixed_pos < 0) { - fixed_pos = str_size + fixed_pos + 1; - } - size_t fixed_len = std::min(str_size - fixed_pos, len_value); - StringOP::push_value_string_reserved_and_allow_overflow( - {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); - } - } -}; - template <typename Impl> class FunctionSubstring : public IFunction { public: @@ -598,81 +431,6 @@ struct Substr2Impl { } }; -template <typename Impl> -class FunctionSubstringOld : public IFunction { -public: - static constexpr auto name = SubstringUtilOld::name; - String get_name() const override { return name; } - static FunctionPtr create() { return std::make_shared<FunctionSubstringOld<Impl>>(); } - - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return make_nullable(std::make_shared<DataTypeString>()); - } - DataTypes get_variadic_argument_types_impl() const override { - return Impl::get_variadic_argument_types(); - } - size_t get_number_of_arguments() const override { - return get_variadic_argument_types_impl().size(); - } - - bool use_default_implementation_for_nulls() const override { return false; } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - return Impl::execute_impl(context, block, arguments, result, input_rows_count); - } -}; - -struct Substr3ImplOld { - static DataTypes get_variadic_argument_types() { - return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(), - std::make_shared<DataTypeInt32>()}; - } - - static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { - SubstringUtilOld::substring_execute(block, arguments, result, input_rows_count); - return Status::OK(); - } -}; - -struct Substr2ImplOld { - static DataTypes get_variadic_argument_types() { - return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()}; - } - - static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { - auto col_len = ColumnInt32::create(input_rows_count); - auto& strlen_data = col_len->get_data(); - - ColumnPtr str_col; - bool str_const; - std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column); - if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) { - str_col = nullable->get_nested_column_ptr(); - } - const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets(); - - if (str_const) { - std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]); - } else { - for (int i = 0; i < input_rows_count; ++i) { - strlen_data[i] = str_offset[i] - str_offset[i - 1]; - } - } - - // we complete the column2(strlen) with the default value - each row's strlen. - block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"}); - ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1}; - - SubstringUtilOld::substring_execute(block, temp_arguments, result, input_rows_count); - return Status::OK(); - } -}; - template <bool Reverse> class FunctionMaskPartial; @@ -883,33 +641,6 @@ public: } }; -class FunctionLeftOld : public IFunction { -public: - static constexpr auto name = "left"; - static FunctionPtr create() { return std::make_shared<FunctionLeftOld>(); } - String get_name() const override { return name; } - size_t get_number_of_arguments() const override { return 2; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return make_nullable(std::make_shared<DataTypeString>()); - } - - bool use_default_implementation_for_nulls() const override { return false; } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - auto int_type = std::make_shared<DataTypeInt32>(); - size_t num_columns_without_result = block.columns(); - block.insert({int_type->create_column_const(input_rows_count, to_field(1)), int_type, - "const 1"}); - ColumnNumbers temp_arguments(3); - temp_arguments[0] = arguments[0]; - temp_arguments[1] = num_columns_without_result; - temp_arguments[2] = arguments[1]; - SubstringUtilOld::substring_execute(block, temp_arguments, result, input_rows_count); - return Status::OK(); - } -}; - class FunctionRight : public IFunction { public: static constexpr auto name = "right"; @@ -959,67 +690,6 @@ public: } }; -class FunctionRightOld : public IFunction { -public: - static constexpr auto name = "right"; - static FunctionPtr create() { return std::make_shared<FunctionRightOld>(); } - String get_name() const override { return name; } - size_t get_number_of_arguments() const override { return 2; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return make_nullable(std::make_shared<DataTypeString>()); - } - - bool use_default_implementation_for_nulls() const override { return false; } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - auto int_type = std::make_shared<DataTypeInt32>(); - auto params1 = ColumnInt32::create(input_rows_count); - auto params2 = ColumnInt32::create(input_rows_count); - size_t num_columns_without_result = block.columns(); - - // params1 = max(arg[1], -len(arg)) - auto& index_data = params1->get_data(); - auto& strlen_data = params2->get_data(); - - // we don't have to update null_map because FunctionSubstring will - // update it - // getNestedColumnIfNull arg[0] - auto str_col = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) { - str_col = nullable->get_nested_column_ptr(); - } - const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets(); - - // getNestedColumnIfNull arg[1] - auto pos_col = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - if (const auto* nullable = check_and_get_column<const ColumnNullable>(*pos_col)) { - pos_col = nullable->get_nested_column_ptr(); - } - const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data(); - - for (int i = 0; i < input_rows_count; ++i) { - strlen_data[i] = str_offset[i] - str_offset[i - 1]; - } - - for (int i = 0; i < input_rows_count; ++i) { - index_data[i] = std::max(-pos_data[i], -strlen_data[i]); - } - - block.insert({std::move(params1), int_type, "index"}); - block.insert({std::move(params2), int_type, "strlen"}); - - ColumnNumbers temp_arguments(3); - temp_arguments[0] = arguments[0]; - temp_arguments[1] = num_columns_without_result; - temp_arguments[2] = num_columns_without_result + 1; - SubstringUtilOld::substring_execute(block, temp_arguments, result, input_rows_count); - return Status::OK(); - } -}; - struct NullOrEmptyImpl { static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; } @@ -2097,166 +1767,6 @@ public: } }; -class FunctionSubstringIndexOld : public IFunction { -public: - static constexpr auto name = "substring_index"; - static FunctionPtr create() { return std::make_shared<FunctionSubstringIndexOld>(); } - String get_name() const override { return name; } - size_t get_number_of_arguments() const override { return 3; } - - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return make_nullable(std::make_shared<DataTypeString>()); - } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - DCHECK_EQ(arguments.size(), 3); - - auto null_map = ColumnUInt8::create(input_rows_count, 0); - // Create a zero column to simply implement - auto res = ColumnString::create(); - - auto& res_offsets = res->get_offsets(); - auto& res_chars = res->get_chars(); - res_offsets.resize(input_rows_count); - ColumnPtr content_column; - bool content_const = false; - std::tie(content_column, content_const) = - unpack_if_const(block.get_by_position(arguments[0]).column); - - if (const auto* nullable = check_and_get_column<const ColumnNullable>(*content_column)) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[0]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), nullable->get_null_map_data()); - content_column = nullable->get_nested_column_ptr(); - } - - const auto* str_col = assert_cast<const ColumnString*>(content_column.get()); - - [[maybe_unused]] const auto& [delimiter_col, delimiter_const] = - unpack_if_const(block.get_by_position(arguments[1]).column); - auto delimiter = delimiter_col->get_data_at(0); - int32_t delimiter_size = delimiter.size; - - [[maybe_unused]] const auto& [part_num_col, part_const] = - unpack_if_const(block.get_by_position(arguments[2]).column); - auto part_number = *((int*)part_num_col->get_data_at(0).data); - - if (part_number == 0 || delimiter_size == 0) { - for (size_t i = 0; i < input_rows_count; ++i) { - StringOP::push_empty_string(i, res_chars, res_offsets); - } - } else if (part_number > 0) { - if (delimiter_size == 1) { - // If delimiter is a char, use memchr to split - for (size_t i = 0; i < input_rows_count; ++i) { - auto str = str_col->get_data_at(i); - int32_t offset = -1; - int32_t num = 0; - while (num < part_number) { - size_t n = str.size - offset - 1; - const char* pos = reinterpret_cast<const char*>( - memchr(str.data + offset + 1, delimiter.data[0], n)); - if (pos != nullptr) { - offset = pos - str.data; - num++; - } else { - offset = str.size; - num = (num == 0) ? 0 : num + 1; - break; - } - } - - if (num == part_number) { - StringOP::push_value_string( - std::string_view {reinterpret_cast<const char*>(str.data), - (size_t)offset}, - i, res_chars, res_offsets); - } else { - StringOP::push_value_string(std::string_view(str.data, str.size), i, - res_chars, res_offsets); - } - } - } else { - StringRef delimiter_ref(delimiter); - StringSearch search(&delimiter_ref); - for (size_t i = 0; i < input_rows_count; ++i) { - auto str = str_col->get_data_at(i); - int32_t offset = -delimiter_size; - int32_t num = 0; - while (num < part_number) { - size_t n = str.size - offset - delimiter_size; - // search first match delimter_ref index from src string among str_offset to end - const char* pos = search.search(str.data + offset + delimiter_size, n); - if (pos < str.data + str.size) { - offset = pos - str.data; - num++; - } else { - offset = str.size; - num = (num == 0) ? 0 : num + 1; - break; - } - } - - if (num == part_number) { - StringOP::push_value_string( - std::string_view {reinterpret_cast<const char*>(str.data), - (size_t)offset}, - i, res_chars, res_offsets); - } else { - StringOP::push_value_string(std::string_view(str.data, str.size), i, - res_chars, res_offsets); - } - } - } - } else { - // if part_number is negative - part_number = -part_number; - for (size_t i = 0; i < input_rows_count; ++i) { - auto str = str_col->get_data_at(i); - auto str_str = str.to_string(); - int32_t offset = str.size; - int32_t pre_offset = offset; - int32_t num = 0; - auto substr = str_str; - while (num <= part_number && offset >= 0) { - offset = (int)substr.rfind(delimiter, offset); - if (offset != -1) { - if (++num == part_number) { - break; - } - pre_offset = offset; - offset = offset - 1; - substr = str_str.substr(0, pre_offset); - } else { - break; - } - } - num = (offset == -1 && num != 0) ? num + 1 : num; - - if (num == part_number) { - if (offset == -1) { - StringOP::push_value_string(std::string_view(str.data, str.size), i, - res_chars, res_offsets); - } else { - StringOP::push_value_string( - std::string_view {str.data + offset + delimiter_size, - str.size - offset - delimiter_size}, - i, res_chars, res_offsets); - } - } else { - StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars, - res_offsets); - } - } - } - - block.get_by_position(result).column = - ColumnNullable::create(std::move(res), std::move(null_map)); - return Status::OK(); - } -}; class FunctionSplitByString : public IFunction { public: static constexpr auto name = "split_by_string"; diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index c2285795c71..fe73d8db3df 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -681,136 +681,6 @@ struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl<DateType> { static DataTypes get_variadic_argument_types() { return {std::make_shared<DateType>()}; } }; -template <typename DateType> -struct UnixTimeStampDateImplOld { - static DataTypes get_variadic_argument_types() { return {std::make_shared<DateType>()}; } - - static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { - RETURN_REAL_TYPE_FOR_DATEV2_FUNCTION(DataTypeInt32); - } - - static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { - const ColumnPtr& col_source = block.get_by_position(arguments[0]).column; - auto col_result = ColumnVector<Int32>::create(); - auto null_map = ColumnVector<UInt8>::create(); - auto& col_result_data = col_result->get_data(); - - col_result->resize(input_rows_count); - - if constexpr (std::is_same_v<DateType, DataTypeDate>) { - null_map->resize(input_rows_count); - auto& null_map_data = null_map->get_data(); - - for (int i = 0; i < input_rows_count; i++) { - if (col_source->is_null_at(i)) { - null_map_data[i] = true; - continue; - } - - StringRef source = col_source->get_data_at(i); - const VecDateTimeValue& ts_value = - reinterpret_cast<const VecDateTimeValue&>(*source.data); - int64_t timestamp {}; - if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { - null_map_data[i] = true; - } else { - null_map_data[i] = false; - col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); - } - } - block.replace_by_position( - result, ColumnNullable::create(std::move(col_result), std::move(null_map))); - } else if constexpr (std::is_same_v<DateType, DataTypeDateV2>) { - const auto is_nullable = block.get_by_position(arguments[0]).type->is_nullable(); - if (is_nullable) { - null_map->resize(input_rows_count); - auto& null_map_data = null_map->get_data(); - for (int i = 0; i < input_rows_count; i++) { - if (col_source->is_null_at(i)) { - DCHECK(is_nullable); - null_map_data[i] = true; - continue; - } - - StringRef source = col_source->get_data_at(i); - const DateV2Value<DateV2ValueType>& ts_value = - reinterpret_cast<const DateV2Value<DateV2ValueType>&>(*source.data); - int64_t timestamp {}; - if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { - null_map_data[i] = true; - } else { - null_map_data[i] = false; - col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); - } - } - block.replace_by_position( - result, ColumnNullable::create(std::move(col_result), std::move(null_map))); - } else { - for (int i = 0; i < input_rows_count; i++) { - DCHECK(!col_source->is_null_at(i)); - StringRef source = col_source->get_data_at(i); - const DateV2Value<DateV2ValueType>& ts_value = - reinterpret_cast<const DateV2Value<DateV2ValueType>&>(*source.data); - int64_t timestamp {}; - const auto valid = - ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); - DCHECK(valid); - col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); - } - block.replace_by_position(result, std::move(col_result)); - } - } else { - const auto is_nullable = block.get_by_position(arguments[0]).type->is_nullable(); - if (is_nullable) { - null_map->resize(input_rows_count); - auto& null_map_data = null_map->get_data(); - for (int i = 0; i < input_rows_count; i++) { - if (col_source->is_null_at(i)) { - DCHECK(is_nullable); - null_map_data[i] = true; - continue; - } - - StringRef source = col_source->get_data_at(i); - const DateV2Value<DateTimeV2ValueType>& ts_value = - reinterpret_cast<const DateV2Value<DateTimeV2ValueType>&>(*source.data); - int64_t timestamp {}; - if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { - null_map_data[i] = true; - } else { - null_map_data[i] = false; - col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); - } - } - block.replace_by_position( - result, ColumnNullable::create(std::move(col_result), std::move(null_map))); - } else { - for (int i = 0; i < input_rows_count; i++) { - DCHECK(!col_source->is_null_at(i)); - StringRef source = col_source->get_data_at(i); - const DateV2Value<DateTimeV2ValueType>& ts_value = - reinterpret_cast<const DateV2Value<DateTimeV2ValueType>&>(*source.data); - int64_t timestamp {}; - const auto valid = - ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); - DCHECK(valid); - col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); - } - block.replace_by_position(result, std::move(col_result)); - } - } - - return Status::OK(); - } -}; - -template <typename DateType> -struct UnixTimeStampDatetimeImplOld : public UnixTimeStampDateImplOld<DateType> { - static DataTypes get_variadic_argument_types() { return {std::make_shared<DateType>()}; } -}; - // This impl doesn't use default impl to deal null value. struct UnixTimeStampStrImpl { static DataTypes get_variadic_argument_types() { @@ -875,61 +745,6 @@ struct UnixTimeStampStrImpl { } }; -struct UnixTimeStampStrImplOld { - static DataTypes get_variadic_argument_types() { - return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; - } - - static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { - return make_nullable(std::make_shared<DataTypeInt32>()); - } - - static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { - const ColumnPtr col_source = block.get_by_position(arguments[0]).column; - const ColumnPtr col_format = block.get_by_position(arguments[1]).column; - - auto col_result = ColumnVector<Int32>::create(); - auto null_map = ColumnVector<UInt8>::create(); - - col_result->resize(input_rows_count); - null_map->resize(input_rows_count); - - auto& col_result_data = col_result->get_data(); - auto& null_map_data = null_map->get_data(); - - for (int i = 0; i < input_rows_count; i++) { - if (col_source->is_null_at(i) || col_format->is_null_at(i)) { - null_map_data[i] = true; - continue; - } - - StringRef source = col_source->get_data_at(i); - StringRef fmt = col_format->get_data_at(i); - - VecDateTimeValue ts_value; - if (!ts_value.from_date_format_str(fmt.data, fmt.size, source.data, source.size)) { - null_map_data[i] = true; - continue; - } - - int64_t timestamp {}; - if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { - null_map_data[i] = true; - } else { - null_map_data[i] = false; - col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); - } - } - - block.replace_by_position( - result, ColumnNullable::create(std::move(col_result), std::move(null_map))); - - return Status::OK(); - } -}; - template <typename Impl> class FunctionUnixTimestamp : public IFunction { public: @@ -1404,15 +1219,6 @@ void register_function_timestamp(SimpleFunctionFactory& factory) { factory.register_function<DateTimeToTimestamp<MicroSec>>(); factory.register_function<DateTimeToTimestamp<MilliSec>>(); factory.register_function<DateTimeToTimestamp<Sec>>(); - - /// @TEMPORARY: for be_exec_version=3 - factory.register_alternative_function< - FunctionUnixTimestamp<UnixTimeStampDateImplOld<DataTypeDate>>>(); - factory.register_alternative_function< - FunctionUnixTimestamp<UnixTimeStampDateImplOld<DataTypeDateV2>>>(); - factory.register_alternative_function< - FunctionUnixTimestamp<UnixTimeStampDateImplOld<DataTypeDateTimeV2>>>(); - factory.register_alternative_function<FunctionUnixTimestamp<UnixTimeStampStrImplOld>>(); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 889a9743635..3a65daeee2e 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -109,7 +109,9 @@ class SimpleFunctionFactory { using Creator = std::function<FunctionBuilderPtr()>; using FunctionCreators = phmap::flat_hash_map<std::string, Creator>; using FunctionIsVariadic = phmap::flat_hash_set<std::string>; - /// @TEMPORARY: for be_exec_version=5 + /// @TEMPORARY: for be_exec_version=5. + /// whenever change this, please make sure old functions was all cleared. otherwise the version now-1 will think it should do replacement + /// which actually should be done by now-2 version. constexpr static int NEWEST_VERSION_FUNCTION_SUBSTITUTE = 5; public: @@ -146,7 +148,7 @@ public: function_alias[alias] = name; } - /// @TEMPORARY: for be_exec_version=3 + /// @TEMPORARY: for be_exec_version=4 template <class Function> void register_alternative_function() { static std::string suffix {"_old_for_version_before_5_0"}; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
