This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch vectorized in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit ffdc9fc9be28ae94671916f06e4fd5f219707e93 Author: Pxl <[email protected]> AuthorDate: Wed Jan 12 17:08:09 2022 +0800 [Vectorized][Feature] upport function conv (#7693) * support function conv() * add document --- be/src/exprs/math_functions.h | 14 +- be/src/vec/CMakeLists.txt | 1 + be/src/vec/data_types/data_type_bitmap.h | 2 + be/src/vec/data_types/data_type_date.h | 2 +- be/src/vec/data_types/data_type_date_time.h | 40 ++--- be/src/vec/data_types/data_type_decimal.h | 2 +- be/src/vec/data_types/data_type_number_base.h | 5 +- be/src/vec/data_types/data_type_string.h | 5 +- be/src/vec/functions/function_conv.cpp | 163 +++++++++++++++++++++ be/src/vec/functions/simple_function_factory.h | 2 + docs/.vuepress/sidebar/en.js | 5 + docs/.vuepress/sidebar/zh-CN.js | 5 + .../sql-functions/math-functions/conv.md | 60 ++++++++ .../sql-functions/math-functions/conv.md | 60 ++++++++ gensrc/script/doris_builtins_functions.py | 6 +- 15 files changed, 339 insertions(+), 33 deletions(-) diff --git a/be/src/exprs/math_functions.h b/be/src/exprs/math_functions.h index 15d8749..9d55ed6 100644 --- a/be/src/exprs/math_functions.h +++ b/be/src/exprs/math_functions.h @@ -50,7 +50,8 @@ public: static doris_udf::IntVal abs(doris_udf::FunctionContext*, const doris_udf::SmallIntVal&); static doris_udf::SmallIntVal abs(doris_udf::FunctionContext*, const doris_udf::TinyIntVal&); - static doris_udf::TinyIntVal sign(doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v); + static doris_udf::TinyIntVal sign(doris_udf::FunctionContext* ctx, + const doris_udf::DoubleVal& v); static doris_udf::DoubleVal sin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); static doris_udf::DoubleVal asin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); @@ -182,11 +183,6 @@ public: static double my_double_round(double value, int64_t dec, bool dec_unsigned, bool truncate); -private: - static const int32_t MIN_BASE = 2; - static const int32_t MAX_BASE = 36; - static const char* _s_alphanumeric_chars; - // Converts src_num in decimal to dest_base, // and fills expr_val.string_val with the result. static doris_udf::StringVal decimal_to_base(doris_udf::FunctionContext* ctx, int64_t src_num, @@ -207,6 +203,12 @@ private: // Returns false otherwise, indicating some other error condition. static bool handle_parse_result(int8_t dest_base, int64_t* num, StringParser::ParseResult parse_res); + + static const int32_t MIN_BASE = 2; + static const int32_t MAX_BASE = 36; + +private: + static const char* _s_alphanumeric_chars; }; } // namespace doris diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 01c69eb..aa302ce 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -108,6 +108,7 @@ set(VEC_FILES functions/functions_logical.cpp functions/function_case.cpp functions/function_cast.cpp + functions/function_conv.cpp functions/function_string.cpp functions/function_timestamp.cpp functions/function_utility.cpp diff --git a/be/src/vec/data_types/data_type_bitmap.h b/be/src/vec/data_types/data_type_bitmap.h index 692d6fc..69f5540 100644 --- a/be/src/vec/data_types/data_type_bitmap.h +++ b/be/src/vec/data_types/data_type_bitmap.h @@ -18,6 +18,7 @@ #pragma once #include "util/bitmap_value.h" #include "vec/columns/column.h" +#include "vec/columns/column_complex.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" @@ -27,6 +28,7 @@ public: DataTypeBitMap() = default; ~DataTypeBitMap() override = default; + using ColumnType = ColumnBitmap; using FieldType = BitmapValue; std::string do_get_name() const override { return get_family_name(); } diff --git a/be/src/vec/data_types/data_type_date.h b/be/src/vec/data_types/data_type_date.h index b3aa90c..b5d148b 100644 --- a/be/src/vec/data_types/data_type_date.h +++ b/be/src/vec/data_types/data_type_date.h @@ -34,7 +34,7 @@ public: bool equals(const IDataType& rhs) const override; std::string to_string(const IColumn& column, size_t row_num) const; - void to_string(const IColumn &column, size_t row_num, BufferWritable &ostr) const override; + void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; static void cast_to_date(Int64& x); }; diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h index e2c6bc5..6b6af04 100644 --- a/be/src/vec/data_types/data_type_date_time.h +++ b/be/src/vec/data_types/data_type_date_time.h @@ -28,25 +28,25 @@ class DateLUTImpl; namespace doris::vectorized { /** DateTime stores time as unix timestamp. - * The value itself is independent of time zone. - * - * In binary format it is represented as unix timestamp. - * In text format it is serialized to and parsed from YYYY-MM-DD hh:mm:ss format. - * The text format is dependent of time zone. - * - * To convert from/to text format, time zone may be specified explicitly or implicit time zone may be used. - * - * Time zone may be specified explicitly as type parameter, example: DateTime('Europe/Moscow'). - * As it does not affect the internal representation of values, - * all types with different time zones are equivalent and may be used interchangingly. - * Time zone only affects parsing and displaying in text formats. - * - * If time zone is not specified (example: DateTime without parameter), then default time zone is used. - * Default time zone is server time zone, if server is doing transformations - * and if client is doing transformations, unless 'use_client_time_zone' setting is passed to client; - * Server time zone is the time zone specified in 'timezone' parameter in configuration file, - * or system time zone at the moment of server startup. - */ + * The value itself is independent of time zone. + * + * In binary format it is represented as unix timestamp. + * In text format it is serialized to and parsed from YYYY-MM-DD hh:mm:ss format. + * The text format is dependent of time zone. + * + * To convert from/to text format, time zone may be specified explicitly or implicit time zone may be used. + * + * Time zone may be specified explicitly as type parameter, example: DateTime('Europe/Moscow'). + * As it does not affect the internal representation of values, + * all types with different time zones are equivalent and may be used interchangingly. + * Time zone only affects parsing and displaying in text formats. + * + * If time zone is not specified (example: DateTime without parameter), then default time zone is used. + * Default time zone is server time zone, if server is doing transformations + * and if client is doing transformations, unless 'use_client_time_zone' setting is passed to client; + * Server time zone is the time zone specified in 'timezone' parameter in configuration file, + * or system time zone at the moment of server startup. + */ class DataTypeDateTime final : public DataTypeNumberBase<Int64> { public: DataTypeDateTime(); @@ -62,7 +62,7 @@ public: std::string to_string(const IColumn& column, size_t row_num) const; - void to_string(const IColumn &column, size_t row_num, BufferWritable &ostr) const override; + void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; static void cast_to_date_time(Int64& x); }; diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index 3404eb3..5b6ab86 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -88,8 +88,8 @@ class DataTypeDecimal final : public IDataType { static_assert(IsDecimalNumber<T>); public: - using FieldType = T; using ColumnType = ColumnDecimal<T>; + using FieldType = T; static constexpr bool is_parametric = true; diff --git a/be/src/vec/data_types/data_type_number_base.h b/be/src/vec/data_types/data_type_number_base.h index df7d236..e19383e 100644 --- a/be/src/vec/data_types/data_type_number_base.h +++ b/be/src/vec/data_types/data_type_number_base.h @@ -35,6 +35,7 @@ class DataTypeNumberBase : public IDataType { public: static constexpr bool is_parametric = false; + using ColumnType = ColumnVector<T>; using FieldType = T; const char* get_family_name() const override { return TypeName<T>::get(); } @@ -53,7 +54,9 @@ public: bool is_value_represented_by_number() const override { return true; } bool is_value_represented_by_integer() const override; bool is_value_represented_by_unsigned_integer() const override; - bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { return true; } + bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { + return true; + } bool have_maximum_size_of_value() const override { return true; } size_t get_size_of_value_in_memory() const override { return sizeof(T); } bool is_categorial() const override { return is_value_represented_by_integer(); } diff --git a/be/src/vec/data_types/data_type_string.h b/be/src/vec/data_types/data_type_string.h index 0c4ef1e..9d5b21b 100644 --- a/be/src/vec/data_types/data_type_string.h +++ b/be/src/vec/data_types/data_type_string.h @@ -28,6 +28,7 @@ namespace doris::vectorized { class DataTypeString final : public IDataType { public: + using ColumnType = ColumnString; using FieldType = String; static constexpr bool is_parametric = false; @@ -47,7 +48,9 @@ public: bool have_subtypes() const override { return false; } bool is_comparable() const override { return true; } bool can_be_compared_with_collation() const override { return true; } - bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { return true; } + bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { + return true; + } bool is_categorial() const override { return true; } bool can_be_inside_nullable() const override { return true; } bool can_be_inside_low_cardinality() const override { return true; } diff --git a/be/src/vec/functions/function_conv.cpp b/be/src/vec/functions/function_conv.cpp new file mode 100644 index 0000000..6f755ff --- /dev/null +++ b/be/src/vec/functions/function_conv.cpp @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/math_functions.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" + +namespace doris::vectorized { + +template <typename Impl> +class FunctionConv : public IFunction { +public: + static constexpr auto name = "conv"; + String get_name() const override { return name; } + static FunctionPtr create() { return std::make_shared<FunctionConv<Impl>>(); } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + DataTypes get_variadic_argument_types_impl() const override { + return {std::make_shared<typename Impl::DataType>(), std::make_shared<DataTypeInt8>(), + std::make_shared<DataTypeInt8>()}; + } + size_t get_number_of_arguments() const override { + return get_variadic_argument_types_impl().size(); + } + + bool use_default_implementation_for_nulls() const override { return false; } + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto result_column = ColumnString::create(); + auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); + + ColumnPtr argument_columns[3]; + + for (int i = 0; i < 3; ++i) { + argument_columns[i] = block.get_by_position(arguments[i]).column; + if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) { + argument_columns[i] = nullable->get_nested_column_ptr(); + VectorizedUtils::update_null_map(result_null_map_column->get_data(), + nullable->get_null_map_data()); + } + } + + execute_straight( + context, + assert_cast<const typename Impl::DataType::ColumnType*>(argument_columns[0].get()), + assert_cast<const ColumnInt8*>(argument_columns[1].get()), + assert_cast<const ColumnInt8*>(argument_columns[2].get()), + assert_cast<ColumnString*>(result_column.get()), + assert_cast<ColumnUInt8*>(result_null_map_column.get())->get_data(), + input_rows_count); + + block.get_by_position(result).column = + ColumnNullable::create(std::move(result_column), std::move(result_null_map_column)); + return Status::OK(); + } + +private: + void execute_straight(FunctionContext* context, + const typename Impl::DataType::ColumnType* data_column, + const ColumnInt8* src_base_column, const ColumnInt8* dst_base_column, + ColumnString* result_column, NullMap& result_null_map, + size_t input_rows_count) { + for (size_t i = 0; i < input_rows_count; i++) { + if (result_null_map[i]) { + result_column->insert_default(); + continue; + } + + Int8 src_base = src_base_column->get_element(i); + Int8 dst_base = dst_base_column->get_element(i); + if (std::abs(src_base) < MathFunctions::MIN_BASE || + std::abs(src_base) > MathFunctions::MAX_BASE || + std::abs(dst_base) < MathFunctions::MIN_BASE || + std::abs(dst_base) > MathFunctions::MAX_BASE) { + result_null_map[i] = true; + result_column->insert_default(); + continue; + } + + Impl::calculate_cell(context, data_column, src_base, dst_base, result_column, + result_null_map, i); + } + } +}; + +struct ConvInt64Impl { + using DataType = DataTypeInt64; + + static void calculate_cell(FunctionContext* context, const DataType::ColumnType* data_column, + const Int8 src_base, const Int8 dst_base, + ColumnString* result_column, NullMap& result_null_map, + size_t index) { + Int64 num = data_column->get_element(index); + if (src_base < 0 && num >= 0) { + result_null_map[index] = true; + result_column->insert_default(); + return; + } + + int64_t decimal_num = num; + if (src_base != 10) { + if (!MathFunctions::decimal_in_base_to_decimal(num, src_base, &decimal_num)) { + MathFunctions::handle_parse_result(dst_base, &decimal_num, + StringParser::PARSE_OVERFLOW); + } + } + StringVal str = MathFunctions::decimal_to_base(context, decimal_num, dst_base); + result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len); + } +}; + +struct ConvStringImpl { + using DataType = DataTypeString; + + static void calculate_cell(FunctionContext* context, const DataType::ColumnType* data_column, + const Int8 src_base, const Int8 dst_base, + ColumnString* result_column, NullMap& result_null_map, + size_t index) { + StringRef str = data_column->get_data_at(index); + StringParser::ParseResult parse_res; + int64_t decimal_num = + StringParser::string_to_int<int64_t>(str.data, str.size, src_base, &parse_res); + if (src_base < 0 && decimal_num >= 0) { + result_null_map[index] = true; + result_column->insert_default(); + return; + } + + if (!MathFunctions::handle_parse_result(dst_base, &decimal_num, parse_res)) { + result_column->insert_data("0", 1); + } else { + StringVal str = MathFunctions::decimal_to_base(context, decimal_num, dst_base); + result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len); + } + } +}; + +void register_function_conv(SimpleFunctionFactory& factory) { + factory.register_function<FunctionConv<ConvInt64Impl>>(); + factory.register_function<FunctionConv<ConvStringImpl>>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index d757920..0718930 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -36,6 +36,7 @@ void register_function_hll_hash(SimpleFunctionFactory& factory); void register_function_logical(SimpleFunctionFactory& factory); void register_function_case(SimpleFunctionFactory& factory); void register_function_cast(SimpleFunctionFactory& factory); +void register_function_conv(SimpleFunctionFactory& factory); void register_function_plus(SimpleFunctionFactory& factory); void register_function_minus(SimpleFunctionFactory& factory); void register_function_multiply(SimpleFunctionFactory& factory); @@ -150,6 +151,7 @@ public: register_function_logical(instance); register_function_case(instance); register_function_cast(instance); + register_function_conv(instance); register_function_plus(instance); register_function_minus(instance); register_function_math(instance); diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index 4adaa79..022b26d 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -452,6 +452,11 @@ module.exports = [ children: ["murmur_hash3_32"], }, { + title: "Math Functions", + directoryPath: "math-functions/", + children: ["conv"], + }, + { title: "table functions", directoryPath: "table-functions/", children: [ diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index fdd2e01..7ec172a 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -444,6 +444,11 @@ module.exports = [ children: ["murmur_hash3_32"], }, { + title: "数学函数", + directoryPath: "math-functions/", + children: ["conv"], + }, + { title: "加密和信息摘要函数", directoryPath: "encrypt-digest-functions/", children: [ diff --git a/docs/en/sql-reference/sql-functions/math-functions/conv.md b/docs/en/sql-reference/sql-functions/math-functions/conv.md new file mode 100644 index 0000000..4ce1e18 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/math-functions/conv.md @@ -0,0 +1,60 @@ +--- +{ + "title": "conv", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# conv + +## description +### Syntax + +`VARCHAR CONV(VARCHAR input, TINYINT from_base, TINYINT to_base)` +`VARCHAR CONV(BIGINT input, TINYINT from_base, TINYINT to_base)` +Convert the input number to the target base. The input base range should be within `[2,36]`. + +## example + +``` +MySQL [test]> SELECT CONV(15,10,2); ++-----------------+ +| conv(15, 10, 2) | ++-----------------+ +| 1111 | ++-----------------+ + +MySQL [test]> SELECT CONV('ff',16,10); ++--------------------+ +| conv('ff', 16, 10) | ++--------------------+ +| 255 | ++--------------------+ + +MySQL [test]> SELECT CONV(230,10,16); ++-------------------+ +| conv(230, 10, 16) | ++-------------------+ +| E6 | ++-------------------+ +``` + +## keyword + CONV diff --git a/docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md b/docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md new file mode 100644 index 0000000..280228e --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md @@ -0,0 +1,60 @@ +--- +{ + "title": "conv", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# conv + +## description +### Syntax + +`VARCHAR CONV(VARCHAR input, TINYINT from_base, TINYINT to_base)` +`VARCHAR CONV(BIGINT input, TINYINT from_base, TINYINT to_base)` +对输入的数字进行进制转换,输入的进制范围应该在`[2,36]`以内。 + +## example + +``` +MySQL [test]> SELECT CONV(15,10,2); ++-----------------+ +| conv(15, 10, 2) | ++-----------------+ +| 1111 | ++-----------------+ + +MySQL [test]> SELECT CONV('ff',16,10); ++--------------------+ +| conv('ff', 16, 10) | ++--------------------+ +| 255 | ++--------------------+ + +MySQL [test]> SELECT CONV(230,10,16); ++-------------------+ +| conv(230, 10, 16) | ++-------------------+ +| E6 | ++-------------------+ +``` + +## keyword + CONV diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 2fe936a..8d3b6d9 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -627,13 +627,13 @@ visible_functions = [ [['conv'], 'VARCHAR', ['BIGINT', 'TINYINT', 'TINYINT'], '_ZN5doris13MathFunctions8conv_intEPN9doris_udf' - '15FunctionContextERKNS1_9BigIntValERKNS1_10TinyIntValES9_', '', '', '', ''], + '15FunctionContextERKNS1_9BigIntValERKNS1_10TinyIntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['conv'], 'VARCHAR', ['VARCHAR', 'TINYINT', 'TINYINT'], '_ZN5doris13MathFunctions11conv_stringEPN9doris_udf' - '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', '', ''], + '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['conv'], 'VARCHAR', ['STRING', 'TINYINT', 'TINYINT'], '_ZN5doris13MathFunctions11conv_stringEPN9doris_udf' - '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', '', ''], + '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['pmod'], 'BIGINT', ['BIGINT', 'BIGINT'], '_ZN5doris13MathFunctions11pmod_bigintEPN9doris_udf' --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
