This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7a08a799e9 [Vectorized](function) support order by convert_to function
(#14555)
7a08a799e9 is described below
commit 7a08a799e90af3e38f0dce03cd05f751bd5b46ce
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Nov 29 15:22:27 2022 +0800
[Vectorized](function) support order by convert_to function (#14555)
---
be/src/vec/functions/function_string.cpp | 1 +
be/src/vec/functions/function_string.h | 92 ++++++++++++++++++++++
.../sql-functions/string-functions/convert_to.md | 73 +++++++++++++++++
docs/sidebars.json | 1 +
.../sql-functions/string-functions/convert_to.md | 73 +++++++++++++++++
fe/fe-core/src/main/cup/sql_parser.cup | 7 ++
.../apache/doris/analysis/FunctionCallExpr.java | 9 ++-
gensrc/script/doris_builtins_functions.py | 1 +
8 files changed, 255 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index 7e762dfe8c..5d5d45409b 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -666,6 +666,7 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionLTrim>();
factory.register_function<FunctionRTrim>();
factory.register_function<FunctionTrim>();
+ factory.register_function<FunctionConvertTo>();
factory.register_function<FunctionSubstring<Substr3Impl>>();
factory.register_function<FunctionSubstring<Substr2Impl>>();
factory.register_function<FunctionLeft>();
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index c3676656be..4d95b07a49 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -17,6 +17,13 @@
#pragma once
+#include <iconv.h>
+#include <stddef.h>
+
+#include <memory>
+
+#include "util/string_util.h"
+#include "vec/columns/column.h"
#ifndef USE_LIBCPP
#include <memory_resource>
#define PMR std::pmr
@@ -1950,4 +1957,89 @@ struct SubReplaceFourImpl {
}
};
+class FunctionConvertTo : public IFunction {
+public:
+ static constexpr auto name = "convert_to";
+
+ static FunctionPtr create() { return
std::make_shared<FunctionConvertTo>(); }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 2; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const
override {
+ return std::make_shared<DataTypeString>();
+ }
+
+ bool use_default_implementation_for_constants() const override { return
true; }
+
+ Status prepare(FunctionContext* context,
FunctionContext::FunctionStateScope scope) override {
+ if (scope != FunctionContext::THREAD_LOCAL) {
+ return Status::OK();
+ }
+ if (!context->is_col_constant(1)) {
+ return Status::InvalidArgument(
+ "character argument to convert function must be
constant.");
+ }
+ const auto& character_data =
context->get_constant_col(1)->column_ptr->get_data_at(0);
+ if (doris::iequal(character_data.to_string(), "gbk")) {
+ iconv_t cd = iconv_open("gb2312", "utf-8");
+ if (cd == nullptr) {
+ return Status::RuntimeError("function {} is convert to gbk
failed in iconv_open",
+ get_name());
+ }
+ context->set_function_state(scope, cd);
+ } else {
+ return Status::RuntimeError(
+ "Illegal second argument column of function convert. now
only support "
+ "convert to character set of gbk");
+ }
+
+ return Status::OK();
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ ColumnPtr argument_column =
+
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ const ColumnString* str_col = static_cast<const
ColumnString*>(argument_column.get());
+ const auto& str_offset = str_col->get_offsets();
+ const auto& str_chars = str_col->get_chars();
+ auto col_res = ColumnString::create();
+ auto& res_offset = col_res->get_offsets();
+ auto& res_chars = col_res->get_chars();
+ res_offset.resize(input_rows_count);
+ iconv_t cd = reinterpret_cast<iconv_t>(
+ context->get_function_state(FunctionContext::THREAD_LOCAL));
+ DCHECK(cd != nullptr);
+
+ size_t in_len = 0, out_len = 0;
+ for (int i = 0; i < input_rows_count; ++i) {
+ in_len = str_offset[i] - str_offset[i - 1];
+ const char* value_data = reinterpret_cast<const
char*>(&str_chars[str_offset[i - 1]]);
+ res_chars.resize(res_offset[i - 1] + in_len);
+ char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
+ char* in = const_cast<char*>(value_data);
+ out_len = in_len;
+ if (iconv(cd, &in, &in_len, &out, &out_len) == -1) {
+ return Status::RuntimeError("function {} is convert to gbk
failed in iconv",
+ get_name());
+ } else {
+ res_offset[i] = res_chars.size();
+ }
+ }
+ block.replace_by_position(result, std::move(col_res));
+ return Status::OK();
+ }
+
+ Status close(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override {
+ if (scope == FunctionContext::THREAD_LOCAL) {
+ iconv_t cd = reinterpret_cast<iconv_t>(
+
context->get_function_state(FunctionContext::THREAD_LOCAL));
+ iconv_close(cd);
+ context->set_function_state(FunctionContext::THREAD_LOCAL,
nullptr);
+ }
+ return Status::OK();
+ }
+};
} // namespace doris::vectorized
diff --git
a/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md
b/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md
new file mode 100644
index 0000000000..aa071f3bd5
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md
@@ -0,0 +1,73 @@
+---
+{
+ "title": "convert_to",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<version since="1.2">
+
+## convert_to
+### description
+#### Syntax
+
+` convert_to(VARCHAR column, VARCHAR character)`
+
+It is used in the order by clause. eg: order by convert(column using gbk), Now
only support character can be converted to 'gbk'.
+Because when the order by column contains Chinese, it is not arranged in the
order of Pinyin
+After the character encoding of column is converted to gbk, it can be arranged
according to pinyin
+
+</version>
+
+### example
+
+```
+mysql> select * from class_test order by class_name;
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+| 6 | asd | [6] |
+| 7 | qwe | [7] |
+| 8 | z | [8] |
+| 2 | 哈 | [2] |
+| 3 | 哦 | [3] |
+| 1 | 啊 | [1] |
+| 4 | 张 | [4] |
+| 5 | 我 | [5] |
++----------+------------+-------------+
+
+mysql> select * from class_test order by convert(class_name using gbk);
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+| 6 | asd | [6] |
+| 7 | qwe | [7] |
+| 8 | z | [8] |
+| 1 | 啊 | [1] |
+| 2 | 哈 | [2] |
+| 3 | 哦 | [3] |
+| 5 | 我 | [5] |
+| 4 | 张 | [4] |
++----------+------------+-------------+
+```
+### keywords
+ convert_to
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 2378985fbf..66eac63629 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -412,6 +412,7 @@
"sql-manual/sql-functions/string-functions/split_part",
"sql-manual/sql-functions/string-functions/money_format",
"sql-manual/sql-functions/string-functions/parse_url",
+
"sql-manual/sql-functions/string-functions/convert_to",
"sql-manual/sql-functions/string-functions/extract_url_parameter",
"sql-manual/sql-functions/string-functions/uuid",
"sql-manual/sql-functions/string-functions/space",
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md
b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md
new file mode 100644
index 0000000000..7750023633
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md
@@ -0,0 +1,73 @@
+---
+{
+ "title": "convert_to",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<version since="1.2">
+
+## convert_to
+### description
+#### Syntax
+
+` convert_to(VARCHAR column, VARCHAR character)`
+在order by子句中使用,例如order by convert(column using gbk), 现在仅支持character转为'gbk'.
+因为当order by column中包含中文时,其排列不是按照汉语拼音的顺序.
+将column的字符编码转为gbk后,可实现按拼音的排列的效果.
+
+</version>
+
+### example
+
+```
+mysql> select * from class_test order by class_name;
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+| 6 | asd | [6] |
+| 7 | qwe | [7] |
+| 8 | z | [8] |
+| 2 | 哈 | [2] |
+| 3 | 哦 | [3] |
+| 1 | 啊 | [1] |
+| 4 | 张 | [4] |
+| 5 | 我 | [5] |
++----------+------------+-------------+
+
+mysql> select * from class_test order by convert(class_name using gbk);
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+| 6 | asd | [6] |
+| 7 | qwe | [7] |
+| 8 | z | [8] |
+| 1 | 啊 | [1] |
+| 2 | 哈 | [2] |
+| 3 | 哦 | [3] |
+| 5 | 我 | [5] |
+| 4 | 张 | [4] |
++----------+------------+-------------+
+
+```
+### keywords
+ convert_to
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index 4026b18d26..3bc6034a63 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -5861,6 +5861,13 @@ non_pred_expr ::=
{: RESULT = new CastExpr(targetType, e); :}
| KW_KEY encryptkey_name:name
{: RESULT = new EncryptKeyRef(name); :}
+ | KW_CONVERT LPAREN expr:e KW_USING ident:character RPAREN
+ {:
+ ArrayList<Expr> exprs = new ArrayList<>();
+ exprs.add(e);
+ exprs.add(new StringLiteral(character));
+ RESULT = new FunctionCallExpr("convert_to", new FunctionParams(exprs));
+ :}
;
expr_pipe_list ::=
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index ed0bea7362..29f76a471c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -606,7 +606,6 @@ public class FunctionCallExpr extends Expr {
}
return;
}
-
if (fnName.getFunction().equalsIgnoreCase("group_concat")) {
if (children.size() - orderByElements.size() > 2 ||
children.isEmpty()) {
throw new AnalysisException(
@@ -1229,7 +1228,13 @@ public class FunctionCallExpr extends Expr {
}
}
}
-
+ if (fnName.getFunction().equalsIgnoreCase("convert_to")) {
+ if (children.size() < 2 || !getChild(1).isConstant()) {
+ throw new AnalysisException(
+ fnName.getFunction() + " needs two params, and the
second is must be a constant: " + this
+ .toSql());
+ }
+ }
if (fn.getFunctionName().getFunction().equals("timediff")) {
fn.getReturnType().getPrimitiveType().setTimeType();
}
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index e369f79ffb..d34b0ba34b 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2483,6 +2483,7 @@ visible_functions = [
'', '', 'vec', 'ALWAYS_NULLABLE'],
# Utility functions
+ [['convert_to'], 'VARCHAR', ['VARCHAR','VARCHAR'], '','', '', 'vec', ''],
[['sleep'], 'BOOLEAN', ['INT'],
'_ZN5doris16UtilityFunctions5sleepEPN9doris_udf15FunctionContextERKNS1_6IntValE',
'', '', 'vec', ''],
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]