This is an automated email from the ASF dual-hosted git repository.
yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new ad949c2 Optimize Hex and add related Doc (#6697)
ad949c2 is described below
commit ad949c2f657d6626b97edd81a7947a605ce066ff
Author: zhoubintao <[email protected]>
AuthorDate: Wed Oct 13 11:36:14 2021 +0800
Optimize Hex and add related Doc (#6697)
I tested hex in a 1000w times for loop with random numbers,
old hex avg time cost is 4.92 s,optimize hex avg time cost is 0.46 s which
faster nearly 10x.
---
be/src/exprs/math_functions.cpp | 27 +++++--
be/test/exprs/math_functions_test.cpp | 94 ++++++++++++++++++++++
docs/.vuepress/sidebar/en.js | 2 +
docs/.vuepress/sidebar/zh-CN.js | 2 +
.../sql-functions/string-functions/hex.md | 85 +++++++++++++++++++
.../sql-functions/string-functions/unhex.md | 63 +++++++++++++++
.../sql-functions/string-functions/hex.md | 85 +++++++++++++++++++
.../sql-functions/string-functions/unhex.md | 63 +++++++++++++++
8 files changed, 415 insertions(+), 6 deletions(-)
diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp
index 40bb47a..50712d5 100644
--- a/be/src/exprs/math_functions.cpp
+++ b/be/src/exprs/math_functions.cpp
@@ -323,10 +323,25 @@ StringVal MathFunctions::hex_int(FunctionContext* ctx,
const BigIntVal& v) {
if (v.is_null) {
return StringVal::null();
}
- // TODO: this is probably unreasonably slow
- std::stringstream ss;
- ss << std::hex << std::uppercase << v.val;
- return AnyValUtil::from_string_temp(ctx, ss.str());
+
+ uint64_t num = v.val;
+ char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
'C', 'D', 'E', 'F'};
+ // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F'
+ // need 1 more space for '\0'
+ char ans[17];
+ int i = 0;
+ while (num) {
+ ans[i++] = hex[num & 15];
+ num = num >> 4;
+ }
+ ans[i] = '\0';
+ // reverse
+ for (int k = 0, j = i - 1; k <= j; k++, j--) {
+ char tmp = ans[j];
+ ans[j] = ans[k];
+ ans[k] = tmp;
+ }
+ return AnyValUtil::from_string_temp(ctx, ans);
}
StringVal MathFunctions::hex_string(FunctionContext* ctx, const StringVal& s) {
@@ -347,8 +362,8 @@ StringVal MathFunctions::unhex(FunctionContext* ctx, const
StringVal& s) {
if (s.is_null) {
return StringVal::null();
}
- // For uneven number of chars return empty string like Hive does.
- if (s.len % 2 != 0) {
+ // For odd number of chars return empty string like Hive does.
+ if (s.len & 1) {
return StringVal();
}
diff --git a/be/test/exprs/math_functions_test.cpp
b/be/test/exprs/math_functions_test.cpp
index 0e41318..8c397c8 100644
--- a/be/test/exprs/math_functions_test.cpp
+++ b/be/test/exprs/math_functions_test.cpp
@@ -24,6 +24,7 @@
#include "exprs/anyval_util.h"
#include "exprs/expr_context.h"
+#include "test_util/test_util.h"
#include "testutil/function_utils.h"
#include "util/logging.h"
@@ -168,6 +169,99 @@ TEST_F(MathFunctionsTest, rand) {
ASSERT_NE(dv3.val, dv4.val);
}
+TEST_F(MathFunctionsTest, hex_int) {
+ doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+
+ ASSERT_EQ(StringVal::null(),
+ MathFunctions::hex_string(context, StringVal::null()));
+
+ ASSERT_EQ(StringVal("7FFFFFFFFFFFFFFF"),
+ MathFunctions::hex_int(context,
BigIntVal(9223372036854775807))); //BigIntVal max_value
+
+ ASSERT_EQ(StringVal("FFE5853AB393E6C0"),
+ MathFunctions::hex_int(context, BigIntVal(-7453337203775808)));
+
+ ASSERT_EQ(StringVal(""),
+ MathFunctions::hex_int(context, BigIntVal(0)));
+
+ ASSERT_EQ(StringVal("C"),
+ MathFunctions::hex_int(context, BigIntVal(12)));
+
+ ASSERT_EQ(StringVal("90"),
+ MathFunctions::hex_int(context, BigIntVal(144)));
+
+ ASSERT_EQ(StringVal("FFFFFFFFFFFFFFFF"),
+ MathFunctions::hex_int(context, BigIntVal(-1)));
+
+ ASSERT_EQ(StringVal("FFFFFFFFFFFFFFFE"),
+ MathFunctions::hex_int(context, BigIntVal(-2)));
+
+ ASSERT_EQ(StringVal("24EC1"),
+ MathFunctions::hex_int(context, BigIntVal(151233)));
+
+ delete context;
+}
+
+TEST_F(MathFunctionsTest, hex_string) {
+ doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+
+ ASSERT_EQ(StringVal::null(),
+ MathFunctions::hex_string(context, StringVal::null()));
+
+ ASSERT_EQ(StringVal("30"),
+ MathFunctions::hex_string(context, StringVal("0")));
+
+ ASSERT_EQ(StringVal("31"),
+ MathFunctions::hex_string(context, StringVal("1")));
+
+ ASSERT_EQ(StringVal("313233"),
+ MathFunctions::hex_string(context, StringVal("123")));
+
+ ASSERT_EQ(StringVal("41"),
+ MathFunctions::hex_string(context, StringVal("A")));
+
+ ASSERT_EQ(StringVal("61"),
+ MathFunctions::hex_string(context, StringVal("a")));
+
+ ASSERT_EQ(StringVal("E68891"),
+ MathFunctions::hex_string(context, StringVal("我")));
+
+ ASSERT_EQ(StringVal("3F"),
+ MathFunctions::hex_string(context, StringVal("?")));
+
+ delete context;
+}
+
+TEST_F(MathFunctionsTest, unhex) {
+ doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+
+ ASSERT_EQ(StringVal::null(),
+ MathFunctions::unhex(context, StringVal::null()));
+
+ ASSERT_EQ(StringVal("123"),
+ MathFunctions::unhex(context, StringVal("313233")));
+
+ ASSERT_EQ(StringVal(""),
+ MathFunctions::unhex(context, StringVal("@!#")));
+
+ ASSERT_EQ(StringVal(""),
+ MathFunctions::unhex(context, StringVal("@@")));
+
+ ASSERT_EQ(StringVal("a"),
+ MathFunctions::unhex(context, StringVal("61")));
+
+ ASSERT_EQ(StringVal("123"),
+ MathFunctions::unhex(context, StringVal("313233")));
+
+ ASSERT_EQ(StringVal(""),
+ MathFunctions::unhex(context, StringVal("我")));
+
+ ASSERT_EQ(StringVal("?"),
+ MathFunctions::unhex(context, StringVal("EFBC9F")));
+
+ delete context;
+}
+
} // namespace doris
int main(int argc, char** argv) {
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index ad9adc2..4f7cdbb 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -338,6 +338,7 @@ module.exports = [
"get_json_int",
"get_json_string",
"group_concat",
+ "hex",
"instr",
"lcase",
"left",
@@ -357,6 +358,7 @@ module.exports = [
"starts_with",
"strleft",
"strright",
+ "unhex",
{
title: "fuzzy match",
directoryPath: "like/",
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index 05a9eee..c9d297d 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -342,6 +342,7 @@ module.exports = [
"get_json_int",
"get_json_string",
"group_concat",
+ "hex",
"instr",
"lcase",
"left",
@@ -361,6 +362,7 @@ module.exports = [
"starts_with",
"strleft",
"strright",
+ "unhex",
{
title: "模糊匹配",
directoryPath: "like/",
diff --git a/docs/en/sql-reference/sql-functions/string-functions/hex.md
b/docs/en/sql-reference/sql-functions/string-functions/hex.md
new file mode 100644
index 0000000..cb58c0d
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/string-functions/hex.md
@@ -0,0 +1,85 @@
+---
+{
+ "title": "hex",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# hex
+## description
+### Syntax
+
+`VARCHAR hex(VARCHAR str)`
+
+`VARCHAR hex(BIGINT num)`
+
+If the input parameter is a number, the string representation of the
hexadecimal value is returned;
+
+If the input parameter is a string, each character will be converted into two
hexadecimal characters, and all the characters after the conversion will be
spliced into a string for output
+
+
+## example
+
+```
+input string
+
+mysql> select hex('1');
++----------+
+| hex('1') |
++----------+
+| 31 |
++----------+
+
+mysql> select hex('@');
++----------+
+| hex('@') |
++----------+
+| 40 |
++----------+
+
+mysql> select hex('12');
++-----------+
+| hex('12') |
++-----------+
+| 3132 |
++-----------+
+```
+
+```
+intput num
+
+mysql> select hex(12);
++---------+
+| hex(12) |
++---------+
+| C |
++---------+
+
+mysql> select hex(-1);
++------------------+
+| hex(-1) |
++------------------+
+| FFFFFFFFFFFFFFFF |
++------------------+
+```
+## keyword
+HEX
diff --git a/docs/en/sql-reference/sql-functions/string-functions/unhex.md
b/docs/en/sql-reference/sql-functions/string-functions/unhex.md
new file mode 100644
index 0000000..eba746e
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/string-functions/unhex.md
@@ -0,0 +1,63 @@
+---
+{
+ "title": "unhex",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# unhex
+## description
+### Syntax
+
+`VARCHAR unhex(VARCHAR str)`
+
+Enter a string, if the length of the string is 0 or an odd number, an empty
string is returned;
+If the string contains characters other than `[0-9], [a-z], [A-Z]`, an empty
string is returned;
+In other cases, every two characters are a group of characters converted into
hexadecimal, and then spliced into a string for output.
+
+
+## example
+
+```
+mysql> select unhex('@');
++------------+
+| unhex('@') |
++------------+
+| |
++------------+
+
+mysql> select unhex('41');
++-------------+
+| unhex('41') |
++-------------+
+| A |
++-------------+
+
+mysql> select unhex('4142');
++---------------+
+| unhex('4142') |
++---------------+
+| AB |
++---------------+
+```
+## keyword
+UNHEX
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md
new file mode 100644
index 0000000..51d861f
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md
@@ -0,0 +1,85 @@
+---
+{
+ "title": "hex",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# hex
+## description
+### Syntax
+
+`VARCHAR hex(VARCHAR str)`
+
+`VARCHAR hex(BIGINT num)`
+
+如果输入参数是数字,返回十六进制值的字符串表示形式;
+
+如果输入参数是字符串,则将每个字符转化为两个十六进制的字符,将转化后的所有字符拼接为字符串输出
+
+
+## example
+
+```
+输入字符串
+
+mysql> select hex('1');
++----------+
+| hex('1') |
++----------+
+| 31 |
++----------+
+
+mysql> select hex('@');
++----------+
+| hex('@') |
++----------+
+| 40 |
++----------+
+
+mysql> select hex('12');
++-----------+
+| hex('12') |
++-----------+
+| 3132 |
++-----------+
+```
+
+```
+输入数字
+
+mysql> select hex(12);
++---------+
+| hex(12) |
++---------+
+| C |
++---------+
+
+mysql> select hex(-1);
++------------------+
+| hex(-1) |
++------------------+
+| FFFFFFFFFFFFFFFF |
++------------------+
+```
+## keyword
+HEX
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md
new file mode 100644
index 0000000..f33e75d
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md
@@ -0,0 +1,63 @@
+---
+{
+ "title": "unhex",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# unhex
+## description
+### Syntax
+
+`VARCHAR unhex(VARCHAR str)`
+
+输入字符串,如果字符串长度为0或者为奇数,返回空串;
+如果字符串中包含`[0-9]、[a-z]、[A-Z]`之外的字符,返回空串;
+其他情况每两个字符为一组转化为16进制后的字符,然后拼接成字符串输出
+
+
+## example
+
+```
+mysql> select unhex('@');
++------------+
+| unhex('@') |
++------------+
+| |
++------------+
+
+mysql> select unhex('41');
++-------------+
+| unhex('41') |
++-------------+
+| A |
++-------------+
+
+mysql> select unhex('4142');
++---------------+
+| unhex('4142') |
++---------------+
+| AB |
++---------------+
+```
+## keyword
+UNHEX
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]