This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ad949c2  Optimize Hex and add related Doc (#6697)
ad949c2 is described below

commit ad949c2f657d6626b97edd81a7947a605ce066ff
Author: zhoubintao <[email protected]>
AuthorDate: Wed Oct 13 11:36:14 2021 +0800

    Optimize Hex and add related Doc (#6697)
    
    I tested hex in a 1000w times for loop with random numbers,
    old hex avg time cost is 4.92 s,optimize hex avg time cost is 0.46 s which 
faster nearly 10x.
---
 be/src/exprs/math_functions.cpp                    | 27 +++++--
 be/test/exprs/math_functions_test.cpp              | 94 ++++++++++++++++++++++
 docs/.vuepress/sidebar/en.js                       |  2 +
 docs/.vuepress/sidebar/zh-CN.js                    |  2 +
 .../sql-functions/string-functions/hex.md          | 85 +++++++++++++++++++
 .../sql-functions/string-functions/unhex.md        | 63 +++++++++++++++
 .../sql-functions/string-functions/hex.md          | 85 +++++++++++++++++++
 .../sql-functions/string-functions/unhex.md        | 63 +++++++++++++++
 8 files changed, 415 insertions(+), 6 deletions(-)

diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp
index 40bb47a..50712d5 100644
--- a/be/src/exprs/math_functions.cpp
+++ b/be/src/exprs/math_functions.cpp
@@ -323,10 +323,25 @@ StringVal MathFunctions::hex_int(FunctionContext* ctx, 
const BigIntVal& v) {
     if (v.is_null) {
         return StringVal::null();
     }
-    // TODO: this is probably unreasonably slow
-    std::stringstream ss;
-    ss << std::hex << std::uppercase << v.val;
-    return AnyValUtil::from_string_temp(ctx, ss.str());
+
+    uint64_t num = v.val;
+    char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 
'C', 'D', 'E', 'F'};
+    // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F'
+    // need 1 more space for '\0'
+    char ans[17];
+    int i = 0;
+    while (num) {
+        ans[i++] = hex[num & 15];
+        num = num >> 4;
+    }
+    ans[i] = '\0';
+    // reverse
+    for (int k = 0, j = i - 1; k <= j; k++, j--) {
+        char tmp = ans[j];
+        ans[j] = ans[k];
+        ans[k] = tmp;
+    }
+    return AnyValUtil::from_string_temp(ctx, ans);
 }
 
 StringVal MathFunctions::hex_string(FunctionContext* ctx, const StringVal& s) {
@@ -347,8 +362,8 @@ StringVal MathFunctions::unhex(FunctionContext* ctx, const 
StringVal& s) {
     if (s.is_null) {
         return StringVal::null();
     }
-    // For uneven number of chars return empty string like Hive does.
-    if (s.len % 2 != 0) {
+    // For odd number of chars return empty string like Hive does.
+    if (s.len & 1) {
         return StringVal();
     }
 
diff --git a/be/test/exprs/math_functions_test.cpp 
b/be/test/exprs/math_functions_test.cpp
index 0e41318..8c397c8 100644
--- a/be/test/exprs/math_functions_test.cpp
+++ b/be/test/exprs/math_functions_test.cpp
@@ -24,6 +24,7 @@
 
 #include "exprs/anyval_util.h"
 #include "exprs/expr_context.h"
+#include "test_util/test_util.h"
 #include "testutil/function_utils.h"
 #include "util/logging.h"
 
@@ -168,6 +169,99 @@ TEST_F(MathFunctionsTest, rand) {
     ASSERT_NE(dv3.val, dv4.val);
 }
 
+TEST_F(MathFunctionsTest, hex_int) {
+    doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+    
+    ASSERT_EQ(StringVal::null(),
+              MathFunctions::hex_string(context, StringVal::null()));
+    
+    ASSERT_EQ(StringVal("7FFFFFFFFFFFFFFF"),
+              MathFunctions::hex_int(context, 
BigIntVal(9223372036854775807))); //BigIntVal max_value
+    
+    ASSERT_EQ(StringVal("FFE5853AB393E6C0"),
+              MathFunctions::hex_int(context, BigIntVal(-7453337203775808)));
+    
+    ASSERT_EQ(StringVal(""),
+              MathFunctions::hex_int(context, BigIntVal(0)));
+    
+    ASSERT_EQ(StringVal("C"),
+              MathFunctions::hex_int(context, BigIntVal(12)));
+    
+    ASSERT_EQ(StringVal("90"),
+              MathFunctions::hex_int(context, BigIntVal(144)));
+    
+    ASSERT_EQ(StringVal("FFFFFFFFFFFFFFFF"),
+              MathFunctions::hex_int(context, BigIntVal(-1)));
+    
+    ASSERT_EQ(StringVal("FFFFFFFFFFFFFFFE"),
+              MathFunctions::hex_int(context, BigIntVal(-2)));
+    
+    ASSERT_EQ(StringVal("24EC1"),
+              MathFunctions::hex_int(context, BigIntVal(151233)));
+    
+    delete context;
+}
+
+TEST_F(MathFunctionsTest, hex_string) {
+    doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+    
+    ASSERT_EQ(StringVal::null(),
+              MathFunctions::hex_string(context, StringVal::null()));
+    
+    ASSERT_EQ(StringVal("30"),
+     MathFunctions::hex_string(context, StringVal("0")));
+
+    ASSERT_EQ(StringVal("31"),
+     MathFunctions::hex_string(context, StringVal("1")));
+
+    ASSERT_EQ(StringVal("313233"),
+     MathFunctions::hex_string(context, StringVal("123")));
+
+    ASSERT_EQ(StringVal("41"),
+     MathFunctions::hex_string(context, StringVal("A")));
+
+    ASSERT_EQ(StringVal("61"),
+     MathFunctions::hex_string(context, StringVal("a")));
+
+    ASSERT_EQ(StringVal("E68891"),
+     MathFunctions::hex_string(context, StringVal("我")));
+
+    ASSERT_EQ(StringVal("3F"),
+              MathFunctions::hex_string(context, StringVal("?")));
+    
+    delete context;
+}
+
+TEST_F(MathFunctionsTest, unhex) {
+    doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+    
+    ASSERT_EQ(StringVal::null(),
+     MathFunctions::unhex(context, StringVal::null()));
+    
+    ASSERT_EQ(StringVal("123"),
+              MathFunctions::unhex(context, StringVal("313233")));
+
+    ASSERT_EQ(StringVal(""),
+              MathFunctions::unhex(context, StringVal("@!#")));
+    
+    ASSERT_EQ(StringVal(""),
+              MathFunctions::unhex(context, StringVal("@@")));
+    
+    ASSERT_EQ(StringVal("a"),
+              MathFunctions::unhex(context, StringVal("61")));
+    
+    ASSERT_EQ(StringVal("123"),
+              MathFunctions::unhex(context, StringVal("313233")));
+    
+    ASSERT_EQ(StringVal(""),
+              MathFunctions::unhex(context, StringVal("我")));
+    
+    ASSERT_EQ(StringVal("?"),
+              MathFunctions::unhex(context, StringVal("EFBC9F")));
+    
+    delete context;
+}
+
 } // namespace doris
 
 int main(int argc, char** argv) {
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index ad9adc2..4f7cdbb 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -338,6 +338,7 @@ module.exports = [
               "get_json_int",
               "get_json_string",
               "group_concat",
+              "hex",
               "instr",
               "lcase",
               "left",
@@ -357,6 +358,7 @@ module.exports = [
               "starts_with",
               "strleft",
               "strright",
+              "unhex",
               {
                 title: "fuzzy match",
                 directoryPath: "like/",
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index 05a9eee..c9d297d 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -342,6 +342,7 @@ module.exports = [
               "get_json_int",
               "get_json_string",
               "group_concat",
+              "hex",
               "instr",
               "lcase",
               "left",
@@ -361,6 +362,7 @@ module.exports = [
               "starts_with",
               "strleft",
               "strright",
+              "unhex",
               {
                 title: "模糊匹配",
                 directoryPath: "like/",
diff --git a/docs/en/sql-reference/sql-functions/string-functions/hex.md 
b/docs/en/sql-reference/sql-functions/string-functions/hex.md
new file mode 100644
index 0000000..cb58c0d
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/string-functions/hex.md
@@ -0,0 +1,85 @@
+---
+{
+    "title": "hex",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# hex
+## description
+### Syntax
+
+`VARCHAR hex(VARCHAR str)`
+
+`VARCHAR hex(BIGINT num)`
+
+If the input parameter is a number, the string representation of the 
hexadecimal value is returned;
+
+If the input parameter is a string, each character will be converted into two 
hexadecimal characters, and all the characters after the conversion will be 
spliced into a string for output
+
+
+## example
+
+```
+input string
+
+mysql> select hex('1');
++----------+
+| hex('1') |
++----------+
+| 31       |
++----------+
+
+mysql> select hex('@');
++----------+
+| hex('@') |
++----------+
+| 40       |
++----------+
+
+mysql> select hex('12');
++-----------+
+| hex('12') |
++-----------+
+| 3132      |
++-----------+
+```
+
+```
+intput num
+
+mysql> select hex(12);
++---------+
+| hex(12) |
++---------+
+| C       |
++---------+
+
+mysql> select hex(-1);
++------------------+
+| hex(-1)          |
++------------------+
+| FFFFFFFFFFFFFFFF |
++------------------+
+```
+## keyword
+HEX
diff --git a/docs/en/sql-reference/sql-functions/string-functions/unhex.md 
b/docs/en/sql-reference/sql-functions/string-functions/unhex.md
new file mode 100644
index 0000000..eba746e
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/string-functions/unhex.md
@@ -0,0 +1,63 @@
+---
+{
+    "title": "unhex",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# unhex
+## description
+### Syntax
+
+`VARCHAR unhex(VARCHAR str)`
+
+Enter a string, if the length of the string is 0 or an odd number, an empty 
string is returned;
+If the string contains characters other than `[0-9], [a-z], [A-Z]`, an empty 
string is returned;
+In other cases, every two characters are a group of characters converted into 
hexadecimal, and then spliced into a string for output.
+
+
+## example
+
+```
+mysql> select unhex('@');
++------------+
+| unhex('@') |
++------------+
+|            |
++------------+
+
+mysql> select unhex('41');
++-------------+
+| unhex('41') |
++-------------+
+| A           |
++-------------+
+
+mysql> select unhex('4142');
++---------------+
+| unhex('4142') |
++---------------+
+| AB            |
++---------------+
+```
+## keyword
+UNHEX
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md
new file mode 100644
index 0000000..51d861f
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md
@@ -0,0 +1,85 @@
+---
+{
+    "title": "hex",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# hex
+## description
+### Syntax
+
+`VARCHAR hex(VARCHAR str)`
+
+`VARCHAR hex(BIGINT num)`
+
+如果输入参数是数字,返回十六进制值的字符串表示形式;
+
+如果输入参数是字符串,则将每个字符转化为两个十六进制的字符,将转化后的所有字符拼接为字符串输出
+
+
+## example
+
+```
+输入字符串
+
+mysql> select hex('1');
++----------+
+| hex('1') |
++----------+
+| 31       |
++----------+
+
+mysql> select hex('@');
++----------+
+| hex('@') |
++----------+
+| 40       |
++----------+
+
+mysql> select hex('12');
++-----------+
+| hex('12') |
++-----------+
+| 3132      |
++-----------+
+```
+
+```
+输入数字
+
+mysql> select hex(12);
++---------+
+| hex(12) |
++---------+
+| C       |
++---------+
+
+mysql> select hex(-1);
++------------------+
+| hex(-1)          |
++------------------+
+| FFFFFFFFFFFFFFFF |
++------------------+
+```
+## keyword
+HEX
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md
new file mode 100644
index 0000000..f33e75d
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md
@@ -0,0 +1,63 @@
+---
+{
+    "title": "unhex",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# unhex
+## description
+### Syntax
+
+`VARCHAR unhex(VARCHAR str)`
+
+输入字符串,如果字符串长度为0或者为奇数,返回空串;
+如果字符串中包含`[0-9]、[a-z]、[A-Z]`之外的字符,返回空串;
+其他情况每两个字符为一组转化为16进制后的字符,然后拼接成字符串输出
+
+
+## example
+
+```
+mysql> select unhex('@');
++------------+
+| unhex('@') |
++------------+
+|            |
++------------+
+
+mysql> select unhex('41');
++-------------+
+| unhex('41') |
++-------------+
+| A           |
++-------------+
+
+mysql> select unhex('4142');
++---------------+
+| unhex('4142') |
++---------------+
+| AB            |
++---------------+
+```
+## keyword
+UNHEX

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to