This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 94b3a2b  [Bug] Fix string functions not support multibyte string 
(#3345)
94b3a2b is described below

commit 94b3a2bd501aa6c6e1f29f3c3ee5dafcf1256e3c
Author: yangzhg <[email protected]>
AuthorDate: Fri May 8 12:52:46 2020 +0800

    [Bug] Fix string functions not support multibyte string (#3345)
    
    Let string functions support utf8 encoding
---
 be/src/exprs/string_functions.cpp                  | 106 ++++++++++++++++-----
 be/src/exprs/string_functions.h                    |   2 +
 be/test/exprs/string_functions_test.cpp            |  80 ++++++++++++++++
 .../string-functions/{length.md => char_length.md} |  39 ++++----
 .../sql-functions/string-functions/left.md         |   2 +-
 .../sql-functions/string-functions/length.md       |   2 +-
 .../sql-functions/string-functions/reverse.md}     |  36 +++----
 .../sql-functions/string-functions/right.md        |   2 +-
 .../sql-functions/string-functions/strleft.md      |   2 +-
 .../sql-functions/string-functions/strright.md     |   2 +-
 .../string-functions/{length.md => char_length.md} |  36 +++----
 .../sql-functions/string-functions/left.md         |   2 +-
 .../sql-functions/string-functions/length.md       |   2 +-
 .../string-functions/{length.md => reverse.md}     |  34 +++----
 .../sql-functions/string-functions/right.md        |   2 +-
 .../sql-functions/string-functions/strleft.md      |   2 +-
 .../sql-functions/string-functions/strright.md     |   2 +-
 gensrc/script/doris_builtins_functions.py          |   4 +-
 18 files changed, 254 insertions(+), 103 deletions(-)

diff --git a/be/src/exprs/string_functions.cpp 
b/be/src/exprs/string_functions.cpp
index e64b745..06895ce 100644
--- a/be/src/exprs/string_functions.cpp
+++ b/be/src/exprs/string_functions.cpp
@@ -19,12 +19,12 @@
 
 #include <re2/re2.h>
 
-#include "exprs/expr.h"
 #include "exprs/anyval_util.h"
+#include "exprs/expr.h"
+#include "math_functions.h"
 #include "runtime/string_value.hpp"
 #include "runtime/tuple_row.h"
 #include "util/url_parser.h"
-#include "math_functions.h"
 
 // NOTE: be careful not to use string::append.  It is not performant.
 namespace doris {
@@ -32,24 +32,68 @@ namespace doris {
 void StringFunctions::init() {
 }
 
+size_t get_utf8_byte_length(unsigned char byte) {
+    size_t char_size = 0;
+    if (byte >= 0xFC) {
+        char_size = 6;
+    } else if (byte >= 0xF8) {
+        char_size = 5;
+    } else if (byte >= 0xF0) {
+        char_size = 4;
+    } else if (byte >= 0xE0) {
+        char_size = 3;
+    } else if (byte >= 0xC0) {
+        char_size = 2;
+    } else {
+        char_size = 1;
+    }
+    return char_size;
+}
+
 // This behaves identically to the mysql implementation, namely:
 //  - 1-indexed positions
 //  - supported negative positions (count from the end of the string)
 //  - [optional] len.  No len indicates longest substr possible
 StringVal StringFunctions::substring(
-        FunctionContext* context, const StringVal& str, 
+        FunctionContext* context, const StringVal& str,
         const IntVal& pos, const IntVal& len) {
-    if (str.is_null || pos.is_null || len.is_null) {
+    if (str.is_null || pos.is_null || len.is_null || pos.val > str.len) {
         return StringVal::null();
     }
+    if (len.val <= 0 || str.len == 0) {
+        return StringVal();
+    }
+
+    // create index indicate every char start byte
+    // e.g.  "hello word 你好" => [0,1,2,3,4,5,6,7,8,9,10,11,14] 你 and 好 are 3 
bytes
+    // why use a vector as index? It is unnecessary if there is no negative 
pos val,
+    // but if has pos is negative it is not easy to determin where to start, 
so need a
+    // index save every character's length
+    size_t byte_pos = 0;
+    std::vector<size_t> index;
+    for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+        char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+        index.push_back(byte_pos);
+        byte_pos += char_size;
+        if (pos.val > 0 && index.size() > pos.val + len.val) {
+            break;
+        }
+    }
+
     int fixed_pos = pos.val;
     if (fixed_pos < 0) {
-        fixed_pos = str.len + fixed_pos + 1;
+        fixed_pos = index.size() + fixed_pos + 1;
+    }
+    if (fixed_pos > index.size()) {
+        return StringVal::null();
     }
-    int max_len = str.len - fixed_pos + 1;
-    int fixed_len = std::min(static_cast<int>(len.val), max_len);
-    if (fixed_pos > 0 && fixed_pos <= str.len && fixed_len > 0) {
-        return StringVal(str.ptr + fixed_pos - 1, fixed_len);
+    byte_pos = index[fixed_pos - 1];
+    int fixed_len = str.len - byte_pos;
+    if (fixed_pos + len.val <= index.size()) {
+        fixed_len = index[fixed_pos + len.val - 1] - byte_pos;
+    }
+    if (byte_pos <= str.len && fixed_len > 0) {
+        return StringVal(str.ptr + byte_pos, fixed_len);
     } else {
         return StringVal();
     }
@@ -118,7 +162,7 @@ StringVal StringFunctions::space(FunctionContext* context, 
const IntVal& len) {
     int32_t space_size = std::min(len.val, 65535);
     // TODO pengyubing
     // StringVal result = StringVal::create_temp_string_val(context, 
space_size);
-    StringVal result(context, space_size);  
+    StringVal result(context, space_size);
     memset(result.ptr, ' ', space_size);
     return result;
 }
@@ -147,7 +191,7 @@ StringVal StringFunctions::repeat(
 }
 
 StringVal StringFunctions::lpad(
-        FunctionContext* context, const StringVal& str, 
+        FunctionContext* context, const StringVal& str,
         const IntVal& len, const StringVal& pad) {
     if (str.is_null || len.is_null || pad.is_null || len.val < 0) {
         return StringVal::null();
@@ -223,6 +267,23 @@ IntVal StringFunctions::length(FunctionContext* context, 
const StringVal& str) {
     return IntVal(str.len);
 }
 
+// Implementation of CHAR_LENGTH
+//   int char_utf8_length(string input)
+// Returns the length of characters of input. If input == NULL, returns
+// NULL per MySQL
+IntVal StringFunctions::char_utf8_length(FunctionContext* context, const 
StringVal& str) {
+    if (str.is_null) {
+        return IntVal::null();
+    }
+    size_t char_len = 0;
+    std::vector<size_t> index;
+    for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+        char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+        ++char_len;
+    }
+    return IntVal(char_len);
+}
+
 StringVal StringFunctions::lower(FunctionContext* context, const StringVal& 
str) {
     if (str.is_null) {
         return StringVal::null();
@@ -260,13 +321,16 @@ StringVal StringFunctions::reverse(FunctionContext* 
context, const StringVal& st
         return StringVal::null();
     }
 
-    // TODO pengyubing
-    // StringVal result = StringVal::create_temp_string_val(context, str.len);
     StringVal result(context, str.len);
     if (UNLIKELY(result.is_null)) {
         return result;
     }
-    std::reverse_copy(str.ptr, str.ptr + str.len, result.ptr);
+
+    for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+        char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+        std::copy(str.ptr + i, str.ptr + i + char_size, result.ptr + 
result.len - i - char_size);
+    }
+
     return result;
 }
 
@@ -370,7 +434,7 @@ IntVal StringFunctions::locate_pos(
 // This function sets options in the RE2 library before pattern matching.
 bool StringFunctions::set_re2_options(
         const StringVal& match_parameter,
-        std::string* error_str, 
+        std::string* error_str,
         re2::RE2::Options* opts) {
     for (int i = 0; i < match_parameter.len; i++) {
         char match = match_parameter.ptr[i];
@@ -401,7 +465,7 @@ bool StringFunctions::set_re2_options(
 
 // The caller owns the returned regex. Returns NULL if the pattern could not 
be compiled.
 static re2::RE2* compile_regex(
-        const StringVal& pattern, 
+        const StringVal& pattern,
         std::string* error_str,
         const StringVal& match_parameter) {
     re2::StringPiece pattern_sp(reinterpret_cast<char*>(pattern.ptr), 
pattern.len);
@@ -418,7 +482,7 @@ static re2::RE2* compile_regex(
     re2::RE2* re = new re2::RE2(pattern_sp, options);
     if (!re->ok()) {
         std::stringstream ss;
-        ss << "Could not compile regexp pattern: " << 
AnyValUtil::to_string(pattern) 
+        ss << "Could not compile regexp pattern: " << 
AnyValUtil::to_string(pattern)
             << std::endl << "Error: " << re->error();
         *error_str = ss.str();
         delete re;
@@ -558,7 +622,7 @@ StringVal StringFunctions::concat(
 }
 
 StringVal StringFunctions::concat_ws(
-        FunctionContext* context, const StringVal& sep, 
+        FunctionContext* context, const StringVal& sep,
         int num_children, const StringVal* strs) {
     DCHECK_GE(num_children, 1);
     if (sep.is_null) {
@@ -633,9 +697,9 @@ IntVal StringFunctions::find_in_set(
 }
 
 void StringFunctions::parse_url_prepare(
-        FunctionContext* ctx, 
+        FunctionContext* ctx,
         FunctionContext::FunctionStateScope scope) {
-    if (scope != FunctionContext::FRAGMENT_LOCAL) { 
+    if (scope != FunctionContext::FRAGMENT_LOCAL) {
         return;
     }
     if (!ctx->is_arg_constant(1)) {
@@ -818,7 +882,6 @@ static int index_of(const uint8_t* source, int 
source_offset, int source_count,
     return -1;
 }
 
-
 StringVal StringFunctions::split_part(FunctionContext* context, const 
StringVal& content,
                                       const StringVal& delimiter, const 
IntVal& field) {
     if (content.is_null || delimiter.is_null || field.is_null || field.val <= 
0) {
@@ -847,5 +910,4 @@ StringVal StringFunctions::split_part(FunctionContext* 
context, const StringVal&
     int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1]) 
- start_pos;
     return StringVal(content.ptr + start_pos, len);
 }
-
 }
diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h
index 31139d3..3c395c9 100644
--- a/be/src/exprs/string_functions.h
+++ b/be/src/exprs/string_functions.h
@@ -70,6 +70,8 @@ public:
         const doris_udf::IntVal& len, const doris_udf::StringVal& pad); 
     static doris_udf::IntVal length(
         doris_udf::FunctionContext* context, const doris_udf::StringVal& str);
+    static doris_udf::IntVal char_utf8_length(
+        doris_udf::FunctionContext* context, const doris_udf::StringVal& str);
     static doris_udf::StringVal lower(
         doris_udf::FunctionContext* context, const doris_udf::StringVal& str);
     static doris_udf::StringVal upper(
diff --git a/be/test/exprs/string_functions_test.cpp 
b/be/test/exprs/string_functions_test.cpp
index a9b76e6..3b257b9 100644
--- a/be/test/exprs/string_functions_test.cpp
+++ b/be/test/exprs/string_functions_test.cpp
@@ -17,6 +17,7 @@
 
 #include "exprs/string_functions.h"
 #include "util/logging.h"
+#include "testutil/function_utils.h"
 #include "exprs/anyval_util.h"
 #include <iostream>
 #include <string>
@@ -252,9 +253,88 @@ TEST_F(StringFunctionsTest, null_or_empty) {
     delete context;
 }
 
+TEST_F(StringFunctionsTest, substring) {
+    doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("hello")),
+            StringFunctions::substring(context, StringVal("hello word"), 1, 
5));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("word")),
+            StringFunctions::substring(context, StringVal("hello word"), 7, 
4));
+
+    ASSERT_EQ(StringVal::null(),
+            StringFunctions::substring(context, StringVal::null(), 1, 0));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")),
+            StringFunctions::substring(context, StringVal("hello word"), 1, 
0));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string(" word")),
+            StringFunctions::substring(context, StringVal("hello word"), -5, 
5));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("hello word 
你")),
+            StringFunctions::substring(context, StringVal("hello word 你好"), 1, 
12));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("好")),
+            StringFunctions::substring(context, StringVal("hello word 你好"), 
13, 1));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")),
+            StringFunctions::substring(context, StringVal("hello word 你好"), 1, 
0));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("rd 你好")),
+            StringFunctions::substring(context, StringVal("hello word 你好"), 
-5, 5));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("h")),
+            StringFunctions::substring(context, StringVal("hello word 你好"), 1, 
1));
+}
+
+TEST_F(StringFunctionsTest, reverse) {
+    FunctionUtils fu;
+    doris_udf::FunctionContext* context = fu.get_fn_ctx();
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("olleh")),
+            StringFunctions::reverse(context, StringVal("hello")));
+    ASSERT_EQ(StringVal::null(),
+            StringFunctions::reverse(context, StringVal::null()));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")),
+            StringFunctions::reverse(context, StringVal("")));
+
+    ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("好你olleh")),
+            StringFunctions::reverse(context, StringVal("hello你好")));
+}
+
+TEST_F(StringFunctionsTest, length) {
+    doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+
+    ASSERT_EQ(IntVal(5),
+            StringFunctions::length(context, StringVal("hello")));
+    ASSERT_EQ(IntVal(5),
+            StringFunctions::char_utf8_length(context, StringVal("hello")));
+    ASSERT_EQ(IntVal::null(),
+            StringFunctions::length(context, StringVal::null()));
+    ASSERT_EQ(IntVal::null(),
+            StringFunctions::char_utf8_length(context, StringVal::null()));
+
+    ASSERT_EQ(IntVal(0),
+            StringFunctions::length(context, StringVal("")));
+    ASSERT_EQ(IntVal(0),
+            StringFunctions::char_utf8_length(context, StringVal("")));
+            
+    ASSERT_EQ(IntVal(11),
+            StringFunctions::length(context, StringVal("hello你好")));
+            
+    ASSERT_EQ(IntVal(7),
+            StringFunctions::char_utf8_length(context, StringVal("hello你好")));
+}
+
 }
 
 int main(int argc, char** argv) {
+    std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
+    if (!doris::config::init(conffile.c_str(), false)) {
+        fprintf(stderr, "error read config file. \n");
+        return -1;
+    }
     ::testing::InitGoogleTest(&argc, argv);
     return RUN_ALL_TESTS();
 }
diff --git a/docs/en/sql-reference/sql-functions/string-functions/length.md 
b/docs/en/sql-reference/sql-functions/string-functions/char_length.md
similarity index 66%
copy from docs/en/sql-reference/sql-functions/string-functions/length.md
copy to docs/en/sql-reference/sql-functions/string-functions/char_length.md
index ff2c961..e747889 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/char_length.md
@@ -1,6 +1,6 @@
 ---
 {
-    "title": "length",
+    "title": "CHAR_LENGTH",
     "language": "en"
 }
 ---
@@ -24,31 +24,32 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-# length
+# char_length
 ## Description
 ### Syntax
 
-'INT length (VARCHAR str)'
+'INT char_length (VARCHAR str)'
 
 
-Returns the length of the string and the number of characters returned for 
multi-byte characters. For example, five two-byte width words return a length 
of 10.
+Returns the length of the string and the number of characters returned for 
multi-byte characters. For example, five two-byte width words return a length 
of 5, only utf8 encodeing is support at current version.
 
 ## example
 
+
 ```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-|             3 |
-+---------------+
-
-mysql> select length("中国");
-+------------------+
-| length('中国')   |
-+------------------+
-|                6 |
-+------------------+
+mysql> select char_length("abc");
++--------------------+
+| char_length('abc') |
++--------------------+
+|                  3 |
++--------------------+
+
+mysql> select char_length("中国");
++------------------- ---+
+| char_length('中国')   |
++-----------------------+
+|                     2 |
++-----------------------+
 ```
-##keyword
-LENGTH
+## keyword
+CHAR_LENGTH
diff --git a/docs/en/sql-reference/sql-functions/string-functions/left.md 
b/docs/en/sql-reference/sql-functions/string-functions/left.md
index 074bc34..b570a5b 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/left.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/left.md
@@ -31,7 +31,7 @@ under the License.
 'VARCHAR left (VARCHAR str)'
 
 
-It returns the left part of a string of specified length
+It returns the left part of a string of specified length, length is char 
length not the byte size.
 
 ## example
 
diff --git a/docs/en/sql-reference/sql-functions/string-functions/length.md 
b/docs/en/sql-reference/sql-functions/string-functions/length.md
index ff2c961..6f345de 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/length.md
@@ -31,7 +31,7 @@ under the License.
 'INT length (VARCHAR str)'
 
 
-Returns the length of the string and the number of characters returned for 
multi-byte characters. For example, five two-byte width words return a length 
of 10.
+Returns the length of the string in byte size.
 
 ## example
 
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md 
b/docs/en/sql-reference/sql-functions/string-functions/reverse.md
similarity index 67%
copy from docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
copy to docs/en/sql-reference/sql-functions/string-functions/reverse.md
index f86e3ee..7118e74 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/reverse.md
@@ -1,7 +1,7 @@
 ---
 {
-    "title": "length",
-    "language": "zh-CN"
+    "title": "REVERSE",
+    "language": "en"
 }
 ---
 
@@ -24,31 +24,33 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-# length
+# reverse
 ## description
 ### Syntax
 
-`INT length(VARCHAR str)`
+`VARCHAR reverse(VARCHAR str)`
 
 
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+The REVERSE() function reverses a string and returns the result.
 
 ## example
 
 ```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-|             3 |
-+---------------+
-
-mysql> select length("中国");
+mysql> SELECT REVERSE('hello');
 +------------------+
-| length('中国')   |
+| REVERSE('hello') |
 +------------------+
-|                6 |
+| olleh            |
 +------------------+
+1 row in set (0.00 sec)
+
+mysql> SELECT REVERSE('你好');
++------------------+
+| REVERSE('你好')   |
++------------------+
+| 好你              |
++------------------+
+1 row in set (0.00 sec)
 ```
-##keyword
-LENGTH
+## keyword
+REVERSE
diff --git a/docs/en/sql-reference/sql-functions/string-functions/right.md 
b/docs/en/sql-reference/sql-functions/string-functions/right.md
index d8e356a..e67a5ac 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/right.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/right.md
@@ -31,7 +31,7 @@ under the License.
 'VARCHAR RIGHT (VARCHAR STR)'
 
 
-It returns the right part of a string of specified length
+It returns the right part of a string of specified length, length is char 
length not the byte size.
 
 ## example
 
diff --git a/docs/en/sql-reference/sql-functions/string-functions/strleft.md 
b/docs/en/sql-reference/sql-functions/string-functions/strleft.md
index 81d6ee8..d1b1c30 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/strleft.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/strleft.md
@@ -31,7 +31,7 @@ under the License.
 'VARCHAR STRAIGHT (VARCHAR STR)'
 
 
-It returns the left part of a string of specified length
+It returns the left part of a string of specified length, length is char 
length not the byte size.
 
 ## example
 
diff --git a/docs/en/sql-reference/sql-functions/string-functions/strright.md 
b/docs/en/sql-reference/sql-functions/string-functions/strright.md
index 25707cd..42a43ec 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/strright.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/strright.md
@@ -32,7 +32,7 @@ under the License.
 'VARCHAR strright (VARCHAR str)'
 
 
-It returns the right part of a string of specified length
+It returns the right part of a string of specified length, length is char 
length not the byte size.
 
 ## example
 
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md
similarity index 63%
copy from docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
copy to docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md
index f86e3ee..9bd3c1f 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md
@@ -1,6 +1,6 @@
 ---
 {
-    "title": "length",
+    "title": "CHAR_LENGTH",
     "language": "zh-CN"
 }
 ---
@@ -24,31 +24,31 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-# length
+# char_length
 ## description
 ### Syntax
 
-`INT length(VARCHAR str)`
+`INT char_length(VARCHAR str)`
 
 
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+返回字符串的长度,对于多字节字符,返回字符数, 目前仅支持utf8 编码。
 
 ## example
 
 ```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-|             3 |
-+---------------+
-
-mysql> select length("中国");
-+------------------+
-| length('中国')   |
-+------------------+
-|                6 |
-+------------------+
+mysql> select char_length("abc");
++--------------------+
+| char_length('abc') |
++--------------------+
+|                  3 |
++--------------------+
+
+mysql> select char_length("中国");
++------------------- ---+
+| char_length('中国')   |
++-----------------------+
+|                     2 |
++-----------------------+
 ```
 ##keyword
-LENGTH
+CHAR_LENGTH
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md
index 99a9b83..2c73243 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md
@@ -31,7 +31,7 @@ under the License.
 `VARCHAR left(VARCHAR str)`
 
 
-它返回具有指定长度的字符串的左边部分
+它返回具有指定长度的字符串的左边部分, 长度的单位为utf8字符
 
 ## example
 
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
index f86e3ee..53fa1f9 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
@@ -31,7 +31,7 @@ under the License.
 `INT length(VARCHAR str)`
 
 
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+返回字符串的字节。
 
 ## example
 
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md
similarity index 67%
copy from docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
copy to docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md
index f86e3ee..1daf780 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md
@@ -1,6 +1,6 @@
 ---
 {
-    "title": "length",
+    "title": "REVERSE",
     "language": "zh-CN"
 }
 ---
@@ -24,31 +24,33 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-# length
+# reverse
 ## description
 ### Syntax
 
-`INT length(VARCHAR str)`
+`VARCHAR reverse(VARCHAR str)`
 
 
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+将字符串反转,返回的字符串的顺序和源字符串的顺序相反。
 
 ## example
 
 ```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-|             3 |
-+---------------+
-
-mysql> select length("中国");
+mysql> SELECT REVERSE('hello');
 +------------------+
-| length('中国')   |
+| REVERSE('hello') |
 +------------------+
-|                6 |
+| olleh            |
 +------------------+
+1 row in set (0.00 sec)
+
+mysql> SELECT REVERSE('你好');
++------------------+
+| REVERSE('你好')   |
++------------------+
+| 好你              |
++------------------+
+1 row in set (0.00 sec)
 ```
-##keyword
-LENGTH
+## keyword
+REVERSE
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md
index 133c92b..0e0839f 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md
@@ -31,7 +31,7 @@ under the License.
 `VARCHAR right(VARCHAR str)`
 
 
-它返回具有指定长度的字符串的右边部分
+它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符
 
 ## example
 
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md
index c4b5d32..a2eae3a 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md
@@ -31,7 +31,7 @@ under the License.
 `VARCHAR strleft(VARCHAR str)`
 
 
-它返回具有指定长度的字符串的左边部分
+它返回具有指定长度的字符串的左边部分,长度的单位为utf8字符
 
 ## example
 
diff --git 
a/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md 
b/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md
index b6e8657..f3510fd 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md
@@ -31,7 +31,7 @@ under the License.
 `VARCHAR strright(VARCHAR str)`
 
 
-它返回具有指定长度的字符串的右边部分
+它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符
 
 ## example
 
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 2dfb76b..a2d9727 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -533,7 +533,9 @@ visible_functions = [
             '15FunctionContextERKNS1_9StringValERKNS1_6IntValES6_'],
     [['length'], 'INT', ['VARCHAR'],
             
'_ZN5doris15StringFunctions6lengthEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
-    [['lower', 'lcase'], 'VARCHAR', ['VARCHAR'],
+    [['char_length', 'character_length'], 'INT', ['VARCHAR'],
+            
'_ZN5doris15StringFunctions16char_utf8_lengthEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
+   [['lower', 'lcase'], 'VARCHAR', ['VARCHAR'],
             
'_ZN5doris15StringFunctions5lowerEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
     [['upper', 'ucase'], 'VARCHAR', ['VARCHAR'],
             
'_ZN5doris15StringFunctions5upperEPN9doris_udf15FunctionContextERKNS1_9StringValE'],


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to