This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 94b3a2b [Bug] Fix string functions not support multibyte string
(#3345)
94b3a2b is described below
commit 94b3a2bd501aa6c6e1f29f3c3ee5dafcf1256e3c
Author: yangzhg <[email protected]>
AuthorDate: Fri May 8 12:52:46 2020 +0800
[Bug] Fix string functions not support multibyte string (#3345)
Let string functions support utf8 encoding
---
be/src/exprs/string_functions.cpp | 106 ++++++++++++++++-----
be/src/exprs/string_functions.h | 2 +
be/test/exprs/string_functions_test.cpp | 80 ++++++++++++++++
.../string-functions/{length.md => char_length.md} | 39 ++++----
.../sql-functions/string-functions/left.md | 2 +-
.../sql-functions/string-functions/length.md | 2 +-
.../sql-functions/string-functions/reverse.md} | 36 +++----
.../sql-functions/string-functions/right.md | 2 +-
.../sql-functions/string-functions/strleft.md | 2 +-
.../sql-functions/string-functions/strright.md | 2 +-
.../string-functions/{length.md => char_length.md} | 36 +++----
.../sql-functions/string-functions/left.md | 2 +-
.../sql-functions/string-functions/length.md | 2 +-
.../string-functions/{length.md => reverse.md} | 34 +++----
.../sql-functions/string-functions/right.md | 2 +-
.../sql-functions/string-functions/strleft.md | 2 +-
.../sql-functions/string-functions/strright.md | 2 +-
gensrc/script/doris_builtins_functions.py | 4 +-
18 files changed, 254 insertions(+), 103 deletions(-)
diff --git a/be/src/exprs/string_functions.cpp
b/be/src/exprs/string_functions.cpp
index e64b745..06895ce 100644
--- a/be/src/exprs/string_functions.cpp
+++ b/be/src/exprs/string_functions.cpp
@@ -19,12 +19,12 @@
#include <re2/re2.h>
-#include "exprs/expr.h"
#include "exprs/anyval_util.h"
+#include "exprs/expr.h"
+#include "math_functions.h"
#include "runtime/string_value.hpp"
#include "runtime/tuple_row.h"
#include "util/url_parser.h"
-#include "math_functions.h"
// NOTE: be careful not to use string::append. It is not performant.
namespace doris {
@@ -32,24 +32,68 @@ namespace doris {
void StringFunctions::init() {
}
+size_t get_utf8_byte_length(unsigned char byte) {
+ size_t char_size = 0;
+ if (byte >= 0xFC) {
+ char_size = 6;
+ } else if (byte >= 0xF8) {
+ char_size = 5;
+ } else if (byte >= 0xF0) {
+ char_size = 4;
+ } else if (byte >= 0xE0) {
+ char_size = 3;
+ } else if (byte >= 0xC0) {
+ char_size = 2;
+ } else {
+ char_size = 1;
+ }
+ return char_size;
+}
+
// This behaves identically to the mysql implementation, namely:
// - 1-indexed positions
// - supported negative positions (count from the end of the string)
// - [optional] len. No len indicates longest substr possible
StringVal StringFunctions::substring(
- FunctionContext* context, const StringVal& str,
+ FunctionContext* context, const StringVal& str,
const IntVal& pos, const IntVal& len) {
- if (str.is_null || pos.is_null || len.is_null) {
+ if (str.is_null || pos.is_null || len.is_null || pos.val > str.len) {
return StringVal::null();
}
+ if (len.val <= 0 || str.len == 0) {
+ return StringVal();
+ }
+
+ // create index indicate every char start byte
+ // e.g. "hello word 你好" => [0,1,2,3,4,5,6,7,8,9,10,11,14] 你 and 好 are 3
bytes
+ // why use a vector as index? It is unnecessary if there is no negative
pos val,
+ // but if has pos is negative it is not easy to determin where to start,
so need a
+ // index save every character's length
+ size_t byte_pos = 0;
+ std::vector<size_t> index;
+ for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+ char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+ index.push_back(byte_pos);
+ byte_pos += char_size;
+ if (pos.val > 0 && index.size() > pos.val + len.val) {
+ break;
+ }
+ }
+
int fixed_pos = pos.val;
if (fixed_pos < 0) {
- fixed_pos = str.len + fixed_pos + 1;
+ fixed_pos = index.size() + fixed_pos + 1;
+ }
+ if (fixed_pos > index.size()) {
+ return StringVal::null();
}
- int max_len = str.len - fixed_pos + 1;
- int fixed_len = std::min(static_cast<int>(len.val), max_len);
- if (fixed_pos > 0 && fixed_pos <= str.len && fixed_len > 0) {
- return StringVal(str.ptr + fixed_pos - 1, fixed_len);
+ byte_pos = index[fixed_pos - 1];
+ int fixed_len = str.len - byte_pos;
+ if (fixed_pos + len.val <= index.size()) {
+ fixed_len = index[fixed_pos + len.val - 1] - byte_pos;
+ }
+ if (byte_pos <= str.len && fixed_len > 0) {
+ return StringVal(str.ptr + byte_pos, fixed_len);
} else {
return StringVal();
}
@@ -118,7 +162,7 @@ StringVal StringFunctions::space(FunctionContext* context,
const IntVal& len) {
int32_t space_size = std::min(len.val, 65535);
// TODO pengyubing
// StringVal result = StringVal::create_temp_string_val(context,
space_size);
- StringVal result(context, space_size);
+ StringVal result(context, space_size);
memset(result.ptr, ' ', space_size);
return result;
}
@@ -147,7 +191,7 @@ StringVal StringFunctions::repeat(
}
StringVal StringFunctions::lpad(
- FunctionContext* context, const StringVal& str,
+ FunctionContext* context, const StringVal& str,
const IntVal& len, const StringVal& pad) {
if (str.is_null || len.is_null || pad.is_null || len.val < 0) {
return StringVal::null();
@@ -223,6 +267,23 @@ IntVal StringFunctions::length(FunctionContext* context,
const StringVal& str) {
return IntVal(str.len);
}
+// Implementation of CHAR_LENGTH
+// int char_utf8_length(string input)
+// Returns the length of characters of input. If input == NULL, returns
+// NULL per MySQL
+IntVal StringFunctions::char_utf8_length(FunctionContext* context, const
StringVal& str) {
+ if (str.is_null) {
+ return IntVal::null();
+ }
+ size_t char_len = 0;
+ std::vector<size_t> index;
+ for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+ char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+ ++char_len;
+ }
+ return IntVal(char_len);
+}
+
StringVal StringFunctions::lower(FunctionContext* context, const StringVal&
str) {
if (str.is_null) {
return StringVal::null();
@@ -260,13 +321,16 @@ StringVal StringFunctions::reverse(FunctionContext*
context, const StringVal& st
return StringVal::null();
}
- // TODO pengyubing
- // StringVal result = StringVal::create_temp_string_val(context, str.len);
StringVal result(context, str.len);
if (UNLIKELY(result.is_null)) {
return result;
}
- std::reverse_copy(str.ptr, str.ptr + str.len, result.ptr);
+
+ for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+ char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+ std::copy(str.ptr + i, str.ptr + i + char_size, result.ptr +
result.len - i - char_size);
+ }
+
return result;
}
@@ -370,7 +434,7 @@ IntVal StringFunctions::locate_pos(
// This function sets options in the RE2 library before pattern matching.
bool StringFunctions::set_re2_options(
const StringVal& match_parameter,
- std::string* error_str,
+ std::string* error_str,
re2::RE2::Options* opts) {
for (int i = 0; i < match_parameter.len; i++) {
char match = match_parameter.ptr[i];
@@ -401,7 +465,7 @@ bool StringFunctions::set_re2_options(
// The caller owns the returned regex. Returns NULL if the pattern could not
be compiled.
static re2::RE2* compile_regex(
- const StringVal& pattern,
+ const StringVal& pattern,
std::string* error_str,
const StringVal& match_parameter) {
re2::StringPiece pattern_sp(reinterpret_cast<char*>(pattern.ptr),
pattern.len);
@@ -418,7 +482,7 @@ static re2::RE2* compile_regex(
re2::RE2* re = new re2::RE2(pattern_sp, options);
if (!re->ok()) {
std::stringstream ss;
- ss << "Could not compile regexp pattern: " <<
AnyValUtil::to_string(pattern)
+ ss << "Could not compile regexp pattern: " <<
AnyValUtil::to_string(pattern)
<< std::endl << "Error: " << re->error();
*error_str = ss.str();
delete re;
@@ -558,7 +622,7 @@ StringVal StringFunctions::concat(
}
StringVal StringFunctions::concat_ws(
- FunctionContext* context, const StringVal& sep,
+ FunctionContext* context, const StringVal& sep,
int num_children, const StringVal* strs) {
DCHECK_GE(num_children, 1);
if (sep.is_null) {
@@ -633,9 +697,9 @@ IntVal StringFunctions::find_in_set(
}
void StringFunctions::parse_url_prepare(
- FunctionContext* ctx,
+ FunctionContext* ctx,
FunctionContext::FunctionStateScope scope) {
- if (scope != FunctionContext::FRAGMENT_LOCAL) {
+ if (scope != FunctionContext::FRAGMENT_LOCAL) {
return;
}
if (!ctx->is_arg_constant(1)) {
@@ -818,7 +882,6 @@ static int index_of(const uint8_t* source, int
source_offset, int source_count,
return -1;
}
-
StringVal StringFunctions::split_part(FunctionContext* context, const
StringVal& content,
const StringVal& delimiter, const
IntVal& field) {
if (content.is_null || delimiter.is_null || field.is_null || field.val <=
0) {
@@ -847,5 +910,4 @@ StringVal StringFunctions::split_part(FunctionContext*
context, const StringVal&
int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1])
- start_pos;
return StringVal(content.ptr + start_pos, len);
}
-
}
diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h
index 31139d3..3c395c9 100644
--- a/be/src/exprs/string_functions.h
+++ b/be/src/exprs/string_functions.h
@@ -70,6 +70,8 @@ public:
const doris_udf::IntVal& len, const doris_udf::StringVal& pad);
static doris_udf::IntVal length(
doris_udf::FunctionContext* context, const doris_udf::StringVal& str);
+ static doris_udf::IntVal char_utf8_length(
+ doris_udf::FunctionContext* context, const doris_udf::StringVal& str);
static doris_udf::StringVal lower(
doris_udf::FunctionContext* context, const doris_udf::StringVal& str);
static doris_udf::StringVal upper(
diff --git a/be/test/exprs/string_functions_test.cpp
b/be/test/exprs/string_functions_test.cpp
index a9b76e6..3b257b9 100644
--- a/be/test/exprs/string_functions_test.cpp
+++ b/be/test/exprs/string_functions_test.cpp
@@ -17,6 +17,7 @@
#include "exprs/string_functions.h"
#include "util/logging.h"
+#include "testutil/function_utils.h"
#include "exprs/anyval_util.h"
#include <iostream>
#include <string>
@@ -252,9 +253,88 @@ TEST_F(StringFunctionsTest, null_or_empty) {
delete context;
}
+TEST_F(StringFunctionsTest, substring) {
+ doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("hello")),
+ StringFunctions::substring(context, StringVal("hello word"), 1,
5));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("word")),
+ StringFunctions::substring(context, StringVal("hello word"), 7,
4));
+
+ ASSERT_EQ(StringVal::null(),
+ StringFunctions::substring(context, StringVal::null(), 1, 0));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")),
+ StringFunctions::substring(context, StringVal("hello word"), 1,
0));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string(" word")),
+ StringFunctions::substring(context, StringVal("hello word"), -5,
5));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("hello word
你")),
+ StringFunctions::substring(context, StringVal("hello word 你好"), 1,
12));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("好")),
+ StringFunctions::substring(context, StringVal("hello word 你好"),
13, 1));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")),
+ StringFunctions::substring(context, StringVal("hello word 你好"), 1,
0));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("rd 你好")),
+ StringFunctions::substring(context, StringVal("hello word 你好"),
-5, 5));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("h")),
+ StringFunctions::substring(context, StringVal("hello word 你好"), 1,
1));
+}
+
+TEST_F(StringFunctionsTest, reverse) {
+ FunctionUtils fu;
+ doris_udf::FunctionContext* context = fu.get_fn_ctx();
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("olleh")),
+ StringFunctions::reverse(context, StringVal("hello")));
+ ASSERT_EQ(StringVal::null(),
+ StringFunctions::reverse(context, StringVal::null()));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")),
+ StringFunctions::reverse(context, StringVal("")));
+
+ ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("好你olleh")),
+ StringFunctions::reverse(context, StringVal("hello你好")));
+}
+
+TEST_F(StringFunctionsTest, length) {
+ doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+
+ ASSERT_EQ(IntVal(5),
+ StringFunctions::length(context, StringVal("hello")));
+ ASSERT_EQ(IntVal(5),
+ StringFunctions::char_utf8_length(context, StringVal("hello")));
+ ASSERT_EQ(IntVal::null(),
+ StringFunctions::length(context, StringVal::null()));
+ ASSERT_EQ(IntVal::null(),
+ StringFunctions::char_utf8_length(context, StringVal::null()));
+
+ ASSERT_EQ(IntVal(0),
+ StringFunctions::length(context, StringVal("")));
+ ASSERT_EQ(IntVal(0),
+ StringFunctions::char_utf8_length(context, StringVal("")));
+
+ ASSERT_EQ(IntVal(11),
+ StringFunctions::length(context, StringVal("hello你好")));
+
+ ASSERT_EQ(IntVal(7),
+ StringFunctions::char_utf8_length(context, StringVal("hello你好")));
+}
+
}
int main(int argc, char** argv) {
+ std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
+ if (!doris::config::init(conffile.c_str(), false)) {
+ fprintf(stderr, "error read config file. \n");
+ return -1;
+ }
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
diff --git a/docs/en/sql-reference/sql-functions/string-functions/length.md
b/docs/en/sql-reference/sql-functions/string-functions/char_length.md
similarity index 66%
copy from docs/en/sql-reference/sql-functions/string-functions/length.md
copy to docs/en/sql-reference/sql-functions/string-functions/char_length.md
index ff2c961..e747889 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/char_length.md
@@ -1,6 +1,6 @@
---
{
- "title": "length",
+ "title": "CHAR_LENGTH",
"language": "en"
}
---
@@ -24,31 +24,32 @@ specific language governing permissions and limitations
under the License.
-->
-# length
+# char_length
## Description
### Syntax
-'INT length (VARCHAR str)'
+'INT char_length (VARCHAR str)'
-Returns the length of the string and the number of characters returned for
multi-byte characters. For example, five two-byte width words return a length
of 10.
+Returns the length of the string and the number of characters returned for
multi-byte characters. For example, five two-byte width words return a length
of 5, only utf8 encodeing is support at current version.
## example
+
```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-| 3 |
-+---------------+
-
-mysql> select length("中国");
-+------------------+
-| length('中国') |
-+------------------+
-| 6 |
-+------------------+
+mysql> select char_length("abc");
++--------------------+
+| char_length('abc') |
++--------------------+
+| 3 |
++--------------------+
+
+mysql> select char_length("中国");
++------------------- ---+
+| char_length('中国') |
++-----------------------+
+| 2 |
++-----------------------+
```
-##keyword
-LENGTH
+## keyword
+CHAR_LENGTH
diff --git a/docs/en/sql-reference/sql-functions/string-functions/left.md
b/docs/en/sql-reference/sql-functions/string-functions/left.md
index 074bc34..b570a5b 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/left.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/left.md
@@ -31,7 +31,7 @@ under the License.
'VARCHAR left (VARCHAR str)'
-It returns the left part of a string of specified length
+It returns the left part of a string of specified length, length is char
length not the byte size.
## example
diff --git a/docs/en/sql-reference/sql-functions/string-functions/length.md
b/docs/en/sql-reference/sql-functions/string-functions/length.md
index ff2c961..6f345de 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/length.md
@@ -31,7 +31,7 @@ under the License.
'INT length (VARCHAR str)'
-Returns the length of the string and the number of characters returned for
multi-byte characters. For example, five two-byte width words return a length
of 10.
+Returns the length of the string in byte size.
## example
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
b/docs/en/sql-reference/sql-functions/string-functions/reverse.md
similarity index 67%
copy from docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
copy to docs/en/sql-reference/sql-functions/string-functions/reverse.md
index f86e3ee..7118e74 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/reverse.md
@@ -1,7 +1,7 @@
---
{
- "title": "length",
- "language": "zh-CN"
+ "title": "REVERSE",
+ "language": "en"
}
---
@@ -24,31 +24,33 @@ specific language governing permissions and limitations
under the License.
-->
-# length
+# reverse
## description
### Syntax
-`INT length(VARCHAR str)`
+`VARCHAR reverse(VARCHAR str)`
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+The REVERSE() function reverses a string and returns the result.
## example
```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-| 3 |
-+---------------+
-
-mysql> select length("中国");
+mysql> SELECT REVERSE('hello');
+------------------+
-| length('中国') |
+| REVERSE('hello') |
+------------------+
-| 6 |
+| olleh |
+------------------+
+1 row in set (0.00 sec)
+
+mysql> SELECT REVERSE('你好');
++------------------+
+| REVERSE('你好') |
++------------------+
+| 好你 |
++------------------+
+1 row in set (0.00 sec)
```
-##keyword
-LENGTH
+## keyword
+REVERSE
diff --git a/docs/en/sql-reference/sql-functions/string-functions/right.md
b/docs/en/sql-reference/sql-functions/string-functions/right.md
index d8e356a..e67a5ac 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/right.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/right.md
@@ -31,7 +31,7 @@ under the License.
'VARCHAR RIGHT (VARCHAR STR)'
-It returns the right part of a string of specified length
+It returns the right part of a string of specified length, length is char
length not the byte size.
## example
diff --git a/docs/en/sql-reference/sql-functions/string-functions/strleft.md
b/docs/en/sql-reference/sql-functions/string-functions/strleft.md
index 81d6ee8..d1b1c30 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/strleft.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/strleft.md
@@ -31,7 +31,7 @@ under the License.
'VARCHAR STRAIGHT (VARCHAR STR)'
-It returns the left part of a string of specified length
+It returns the left part of a string of specified length, length is char
length not the byte size.
## example
diff --git a/docs/en/sql-reference/sql-functions/string-functions/strright.md
b/docs/en/sql-reference/sql-functions/string-functions/strright.md
index 25707cd..42a43ec 100644
--- a/docs/en/sql-reference/sql-functions/string-functions/strright.md
+++ b/docs/en/sql-reference/sql-functions/string-functions/strright.md
@@ -32,7 +32,7 @@ under the License.
'VARCHAR strright (VARCHAR str)'
-It returns the right part of a string of specified length
+It returns the right part of a string of specified length, length is char
length not the byte size.
## example
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md
similarity index 63%
copy from docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
copy to docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md
index f86e3ee..9bd3c1f 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md
@@ -1,6 +1,6 @@
---
{
- "title": "length",
+ "title": "CHAR_LENGTH",
"language": "zh-CN"
}
---
@@ -24,31 +24,31 @@ specific language governing permissions and limitations
under the License.
-->
-# length
+# char_length
## description
### Syntax
-`INT length(VARCHAR str)`
+`INT char_length(VARCHAR str)`
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+返回字符串的长度,对于多字节字符,返回字符数, 目前仅支持utf8 编码。
## example
```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-| 3 |
-+---------------+
-
-mysql> select length("中国");
-+------------------+
-| length('中国') |
-+------------------+
-| 6 |
-+------------------+
+mysql> select char_length("abc");
++--------------------+
+| char_length('abc') |
++--------------------+
+| 3 |
++--------------------+
+
+mysql> select char_length("中国");
++------------------- ---+
+| char_length('中国') |
++-----------------------+
+| 2 |
++-----------------------+
```
##keyword
-LENGTH
+CHAR_LENGTH
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md
index 99a9b83..2c73243 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md
@@ -31,7 +31,7 @@ under the License.
`VARCHAR left(VARCHAR str)`
-它返回具有指定长度的字符串的左边部分
+它返回具有指定长度的字符串的左边部分, 长度的单位为utf8字符
## example
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
index f86e3ee..53fa1f9 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
@@ -31,7 +31,7 @@ under the License.
`INT length(VARCHAR str)`
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+返回字符串的字节。
## example
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md
similarity index 67%
copy from docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
copy to docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md
index f86e3ee..1daf780 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md
@@ -1,6 +1,6 @@
---
{
- "title": "length",
+ "title": "REVERSE",
"language": "zh-CN"
}
---
@@ -24,31 +24,33 @@ specific language governing permissions and limitations
under the License.
-->
-# length
+# reverse
## description
### Syntax
-`INT length(VARCHAR str)`
+`VARCHAR reverse(VARCHAR str)`
-返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。
+将字符串反转,返回的字符串的顺序和源字符串的顺序相反。
## example
```
-mysql> select length("abc");
-+---------------+
-| length('abc') |
-+---------------+
-| 3 |
-+---------------+
-
-mysql> select length("中国");
+mysql> SELECT REVERSE('hello');
+------------------+
-| length('中国') |
+| REVERSE('hello') |
+------------------+
-| 6 |
+| olleh |
+------------------+
+1 row in set (0.00 sec)
+
+mysql> SELECT REVERSE('你好');
++------------------+
+| REVERSE('你好') |
++------------------+
+| 好你 |
++------------------+
+1 row in set (0.00 sec)
```
-##keyword
-LENGTH
+## keyword
+REVERSE
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md
index 133c92b..0e0839f 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md
@@ -31,7 +31,7 @@ under the License.
`VARCHAR right(VARCHAR str)`
-它返回具有指定长度的字符串的右边部分
+它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符
## example
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md
index c4b5d32..a2eae3a 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md
@@ -31,7 +31,7 @@ under the License.
`VARCHAR strleft(VARCHAR str)`
-它返回具有指定长度的字符串的左边部分
+它返回具有指定长度的字符串的左边部分,长度的单位为utf8字符
## example
diff --git
a/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md
b/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md
index b6e8657..f3510fd 100644
--- a/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md
@@ -31,7 +31,7 @@ under the License.
`VARCHAR strright(VARCHAR str)`
-它返回具有指定长度的字符串的右边部分
+它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符
## example
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 2dfb76b..a2d9727 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -533,7 +533,9 @@ visible_functions = [
'15FunctionContextERKNS1_9StringValERKNS1_6IntValES6_'],
[['length'], 'INT', ['VARCHAR'],
'_ZN5doris15StringFunctions6lengthEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
- [['lower', 'lcase'], 'VARCHAR', ['VARCHAR'],
+ [['char_length', 'character_length'], 'INT', ['VARCHAR'],
+
'_ZN5doris15StringFunctions16char_utf8_lengthEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
+ [['lower', 'lcase'], 'VARCHAR', ['VARCHAR'],
'_ZN5doris15StringFunctions5lowerEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
[['upper', 'ucase'], 'VARCHAR', ['VARCHAR'],
'_ZN5doris15StringFunctions5upperEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]