This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new ca9ca07e515 [Fix](Nereids) Fix datatype length wrong when string 
contains chinese (#29885) (#30814)
ca9ca07e515 is described below

commit ca9ca07e5157b40ef57e37c47302eeeb96e1c606
Author: seawinde <[email protected]>
AuthorDate: Sun Feb 4 20:20:55 2024 +0800

    [Fix](Nereids) Fix datatype length wrong when string contains chinese 
(#29885) (#30814)
    
    When varchar literal contains chinese, the length of varchar should not be 
the length of the varchar, it should be
    the actual length of the using byte.
    Chinese is represented by unicode, a chinese char occypy 4 byte at mostly. 
So if meet chinese in varchar literal, we
    set the length is 4* length.
    
    for example as following:
    >        CREATE MATERIALIZED VIEW test_varchar_literal_mv
    >             BUILD IMMEDIATE REFRESH AUTO ON MANUAL
    >             DISTRIBUTED BY RANDOM BUCKETS 2
    >             PROPERTIES ('replication_num' = '1')
    >             AS
    >             select case when l_orderkey > 1 then "一二三四" else "五六七八" end 
as field_1 from lineitem;
    
    mysql> desc test_varchar_literal_mv;
    the def of materialized view is as following:
    +---------+-------------+------+-------+---------+-------+
    | Field   | Type        | Null | Key   | Default | Extra |
    +---------+-------------+------+-------+---------+-------+
    | field_1 | VARCHAR(16) | No   | false | NULL    | NONE  |
    +---------+-------------+------+-------+---------+-------+
---
 .../doris/nereids/parser/LogicalPlanBuilder.java   | 10 ++++++-
 .../expressions/literal/StringLikeLiteral.java     |  5 +++-
 .../java/org/apache/doris/nereids/util/Utils.java  | 12 ++++++++
 .../org/apache/doris/nereids/util/UtilsTest.java   | 35 ++++++++++++++++++++++
 4 files changed, 60 insertions(+), 2 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index df42620d116..40427e038a0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -19,6 +19,7 @@ package org.apache.doris.nereids.parser;
 
 import org.apache.doris.analysis.ArithmeticExpr.Operator;
 import org.apache.doris.analysis.UserIdentity;
+import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.DorisParser;
@@ -217,6 +218,8 @@ import 
org.apache.doris.nereids.trees.expressions.literal.LargeIntLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.SmallIntLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
 import org.apache.doris.nereids.trees.plans.JoinHint;
@@ -255,6 +258,7 @@ import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.types.VarcharType;
 import org.apache.doris.nereids.types.coercion.CharacterType;
 import org.apache.doris.nereids.util.ExpressionUtils;
+import org.apache.doris.nereids.util.Utils;
 import org.apache.doris.policy.FilterType;
 import org.apache.doris.policy.PolicyTypeEnum;
 import org.apache.doris.qe.ConnectContext;
@@ -1380,7 +1384,11 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor<Object> {
         if (!SqlModeHelper.hasNoBackSlashEscapes()) {
             s = escapeBackSlash(s);
         }
-        return new VarcharLiteral(s);
+        int strLength = Utils.containChinese(s) ? s.length() * 
StringLikeLiteral.CHINESE_CHAR_BYTE_LENGTH : s.length();
+        if (strLength > ScalarType.MAX_VARCHAR_LENGTH) {
+            return new StringLiteral(s);
+        }
+        return new VarcharLiteral(s, strLength);
     }
 
     private String escapeBackSlash(String str) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
index 5adf9f8623a..5b437021f53 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
@@ -21,8 +21,11 @@ import org.apache.doris.nereids.types.DataType;
 
 import java.util.Objects;
 
-/** StringLikeLiteral. */
+/**
+ * StringLikeLiteral.
+ */
 public abstract class StringLikeLiteral extends Literal {
+    public static final int CHINESE_CHAR_BYTE_LENGTH = 4;
     public final String value;
 
     public StringLikeLiteral(String value, DataType dataType) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
index f995ab138cc..6ac9ffd9513 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
@@ -256,4 +256,16 @@ public class Utils {
     public static <T> List<T> copyRequiredList(List<T> list) {
         return ImmutableList.copyOf(Objects.requireNonNull(list, "non-null 
list is required"));
     }
+
+    /**
+     * Check the content if contains chinese or not, if true when contains 
chinese or false
+     */
+    public static boolean containChinese(String text) {
+        for (char textChar : text.toCharArray()) {
+            if (Character.UnicodeScript.of(textChar) == 
Character.UnicodeScript.HAN) {
+                return true;
+            }
+        }
+        return false;
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/UtilsTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/UtilsTest.java
new file mode 100644
index 00000000000..0c7d903311f
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/UtilsTest.java
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.util;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * The tests for utils
+ */
+public class UtilsTest {
+    @Test
+    public void containChinese() {
+        String chinese = "123数据库";
+        Assertions.assertTrue(Utils.containChinese(chinese));
+
+        String en = "database123";
+        Assertions.assertFalse(Utils.containChinese(en));
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to