This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new cc387f362fb branch-2.1: [opt](identifer) let unicode format as a
superset of latin format #48078 (#53264)
cc387f362fb is described below
commit cc387f362fb3b93e6b735ebfb5490308875162d4
Author: morrySnow <[email protected]>
AuthorDate: Wed Jul 16 19:58:58 2025 +0800
branch-2.1: [opt](identifer) let unicode format as a superset of latin
format #48078 (#53264)
cherry-picked from #48078
---
.../java/org/apache/doris/common/FeNameFormat.java | 6 +-
.../org/apache/doris/common/FeNameFormatTest.java | 109 ++++++++++++++++++---
2 files changed, 97 insertions(+), 18 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
index 9a99412ab1d..03600cc3e3b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
@@ -35,16 +35,16 @@ public class FeNameFormat {
private static final String UNDERSCORE_COMMON_NAME_REGEX =
"^[_a-zA-Z][a-zA-Z0-9-_]{0,63}$";
private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
- private static final String COLUMN_NAME_REGEX =
"^[_a-zA-Z@0-9\\s/][.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,255}$";
+ private static final String COLUMN_NAME_REGEX =
"^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{1,256}$";
private static final String REPOSITORY_NAME_REGEX =
"^[a-zA-Z][a-zA-Z0-9-_]{0,255}$";
- private static final String UNICODE_LABEL_REGEX =
"^[-_A-Za-z0-9:\\p{L}]{1,128}$";
+ private static final String UNICODE_LABEL_REGEX =
"^[-_A-Za-z0-9:\\p{L}]{1," + Config.label_regex_length + "}$";
private static final String UNICODE_COMMON_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_UNDERSCORE_COMMON_NAME_REGEX =
"^[_a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_TABLE_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
private static final String UNICODE_USER_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
private static final String UNICODE_COLUMN_NAME_REGEX
- = "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/?@#$%^&*\\p{L}]{0,255}$";
+ = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:\\p{L}]{1,256}$";
private static final String UNICODE_REPOSITORY_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,255}$";
public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
index 3edb6a33e96..32e2a553b94 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
@@ -17,26 +17,24 @@
package org.apache.doris.common;
-import org.junit.Test;
+import org.apache.doris.qe.VariableMgr;
+
+import com.google.common.collect.Lists;
+import org.apache.ivy.util.StringUtils;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
public class FeNameFormatTest {
@Test
- public void testCheckColumnName() {
+ void testLabelName() {
// check label use correct regex, begin with '-' is different from
others
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkLabel("-lable"));
+ }
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("_id"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("__id"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("___id"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("___id_"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("@timestamp"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("@timestamp#"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("timestamp*"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("timestamp.1"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("timestamp.#"));
- ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkColumnName("?id_"));
- ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkColumnName("#id_"));
+ @Test
+ void testTableName() {
// length 64
String tblName =
"test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkTableName(tblName));
@@ -45,19 +43,100 @@ public class FeNameFormatTest {
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkTableName(largeTblName));
// check table name use correct regex, not begin with '-'
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkTableName("-" + tblName));
+ }
+
+ @Test
+ void testCheckColumnName() {
+ List<String> alwaysValid = Lists.newArrayList(
+ "_id",
+ "_id",
+ "_ id",
+ " _id",
+ "__id",
+ "___id",
+ "___id_",
+ "@timestamp",
+ "@timestamp#",
+ "timestamp*",
+ "timestamp.1",
+ "timestamp.#",
+ "?id_",
+ "#id_",
+ "$id_",
+ "a-zA-Z0-9.+-/?@#$%^&*\" ,:"
+ );
+
+ List<String> alwaysInvalid = Lists.newArrayList(
+ // inner column prefix
+ "mv_",
+ "mva_",
+ "__doris_shadow_",
+
+ // invalid
+ "",
+ "\\",
+ "column\\",
+ StringUtils.repeat("a", 257)
+ );
+
+ List<String> unicodeValid = Lists.newArrayList(
+ "中文",
+ "語言",
+ "язык",
+ "언어",
+ "لغة",
+ "ภาษา",
+ "שפה",
+ "γλώσσα",
+ "ენა",
+ "げんご"
+ );
+ boolean defaultUnicode =
VariableMgr.getDefaultSessionVariable().enableUnicodeNameSupport;
+ List<Boolean> enableUnicode = Lists.newArrayList(false, true);
+ try {
+ for (Boolean unicode : enableUnicode) {
+
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(unicode);
+ for (String s : alwaysValid) {
+ ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName(s));
+ }
+ for (String s : alwaysInvalid) {
+ ExceptionChecker.expectThrows(AnalysisException.class, ()
-> FeNameFormat.checkColumnName(s));
+ }
+ for (String s : unicodeValid) {
+ if (unicode) {
+ ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName(s));
+ } else {
+ ExceptionChecker.expectThrows(AnalysisException.class,
() -> FeNameFormat.checkColumnName(s));
+ }
+ }
+ }
+ } finally {
+
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(defaultUnicode);
+ }
+ }
+
+ @Test
+ void testUserName() {
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkUserName("a.b"));
// check user name use correct regex, not begin with '.'
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkUserName(".a.b"));
+ }
+
+ @Test
+ void testCommonName() {
+ String commonName =
"test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
// check common name use correct regex, length 65
- ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkCommonName("fakeType", tblName + "t"));
+ ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkCommonName("fakeType", commonName + "t"));
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkCommonName("fakeType", "_commonName"));
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkCommonName("fakeType", "common-Name"));
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkCommonName("fakeType", "commonName-"));
+ }
+ @Test
+ void testOutfileName() {
// check success file name prefix
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkOutfileSuccessFileName("fakeType", "_success"));
}
-
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]