This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new b1305002039 [fix](Nereids) initcap constant folding should upper first
character in all words (#49061) (#49342)
b1305002039 is described below
commit b1305002039faf34ee0f2077c3628ea0926450e3
Author: LiBinfeng <[email protected]>
AuthorDate: Fri Mar 21 18:03:28 2025 +0800
[fix](Nereids) initcap constant folding should upper first character in all
words (#49061) (#49342)
---
.../functions/executable/StringArithmetic.java | 11 +-
.../fold_constant_string_arithmatic.groovy | 138 +++++++++++++++++++++
2 files changed, 142 insertions(+), 7 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index 027f75bbc86..532ec04d8aa 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -412,12 +412,9 @@ public class StringArithmetic {
return new IntegerLiteral(first.getValue().length());
}
- private static boolean isSeparator(char c) {
- if (".$|()[{^?*+\\".indexOf(c) == -1) {
- return false;
- } else {
- return true;
- }
+ private static boolean isAlphabetic(char c) {
+ Pattern pattern = Pattern.compile("\\p{Alnum}");
+ return pattern.matcher(String.valueOf(c)).find();
}
/**
@@ -429,7 +426,7 @@ public class StringArithmetic {
boolean capitalizeNext = true;
for (char c : first.getValue().toCharArray()) {
- if (Character.isWhitespace(c) || isSeparator(c)) {
+ if (Character.isWhitespace(c) || !isAlphabetic(c)) {
result.append(c);
capitalizeNext = true; // Next character should be capitalized
} else if (capitalizeNext) {
diff --git
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index 08f9fca9801..8d440289073 100644
---
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -213,6 +213,144 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select initcap(' hello world')")
testFoldConst("select initcap('こんにちは')")
testFoldConst("select initcap('上海天津北京杭州')")
+ testFoldConst("select initcap('ab')")
+ testFoldConst("select initcap('aBc')")
+ testFoldConst("select initcap('a,b,c')")
+ testFoldConst("select initcap('a;b;c')")
+ testFoldConst("select initcap(null)")
+ testFoldConst("select initcap('')")
+ testFoldConst("select initcap(123)")
+ testFoldConst("select initcap(0)")
+ testFoldConst("select initcap(true)")
+ testFoldConst("select initcap(' a ')")
+ testFoldConst("select initcap('中文字')")
+ testFoldConst("select initcap('<d83d><dc3c>abc')")
+ testFoldConst("select initcap('2023-01-01')")
+ testFoldConst("select initcap('aBcDeF')")
+ testFoldConst("select initcap('hello world!')")
+ testFoldConst("select initcap('123abcDEF')")
+ testFoldConst("select initcap(' ')")
+ testFoldConst("select initcap('null')")
+ testFoldConst("select initcap('ärger')")
+ testFoldConst("select initcap('über')")
+ testFoldConst("select initcap('a1!b2@c3#')")
+ testFoldConst("select initcap('john o''connor')")
+ testFoldConst("select initcap('mcdonald''s')")
+ testFoldConst("select initcap('abc-def')")
+ testFoldConst("select initcap('foo_bar')")
+ testFoldConst("select initcap(' test ')")
+ testFoldConst("select initcap('xyz,zyx')")
+ testFoldConst("select initcap('123 456')")
+ testFoldConst("select initcap('.,abc')")
+ testFoldConst("select initcap('[]test')")
+ testFoldConst("select initcap('<d83d><dc3c><d83d><dc3b>')")
+ testFoldConst("select initcap('aaAAaa')")
+ testFoldConst("select initcap(substring('abcd', 2))")
+ testFoldConst("select initcap(concat('a', '-test'))")
+ testFoldConst("select initcap('hello world')")
+ testFoldConst("select initcap('mixedCASE')")
+ testFoldConst("select initcap('UPPERCASE')")
+ testFoldConst("select initcap('lowercase')")
+ testFoldConst("select initcap('multiple spaces')")
+ testFoldConst("select initcap('hyphenated-word')")
+ testFoldConst("select initcap('under_score')")
+ testFoldConst("select initcap('dot.test')")
+ testFoldConst("select initcap('colon:test')")
+ testFoldConst("select initcap('semi;test')")
+ testFoldConst("select initcap('quote''test')")
+ testFoldConst("select initcap('slash/test')")
+ testFoldConst("select initcap('back\slash')")
+ testFoldConst("select initcap('emoji<d83d><dc3c>test')")
+ testFoldConst("select initcap('数字123test')")
+ testFoldConst("select initcap(' leading space')")
+ testFoldConst("select initcap('trailing space ')")
+ testFoldConst("select initcap(' multiple ')")
+ testFoldConst("select initcap('a.b.c.d')")
+ testFoldConst("select initcap('test-123-test')")
+ testFoldConst("select initcap('mixed_separators-here')")
+ testFoldConst("select initcap('ÄÖÜäöü')")
+ testFoldConst("select initcap('àçèñ')")
+ testFoldConst("select initcap('')")
+ testFoldConst("select initcap(' ')")
+ testFoldConst("select initcap('9am')")
+ testFoldConst("select initcap('sign')")
+ testFoldConst("select initcap('hash#tag')")
+ testFoldConst("select initcap('at@sign')")
+ testFoldConst("select initcap('caret^test')")
+ testFoldConst("select initcap('amp&test')")
+ testFoldConst("select initcap('star*test')")
+ testFoldConst("select initcap('plus+test')")
+ testFoldConst("select initcap('minus-test')")
+ testFoldConst("select initcap('equals=test')")
+ testFoldConst("select initcap('tilde~test')")
+ testFoldConst("select initcap('backtick`test')")
+ testFoldConst("select initcap('pipe|test')")
+ testFoldConst("select initcap('brace{test')")
+ testFoldConst("select initcap('bracket[test')")
+ testFoldConst("select initcap('less<test')")
+ testFoldConst("select initcap('greater>test')")
+ testFoldConst("select initcap('slash/test')")
+ testFoldConst("select initcap('question?test')")
+ testFoldConst("select initcap('space test')")
+ testFoldConst("select initcap('emoji<d83d><dc3c>mix')")
+ testFoldConst("select initcap('unicodeñtest')")
+ testFoldConst("select initcap('ÆØÅtest')")
+ testFoldConst("select initcap('çédîñ')")
+ testFoldConst("select initcap('русский')")
+ testFoldConst("select initcap('日本語')")
+ testFoldConst("select initcap('한글')")
+ testFoldConst("select initcap('ﺎﻠﻋﺮﺒﻳﺓ')")
+ testFoldConst("select initcap('<d83d><de0a>test')")
+ testFoldConst("select initcap('<d834><dd1e>music')")
+ testFoldConst("select initcap('<d83c><dd71>button')")
+ testFoldConst("select initcap('<d83c><ddfa><d83c><ddf8>flag')")
+ testFoldConst("select
initcap('<d83d><dc68><d83d><dc69><d83d><dc67><d83d><dc66>family')")
+ testFoldConst("select initcap('<d83d><dd25>fire')")
+ testFoldConst("select initcap('<d83d><de80>rocket')")
+ testFoldConst("select initcap('<d83d><dcc5>2023')")
+ testFoldConst("select initcap('√square')")
+ testFoldConst("select initcap('∞infinity')")
+ testFoldConst("select initcap('µmicro')")
+ testFoldConst("select initcap('¶pilcrow')")
+ testFoldConst("select initcap('©copyright')")
+ testFoldConst("select initcap('®registered')")
+ testFoldConst("select initcap('™trademark')")
+ testFoldConst("select initcap('§section')")
+ testFoldConst("select initcap('°degree')")
+ testFoldConst("select initcap('±plusminus')")
+ testFoldConst("select initcap('×multiply')")
+ testFoldConst("select initcap('÷divide')")
+ testFoldConst("select initcap('¹superscript')")
+ testFoldConst("select initcap('₂subscript')")
+ testFoldConst("select initcap('Ωomega')")
+ testFoldConst("select initcap('∆delta')")
+ testFoldConst("select initcap('∑sum')")
+ testFoldConst("select initcap('∏product')")
+ testFoldConst("select initcap('∫integral')")
+ testFoldConst("select initcap('⌘command')")
+ testFoldConst("select initcap('⌥option')")
+ testFoldConst("select initcap('⇧shift')")
+ testFoldConst("select initcap('⌃control')")
+ testFoldConst("select initcap('⌦delete')")
+ testFoldConst("select initcap('⇨arrow')")
+ testFoldConst("select initcap('★star')")
+ testFoldConst("select initcap('☀sun')")
+ testFoldConst("select initcap('☔ umbrella')")
+ testFoldConst("select initcap('☎phone')")
+ testFoldConst("select initcap('✉email')")
+ testFoldConst("select initcap('✓check')")
+ testFoldConst("select initcap('✗cross')")
+ testFoldConst("select initcap('⚠warning')")
+ testFoldConst("select initcap('⏰ clock')")
+ testFoldConst("select initcap('<d83c><df82>cake')")
+ testFoldConst("select initcap('<d83c><df89>party')")
+ testFoldConst("select initcap('⚡ bolt')")
+ testFoldConst("select initcap('⛔ forbidden')")
+ testFoldConst("select initcap('✅ check')")
+ testFoldConst("select initcap('✈plane')")
+ testFoldConst("select initcap('❤heart')")
+ testFoldConst("select initcap('⏩ fast')")
+ testFoldConst("select initcap('<d83d><dd11>key')")
// instr
testFoldConst("select instr('上海天津北京杭州', '北京')")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]