morrySnow commented on code in PR #49087:
URL: https://github.com/apache/doris/pull/49087#discussion_r2000177596
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -303,17 +308,19 @@ public static Expression left(StringLikeLiteral first,
IntegerLiteral second) {
*/
@ExecFunction(name = "right")
public static Expression right(StringLikeLiteral first, IntegerLiteral
second) {
- if (second.getValue() < (- first.getValue().length()) ||
Math.abs(second.getValue()) == 0) {
+ int inputLength = first.getValue().offsetByCodePoints(0,
first.getValue().length());
+ if (second.getValue() < (- inputLength) || Math.abs(second.getValue())
== 0) {
return castStringLikeLiteral(first, "");
- } else if (second.getValue() > first.getValue().length()) {
+ } else if (second.getValue() > inputLength) {
return first;
} else {
+ int index = first.getValue().offsetByCodePoints(0,
second.getValue());
Review Comment:
throw exception when `second.getValue < 0`. when `second.getValue < 0`
should call `int index = first.getValue().offsetByCodePoints(inputLength,
second.getValue());`
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
Review Comment:
all offsetByCodePoints should be replaced by codePointCount
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -303,17 +308,19 @@ public static Expression left(StringLikeLiteral first,
IntegerLiteral second) {
*/
@ExecFunction(name = "right")
public static Expression right(StringLikeLiteral first, IntegerLiteral
second) {
- if (second.getValue() < (- first.getValue().length()) ||
Math.abs(second.getValue()) == 0) {
+ int inputLength = first.getValue().offsetByCodePoints(0,
first.getValue().length());
Review Comment:
```suggestion
int inputLength = first.getValue().codePointCount(0,
first.getValue().length());
```
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
Review Comment:
we'd better add FE UT too to ensure these functions work as expected rather
than throw exception
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) {
return new VarcharLiteral(sb.toString());
}
+ /**
+ * split by char by empty string considering emoji
+ * @param first input string to be split
+ * @return ArrayLiteral
+ */
+ public static Expression splitByGrapheme(StringLikeLiteral first) {
+ List<String> result = new ArrayList<>();
+ int length = first.getValue().length();
+ for (int i = 0; i < length; ) {
+ int codePoint = first.getValue().codePointAt(i);
+ int charCount = Character.charCount(codePoint);
+ result.add(first.getValue().substring(i, i + charCount));
+ i += charCount;
+ }
+ List<Literal> items = new ArrayList<>();
+ for (String s : result) {
+ items.add((Literal) castStringLikeLiteral(first, s));
+ }
Review Comment:
then remove this for loop
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -289,12 +292,14 @@ public static Expression replace(StringLikeLiteral first,
StringLikeLiteral seco
*/
@ExecFunction(name = "left")
public static Expression left(StringLikeLiteral first, IntegerLiteral
second) {
+ int inputLength = first.getValue().offsetByCodePoints(0,
first.getValue().length());
if (second.getValue() <= 0) {
return castStringLikeLiteral(first, "");
- } else if (second.getValue() < first.getValue().length()) {
- return castStringLikeLiteral(first, first.getValue().substring(0,
second.getValue()));
- } else {
+ } else if (second.getValue() > inputLength) {
Review Comment:
```suggestion
} else if (second.getValue() >= inputLength) {
```
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) {
return new VarcharLiteral(sb.toString());
}
+ /**
+ * split by char by empty string considering emoji
+ * @param first input string to be split
+ * @return ArrayLiteral
+ */
+ public static Expression splitByGrapheme(StringLikeLiteral first) {
Review Comment:
```suggestion
public static Expression splitByGrapheme(StringLikeLiteral str) {
```
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -303,17 +308,19 @@ public static Expression left(StringLikeLiteral first,
IntegerLiteral second) {
*/
@ExecFunction(name = "right")
public static Expression right(StringLikeLiteral first, IntegerLiteral
second) {
- if (second.getValue() < (- first.getValue().length()) ||
Math.abs(second.getValue()) == 0) {
+ int inputLength = first.getValue().offsetByCodePoints(0,
first.getValue().length());
+ if (second.getValue() < (- inputLength) || Math.abs(second.getValue())
== 0) {
return castStringLikeLiteral(first, "");
- } else if (second.getValue() > first.getValue().length()) {
+ } else if (second.getValue() > inputLength) {
Review Comment:
```suggestion
} else if (second.getValue() >= inputLength) {
```
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -333,7 +340,7 @@ public static Expression locate(StringLikeLiteral first,
StringLikeLiteral secon
public static Expression locate(StringLikeLiteral first, StringLikeLiteral
second, IntegerLiteral third) {
int result = second.getValue().indexOf(first.getValue()) + 1;
if (third.getValue() <= 0 || !substringImpl(second.getValue(),
third.getValue(),
- second.getValue().length()).contains(first.getValue())) {
+ second.getValue().offsetByCodePoints(0,
second.getValue().length())).contains(first.getValue())) {
Review Comment:
add comment to explain which situation do you process when u call
`!substringImpl(second.getValue(), third.getValue(),
second.getValue().offsetByCodePoints(0,
second.getValue().length())).contains(first.getValue())`
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) {
return new VarcharLiteral(sb.toString());
}
+ /**
+ * split by char by empty string considering emoji
+ * @param first input string to be split
+ * @return ArrayLiteral
+ */
+ public static Expression splitByGrapheme(StringLikeLiteral first) {
+ List<String> result = new ArrayList<>();
Review Comment:
List<Literal> result =
Lists.newArrayListWithExpectedSize(first.getValue().length());
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java:
##########
@@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) {
return new VarcharLiteral(sb.toString());
}
+ /**
+ * split by char by empty string considering emoji
+ * @param first input string to be split
+ * @return ArrayLiteral
+ */
+ public static Expression splitByGrapheme(StringLikeLiteral first) {
+ List<String> result = new ArrayList<>();
+ int length = first.getValue().length();
+ for (int i = 0; i < length; ) {
+ int codePoint = first.getValue().codePointAt(i);
+ int charCount = Character.charCount(codePoint);
+ result.add(first.getValue().substring(i, i + charCount));
Review Comment:
maybe a better way
```suggestion
result.add(castStringLikeLiteral(first, new String(new
int[]{codePoint}, 0, 1)));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]