This is an automated email from the ASF dual-hosted git repository.
mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new e352e93305 [CALCITE-7153] Mixed wildcards of _ and % need to be
simplified in LIKE operator
e352e93305 is described below
commit e352e933059ae4d895d8e943604ef40fb2a43e40
Author: xuzifu666 <[email protected]>
AuthorDate: Wed Sep 3 11:56:55 2025 +0800
[CALCITE-7153] Mixed wildcards of _ and % need to be simplified in LIKE
operator
---
.../java/org/apache/calcite/rex/RexSimplify.java | 63 +++++++++++++++++++++-
.../org/apache/calcite/rex/RexProgramTest.java | 26 ++++++++-
2 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
index 410ce4fd12..c9c8081861 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
@@ -65,6 +65,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.calcite.linq4j.Nullness.castNonNull;
@@ -529,6 +530,66 @@ private RexNode simplifyLike(RexCall e, RexUnknownAs
unknownAs) {
return simplifyGenericNode(e);
}
+ // string 'AA%%__%%AA' simplify to 'AA__%AA'
+ // string with even escapes 'AA\\\\%%__%%AA' simplify to 'AA\\__%AA'
+ // string with odd escapes 'AA\\\\\\%%__%%AA' simplify to 'AA\\\\\\%__%AA'
+ private String simplifyMixedWildcards(String str, char escape) {
+ Pattern pattern = Pattern.compile("[_%]+");
+ Matcher matcher = pattern.matcher(str);
+ StringBuilder builder = new StringBuilder();
+ int from = 0;
+ while (matcher.find()) {
+ int start = matcher.start();
+ String group = requireNonNull(matcher.group(0));
+ if (start > 0
+ && str.charAt(start - 1) == escape
+ && consecutiveSameCharCountBefore(str, start - 1, escape) % 2 == 1) {
+ builder.append(str.substring(from, start + 1));
+ builder.append(simplifyPercentAndUnderline(group.substring(1)));
+ } else {
+ builder.append(str.substring(from, start));
+ builder.append(simplifyPercentAndUnderline(group));
+ }
+ from = matcher.end();
+ }
+ if (from < str.length()) {
+ builder.append(str.substring(from));
+ }
+ return builder.toString();
+ }
+
+ // Tool method: count the number of consecutive identical characters before
index
+ private int consecutiveSameCharCountBefore(String str, int index, char
escape) {
+ int count = 0;
+ while (index >= 0) {
+ if (str.charAt(index) != escape) {
+ break;
+ }
+ count++;
+ index--;
+ }
+ return count;
+ }
+
+ // Tool method: simplified string mixed with '%' and '_'
+ private String simplifyPercentAndUnderline(String str) {
+ StringBuilder builder = new StringBuilder();
+ boolean containsPercent = false;
+ for (int index = 0; index < str.length(); index++) {
+ if (str.charAt(index) == '%') {
+ containsPercent = true;
+ continue;
+ }
+ if (str.charAt(index) == '_') {
+ builder.append('_');
+ }
+ }
+ if (containsPercent) {
+ builder.append('%');
+ }
+ return builder.toString();
+ }
+
/**
* Simplifies like string with escape.
* A like '%%#%%A%%' escape '#' should simplify to A like '%#%%A%' escape
'#'.
@@ -559,7 +620,7 @@ private String simplifyLikeString(String content, char
escape, char wildcard) {
escapeCount = 0;
wildcardCount = 0;
}
- return builder.toString();
+ return simplifyMixedWildcards(builder.toString(), escape);
}
// e must be a comparison (=, >, >=, <, <=, !=)
diff --git a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
index 8c583c8a66..db9a427fea 100644
--- a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
+++ b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
@@ -3940,7 +3940,9 @@ private void checkSarg(String message, Sarg sarg,
* <a
href="https://issues.apache.org/jira/browse/CALCITE-4094">[CALCITE-4094]
* RexSimplify should simplify more always true OR expressions</a>,
* <a
href="https://issues.apache.org/jira/browse/CALCITE-7088">[CALCITE-7088]
- * Multiple consecutive '%' in the string matched by LIKE should simplify to
a single '%'</a>.
+ * Multiple consecutive '%' in the string matched by LIKE should simplify to
a single '%'</a>,
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7153">[CALCITE-7153]
+ * Mixed wildcards of _ and % need to be simplified in LIKE operator</a>.
* */
@Test void testSimplifyLike() {
final RexNode ref = input(tVarchar(true, 10), 0);
@@ -3963,12 +3965,32 @@ private void checkSarg(String message, Sarg sarg,
"true");
checkSimplifyUnchanged(like(ref, literal("%A")));
checkSimplify(like(ref, literal("%%A")), "LIKE($0, '%A')");
- checkSimplify(like(ref, literal("%%%_A%%B%%")), "LIKE($0, '%_A%B%')");
+ checkSimplify(like(ref, literal("%%%_A%%B%%")), "LIKE($0, '_%A%B%')");
checkSimplify(like(ref, literal("%%A%%%")), "LIKE($0, '%A%')");
checkSimplify(like(ref, literal("%%\\%%A\\%%%%%")), "LIKE($0,
'%\\%%A\\%%')");
checkSimplify(like(ref, literal("%%A"), literal("#")), "LIKE($0, '%A',
'#')");
checkSimplify(like(ref, literal("%%#%%A%%"), literal("#")),
"LIKE($0, '%#%%A%', '#')");
+ checkSimplify(like(ref, literal("AA%__%BB%_CC")),
+ "LIKE($0, 'AA__%BB_%CC')");
+ checkSimplify(like(ref, literal("%%__%AA%_BB")),
+ "LIKE($0, '__%AA_%BB')");
+ checkSimplify(like(ref, literal("AA\\%%___%BB%%%___%CC")),
+ "LIKE($0, 'AA\\%___%BB___%CC')");
+ checkSimplify(like(ref, literal("AA#%%___%BB%%%___%CC")),
+ "LIKE($0, 'AA#___%BB___%CC')");
+ checkSimplify(like(ref, literal("AA#%%___%BB%%%___%CC"), literal("#")),
+ "LIKE($0, 'AA#%___%BB___%CC', '#')");
+ // odd number of '#', the next % would not be simplified
+ checkSimplify(like(ref, literal("AA###%%___%BB%%%___%CC"), literal("#")),
+ "LIKE($0, 'AA###%___%BB___%CC', '#')");
+ checkSimplify(like(ref, literal("AA\\\\\\%%___%BB%%%___%CC")),
+ "LIKE($0, 'AA\\\\\\%___%BB___%CC')");
+ // even number of '#', the next % would be simplified
+ checkSimplify(like(ref, literal("AA##%%___%BB%%%___%CC"), literal("#")),
+ "LIKE($0, 'AA##___%BB___%CC', '#')");
+ checkSimplify(like(ref, literal("AA\\\\\\\\%%___%BB%%%___%CC")),
+ "LIKE($0, 'AA\\\\\\\\___%BB___%CC')");
checkSimplify(like(ref, literal("%%#%#%A%%"), literal("#")),
"LIKE($0, '%#%#%A%', '#')");
checkSimplify(like(ref, literal("###%%#%#%A%%##%%%"), literal("#")),