This is an automated email from the ASF dual-hosted git repository.
mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 98afe461ca [CALCITE-7088] Multiple consecutive '%' in the string
matched by LIKE should simplify to a single '%'
98afe461ca is described below
commit 98afe461cafbc5297923e8359fc2ec5cebdd17ce
Author: xuzifu666 <[email protected]>
AuthorDate: Wed Jun 25 13:50:28 2025 +0800
[CALCITE-7088] Multiple consecutive '%' in the string matched by LIKE
should simplify to a single '%'
---
.../java/org/apache/calcite/rex/RexSimplify.java | 55 +++++++++++++++++++++-
.../org/apache/calcite/rex/RexProgramTest.java | 23 ++++++++-
2 files changed, 75 insertions(+), 3 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
index 4710bb1f24..410ce4fd12 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
@@ -65,6 +65,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.regex.Pattern;
import static org.apache.calcite.linq4j.Nullness.castNonNull;
import static org.apache.calcite.rex.RexUnknownAs.FALSE;
@@ -500,17 +501,67 @@ private RexNode simplifyDivide(RexCall e) {
private RexNode simplifyLike(RexCall e, RexUnknownAs unknownAs) {
if (e.operands.get(1) instanceof RexLiteral) {
final RexLiteral literal = (RexLiteral) e.operands.get(1);
- if ("%".equals(literal.getValueAs(String.class))) {
- // "x LIKE '%'" simplifies to "x = x"
+ String likeStr = requireNonNull(literal.getValueAs(String.class));
+ Pattern pattern = Pattern.compile("%+");
+ String value = pattern.matcher(likeStr).replaceAll("%");
+ if ("%".equals(value)) {
+ // "x LIKE '%'" or "x LIKE '%...'" simplifies to "x = x"
final RexNode x = e.operands.get(0);
return simplify(
rexBuilder.makeCall(
e.getParserPosition(), SqlStdOperatorTable.EQUALS, x, x),
unknownAs);
}
+ // simplify "x LIKE '%%\%%a%%%'" to "x LIKE '%\%%a%'", default escape is
'\'
+ if (e.operands.size() == 2) {
+ e = (RexCall) rexBuilder
+ .makeCall(e.getParserPosition(), e.getOperator(),
e.operands.get(0),
+ rexBuilder.makeLiteral(simplifyLikeString(likeStr, '\\',
'%')));
+ }
+ if (e.operands.size() == 3 && e.operands.get(2) instanceof RexLiteral) {
+ final RexLiteral escapeLiteral = (RexLiteral) e.operands.get(2);
+ Character escape =
requireNonNull(escapeLiteral.getValueAs(Character.class));
+ e = (RexCall) rexBuilder
+ .makeCall(e.getParserPosition(), e.getOperator(),
e.operands.get(0),
+ rexBuilder.makeLiteral(simplifyLikeString(likeStr, escape,
'%')),
+ escapeLiteral);
+ }
}
return simplifyGenericNode(e);
}
+ /**
+ * Simplifies like string with escape.
+ * A like '%%#%%A%%' escape '#' should simplify to A like '%#%%A%' escape
'#'.
+ */
+ private String simplifyLikeString(String content, char escape, char
wildcard) {
+ int escapeCount = 0;
+ int wildcardCount = 0;
+ StringBuilder builder = new StringBuilder();
+ for (int index = 0; index < content.length(); index++) {
+ char c = content.charAt(index);
+ if (c == escape) {
+ builder.append(c);
+ escapeCount++;
+ wildcardCount = 0;
+ continue;
+ }
+ if (c == wildcard) {
+ if (escapeCount % 2 == 1) {
+ builder.append(wildcard);
+ } else if (wildcardCount == 0) {
+ builder.append(wildcard);
+ wildcardCount++;
+ }
+ escapeCount = 0;
+ continue;
+ }
+ builder.append(c);
+ escapeCount = 0;
+ wildcardCount = 0;
+ }
+ return builder.toString();
+ }
+
// e must be a comparison (=, >, >=, <, <=, !=)
private RexNode simplifyComparison(RexCall e, RexUnknownAs unknownAs) {
//noinspection unchecked
diff --git a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
index ea44d22e9b..8c583c8a66 100644
--- a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
+++ b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
@@ -3938,11 +3938,16 @@ private void checkSarg(String message, Sarg sarg,
/** Tests
* <a
href="https://issues.apache.org/jira/browse/CALCITE-4094">[CALCITE-4094]
- * RexSimplify should simplify more always true OR expressions</a>. */
+ * RexSimplify should simplify more always true OR expressions</a>,
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7088">[CALCITE-7088]
+ * Multiple consecutive '%' in the string matched by LIKE should simplify to
a single '%'</a>.
+ * */
@Test void testSimplifyLike() {
final RexNode ref = input(tVarchar(true, 10), 0);
checkSimplify3(like(ref, literal("%")),
"OR(null, IS NOT NULL($0))", "IS NOT NULL($0)", "true");
+ checkSimplify3(like(ref, literal("%%%")),
+ "OR(null, IS NOT NULL($0))", "IS NOT NULL($0)", "true");
checkSimplify3(like(ref, literal("%"), literal("#")),
"OR(null, IS NOT NULL($0))", "IS NOT NULL($0)", "true");
checkSimplify3(
@@ -3952,9 +3957,25 @@ private void checkSarg(String message, Sarg sarg,
"OR(IS NOT NULL($0), LIKE($0, '% %'))", "true");
checkSimplify(or(isNull(ref), like(ref, literal("%"))),
"true");
+ checkSimplify(or(isNull(ref), like(ref, literal("%%"))),
+ "true");
checkSimplify(or(isNull(ref), like(ref, literal("%"), literal("#"))),
"true");
checkSimplifyUnchanged(like(ref, literal("%A")));
+ checkSimplify(like(ref, literal("%%A")), "LIKE($0, '%A')");
+ checkSimplify(like(ref, literal("%%%_A%%B%%")), "LIKE($0, '%_A%B%')");
+ checkSimplify(like(ref, literal("%%A%%%")), "LIKE($0, '%A%')");
+ checkSimplify(like(ref, literal("%%\\%%A\\%%%%%")), "LIKE($0,
'%\\%%A\\%%')");
+ checkSimplify(like(ref, literal("%%A"), literal("#")), "LIKE($0, '%A',
'#')");
+ checkSimplify(like(ref, literal("%%#%%A%%"), literal("#")),
+ "LIKE($0, '%#%%A%', '#')");
+ checkSimplify(like(ref, literal("%%#%#%A%%"), literal("#")),
+ "LIKE($0, '%#%#%A%', '#')");
+ checkSimplify(like(ref, literal("###%%#%#%A%%##%%%"), literal("#")),
+ "LIKE($0, '###%%#%#%A%##%', '#')");
+ checkSimplify(like(ref, literal("###%%#%#%A#%%%#%A%%###%%%"),
literal("#")),
+ "LIKE($0, '###%%#%#%A#%%#%A%###%%', '#')");
+ checkSimplifyUnchanged(like(ref, literal("A"), literal("#")));
checkSimplifyUnchanged(like(ref, literal("%A"), literal("#")));
// As above, but ref is NOT NULL