This is an automated email from the ASF dual-hosted git repository.
jhyde pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new d815dc1 [CALCITE-3574] Add RLIKE operator (similar to LIKE, but uses
Java regex, and Hive- and Spark-specific) (Shradha Ambekar)
d815dc1 is described below
commit d815dc1209c3cc0728941e9fc81fce7d9e44c79b
Author: Shradha <[email protected]>
AuthorDate: Thu Feb 4 00:08:40 2021 -0800
[CALCITE-3574] Add RLIKE operator (similar to LIKE, but uses Java regex,
and Hive- and Spark-specific) (Shradha Ambekar)
Close apache/calcite#2341
---
babel/src/main/codegen/config.fmpp | 1 +
core/src/main/codegen/default_config.fmpp | 1 +
core/src/main/codegen/templates/Parser.jj | 5 +++
.../calcite/adapter/enumerable/RexImpTable.java | 19 +++++------
.../org/apache/calcite/runtime/SqlFunctions.java | 5 +++
.../main/java/org/apache/calcite/sql/SqlKind.java | 3 ++
.../calcite/sql/fun/SqlLibraryOperators.java | 10 ++++++
.../apache/calcite/sql/fun/SqlLikeOperator.java | 4 +++
.../calcite/sql2rel/StandardConvertletTable.java | 7 ++++
.../org/apache/calcite/util/BuiltInMethod.java | 1 +
.../calcite/rel/rel2sql/RelToSqlConverterTest.java | 23 ++++++++++++++
.../apache/calcite/sql/parser/SqlParserTest.java | 19 ++++++++++-
.../apache/calcite/sql/test/SqlAdvisorTest.java | 1 +
.../calcite/sql/test/SqlOperatorBaseTest.java | 37 ++++++++++++++++++++++
.../org/apache/calcite/test/SqlValidatorTest.java | 19 +++++++++++
site/_docs/reference.md | 3 ++
16 files changed, 148 insertions(+), 10 deletions(-)
diff --git a/babel/src/main/codegen/config.fmpp
b/babel/src/main/codegen/config.fmpp
index 20cb864..772a394 100644
--- a/babel/src/main/codegen/config.fmpp
+++ b/babel/src/main/codegen/config.fmpp
@@ -402,6 +402,7 @@ data: {
"RETURNS"
"REVOKE"
# "RIGHT"
+ "RLIKE"
"ROLE"
"ROLLBACK"
# "ROLLUP"
diff --git a/core/src/main/codegen/default_config.fmpp
b/core/src/main/codegen/default_config.fmpp
index d39bf7b..8e9cdc0 100644
--- a/core/src/main/codegen/default_config.fmpp
+++ b/core/src/main/codegen/default_config.fmpp
@@ -225,6 +225,7 @@ parser: {
"RETURNED_OCTET_LENGTH"
"RETURNED_SQLSTATE"
"RETURNING"
+ "RLIKE"
"ROLE"
"ROUTINE"
"ROUTINE_CATALOG"
diff --git a/core/src/main/codegen/templates/Parser.jj
b/core/src/main/codegen/templates/Parser.jj
index 03abe02..bab7a64 100644
--- a/core/src/main/codegen/templates/Parser.jj
+++ b/core/src/main/codegen/templates/Parser.jj
@@ -3591,6 +3591,8 @@ List<Object> Expression2(ExprContext exprContext) :
|
<ILIKE> { op = SqlLibraryOperators.NOT_ILIKE; }
|
+ <RLIKE> { op = SqlLibraryOperators.NOT_RLIKE; }
+ |
<SIMILAR> <TO> { op =
SqlStdOperatorTable.NOT_SIMILAR_TO; }
)
|
@@ -3598,6 +3600,8 @@ List<Object> Expression2(ExprContext exprContext) :
|
<ILIKE> { op = SqlLibraryOperators.ILIKE; }
|
+ <RLIKE> { op = SqlLibraryOperators.RLIKE; }
+ |
<SIMILAR> <TO> { op = SqlStdOperatorTable.SIMILAR_TO; }
)
<#if
(parser.includePosixOperators!default.parser.includePosixOperators)>
@@ -7705,6 +7709,7 @@ SqlPostfixOperator PostfixRowOperator() :
| < RETURNS: "RETURNS" >
| < REVOKE: "REVOKE" >
| < RIGHT: "RIGHT" >
+| < RLIKE: "RLIKE" >
| < ROLE: "ROLE" >
| < ROLLBACK: "ROLLBACK" >
| < ROLLUP: "ROLLUP" >
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 865f942..8f9af95 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -138,6 +138,7 @@ import static
org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REPEAT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REVERSE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.RIGHT;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.RLIKE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SHA1;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SINH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX;
@@ -473,18 +474,18 @@ public class RexImpTable {
map.put(IS_NOT_FALSE, new IsNotFalseImplementor());
// LIKE, ILIKE and SIMILAR
- final MethodImplementor likeImplementor =
+ map.put(LIKE,
new MethodImplementor(BuiltInMethod.LIKE.method, NullPolicy.STRICT,
- false);
- map.put(LIKE, likeImplementor);
- final MethodImplementor ilikeImplementor =
+ false));
+ map.put(ILIKE,
new MethodImplementor(BuiltInMethod.ILIKE.method, NullPolicy.STRICT,
- false);
- map.put(ILIKE, ilikeImplementor);
- final MethodImplementor similarImplementor =
+ false));
+ map.put(RLIKE,
+ new MethodImplementor(BuiltInMethod.RLIKE.method, NullPolicy.STRICT,
+ false));
+ map.put(SIMILAR_TO,
new MethodImplementor(BuiltInMethod.SIMILAR.method, NullPolicy.STRICT,
- false);
- map.put(SIMILAR_TO, similarImplementor);
+ false));
// POSIX REGEX
final MethodImplementor posixRegexImplementorCaseSensitive =
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index ec3feda..9009feb 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -631,6 +631,11 @@ public class SqlFunctions {
return Pattern.compile(regex,
Pattern.CASE_INSENSITIVE).matcher(s).matches();
}
+ /** SQL {@code RLIKE} function. */
+ public static boolean rlike(String s, String pattern) {
+ return Pattern.compile(pattern).matcher(s).find();
+ }
+
/** SQL {@code SIMILAR} function. */
public static boolean similar(String s, String pattern) {
final String regex = Like.sqlToRegexSimilar(pattern, null);
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
index 73681cb..77feb0e 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
@@ -355,6 +355,9 @@ public enum SqlKind {
/** {@code LIKE} operator. */
LIKE,
+ /** {@code RLIKE} operator. */
+ RLIKE,
+
/** {@code SIMILAR} operator. */
SIMILAR,
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 98e4b22..7879564 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -489,6 +489,16 @@ public abstract class SqlLibraryOperators {
public static final SqlSpecialOperator NOT_ILIKE =
new SqlLikeOperator("NOT ILIKE", SqlKind.LIKE, true, false);
+ /** The regex variant of the LIKE operator. */
+ @LibraryOperator(libraries = {SPARK, HIVE})
+ public static final SqlSpecialOperator RLIKE =
+ new SqlLikeOperator("RLIKE", SqlKind.RLIKE, false, true);
+
+ /** The regex variant of the NOT LIKE operator. */
+ @LibraryOperator(libraries = {SPARK, HIVE})
+ public static final SqlSpecialOperator NOT_RLIKE =
+ new SqlLikeOperator("NOT RLIKE", SqlKind.RLIKE, true, true);
+
/** The "CONCAT(arg, ...)" function that concatenates strings.
* For example, "CONCAT('a', 'bc', 'd')" returns "abcd". */
@LibraryOperator(libraries = {MYSQL, POSTGRESQL})
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLikeOperator.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLikeOperator.java
index ce5cf82..b577afd 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLikeOperator.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLikeOperator.java
@@ -127,6 +127,10 @@ public class SqlLikeOperator extends SqlSpecialOperator {
return negated
? SqlStdOperatorTable.NOT_SIMILAR_TO
: SqlStdOperatorTable.SIMILAR_TO;
+ case RLIKE:
+ return negated
+ ? SqlLibraryOperators.NOT_RLIKE
+ : SqlLibraryOperators.RLIKE;
case LIKE:
if (caseSensitive) {
return negated
diff --git
a/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java
b/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java
index 6d062bd..dc8d53a 100644
--- a/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java
+++ b/core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java
@@ -179,6 +179,13 @@ public class StandardConvertletTable extends
ReflectiveConvertletTable {
SqlLibraryOperators.ILIKE.createCall(SqlParserPos.ZERO,
call.getOperandList()))));
+ // Expand "x NOT RLIKE y" into "NOT (x RLIKE y)"
+ registerOp(SqlLibraryOperators.NOT_RLIKE,
+ (cx, call) -> cx.convertExpression(
+ SqlStdOperatorTable.NOT.createCall(SqlParserPos.ZERO,
+ SqlLibraryOperators.RLIKE.createCall(SqlParserPos.ZERO,
+ call.getOperandList()))));
+
// Expand "x NOT SIMILAR y" into "NOT (x SIMILAR y)"
registerOp(SqlStdOperatorTable.NOT_SIMILAR_TO,
(cx, call) -> cx.convertExpression(
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index e7bf559..60b251c 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -423,6 +423,7 @@ public enum BuiltInMethod {
RTRIM(SqlFunctions.class, "rtrim", String.class),
LIKE(SqlFunctions.class, "like", String.class, String.class),
ILIKE(SqlFunctions.class, "ilike", String.class, String.class),
+ RLIKE(SqlFunctions.class, "rlike", String.class, String.class),
SIMILAR(SqlFunctions.class, "similar", String.class, String.class),
POSIX_REGEX(SqlFunctions.class, "posixRegex", String.class, String.class,
boolean.class),
REGEXP_REPLACE3(SqlFunctions.class, "regexpReplace", String.class,
diff --git
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
index 402c1c3..bb7447d 100644
---
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
+++
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
@@ -3665,6 +3665,29 @@ class RelToSqlConverterTest {
sql(query).withLibrary(SqlLibrary.POSTGRESQL).ok(expected);
}
+ @Test void testRlike() {
+ String query = "select \"product_name\" from \"product\" a "
+ + "where \"product_name\" rlike '.+@.+\\\\..+'";
+ String expectedSpark = "SELECT \"product_name\"\n"
+ + "FROM \"foodmart\".\"product\"\n"
+ + "WHERE \"product_name\" RLIKE '.+@.+\\\\..+'";
+ String expectedHive = "SELECT \"product_name\"\n"
+ + "FROM \"foodmart\".\"product\"\n"
+ + "WHERE \"product_name\" RLIKE '.+@.+\\\\..+'";
+ sql(query)
+ .withLibrary(SqlLibrary.SPARK).ok(expectedSpark)
+ .withLibrary(SqlLibrary.HIVE).ok(expectedHive);
+ }
+
+ @Test void testNotRlike() {
+ String query = "select \"product_name\" from \"product\" a "
+ + "where \"product_name\" not rlike '.+@.+\\\\..+'";
+ String expected = "SELECT \"product_name\"\n"
+ + "FROM \"foodmart\".\"product\"\n"
+ + "WHERE \"product_name\" NOT RLIKE '.+@.+\\\\..+'";
+ sql(query).withLibrary(SqlLibrary.SPARK).ok(expected);
+ }
+
@Test void testNotIlike() {
String query = "select \"product_name\" from \"product\" a "
+ "where \"product_name\" not ilike 'abC'";
diff --git
a/core/src/test/java/org/apache/calcite/sql/parser/SqlParserTest.java
b/core/src/test/java/org/apache/calcite/sql/parser/SqlParserTest.java
index 1662f5c..1737908 100644
--- a/core/src/test/java/org/apache/calcite/sql/parser/SqlParserTest.java
+++ b/core/src/test/java/org/apache/calcite/sql/parser/SqlParserTest.java
@@ -286,7 +286,7 @@ public class SqlParserTest {
"HOURS", "2011",
"IDENTITY", "92", "99", "2003", "2011", "2014", "c",
"IF", "92", "99", "2003",
- "ILIKE",
+ "ILIKE", // PostgreSQL
"IMMEDIATE", "92", "99", "2003",
"IMMEDIATELY",
"IMPORT", "c",
@@ -452,6 +452,7 @@ public class SqlParserTest {
"RETURNS", "92", "99", "2003", "2011", "2014", "c",
"REVOKE", "92", "99", "2003", "2011", "2014", "c",
"RIGHT", "92", "99", "2003", "2011", "2014", "c",
+ "RLIKE", // Hive and Spark
"ROLE", "99",
"ROLLBACK", "92", "99", "2003", "2011", "2014", "c",
"ROLLUP", "99", "2003", "2011", "2014", "c",
@@ -1824,6 +1825,22 @@ public class SqlParserTest {
sql(sql1).ok(expected1);
}
+ @Test void testRlike() {
+ // The RLIKE operator is valid when the HIVE or SPARK function library is
+ // enabled ('fun=spark' or 'fun=hive'). But the parser can always parse it.
+ final String expected = "SELECT `COLA`\n"
+ + "FROM `T`\n"
+ + "WHERE (MAX(`EMAIL`) RLIKE '.+@.+\\\\..+')";
+ final String sql = "select cola from t where max(email) rlike
'.+@.+\\\\..+'";
+ sql(sql).ok(expected);
+
+ final String expected1 = "SELECT `COLA`\n"
+ + "FROM `T`\n"
+ + "WHERE (MAX(`EMAIL`) NOT RLIKE '.+@.+\\\\..+')";
+ final String sql1 = "select cola from t where max(email) not rlike
'.+@.+\\\\..+'";
+ sql(sql1).ok(expected1);
+ }
+
@Test void testArithmeticOperators() {
expr("1-2+3*4/5/6-7")
.ok("(((1 - 2) + (((3 * 4) / 5) / 6)) - 7)");
diff --git a/core/src/test/java/org/apache/calcite/sql/test/SqlAdvisorTest.java
b/core/src/test/java/org/apache/calcite/sql/test/SqlAdvisorTest.java
index 50583f7..c278aeb 100644
--- a/core/src/test/java/org/apache/calcite/sql/test/SqlAdvisorTest.java
+++ b/core/src/test/java/org/apache/calcite/sql/test/SqlAdvisorTest.java
@@ -299,6 +299,7 @@ class SqlAdvisorTest extends SqlValidatorTestCase {
"KEYWORD(EQUALS)",
"KEYWORD(FORMAT)",
"KEYWORD(ILIKE)",
+ "KEYWORD(RLIKE)",
"KEYWORD(IMMEDIATELY)",
"KEYWORD(IN)",
"KEYWORD(IS)",
diff --git
a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
index d504449..25c6584 100644
--- a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
+++ b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
@@ -3852,6 +3852,43 @@ public abstract class SqlOperatorBaseTest {
tester.checkBoolean("'ab\ncd\nef' not like '%cde%'", Boolean.TRUE);
}
+ @Test void testRlikeOperator() {
+ checkRlike(SqlLibrary.SPARK);
+ checkRlike(SqlLibrary.HIVE);
+ checkRlikeFails(SqlLibrary.MYSQL);
+ checkRlikeFails(SqlLibrary.ORACLE);
+ }
+
+ void checkRlike(SqlLibrary library) {
+ final SqlTester tester1 = libraryTester(library);
+ tester1.setFor(SqlLibraryOperators.RLIKE, VM_EXPAND);
+ tester1.checkBoolean("'[email protected]' rlike '.+@*\\.com'",
Boolean.TRUE);
+ tester1.checkBoolean("'[email protected]' rlike '.com$'", Boolean.TRUE);
+ tester1.checkBoolean("'acbd' rlike '^ac+'", Boolean.TRUE);
+ tester1.checkBoolean("'acb' rlike 'acb|efg'", Boolean.TRUE);
+ tester1.checkBoolean("'acb|efg' rlike 'acb\\|efg'", Boolean.TRUE);
+ tester1.checkBoolean("'Acbd' rlike '^ac+'", Boolean.FALSE);
+ tester1.checkBoolean("'[email protected]' rlike 'Merrisa_'",
Boolean.FALSE);
+ tester1.checkBoolean("'abcdef' rlike '%cd%'", Boolean.FALSE);
+
+ tester1.setFor(SqlLibraryOperators.NOT_RLIKE, VM_EXPAND);
+ tester1.checkBoolean("'Merrisagmail' not rlike '.+@*\\.com'",
Boolean.TRUE);
+ tester1.checkBoolean("'acbd' not rlike '^ac+'", Boolean.FALSE);
+ tester1.checkBoolean("'acb|efg' not rlike 'acb\\|efg'", Boolean.FALSE);
+ tester1.checkBoolean("'[email protected]' not rlike 'Merrisa_'",
Boolean.TRUE);
+ }
+
+ void checkRlikeFails(SqlLibrary library) {
+ final SqlTester tester1 = libraryTester(library);
+ tester1.setFor(SqlLibraryOperators.RLIKE, VM_EXPAND);
+ final String noRlike = "(?s).*No match found for function signature RLIKE";
+ tester1.checkFails("^'[email protected]' rlike '.+@*\\.com'^", noRlike,
false);
+ tester1.checkFails("^'acb' rlike 'acb|efg'^", noRlike, false);
+ final String noNotRlike = "(?s).*No match found for function signature NOT
RLIKE";
+ tester1.checkFails("^'abcdef' not rlike '%cd%'^", noNotRlike, false);
+ tester1.checkFails("^'[email protected]' not rlike 'Merrisa_'^",
noNotRlike, false);
+ }
+
@Test void testLikeEscape() {
tester.setFor(SqlStdOperatorTable.LIKE);
tester.checkBoolean("'a_c' like 'a#_c' escape '#'", Boolean.TRUE);
diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
index a8ce5a2..679464b 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
@@ -1018,6 +1018,25 @@ public class SqlValidatorTest extends
SqlValidatorTestCase {
.fails("No match found for function signature ILIKE");
}
+ @Test void testRlike() {
+ // RLIKE is supported for SPARK
+ final Sql s = sql("?")
+ .withOperatorTable(operatorTableFor(SqlLibrary.SPARK));
+ s.expr("'first_name' rlike '%Ted%'").columnType("BOOLEAN NOT NULL");
+ s.expr("'first_name' rlike '^M+'").columnType("BOOLEAN NOT NULL");
+
+ // RLIKE is only supported for Spark and Hive
+ String noMatch = "(?s).*No match found for function signature RLIKE";
+ expr("^'b' rlike '.+@.+\\\\..+'^")
+ .fails(noMatch)
+ .withOperatorTable(operatorTableFor(SqlLibrary.POSTGRESQL))
+ .fails(noMatch)
+ .withOperatorTable(operatorTableFor(SqlLibrary.SPARK))
+ .columnType("BOOLEAN NOT NULL")
+ .withOperatorTable(operatorTableFor(SqlLibrary.HIVE))
+ .columnType("BOOLEAN NOT NULL");
+ }
+
public void _testLikeAndSimilarFails() {
expr("'a' like _UTF16'b' escape 'c'")
.fails("(?s).*Operands _ISO-8859-1.a. COLLATE
ISO-8859-1.en_US.primary,"
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 79b23bb..bd39d16 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -859,6 +859,7 @@ RETURNING,
**RETURNS**,
**REVOKE**,
**RIGHT**,
+RLIKE,
ROLE,
**ROLLBACK**,
**ROLLUP**,
@@ -2533,6 +2534,8 @@ semantics.
| m p | REPEAT(string, integer) | Returns a string
consisting of *string* repeated of *integer* times; returns an empty string if
*integer* is less than 1
| m | REVERSE(string) | Returns *string* with
the order of the characters reversed
| m p | RIGHT(string, length) | Returns the rightmost
*length* characters from the *string*
+| h s | string1 RLIKE string2 | Whether *string1*
matches regex pattern *string2* (similar to `LIKE`, but uses Java regex)
+| h s | string1 NOT RLIKE string2 | Whether *string1* does
not match regex pattern *string2* (similar to `NOT LIKE`, but uses Java regex)
| o | RTRIM(string) | Returns *string* with
all blanks removed from the end
| m p | SHA1(string) | Calculates a SHA-1 hash
value of *string* and returns it as a hex string
| o | SINH(numeric) | Returns the hyperbolic
sine of *numeric*