This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b10263b [SPARK-30724][SQL] Support 'LIKE ANY' and 'LIKE ALL' operators
b10263b is described below
commit b10263b8e5106409467e0115968bbaf0b9141cd1
Author: Yuming Wang <[email protected]>
AuthorDate: Fri Apr 24 22:20:32 2020 +0900
[SPARK-30724][SQL] Support 'LIKE ANY' and 'LIKE ALL' operators
### What changes were proposed in this pull request?
`LIKE ANY/SOME` and `LIKE ALL` operators are mostly used when we are
matching a text field with numbers of patterns. For example:
Teradata / Hive 3.0 / Snowflake:
```sql
--like any
select 'foo' LIKE ANY ('%foo%','%bar%');
--like all
select 'foo' LIKE ALL ('%foo%','%bar%');
```
PostgreSQL:
```sql
-- like any
select 'foo' LIKE ANY (array['%foo%','%bar%']);
-- like all
select 'foo' LIKE ALL (array['%foo%','%bar%']);
```
This PR add support these two operators.
More details:
https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/4~AyrPNmDN0Xk4SALLo6aQ
https://issues.apache.org/jira/browse/HIVE-15229
https://docs.snowflake.net/manuals/sql-reference/functions/like_any.html
### Why are the changes needed?
To smoothly migrate SQLs to Spark SQL.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
Unit test.
Closes #27477 from wangyum/SPARK-30724.
Authored-by: Yuming Wang <[email protected]>
Signed-off-by: Takeshi Yamamuro <[email protected]>
---
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 1 +
.../spark/sql/catalyst/parser/AstBuilder.scala | 33 +++--
.../catalyst/parser/ExpressionParserSuite.scala | 17 ++-
.../test/resources/sql-tests/inputs/like-all.sql | 39 ++++++
.../test/resources/sql-tests/inputs/like-any.sql | 39 ++++++
.../resources/sql-tests/results/like-all.sql.out | 140 ++++++++++++++++++++
.../resources/sql-tests/results/like-any.sql.out | 146 +++++++++++++++++++++
7 files changed, 405 insertions(+), 10 deletions(-)
diff --git
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d78f584..e49bc07 100644
---
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -766,6 +766,7 @@ predicate
| NOT? kind=IN '(' expression (',' expression)* ')'
| NOT? kind=IN '(' query ')'
| NOT? kind=RLIKE pattern=valueExpression
+ | NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression
(',' expression)* ')')
| NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)?
| IS NOT? kind=NULL
| IS NOT? kind=(TRUE | FALSE | UNKNOWN)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ff362e7..e51b8f3 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1373,7 +1373,7 @@ class AstBuilder(conf: SQLConf) extends
SqlBaseBaseVisitor[AnyRef] with Logging
* Add a predicate to the given expression. Supported expressions are:
* - (NOT) BETWEEN
* - (NOT) IN
- * - (NOT) LIKE
+ * - (NOT) LIKE (ANY | SOME | ALL)
* - (NOT) RLIKE
* - IS (NOT) NULL.
* - IS (NOT) (TRUE | FALSE | UNKNOWN)
@@ -1391,6 +1391,14 @@ class AstBuilder(conf: SQLConf) extends
SqlBaseBaseVisitor[AnyRef] with Logging
case other => Seq(other)
}
+ def getLikeQuantifierExprs(expressions:
java.util.List[ExpressionContext]): Seq[Expression] = {
+ if (expressions.isEmpty) {
+ throw new ParseException("Expected something between '(' and ')'.",
ctx)
+ } else {
+ expressions.asScala.map(expression).map(p => invertIfNotDefined(new
Like(e, p)))
+ }
+ }
+
// Create the predicate.
ctx.kind.getType match {
case SqlBaseParser.BETWEEN =>
@@ -1403,14 +1411,21 @@ class AstBuilder(conf: SQLConf) extends
SqlBaseBaseVisitor[AnyRef] with Logging
case SqlBaseParser.IN =>
invertIfNotDefined(In(e, ctx.expression.asScala.map(expression)))
case SqlBaseParser.LIKE =>
- val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
- if (str.length != 1) {
- throw new ParseException("Invalid escape string." +
- "Escape string must contains only one character.", ctx)
- }
- str.charAt(0)
- }.getOrElse('\\')
- invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
+ Option(ctx.quantifier).map(_.getType) match {
+ case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) =>
+ getLikeQuantifierExprs(ctx.expression).reduceLeft(Or)
+ case Some(SqlBaseParser.ALL) =>
+ getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
+ case _ =>
+ val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
+ if (str.length != 1) {
+ throw new ParseException("Invalid escape string." +
+ "Escape string must contain only one character.", ctx)
+ }
+ str.charAt(0)
+ }.getOrElse('\\')
+ invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
+ }
case SqlBaseParser.RLIKE =>
invertIfNotDefined(RLike(e, expression(ctx.pattern)))
case SqlBaseParser.NULL if ctx.NOT != null =>
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 522d49e..b583e1b 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -188,7 +188,7 @@ class ExpressionParserSuite extends AnalysisTest {
}
test("like escape expressions") {
- val message = "Escape string must contains only one character."
+ val message = "Escape string must contain only one character."
assertEqual("a like 'pattern%' escape '#'", 'a.like("pattern%", '#'))
assertEqual("a like 'pattern%' escape '\"'", 'a.like("pattern%", '\"'))
intercept("a like 'pattern%' escape '##'", message)
@@ -208,6 +208,21 @@ class ExpressionParserSuite extends AnalysisTest {
assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
}
+ test("(NOT) LIKE (ANY | SOME | ALL) expressions") {
+ Seq("any", "some").foreach { quantifier =>
+ assertEqual(s"a like $quantifier ('foo%', 'b%')", ('a like "foo%") ||
('a like "b%"))
+ assertEqual(s"a not like $quantifier ('foo%', 'b%')", !('a like "foo%")
|| !('a like "b%"))
+ assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !(('a like
"foo%") || ('a like "b%")))
+ }
+ assertEqual("a like all ('foo%', 'b%')", ('a like "foo%") && ('a like
"b%"))
+ assertEqual("a not like all ('foo%', 'b%')", !('a like "foo%") && !('a
like "b%"))
+ assertEqual("not (a like all ('foo%', 'b%'))", !(('a like "foo%") && ('a
like "b%")))
+
+ Seq("any", "some", "all").foreach { quantifier =>
+ intercept(s"a like $quantifier()", "Expected something between '(' and
')'")
+ }
+ }
+
test("is null expressions") {
assertEqual("a is null", 'a.isNull)
assertEqual("a is not null", 'a.isNotNull)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
new file mode 100644
index 0000000..a084dbe
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
@@ -0,0 +1,39 @@
+CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
+ ('google', '%oo%'),
+ ('facebook', '%oo%'),
+ ('linkedin', '%in'))
+ as t1(company, pat);
+
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%');
+
+SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft',
'%yoo%');
+
+SELECT
+ company,
+ CASE
+ WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
+ ELSE 'N'
+ END AS is_available,
+ CASE
+ WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in',
'ms%') THEN 'Y'
+ ELSE 'N'
+ END AS mix
+FROM like_all_table ;
+
+-- Mix test with constant pattern and column value
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat);
+
+-- not like all test
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in',
'fa%');
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft',
'%yoo%');
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%');
+SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%');
+
+-- null test
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL);
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL);
+SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL);
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL);
+
+-- negative case
+SELECT company FROM like_any_table WHERE company LIKE ALL ();
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
new file mode 100644
index 0000000..5758a2a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
@@ -0,0 +1,39 @@
+CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
+ ('google', '%oo%'),
+ ('facebook', '%oo%'),
+ ('linkedin', '%in'))
+ as t1(company, pat);
+
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in',
'fa%');
+
+SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft',
'%yoo%');
+
+select
+ company,
+ CASE
+ WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y'
+ ELSE 'N'
+ END AS is_available,
+ CASE
+ WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in',
'ms%') THEN 'Y'
+ ELSE 'N'
+ END AS mix
+FROM like_any_table;
+
+-- Mix test with constant pattern and column value
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat);
+
+-- not like any test
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in',
'fa%');
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft',
'%yoo%');
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', 'fa%');
+SELECT company FROM like_any_table WHERE NOT company LIKE ANY ('%oo%', 'fa%');
+
+-- null test
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL);
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL);
+SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL);
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL);
+
+-- negative case
+SELECT company FROM like_any_table WHERE company LIKE ANY ();
diff --git a/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
b/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
new file mode 100644
index 0000000..b4bb69c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
@@ -0,0 +1,140 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 14
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
+ ('google', '%oo%'),
+ ('facebook', '%oo%'),
+ ('linkedin', '%in'))
+ as t1(company, pat)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%')
+-- !query schema
+struct<company:string>
+-- !query output
+google
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft',
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT
+ company,
+ CASE
+ WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
+ ELSE 'N'
+ END AS is_available,
+ CASE
+ WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in',
'ms%') THEN 'Y'
+ ELSE 'N'
+ END AS mix
+FROM like_all_table
+-- !query schema
+struct<company:string,is_available:string,mix:string>
+-- !query output
+facebook N N
+google Y Y
+linkedin N N
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat)
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in',
'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft',
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+linkedin
+
+
+-- !query
+SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ALL ()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Expected something between '(' and ')'.(line 1, pos 49)
+
+== SQL ==
+SELECT company FROM like_any_table WHERE company LIKE ALL ()
+-------------------------------------------------^^^
diff --git a/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
b/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
new file mode 100644
index 0000000..e46ac6d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
@@ -0,0 +1,146 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 14
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
+ ('google', '%oo%'),
+ ('facebook', '%oo%'),
+ ('linkedin', '%in'))
+ as t1(company, pat)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in',
'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft',
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+select
+ company,
+ CASE
+ WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y'
+ ELSE 'N'
+ END AS is_available,
+ CASE
+ WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in',
'ms%') THEN 'Y'
+ ELSE 'N'
+ END AS mix
+FROM like_any_table
+-- !query schema
+struct<company:string,is_available:string,mix:string>
+-- !query output
+facebook Y Y
+google Y Y
+linkedin Y Y
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat)
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in',
'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft',
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE NOT company LIKE ANY ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Expected something between '(' and ')'.(line 1, pos 49)
+
+== SQL ==
+SELECT company FROM like_any_table WHERE company LIKE ANY ()
+-------------------------------------------------^^^
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]