This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new b10263b  [SPARK-30724][SQL] Support 'LIKE ANY' and 'LIKE ALL' operators
b10263b is described below

commit b10263b8e5106409467e0115968bbaf0b9141cd1
Author: Yuming Wang <yumw...@ebay.com>
AuthorDate: Fri Apr 24 22:20:32 2020 +0900

    [SPARK-30724][SQL] Support 'LIKE ANY' and 'LIKE ALL' operators
    
    ### What changes were proposed in this pull request?
    
    `LIKE ANY/SOME` and `LIKE ALL` operators are mostly used when we are 
matching a text field with numbers of patterns. For example:
    
    Teradata / Hive 3.0 / Snowflake:
    ```sql
    --like any
    select 'foo' LIKE ANY ('%foo%','%bar%');
    
    --like all
    select 'foo' LIKE ALL ('%foo%','%bar%');
    ```
    PostgreSQL:
    ```sql
    -- like any
    select 'foo' LIKE ANY (array['%foo%','%bar%']);
    
    -- like all
    select 'foo' LIKE ALL (array['%foo%','%bar%']);
    ```
    
    This PR add support these two operators.
    
    More details:
    
https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/4~AyrPNmDN0Xk4SALLo6aQ
    https://issues.apache.org/jira/browse/HIVE-15229
    https://docs.snowflake.net/manuals/sql-reference/functions/like_any.html
    
    ### Why are the changes needed?
    
    To smoothly migrate SQLs to Spark SQL.
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    Unit test.
    
    Closes #27477 from wangyum/SPARK-30724.
    
    Authored-by: Yuming Wang <yumw...@ebay.com>
    Signed-off-by: Takeshi Yamamuro <yamam...@apache.org>
---
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    |   1 +
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  33 +++--
 .../catalyst/parser/ExpressionParserSuite.scala    |  17 ++-
 .../test/resources/sql-tests/inputs/like-all.sql   |  39 ++++++
 .../test/resources/sql-tests/inputs/like-any.sql   |  39 ++++++
 .../resources/sql-tests/results/like-all.sql.out   | 140 ++++++++++++++++++++
 .../resources/sql-tests/results/like-any.sql.out   | 146 +++++++++++++++++++++
 7 files changed, 405 insertions(+), 10 deletions(-)

diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d78f584..e49bc07 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -766,6 +766,7 @@ predicate
     | NOT? kind=IN '(' expression (',' expression)* ')'
     | NOT? kind=IN '(' query ')'
     | NOT? kind=RLIKE pattern=valueExpression
+    | NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression 
(',' expression)* ')')
     | NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)?
     | IS NOT? kind=NULL
     | IS NOT? kind=(TRUE | FALSE | UNKNOWN)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ff362e7..e51b8f3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1373,7 +1373,7 @@ class AstBuilder(conf: SQLConf) extends 
SqlBaseBaseVisitor[AnyRef] with Logging
    * Add a predicate to the given expression. Supported expressions are:
    * - (NOT) BETWEEN
    * - (NOT) IN
-   * - (NOT) LIKE
+   * - (NOT) LIKE (ANY | SOME | ALL)
    * - (NOT) RLIKE
    * - IS (NOT) NULL.
    * - IS (NOT) (TRUE | FALSE | UNKNOWN)
@@ -1391,6 +1391,14 @@ class AstBuilder(conf: SQLConf) extends 
SqlBaseBaseVisitor[AnyRef] with Logging
       case other => Seq(other)
     }
 
+    def getLikeQuantifierExprs(expressions: 
java.util.List[ExpressionContext]): Seq[Expression] = {
+      if (expressions.isEmpty) {
+        throw new ParseException("Expected something between '(' and ')'.", 
ctx)
+      } else {
+        expressions.asScala.map(expression).map(p => invertIfNotDefined(new 
Like(e, p)))
+      }
+    }
+
     // Create the predicate.
     ctx.kind.getType match {
       case SqlBaseParser.BETWEEN =>
@@ -1403,14 +1411,21 @@ class AstBuilder(conf: SQLConf) extends 
SqlBaseBaseVisitor[AnyRef] with Logging
       case SqlBaseParser.IN =>
         invertIfNotDefined(In(e, ctx.expression.asScala.map(expression)))
       case SqlBaseParser.LIKE =>
-        val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
-          if (str.length != 1) {
-            throw new ParseException("Invalid escape string." +
-              "Escape string must contains only one character.", ctx)
-          }
-          str.charAt(0)
-        }.getOrElse('\\')
-        invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
+        Option(ctx.quantifier).map(_.getType) match {
+          case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) =>
+            getLikeQuantifierExprs(ctx.expression).reduceLeft(Or)
+          case Some(SqlBaseParser.ALL) =>
+            getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
+          case _ =>
+            val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
+              if (str.length != 1) {
+                throw new ParseException("Invalid escape string." +
+                  "Escape string must contain only one character.", ctx)
+              }
+              str.charAt(0)
+            }.getOrElse('\\')
+            invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
+        }
       case SqlBaseParser.RLIKE =>
         invertIfNotDefined(RLike(e, expression(ctx.pattern)))
       case SqlBaseParser.NULL if ctx.NOT != null =>
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 522d49e..b583e1b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -188,7 +188,7 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   test("like escape expressions") {
-    val message = "Escape string must contains only one character."
+    val message = "Escape string must contain only one character."
     assertEqual("a like 'pattern%' escape '#'", 'a.like("pattern%", '#'))
     assertEqual("a like 'pattern%' escape '\"'", 'a.like("pattern%", '\"'))
     intercept("a like 'pattern%' escape '##'", message)
@@ -208,6 +208,21 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
   }
 
+  test("(NOT) LIKE (ANY | SOME | ALL) expressions") {
+    Seq("any", "some").foreach { quantifier =>
+      assertEqual(s"a like $quantifier ('foo%', 'b%')", ('a like "foo%") || 
('a like "b%"))
+      assertEqual(s"a not like $quantifier ('foo%', 'b%')", !('a like "foo%") 
|| !('a like "b%"))
+      assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !(('a like 
"foo%") || ('a like "b%")))
+    }
+    assertEqual("a like all ('foo%', 'b%')", ('a like "foo%") && ('a like 
"b%"))
+    assertEqual("a not like all ('foo%', 'b%')", !('a like "foo%") && !('a 
like "b%"))
+    assertEqual("not (a like all ('foo%', 'b%'))", !(('a like "foo%") && ('a 
like "b%")))
+
+    Seq("any", "some", "all").foreach { quantifier =>
+      intercept(s"a like $quantifier()", "Expected something between '(' and 
')'")
+    }
+  }
+
   test("is null expressions") {
     assertEqual("a is null", 'a.isNull)
     assertEqual("a is not null", 'a.isNotNull)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql 
b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
new file mode 100644
index 0000000..a084dbe
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
@@ -0,0 +1,39 @@
+CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
+  ('google', '%oo%'),
+  ('facebook', '%oo%'),
+  ('linkedin', '%in'))
+  as t1(company, pat);
+
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%');
+
+SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', 
'%yoo%');
+
+SELECT 
+    company,
+    CASE
+        WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
+        ELSE 'N'
+    END AS is_available,
+    CASE
+        WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 
'ms%') THEN 'Y'
+        ELSE 'N'
+    END AS mix
+FROM like_all_table ;
+
+-- Mix test with constant pattern and column value
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat);
+
+-- not like all test
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 
'fa%');
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', 
'%yoo%');
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%');
+SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%');
+
+-- null test
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL);
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL);
+SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL);
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL);
+
+-- negative case
+SELECT company FROM like_any_table WHERE company LIKE ALL ();
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-any.sql 
b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
new file mode 100644
index 0000000..5758a2a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
@@ -0,0 +1,39 @@
+CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
+  ('google', '%oo%'),
+  ('facebook', '%oo%'),
+  ('linkedin', '%in'))
+  as t1(company, pat);
+
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in', 
'fa%');
+
+SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft', 
'%yoo%');
+
+select
+    company,
+    CASE
+        WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y'
+        ELSE 'N'
+    END AS is_available,
+    CASE
+        WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in', 
'ms%') THEN 'Y'
+        ELSE 'N'
+    END AS mix
+FROM like_any_table;
+
+-- Mix test with constant pattern and column value
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat);
+
+-- not like any test
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in', 
'fa%');
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft', 
'%yoo%');
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', 'fa%');
+SELECT company FROM like_any_table WHERE NOT company LIKE ANY ('%oo%', 'fa%');
+
+-- null test
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL);
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL);
+SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL);
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL);
+
+-- negative case
+SELECT company FROM like_any_table WHERE company LIKE ANY ();
diff --git a/sql/core/src/test/resources/sql-tests/results/like-all.sql.out 
b/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
new file mode 100644
index 0000000..b4bb69c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
@@ -0,0 +1,140 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 14
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
+  ('google', '%oo%'),
+  ('facebook', '%oo%'),
+  ('linkedin', '%in'))
+  as t1(company, pat)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%')
+-- !query schema
+struct<company:string>
+-- !query output
+google
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', 
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT 
+    company,
+    CASE
+        WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
+        ELSE 'N'
+    END AS is_available,
+    CASE
+        WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 
'ms%') THEN 'Y'
+        ELSE 'N'
+    END AS mix
+FROM like_all_table
+-- !query schema
+struct<company:string,is_available:string,mix:string>
+-- !query output
+facebook       N       N
+google Y       Y
+linkedin       N       N
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat)
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 
'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', 
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+linkedin
+
+
+-- !query
+SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ALL ()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Expected something between '(' and ')'.(line 1, pos 49)
+
+== SQL ==
+SELECT company FROM like_any_table WHERE company LIKE ALL ()
+-------------------------------------------------^^^
diff --git a/sql/core/src/test/resources/sql-tests/results/like-any.sql.out 
b/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
new file mode 100644
index 0000000..e46ac6d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
@@ -0,0 +1,146 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 14
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
+  ('google', '%oo%'),
+  ('facebook', '%oo%'),
+  ('linkedin', '%in'))
+  as t1(company, pat)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in', 
'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft', 
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+select
+    company,
+    CASE
+        WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y'
+        ELSE 'N'
+    END AS is_available,
+    CASE
+        WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in', 
'ms%') THEN 'Y'
+        ELSE 'N'
+    END AS mix
+FROM like_any_table
+-- !query schema
+struct<company:string,is_available:string,mix:string>
+-- !query output
+facebook       Y       Y
+google Y       Y
+linkedin       Y       Y
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat)
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in', 
'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft', 
'%yoo%')
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+google
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE NOT company LIKE ANY ('%oo%', 'fa%')
+-- !query schema
+struct<company:string>
+-- !query output
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+facebook
+google
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+linkedin
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL)
+-- !query schema
+struct<company:string>
+-- !query output
+
+
+
+-- !query
+SELECT company FROM like_any_table WHERE company LIKE ANY ()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Expected something between '(' and ')'.(line 1, pos 49)
+
+== SQL ==
+SELECT company FROM like_any_table WHERE company LIKE ANY ()
+-------------------------------------------------^^^


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to