maropu commented on a change in pull request #23531: [SPARK-24497][SQL] Support 
recursive SQL query
URL: https://github.com/apache/spark/pull/23531#discussion_r307090004
 
 

 ##########
 File path: sql/core/src/test/resources/sql-tests/inputs/cte.sql
 ##########
 @@ -155,6 +155,419 @@ SELECT (
   )
 );
 
+-- fails due to recursion isn't allowed with RECURSIVE keyword
+WITH r(level) AS (
+  VALUES (0)
+  UNION ALL
+  SELECT level + 1 FROM r WHERE level < 10
+)
+SELECT * FROM r;
+
+-- very basic recursion
+WITH RECURSIVE r(level) AS (
+  VALUES (0)
+  UNION ALL
+  SELECT level + 1 FROM r WHERE level < 10
+)
+SELECT * FROM r;
+
+-- unlimited recursion fails at spark.sql.cte.recursion.level.limits level
+WITH RECURSIVE r(level) AS (
+  VALUES (0)
+  UNION ALL
+  SELECT level + 1 FROM r
+)
+SELECT * FROM r;
+
+-- terminate recursion with LIMIT
+WITH RECURSIVE r(level) AS (
+  VALUES (0)
+  UNION ALL
+  SELECT level + 1 FROM r
+)
+SELECT * FROM r LIMIT 10;
+
+-- terminate projected recursion with LIMIT
+WITH RECURSIVE r(level) AS (
+  VALUES (0)
+  UNION ALL
+  SELECT level + 1 FROM r
+)
+SELECT level, level FROM r LIMIT 10;
+
+-- fails because using LIMIT to terminate recursion only works where Limit can 
be pushed through
+-- recursion
+WITH RECURSIVE r(level) AS (
+  VALUES (0)
+  UNION ALL
+  SELECT level + 1 FROM r
+)
+SELECT level, level FROM r ORDER BY level LIMIT 10;
+
+-- using string column in recursion
+WITH RECURSIVE r(c) AS (
+  SELECT 'a'
+  UNION ALL
+  SELECT c || ' b' FROM r WHERE LENGTH(c) < 10
+)
+SELECT * FROM r;
+
+-- recursion works regardless the order of anchor and recursive terms
+WITH RECURSIVE r(level) AS (
+  SELECT level + 1 FROM r WHERE level < 10
+  UNION ALL
+  VALUES (0)
+)
+SELECT * FROM r;
+
+-- multiple anchor terms are supported
+WITH RECURSIVE r(level, data) AS (
+  VALUES (0, 'A')
+  UNION ALL
+  VALUES (0, 'B')
+  UNION ALL
+  SELECT level + 1, data || 'C' FROM r WHERE level < 3
+)
+SELECT * FROM r;
+
+-- multiple recursive terms are supported
+WITH RECURSIVE r(level, data) AS (
+  VALUES (0, 'A')
+  UNION ALL
+  SELECT level + 1, data || 'B' FROM r WHERE level < 2
+  UNION ALL
+  SELECT level + 1, data || 'C' FROM r WHERE level < 3
+)
+SELECT * FROM r;
+
+-- multiple anchor and recursive terms are supported
+WITH RECURSIVE r(level, data) AS (
+  VALUES (0, 'A')
+  UNION ALL
+  VALUES (0, 'B')
+  UNION ALL
+  SELECT level + 1, data || 'C' FROM r WHERE level < 2
+  UNION ALL
+  SELECT level + 1, data || 'D' FROM r WHERE level < 3
+)
+SELECT * FROM r;
+
+-- recursion without an anchor term fails
+WITH RECURSIVE r(level) AS (
+  SELECT level + 1 FROM r WHERE level < 3
+)
+SELECT * FROM r;
+
+-- UNION combinator supported to eliminate duplicates and stop recursion
+WITH RECURSIVE r(level) AS (
+  VALUES (0), (0)
+  UNION
+  SELECT (level + 1) % 10 FROM r
+)
+SELECT * FROM r;
+
+-- fails because a recursive query should contain UNION ALL or UNION combinator
+WITH RECURSIVE r(level) AS (
+  VALUES (0)
+  INTERSECT
+  SELECT level + 1 FROM r WHERE level < 10
+)
+SELECT * FROM r;
+
+-- recursive reference is not allowed in a subquery
+WITH RECURSIVE r(level) AS (
+  VALUES (0)
+  UNION ALL
+  SELECT level + 1 FROM r WHERE (SELECT SUM(level) FROM r) < 10
+)
+SELECT * FROM r;
+
+-- recursive reference can't be used multiple times in a recursive term
+WITH RECURSIVE r(level, data) AS (
+  VALUES (0, 'A')
+  UNION ALL
+  SELECT r1.level + 1, r1.data
+  FROM r AS r1
+  JOIN r AS r2 ON r2.data = r1.data
+  WHERE r1.level < 10
+)
+SELECT * FROM r;
+
+-- recursive reference is not allowed on right side of a left outer join
+WITH RECURSIVE r(level, data) AS (
+  VALUES (0, 'A')
+  UNION ALL
+  SELECT level + 1, r.data
+  FROM (
+    SELECT 'B' AS data
+  ) AS o
+  LEFT JOIN r ON r.data = o.data
+)
+SELECT * FROM r;
+
+-- recursive reference is not allowed on left side of a right outer join
+WITH RECURSIVE r(level, data) AS (
+  VALUES (0, 'A')
+  UNION ALL
+  SELECT level + 1, r.data
+  FROM r
+  RIGHT JOIN (
+    SELECT 'B' AS data
+  ) AS o ON o.data = r.data
+)
+SELECT * FROM r;
+
+-- aggregate is supported in the anchor term
+WITH RECURSIVE r(level, data) AS (
+  SELECT MAX(level) AS level, SUM(data) AS data FROM VALUES (0, 1), (0, 2)
+  UNION ALL
+  SELECT level + 1, data FROM r WHERE level < 10
+)
+SELECT * FROM r ORDER BY level;
+
+-- recursive reference is not allowed in an aggregate in a recursive term
+WITH RECURSIVE r(group, data) AS (
 
 Review comment:
   nit: `group` is a reserved keyword, so you'd be better to avoid to use this 
word here.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to