mgaido91 commented on a change in pull request #23531: [SPARK-24497][SQL]
Support recursive SQL query
URL: https://github.com/apache/spark/pull/23531#discussion_r252147035
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1548,6 +1663,69 @@ class Analyzer(
}
}
+ /**
+ * Recursion according to SQL standard comes with several limitations due to
the fact that only
+ * those operations are allowed where the new set of rows should be computed
from the result of
+ * the previous iteration only (the so far cumulated results are not
required).
+ * This implies that a recursive reference can't be used in some kinds of
joins, aggregation,
+ * distinct computation and subqueries.
+ * A further constraint is that a recursive term can contain one recursive
reference only.
+ *
+ * This rules checks that these restrictions are not violated and returns
the original plan.
+ */
+ object CheckRecursionConstraints extends Rule[LogicalPlan] {
+ def apply(plan: LogicalPlan): LogicalPlan = {
+ traverse(plan)
+
+ plan
+ }
+
+ private def mergeMap(a: Map[String, Int], b: Map[String, Int]):
Map[String, Int] =
+ a ++ b.map { case (k, v) => k -> (v + a.getOrElse(k, 0)) }
+
+ private def traverse(
+ plan: LogicalPlan,
+ allowedRecursiveReferences: Set[String] = Set.empty): Map[String, Int]
= plan match {
+ case RecursiveTable(name, anchorTerms, recursiveTerms, _) =>
+ if (allowedRecursiveReferences.contains(name)) {
+ throw new AnalysisException(s"Recursive CTE definition $name is
already in use.")
+ }
+ (anchorTerms.map(traverse(_, allowedRecursiveReferences)) ++
+ recursiveTerms.map { recursiveTerm =>
+ val m = traverse(recursiveTerm, allowedRecursiveReferences + name)
+ if (m.get(name).getOrElse(0) > 1) {
+ throw new AnalysisException(s"Recursive reference $name cannot be
used multiple " +
+ "times in a recursive term")
+ }
+ m - name
+ }).foldLeft(Map.empty[String, Int])(mergeMap)
Review comment:
please use mutable Map for better perf
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]