hvanhovell commented on a change in pull request #32060:
URL: https://github.com/apache/spark/pull/32060#discussion_r608486643



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.rules
+
+import scala.collection.mutable
+
+// A collection of rules that use rule ids to prune tree traversals.
+object RuleIdCollection {
+
+  // The rules listed here need a rule id. Typically, rules that are in a 
fixed point batch or
+  // invoked multiple times by Analyzer/Optimizer/Planner need a rule id to 
prune unnecessary
+  // tree traversals in the transform function family. Note that those rules 
should not depend on
+  // a changing, external state. Rules here are in alphabetical order.
+  private val rulesNeedingIds: Seq[String] = {
+      // Catalyst Optimizer rules
+      "org.apache.spark.sql.catalyst.optimizer.CostBasedJoinReorder" ::
+      "org.apache.spark.sql.catalyst.optimizer.EliminateOuterJoin" ::
+      "org.apache.spark.sql.catalyst.optimizer.OptimizeIn" ::
+      "org.apache.spark.sql.catalyst.optimizer.PushDownLeftSemiAntiJoin" ::
+      "org.apache.spark.sql.catalyst.optimizer.PushExtraPredicateThroughJoin" 
::
+      
"org.apache.spark.sql.catalyst.optimizer.PushLeftSemiLeftAntiThroughJoin" ::
+      "org.apache.spark.sql.catalyst.optimizer.ReorderJoin" :: Nil
+  }
+
+  // Maps rule names to ids. Rule ids are continuous natural numbers starting 
from 0.
+  private val ruleToId = new mutable.HashMap[String, Int]
+
+  // Maps rule ids to names. Rule ids are continuous natural numbers starting 
from 0.
+  private val ruleIdToName = new mutable.HashMap[Int, String]
+
+  // Unknown rule id which does not prune tree traversal. It is used as the 
default rule id for
+  // tree transformation functions.
+  val UnknownId: Int = -1
+
+  // The total number of rules with ids.
+  val NumRules: Int = {
+    var ruleId = 0
+    rulesNeedingIds.foreach(ruleName => {
+      ruleToId.put(ruleName, ruleId)
+      ruleIdToName.put(ruleId, ruleName)
+      ruleId = ruleId + 1
+    })
+    // Currently, there are more than 128 but less than 192 rules needing an 
id. However, the
+    // assertion can be relaxed when we have more such rules. Note that 
increasing the max id can
+    // result in increased memory consumption from every TreeNode.
+    assert(ruleId < 192)
+    ruleId
+  }
+
+  // Return the rule Id for a rule name.
+  def getRuleId(ruleName: String): Int = {
+    val ruleIdOpt = ruleToId.get(ruleName)
+    // Please add the rule name to `rulesWithIds` if this assert fails.
+    assert(ruleIdOpt.isDefined, s"add $ruleName into `rulesNeedingIds`")
+    ruleIdOpt.get
+  }
+
+  // Return the rule name from its id. It is for debugging purpose.
+  def getRuleName(ruleId: Int): String = {

Review comment:
       Where is this used?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to