Github user abehm commented on a diff in the pull request:
https://github.com/apache/spark/pull/23096#discussion_r235135213
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala
---
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.util.BoundedPriorityQueue
+
+
+/**
+ * A simple utility for tracking runtime and associated stats in query
planning.
+ *
+ * There are two separate concepts we track:
+ *
+ * 1. Phases: These are broad scope phases in query planning, as listed
below, i.e. analysis,
+ * optimizationm and physical planning (just planning).
+ *
+ * 2. Rules: These are the individual Catalyst rules that we track. In
addition to time, we also
+ * track the number of invocations and effective invocations.
+ */
+object QueryPlanningTracker {
+
+ // Define a list of common phases here.
+ val PARSING = "parsing"
+ val ANALYSIS = "analysis"
+ val OPTIMIZATION = "optimization"
+ val PLANNING = "planning"
+
+ class RuleSummary(
+ var totalTimeNs: Long, var numInvocations: Long, var
numEffectiveInvocations: Long) {
+
+ def this() = this(totalTimeNs = 0, numInvocations = 0,
numEffectiveInvocations = 0)
+
+ override def toString: String = {
+ s"RuleSummary($totalTimeNs, $numInvocations,
$numEffectiveInvocations)"
+ }
+ }
+}
+
+
+class QueryPlanningTracker {
+
+ import QueryPlanningTracker._
+
+ // Mapping from the name of a rule to a rule's summary.
+ // Use a Java HashMap for less overhead.
+ private val rulesMap = new java.util.HashMap[String, RuleSummary]
+
+ // From a phase to time in ns.
+ private val phaseToTimeNs = new java.util.HashMap[String, Long]
+
+ /** Measure the runtime of function f, and add it to the time for the
specified phase. */
+ def measureTime[T](phase: String)(f: => T): T = {
+ val startTime = System.nanoTime()
+ val ret = f
+ val timeTaken = System.nanoTime() - startTime
+ phaseToTimeNs.put(phase, phaseToTimeNs.getOrDefault(phase, 0) +
timeTaken)
+ ret
+ }
+
+ /**
+ * Reecord a specific invocation of a rule.
--- End diff --
typo: Record
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]