This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git
The following commit(s) were added to refs/heads/master by this push:
new aeafecc Add new TimeTracker for more advanced performance analysis
aeafecc is described below
commit aeafeccb4482a9c39ab7a0026060f8c6b2d7ab92
Author: Steve Lawrence <[email protected]>
AuthorDate: Fri Jan 31 09:05:35 2020 -0500
Add new TimeTracker for more advanced performance analysis
Because of our nested parsers, it can sometimes be difficult to
determine how much time each processor takes on average to complete it's
parse/unparse, even when using a profiler. This adds a new timer that
makes it much easier to determine how much total time particular parsers
take and on average how long they take. This can help to prioritize
performance optimizations.
The intended use is to modify the Parser.scala parse1() function to look
like this:
try {
TimeTracker.track(parserName) {
parse(pstate)
}
}
The resulting output gives a break down of all the parsers and how
performant they are. Once slow processors are found, it can then be
useful to wrap subsections of the parse() method in a call to
TimeTracker.track() to determine which sections are slow. This incurs
less overhead than traditional profilers and give more control over
which sections are actually profiled.
DAFFODIL-1883
---
.../scala/org/apache/daffodil/util/Timer.scala | 95 ++++++++++++++++++++++
.../org/apache/daffodil/util/TimerMacros.scala | 67 +++++++++++++++
.../daffodil/processors/parsers/Parser.scala | 2 +-
3 files changed, 163 insertions(+), 1 deletion(-)
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Timer.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Timer.scala
index 2aef3b0..f9150ad 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Timer.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Timer.scala
@@ -17,6 +17,8 @@
package org.apache.daffodil.util
+import collection.JavaConverters._
+
object Timer extends Logging {
def printTime(message: String, nanos: Long, units: String) {
@@ -166,3 +168,96 @@ object TakTimer {
}
}
+
+
+object TimeTracker extends Logging {
+
+ case class SectionTime(var time: Long, var count: Int)
+
+ /**
+ * A mapping of each section of code tracked
+ */
+ val sectionTimes = new java.util.HashMap[String,SectionTime]()
+
+ /**
+ * Used to track the time of child tracked sections so they can be excluded
+ * from the parents tracked time. When a tracked section begins, we will push
+ * a zero onto the top of the stack. As each child section finishes and
+ * calculates their time, they will add their time to the value at the top of
+ * the stack. This effectively keeps track of how long all child tracked
+ * sections take. Then when the parent tracked section ends it can pop and
+ * subtract the value on the top of the stack to determine how much time just
+ * that section took, excluding the nested child track sections. This is
+ * necessary since we often want to track the time each parser takes to
+ * complete, but our parsers are nested making that difficult with standard
+ * profilers. This makes that much easier.
+ */
+ val childrenTimeStack = scala.collection.mutable.Stack[Long]()
+
+ /**
+ * Used to measure a section of code that might get called multiple times.
+ * This keeps track of the total amount of time spent in that section and how
+ * many times that section was run. A name is provided by the caller to
+ * identify the section. Additionally, if a section that is being tracked
+ * calls another section that is being tracked (i.e. nested TimeTracker.track
+ * calls) the time spent in the inner section is not included in the outer
+ * section. Note that because of this feature, the use of the track function
+ * is not thread safe.
+ *
+ * For example,
+ *
+ * TimeTracker.track("code section") {
+ * // code to track goes here
+ * }
+ *
+ * Once processing is complete, a call to logTimes will dispaly the stats
+ * about the tracked sections.
+ */
+ def track[A](name: String)(body: => A): A = macro
TimeTrackerMacros.trackMacro
+
+ /**
+ * Output the results of the tracked sections in sorted columnar format.
+ */
+ def logTimes(logLevel: LogLevel.Type): Unit = {
+ val stats = sectionTimes.asScala.toSeq.map { case (name,
SectionTime(timeNS, count)) => {
+ val average = timeNS / count
+ (name, timeNS / 1000000000.0, average, count)
+ }}.sortBy(_._2).reverse
+
+ val totalTime = stats.map { _._2 }.sum
+
+ val stringStats = stats.map { case (name, time, average, count) =>
+ (
+ name,
+ "%.3f".format(time),
+ "%.2f%%".format(time * 100 / totalTime),
+ average.toString,
+ count.toString
+ )
+ }
+
+ val nameLen = stringStats.map(_._1.length).max
+ val timeLen = stringStats.map(_._2.length).max
+ val percentLen = stringStats.map(_._3.length).max
+ val averageLen = stringStats.map(_._4.length).max
+ val countLen = stringStats.map(_._5.length).max
+
+ val formatString =
+ "%-" + nameLen + "s " +
+ "%" + timeLen + "s " +
+ "%" + percentLen + "s " +
+ "%" + averageLen + "s " +
+ "%" + countLen + "s"
+
+ log(logLevel, formatString, "Name", "Time", "Pct", "Average", "Count")
+ stringStats.foreach { stats =>
+ log(logLevel, formatString, stats.productIterator.toList: _*)
+ }
+ log(logLevel, "Total Time: %.3f", totalTime)
+ }
+
+ def clear(): Unit = {
+ sectionTimes.clear()
+ childrenTimeStack.clear()
+ }
+}
diff --git
a/daffodil-macro-lib/src/main/scala/org/apache/daffodil/util/TimerMacros.scala
b/daffodil-macro-lib/src/main/scala/org/apache/daffodil/util/TimerMacros.scala
new file mode 100644
index 0000000..6c4ee1d
--- /dev/null
+++
b/daffodil-macro-lib/src/main/scala/org/apache/daffodil/util/TimerMacros.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.util
+
+import scala.reflect.macros.blackbox.Context
+
+object TimeTrackerMacros {
+
+ def trackMacro(c: Context)(name: c.Tree)(body: c.Tree) = {
+ import c.universe._
+
+ val startTime = TermName(c.freshName)
+ val endTime = TermName(c.freshName)
+ val childrenTime = TermName(c.freshName)
+ val timeTaken = TermName(c.freshName)
+ val selfTime = TermName(c.freshName)
+ val sectionTime = TermName(c.freshName)
+ val result = TermName(c.freshName)
+ val key = TermName(c.freshName)
+
+ q"""
+ {
+ val $startTime = System.nanoTime
+ TimeTracker.childrenTimeStack.push(0)
+
+ val $result = try {
+ $body
+ } finally {
+ val $endTime = System.nanoTime
+ val $timeTaken = $endTime - $startTime
+ val $childrenTime = TimeTracker.childrenTimeStack.pop()
+ val $selfTime = $timeTaken - $childrenTime
+
+ val $key = $name
+ val $sectionTime = TimeTracker.sectionTimes.get($key)
+ if ($sectionTime == null) {
+ TimeTracker.sectionTimes.put($key, new
TimeTracker.SectionTime($selfTime, 1))
+ } else {
+ $sectionTime.time += $selfTime
+ $sectionTime.count += 1
+ }
+
+ if (!TimeTracker.childrenTimeStack.isEmpty) {
+ TimeTracker.childrenTimeStack.push(TimeTracker.childrenTimeStack.pop
+ $timeTaken)
+ }
+ }
+
+ $result
+ }
+ """
+ }
+}
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/Parser.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/Parser.scala
index 3dc594a..5f5d86a 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/Parser.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/Parser.scala
@@ -54,7 +54,7 @@ sealed trait Parser
def isEmpty = false // override in NadaParser
- protected def parserName = Misc.getNameFromClass(this)
+ protected lazy val parserName = Misc.getNameFromClass(this)
def PE(pstate: PState, s: String, args: Any*) = {
pstate.setFailed(new ParseError(One(context.schemaFileLocation),
One(pstate.currentLocation), s, args: _*))