Revision: 13168
Author:   [email protected]
Date:     Fri Dec  7 04:51:33 2012
Log:      llprof improvements

Review URL: https://codereview.chromium.org/11444031
http://code.google.com/p/v8/source/detail?r=13168

Added:
 /branches/bleeding_edge/tools/run-llprof.sh
Modified:
 /branches/bleeding_edge/tools/ll_prof.py

=======================================
--- /dev/null
+++ /branches/bleeding_edge/tools/run-llprof.sh Fri Dec  7 04:51:33 2012
@@ -0,0 +1,69 @@
+#!/bin/sh
+#
+# Copyright 2012 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+########## Global variable definitions
+
+# Ensure that <your CPU clock> / $SAMPLE_EVERY_N_CYCLES < $MAXIMUM_SAMPLE_RATE.
+MAXIMUM_SAMPLE_RATE=10000000
+SAMPLE_EVERY_N_CYCLES=10000
+SAMPLE_RATE_CONFIG_FILE="/proc/sys/kernel/perf_event_max_sample_rate"
+KERNEL_MAP_CONFIG_FILE="/proc/sys/kernel/kptr_restrict"
+
+########## Usage
+
+usage() {
+cat << EOF
+usage: $0 <benchmark_command>
+
+Executes <benchmark_command> under observation by the kernel's "perf" \
+framework, then calls the low level tick processor to analyze the results.
+EOF
+}
+
+if [ $# -eq 0 ] || [ "$1" == "-h" ]  || [ "$1" == "--help" ] ; then
+  usage
+  exit 1
+fi
+
+########## Actual script execution
+
+ACTUAL_SAMPLE_RATE=$(cat $SAMPLE_RATE_CONFIG_FILE)
+if [ "$ACTUAL_SAMPLE_RATE" -lt "$MAXIMUM_SAMPLE_RATE" ] ; then
+  echo "Setting appropriate maximum sample rate..."
+  echo $MAXIMUM_SAMPLE_RATE | sudo tee $SAMPLE_RATE_CONFIG_FILE
+fi
+
+ACTUAL_KERNEL_MAP_RESTRICTION=$(cat $KERNEL_MAP_CONFIG_FILE)
+if [ "$ACTUAL_KERNEL_MAP_RESTRICTION" -ne "0" ] ; then
+  echo "Disabling kernel address map restriction..."
+  echo 0 | sudo tee $KERNEL_MAP_CONFIG_FILE
+fi
+
+echo "Running benchmark..."
+perf record -R -e cycles -c $SAMPLE_EVERY_N_CYCLES -f -i $@ --ll-prof
=======================================
--- /branches/bleeding_edge/tools/ll_prof.py    Thu Sep  6 08:01:20 2012
+++ /branches/bleeding_edge/tools/ll_prof.py    Fri Dec  7 04:51:33 2012
@@ -45,7 +45,7 @@
 Analyses V8 and perf logs to produce profiles.

 Perf logs can be collected using a command like:
-  $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof
+  $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
   # -R: collect all data
   # -e cycles: use cpu-cycles event (run "perf list" for details)
   # -c 10000: write a sample after each 10000 events
@@ -54,6 +54,16 @@
   # --ll-prof shell flag enables the right V8 logs
 This will produce a binary trace file (perf.data) that %prog can analyse.

+IMPORTANT:
+  The kernel has an internal maximum for events per second, it is 100K by
+  default. That's not enough for "-c 10000". Set it to some higher value:
+  $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
+  You can also make the warning about kernel address maps go away:
+  $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
+
+We have a convenience script that handles all of the above for you:
+  $ tools/run-llprof.sh ./d8 bench.js
+
 Examples:
   # Print flat profile with annotated disassembly for the 10 top
   # symbols. Use default log names and include the snapshot log.
@@ -75,6 +85,10 @@
   """Code object."""

   _id = 0
+  UNKNOWN = 0
+  V8INTERNAL = 1
+  FULL_CODEGEN = 2
+  OPTIMIZED = 3

def __init__(self, name, start_address, end_address, origin, origin_offset):
     self.id = Code._id
@@ -88,6 +102,14 @@
     self.self_ticks = 0
     self.self_ticks_map = None
     self.callee_ticks = None
+    if name.startswith("LazyCompile:*"):
+      self.codetype = Code.OPTIMIZED
+    elif name.startswith("LazyCompile:"):
+      self.codetype = Code.FULL_CODEGEN
+    elif name.startswith("v8::internal::"):
+      self.codetype = Code.V8INTERNAL
+    else:
+      self.codetype = Code.UNKNOWN

   def AddName(self, name):
     assert self.name != name
@@ -185,7 +207,7 @@
 class CodePage(object):
   """Group of adjacent code objects."""

-  SHIFT = 12  # 4K pages
+  SHIFT = 20  # 1M pages
   SIZE = (1 << SHIFT)
   MASK = ~(SIZE - 1)

@@ -507,6 +529,7 @@
 # for the gory details.


+# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
 TRACE_HEADER_DESC = Descriptor([
   ("magic", "u64"),
   ("size", "u64"),
@@ -520,6 +543,7 @@
 ])


+# Reference: /usr/include/linux/perf_event.h
 PERF_EVENT_ATTR_DESC = Descriptor([
   ("type", "u32"),
   ("size", "u32"),
@@ -529,12 +553,13 @@
   ("read_format", "u64"),
   ("flags", "u64"),
   ("wakeup_events_or_watermark", "u32"),
-  ("bt_type", "u32"),
+  ("bp_type", "u32"),
   ("bp_addr", "u64"),
-  ("bp_len", "u64"),
+  ("bp_len", "u64")
 ])


+# Reference: /usr/include/linux/perf_event.h
 PERF_EVENT_HEADER_DESC = Descriptor([
   ("type", "u32"),
   ("misc", "u16"),
@@ -542,6 +567,7 @@
 ])


+# Reference: kernel/events/core.c
 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
   ("pid", "u32"),
   ("tid", "u32"),
@@ -566,6 +592,7 @@
 PERF_SAMPLE_RAW = 1 << 10


+# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
 PERF_SAMPLE_EVENT_BODY_FIELDS = [
   ("ip", "u64", PERF_SAMPLE_IP),
   ("pid", "u32", PERF_SAMPLE_TID),
@@ -702,8 +729,12 @@
     # Unfortunately, section headers span two lines, so we have to
     # keep the just seen section name (from the first line in each
     # section header) in the after_section variable.
+    if mmap_info.filename.endswith(".ko"):
+      dynamic_symbols = ""
+    else:
+      dynamic_symbols = "-T"
     process = subprocess.Popen(
-      "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename),
+ "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     pipe = process.stdout
     after_section = None
@@ -795,7 +826,7 @@
       code.PrintAnnotated(arch, options)
   print
   print "Ticks per library:"
-  mmap_infos = [m for m in library_repo.infos]
+  mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
   for mmap_info in mmap_infos:
     mmap_ticks = mmap_info.ticks
@@ -885,6 +916,9 @@
   ticks = 0
   missed_ticks = 0
   really_missed_ticks = 0
+  optimized_ticks = 0
+  generated_ticks = 0
+  v8_internal_ticks = 0
   mmap_time = 0
   sample_time = 0

@@ -928,6 +962,12 @@
       code = code_map.Find(sample.ip)
       if code:
         code.Tick(sample.ip)
+        if code.codetype == Code.OPTIMIZED:
+          optimized_ticks += 1
+        elif code.codetype == Code.FULL_CODEGEN:
+          generated_ticks += 1
+        elif code.codetype == Code.V8INTERNAL:
+          v8_internal_ticks += 1
       else:
         missed_ticks += 1
       if not library_repo.Tick(sample.ip) and not code:
@@ -947,12 +987,21 @@
     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)

     if not options.quiet:
+      def PrintTicks(number, total, description):
+        print("%10d %5.1f%% ticks in %s" %
+              (number, 100.0*number/total, description))
       print
       print "Stats:"
       print "%10d total trace events" % events
       print "%10d total ticks" % ticks
       print "%10d ticks not in symbols" % missed_ticks
-      print "%10d unaccounted ticks" % really_missed_ticks
+      unaccounted = "unaccounted ticks"
+      if really_missed_ticks > 0:
+        unaccounted += " (probably in the kernel, try --kernel)"
+      PrintTicks(really_missed_ticks, ticks, unaccounted)
+      PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
+ PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
+      PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
       print "%10d total symbols" % len([c for c in code_map.AllCode()])
       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
       print "%9.2fs library processing time" % mmap_time

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to