Author: edelsohn
Branch: ppc-jit-backend
Changeset: r56840:626689d0e745
Date: 2012-08-24 11:05 -0400
http://bitbucket.org/pypy/pypy/changeset/626689d0e745/
Log: Upgrade to x86 version of viewcode.py adjusted for PPC.
diff --git a/pypy/jit/backend/ppc/tool/viewcode.py
b/pypy/jit/backend/ppc/tool/viewcode.py
old mode 100644
new mode 100755
--- a/pypy/jit/backend/ppc/tool/viewcode.py
+++ b/pypy/jit/backend/ppc/tool/viewcode.py
@@ -1,18 +1,44 @@
+#! /usr/bin/env python
+"""
+Viewer for the output of compiled programs generating code.
+Use on the log files created with 'PYPYLOG=jit-backend-dump:log'.
-#!/usr/bin/env python
+Try:
+ ./viewcode.py --text log # text only disassembly
+ ./viewcode.py log # also includes a pygame viewer
"""
-Try:
- ./viewcode.py file.asm
- ./viewcode.py --decode dumpfile
-"""
-import os, sys, py
+
+import autopath
+import new
+import operator
+import py
+import re
+import sys
import subprocess
+from bisect import bisect_left
+
+# don't use pypy.tool.udir here to avoid removing old usessions which
+# might still contain interesting executables
+udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
+tmpfile = str(udir.join('dump.tmp'))
+
+# hack hack
+import pypy.tool
+mod = new.module('pypy.tool.udir')
+mod.udir = udir
+sys.modules['pypy.tool.udir'] = mod
+pypy.tool.udir = mod
+
+# ____________________________________________________________
+# Some support code from Psyco. There is more over there,
+# I am porting it in a lazy fashion... See py-utils/xam.py
+
+if sys.platform == "win32":
+ pass # lots more in Psyco
def machine_code_dump(data, originaddr, backend_name, label_list=None):
- assert backend_name in ["ppc", "ppc_32", "ppc_64"]
- tmpfile = get_tmp_file()
- objdump = "objdump -EB -D --target=binary --adjust-vma=%(origin)d "
- objdump += "--architecture=powerpc %(file)s"
+ objdump = ('objdump -EB --target=binary --architecture=powerpc:common64 '
+ '--adjust-vma=%(origin)d -D %(file)s')
#
f = open(tmpfile, 'wb')
f.write(data)
@@ -52,35 +78,353 @@
for line in itlines:
yield line
-def objdump(input):
- os.system("objdump -EB -D --target=binary --architecture=powerpc %s" %
input)
+def load_symbols(filename):
+ # the program that lists symbols, and the output it gives
+ symbollister = 'nm %s'
+ re_symbolentry = re.compile(r'([0-9a-fA-F]+)\s\w\s(.*)')
+ #
+ print 'loading symbols from %s...' % (filename,)
+ symbols = {}
+ p = subprocess.Popen(symbollister % filename, shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ assert not p.returncode, ('Encountered an error running nm: %s' %
+ stderr)
+ for line in stdout.splitlines(True):
+ match = re_symbolentry.match(line)
+ if match:
+ addr = long(match.group(1), 16)
+ name = match.group(2)
+ if name.startswith('pypy_g_'):
+ name = '\xb7' + name[7:]
+ symbols[addr] = name
+ print '%d symbols found' % (len(symbols),)
+ return symbols
+re_addr =
re.compile(r'[\s,$]0x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]+)')
+re_lineaddr = re.compile(r'\s*0?x?([0-9a-fA-F]+)')
-def get_tmp_file():
- # don't use pypy.tool.udir here to avoid removing old usessions which
- # might still contain interesting executables
- udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
- tmpfile = str(udir.join('dump.tmp'))
- return tmpfile
+def lineaddresses(line):
+ result = []
+ i = 0
+ while 1:
+ match = re_addr.search(line, i)
+ if not match:
+ break
+ i = match.end()
+ addr = long(match.group(1), 16)
+ result.append(addr)
+ return result
-def decode(source):
- with open(source, 'r') as f:
- data = f.read().strip()
- data = data.decode('hex')
+# ____________________________________________________________
- target = get_tmp_file()
- with open(target, 'wb') as f:
- f.write(data)
- return target
+class CodeRange(object):
+ fallthrough = False
+ def __init__(self, world, addr, data):
+ self.world = world
+ self.addr = addr
+ self.data = data
+
+ def __repr__(self):
+ return '<CodeRange %s length %d>' % (hex(self.addr), len(self.data))
+
+ def touches(self, other):
+ return (self .addr < other.addr + len(other.data) and
+ other.addr < self .addr + len(self.data))
+
+ def update_from_old(self, other):
+ if other.addr < self.addr:
+ delta = self.addr - other.addr
+ assert delta <= len(other.data)
+ self.addr -= delta
+ self.data = other.data[:delta] + self.data
+ self_end = self .addr + len(self .data)
+ other_end = other.addr + len(other.data)
+ if other_end > self_end:
+ extra = other_end - self_end
+ assert extra <= len(other.data)
+ self.data += other.data[-extra:]
+
+ def cmpop(op):
+ def _cmp(self, other):
+ if not isinstance(other, CodeRange):
+ return NotImplemented
+ return op((self.addr, self.data), (other.addr, other.data))
+ return _cmp
+ __lt__ = cmpop(operator.lt)
+ __le__ = cmpop(operator.le)
+ __eq__ = cmpop(operator.eq)
+ __ne__ = cmpop(operator.ne)
+ __gt__ = cmpop(operator.gt)
+ __ge__ = cmpop(operator.ge)
+ del cmpop
+
+ def disassemble(self):
+ if not hasattr(self, 'text'):
+ lines = machine_code_dump(self.data, self.addr,
self.world.backend_name)
+ lines = list(lines)
+ # instead of adding symbol names in the dumps we could
+ # also make the 0xNNNNNNNN addresses be red and show the
+ # symbol name when the mouse is over them
+ logentries = self.world.logentries
+ symbols = self.world.symbols
+ for i, line in enumerate(lines):
+ match = re_lineaddr.match(line)
+ if match:
+ addr = long(match.group(1), 16)
+ logentry = logentries.get(addr)
+ if logentry:
+ lines[i] = '\n%s\n%s' % (logentry, lines[i])
+ for addr in lineaddresses(line):
+ sym = symbols.get(addr)
+ if sym:
+ lines[i] = '%s\t%s\n' % (lines[i].rstrip(), sym)
+ self.text = ''.join(lines)
+ return self.text
+
+ def findjumps(self):
+ text = self.disassemble()
+ lines = text.splitlines()
+ line = ''
+ for i, line in enumerate(lines):
+ if '\tj' not in line: # poor heuristic to recognize lines that
+ continue # could be jump instructions
+ addrs = list(lineaddresses(line))
+ if not addrs:
+ continue
+ addr = addrs[-1]
+ final = '\tjmp' in line
+ yield i, addr, final
+ if self.fallthrough and '\tret' not in line:
+ yield len(lines), self.addr + len(self.data), True
+
+
+class World(object):
+
+ def __init__(self):
+ self.ranges = []
+ self.labeltargets = {}
+ self.jumps = {}
+ self.symbols = {}
+ self.logentries = {}
+ self.backend_name = None
+ self.executable_name = None
+
+ def parse(self, f, textonly=True):
+ for line in f:
+ if line.startswith('BACKEND '):
+ self.backend_name = line.split(' ')[1].strip()
+ elif line.startswith('CODE_DUMP '):
+ pieces = line.split()
+ assert pieces[1].startswith('@')
+ assert pieces[2].startswith('+')
+ if len(pieces) == 3:
+ continue # empty line
+ baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
+ offset = int(pieces[2][1:])
+ addr = baseaddr + offset
+ data = pieces[3].replace(':', '').decode('hex')
+ coderange = CodeRange(self, addr, data)
+ i = bisect_left(self.ranges, coderange)
+ j = i
+ while i>0 and coderange.touches(self.ranges[i-1]):
+ coderange.update_from_old(self.ranges[i-1])
+ i -= 1
+ while j<len(self.ranges) and coderange.touches(self.ranges[j]):
+ coderange.update_from_old(self.ranges[j])
+ j += 1
+ self.ranges[i:j] = [coderange]
+ elif line.startswith('LOG '):
+ pieces = line.split(None, 3)
+ assert pieces[1].startswith('@')
+ assert pieces[2].startswith('+')
+ baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
+ offset = int(pieces[2][1:])
+ addr = baseaddr + offset
+ self.logentries[addr] = pieces[3]
+ elif line.startswith('SYS_EXECUTABLE '):
+ filename = line[len('SYS_EXECUTABLE '):].strip()
+ if filename != self.executable_name and filename != '??':
+ self.symbols.update(load_symbols(filename))
+ self.executable_name = filename
+
+ def find_cross_references(self):
+ # find cross-references between blocks
+ fnext = 0.1
+ for i, r in enumerate(self.ranges):
+ for lineno, targetaddr, _ in r.findjumps():
+ self.labeltargets[targetaddr] = True
+ if i % 100 == 99:
+ f = float(i) / len(self.ranges)
+ if f >= fnext:
+ sys.stderr.write("%d%%" % int(f*100.0))
+ fnext += 0.1
+ sys.stderr.write(".")
+ sys.stderr.write("100%")
+ # split blocks at labeltargets
+ t = self.labeltargets
+ #print t
+ for r in self.ranges:
+ #print r.addr, r.addr + len(r.data)
+ for i in range(r.addr + 1, r.addr + len(r.data)):
+ if i in t:
+ #print i
+ ofs = i - r.addr
+ self.ranges.append(CodeRange(self, i, r.data[ofs:]))
+ r.data = r.data[:ofs]
+ r.fallthrough = True
+ try:
+ del r.text
+ except AttributeError:
+ pass
+ break
+ # hack hack hacked
+ sys.stderr.write("\n")
+
+ def show(self, showtext=True, showgraph=True):
+ if showgraph:
+ g1 = Graph('codedump')
+ self.ranges.sort()
+ for r in self.ranges:
+ disassembled = r.disassemble()
+ if showtext:
+ print disassembled
+ if showgraph:
+ text, width = tab2columns(disassembled)
+ text = '0x%x\n\n%s' % (r.addr, text)
+ g1.emit_node('N_%x' % r.addr, shape="box", label=text,
+ width=str(width*0.1125))
+ for lineno, targetaddr, final in r.findjumps():
+ if final:
+ color = "black"
+ else:
+ color = "red"
+ g1.emit_edge('N_%x' % r.addr, 'N_%x' % targetaddr,
+ color=color)
+ sys.stdout.flush()
+ if showgraph:
+ g1.display()
+
+ def showtextonly(self):
+ self.ranges.sort()
+ for r in self.ranges:
+ disassembled = r.disassemble()
+ print disassembled
+ del r.text
+
+
+def tab2columns(text):
+ lines = text.split('\n')
+ columnwidth = []
+ for line in lines:
+ columns = line.split('\t')[:-1]
+ while len(columnwidth) < len(columns):
+ columnwidth.append(0)
+ for i, s in enumerate(columns):
+ width = len(s.strip())
+ if not s.endswith(':'):
+ width += 2
+ columnwidth[i] = max(columnwidth[i], width)
+ columnwidth.append(1)
+ result = []
+ for line in lines:
+ columns = line.split('\t')
+ text = []
+ for width, s in zip(columnwidth, columns):
+ text.append(s.strip().ljust(width))
+ result.append(' '.join(text))
+ lengths = [len(line) for line in result]
+ lengths.append(1)
+ totalwidth = max(lengths)
+ return '\\l'.join(result), totalwidth
+
+# ____________________________________________________________
+# XXX pasted from
+# http://codespeak.net/svn/user/arigo/hack/misc/graphlib.py
+# but needs to be a bit more subtle later
+
+from pypy.translator.tool.make_dot import DotGen
+from dotviewer.graphclient import display_page
+
+class Graph(DotGen):
+
+ def highlight(self, word, text, linked_to=None):
+ if not hasattr(self, '_links'):
+ self._links = {}
+ self._links_to = {}
+ self._links[word] = text
+ if linked_to:
+ self._links_to[word] = linked_to
+
+ def display(self):
+ "Display a graph page locally."
+ display_page(_Page(self))
+
+
+class NoGraph(Exception):
+ pass
+
+class _Page:
+ def __init__(self, graph_builder):
+ if callable(graph_builder):
+ graph = graph_builder()
+ else:
+ graph = graph_builder
+ if graph is None:
+ raise NoGraph
+ self.graph_builder = graph_builder
+
+ def content(self):
+ return _PageContent(self.graph_builder)
+
+class _PageContent:
+ fixedfont = True
+
+ def __init__(self, graph_builder):
+ if callable(graph_builder):
+ graph = graph_builder()
+ else:
+ graph = graph_builder
+ assert graph is not None
+ self.graph_builder = graph_builder
+ self.graph = graph
+ self.links = getattr(graph, '_links', {})
+ if not hasattr(graph, '_source'):
+ graph._source = graph.generate(target=None)
+ self.source = graph._source
+
+ def followlink(self, link):
+ try:
+ return _Page(self.graph._links_to[link])
+ except NoGraph:
+ return _Page(self.graph_builder)
+
+# ____________________________________________________________
if __name__ == '__main__':
- if len(sys.argv) == 2:
- objdump(sys.argv[1])
- elif len(sys.argv) == 3:
- assert sys.argv[1] == '--decode'
- f = decode(sys.argv[2])
- objdump(f)
+ if '--text' in sys.argv:
+ sys.argv.remove('--text')
+ showgraph = False
else:
+ showgraph = True
+ if len(sys.argv) != 2:
print >> sys.stderr, __doc__
sys.exit(2)
+ #
+ import cStringIO
+ from pypy.tool import logparser
+ log1 = logparser.parse_log_file(sys.argv[1])
+ text1 = logparser.extract_category(log1, catprefix='jit-backend-dump')
+ f = cStringIO.StringIO()
+ f.writelines(text1)
+ f.seek(0)
+ del log1, text1
+ #
+ world = World()
+ world.parse(f)
+ if showgraph:
+ world.find_cross_references()
+ world.show(showtext=True)
+ else:
+ world.showtextonly()
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit