[pypy-commit] extradoc extradoc: add code to build backend data tables about machine code and guard data sizes and add the latest results

bivab Wed, 25 Jul 2012 03:21:12 -0700

Author: David Schneider <[email protected]>
Branch: extradoc
Changeset: r4357:d35d75773797
Date: 2012-07-25 12:20 +0200
http://bitbucket.org/pypy/extradoc/changeset/d35d75773797/


Log:    add code to build backend data tables about machine code and guard
        data sizes and add the latest results

diff --git a/talk/vmil2012/Makefile b/talk/vmil2012/Makefile
--- a/talk/vmil2012/Makefile
+++ b/talk/vmil2012/Makefile
@@ -1,5 +1,5 @@
 
-jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex 
figures/benchmarks_table.tex
+jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex 
figures/benchmarks_table.tex figures/backend_table.tex
        pdflatex paper
        bibtex paper
        pdflatex paper
@@ -18,15 +18,18 @@
 %.tex: %.py
        pygmentize -l python -o $@ $<
 
-figures/benchmarks_table.tex: tool/build_tables.py logs/summary.csv 
tool/table_template.tex
+figures/%_table.tex: tool/build_tables.py logs/backend_summary.csv 
logs/summary.csv tool/table_template.tex
        tool/setup.sh
-       paper_env/bin/python tool/build_tables.py logs/summary.csv 
tool/table_template.tex figures/benchmarks_table.tex
+       paper_env/bin/python tool/build_tables.py $@
 
 logs/logbench*:;
 
 logs/summary.csv: logs/logbench* tool/difflogs.py
        @if ls logs/logbench* &> /dev/null; then python tool/difflogs.py 
--diffall logs; fi
 
+logs/backend_summary.csv: logs/logbench* tool/backenddata.py
+       @if ls logs/logbench* &> /dev/null; then python tool/backenddata.py 
logs; fi
+
 logs::
        tool/run_benchmarks.sh
 
diff --git a/talk/vmil2012/logs/backend_summary.csv 
b/talk/vmil2012/logs/backend_summary.csv
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/logs/backend_summary.csv
@@ -0,0 +1,12 @@
+exe,bench,asm size,guard map size
+pypy-c,chaos,154,24
+pypy-c,crypto_pyaes,167,24
+pypy-c,django,220,47
+pypy-c,go,4802,874
+pypy-c,pyflate-fast,719,150
+pypy-c,raytrace-simple,486,75
+pypy-c,richards,153,17
+pypy-c,spambayes,2502,337
+pypy-c,sympy_expand,918,211
+pypy-c,telco,506,77
+pypy-c,twisted_names,1604,211
diff --git a/talk/vmil2012/paper.tex b/talk/vmil2012/paper.tex
--- a/talk/vmil2012/paper.tex
+++ b/talk/vmil2012/paper.tex
@@ -456,6 +456,7 @@
 \label{sec:evaluation}
 
 \include{figures/benchmarks_table}
+\include{figures/backend_table}
 
 * Evaluation
    * Measure guard memory consumption and machine code size
diff --git a/talk/vmil2012/tool/backenddata.py 
b/talk/vmil2012/tool/backenddata.py
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/tool/backenddata.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+"""
+Parse and summarize the traces produced by pypy-c-jit when PYPYLOG is set.
+only works for logs when unrolling is disabled
+"""
+
+import csv
+import optparse
+import os
+import re
+import sys
+from pypy.jit.metainterp.history import ConstInt
+from pypy.jit.tool.oparser import parse
+from pypy.rpython.lltypesystem import llmemory, lltype
+from pypy.tool import logparser
+
+
+def collect_logfiles(path):
+    if not os.path.isdir(path):
+        logs = [os.path.basename(path)]
+    else:
+        logs = os.listdir(path)
+    all = []
+    for log in logs:
+        parts = log.split(".")
+        if len(parts) != 3:
+            continue
+        l, exe, bench = parts
+        if l != "logbench":
+            continue
+        all.append((exe, bench, log))
+    all.sort()
+    return all
+
+
+def collect_guard_data(log):
+    """Calculate the total size in bytes of the locations maps for all guards
+    in a logfile"""
+    guards = logparser.extract_category(log, 'jit-backend-guard-size')
+    return sum(int(x[6:]) for x in guards if x.startswith('chars'))
+
+
+def collect_asm_size(log, guard_size=0):
+    """Calculate the size of the machine code pieces of a logfile. If
+    guard_size is passed it is substracted from result under the assumption
+    that the guard location maps are encoded in the instruction stream"""
+    asm = logparser.extract_category(log, 'jit-backend-dump')
+    asmlen = 0
+    for block in asm:
+        expr = re.compile("CODE_DUMP @\w+ \+\d+\s+(.*$)")
+        match = expr.search(block)
+        assert match is not None  # no match found
+        code = match.group(1)
+        asmlen += len(code)
+    return asmlen - guard_size
+
+
+def collect_data(dirname, logs):
+    for exe, name, log in logs:
+        path = os.path.join(dirname, log)
+        logfile = logparser.parse_log_file(path)
+        guard_size = collect_guard_data(logfile)
+        asm_size = collect_asm_size(logfile, guard_size)
+        yield (exe, name, log, asm_size, guard_size)
+
+
+def main(path):
+    logs = collect_logfiles(path)
+    if os.path.isdir(path):
+        dirname = path
+    else:
+        dirname = os.path.dirname(path)
+    results = collect_data(dirname, logs)
+
+    with file("logs/backend_summary.csv", "w") as f:
+        csv_writer = csv.writer(f)
+        row = ["exe", "bench", "asm size", "guard map size"]
+        csv_writer.writerow(row)
+        print row
+        for exe, bench, log, asm_size, guard_size in results:
+            row = [exe, bench, asm_size / 1024, guard_size / 1024]
+            csv_writer.writerow(row)
+            print row
+
+if __name__ == '__main__':
+    parser = optparse.OptionParser(usage="%prog logdir_or_file")
+
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.print_help()
+        sys.exit(2)
+    else:
+        main(args[0])
diff --git a/talk/vmil2012/tool/build_tables.py 
b/talk/vmil2012/tool/build_tables.py
--- a/talk/vmil2012/tool/build_tables.py
+++ b/talk/vmil2012/tool/build_tables.py
@@ -2,17 +2,21 @@
 import csv
 import django
 from django.template import Template, Context
-import optparse
-from os import path
+import os
 import sys
 
-#
+# This line is required for Django configuration
+django.conf.settings.configure()
 
 
-def main(csvfile, template, texfile):
+def getlines(csvfile):
     with open(csvfile, 'rb') as f:
         reader = csv.DictReader(f, delimiter=',')
-        lines = [l for l in reader]
+        return [l for l in reader]
+
+
+def build_ops_count_table(csvfile, texfile, template):
+    lines = getlines(csvfile)
 
     head = ['Benchmark',
             'ops b/o',
@@ -20,7 +24,7 @@
             'ops a/o',
             '\\% guards a/o',
             'opt. rate',
-            'guard opt. rate',]
+            'guard opt. rate']
 
     table = []
     # collect data
@@ -33,22 +37,43 @@
         res = [
                 bench['bench'].replace('_', '\\_'),
                 ops_bo,
-                "%.2f (%s)" % (guards_bo / ops_bo * 100, bench['guard 
before']),
+                "%.2f (%s)" % (guards_bo / ops_bo * 100,
+                                 bench['guard before']),
                 ops_ao,
-                "%.2f (%s)" % (guards_ao / ops_ao * 100, bench['guard after']),
-                "%.2f" % ((1 - ops_ao/ops_bo) * 100,),
-                "%.2f" % ((1 - guards_ao/guards_bo) * 100,),
+                "%.2f (%s)" % (guards_ao / ops_ao * 100,
+                                  bench['guard after']),
+                "%.2f" % ((1 - ops_ao / ops_bo) * 100,),
+                "%.2f" % ((1 - guards_ao / guards_bo) * 100,),
               ]
         table.append(res)
     output = render_table(template, head, sorted(table))
+    write_table(output, texfile)
+
+
+def build_backend_count_table(csvfile, texfile, template):
+    lines = getlines(csvfile)
+
+    head = ['Benchmark',
+            'Machine code size (kB)',
+            'll resume data (kB)']
+
+    table = []
+    # collect data
+    for bench in lines:
+        bench['bench'] = bench['bench'].replace('_', '\\_')
+        keys = ['bench', 'asm size', 'guard map size']
+        table.append([bench[k] for k in keys])
+    output = render_table(template, head, sorted(table))
+    write_table(output, texfile)
+
+
+def write_table(output, texfile):
     # Write the output to a file
     with open(texfile, 'w') as out_f:
         out_f.write(output)
 
 
 def render_table(ttempl, head, table):
-    # This line is required for Django configuration
-    django.conf.settings.configure()
     # open and read template
     with open(ttempl) as f:
         t = Template(f.read())
@@ -56,12 +81,25 @@
     return t.render(c)
 
 
+tables = {
+        'benchmarks_table.tex':
+            ('summary.csv', build_ops_count_table),
+        'backend_table.tex':
+            ('backend_summary.csv', build_backend_count_table)
+        }
+
+
+def main(table):
+    tablename = os.path.basename(table)
+    if tablename not in tables:
+        raise AssertionError('unsupported table')
+    data, builder = tables[tablename]
+    csvfile = os.path.join('logs', data)
+    texfile = os.path.join('figures', tablename)
+    template = os.path.join('tool', 'table_template.tex')
+    builder(csvfile, texfile, template)
+
+
 if __name__ == '__main__':
-    parser = optparse.OptionParser(usage="%prog csvfile template.tex 
output.tex")
-    options, args = parser.parse_args()
-    if len(args) < 3:
-        parser.print_help()
-        sys.exit(2)
-    else:
-        main(args[0], args[1], args[2])
-
+    assert len(sys.argv) > 1
+    main(sys.argv[1])
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] extradoc extradoc: add code to build backend data tables about machine code and guard data sizes and add the latest results

Reply via email to