Author: Carl Friedrich Bolz <cfb...@gmx.de> Branch: extradoc Changeset: r4371:7706cb52355c Date: 2012-07-26 11:13 +0200 http://bitbucket.org/pypy/extradoc/changeset/7706cb52355c/
Log: commit diff --git a/talk/vmil2012/Makefile b/talk/vmil2012/Makefile --- a/talk/vmil2012/Makefile +++ b/talk/vmil2012/Makefile @@ -1,5 +1,5 @@ -jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex figures/benchmarks_table.tex +jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex figures/benchmarks_table.tex figures/backend_table.tex pdflatex paper bibtex paper pdflatex paper @@ -18,12 +18,18 @@ %.tex: %.py pygmentize -l python -o $@ $< -figures/benchmarks_table.tex: tool/build_tables.py logs/summary.csv tool/table_template.tex +figures/%_table.tex: tool/build_tables.py logs/backend_summary.csv logs/summary.csv tool/table_template.tex tool/setup.sh - paper_env/bin/python tool/build_tables.py logs/summary.csv tool/table_template.tex figures/benchmarks_table.tex + paper_env/bin/python tool/build_tables.py $@ + +logs/logbench*:; logs/summary.csv: logs/logbench* tool/difflogs.py - python tool/difflogs.py --diffall logs + @if ls logs/logbench* &> /dev/null; then python tool/difflogs.py --diffall logs; fi + +logs/backend_summary.csv: logs/logbench* tool/backenddata.py + @if ls logs/logbench* &> /dev/null; then python tool/backenddata.py logs; fi logs:: tool/run_benchmarks.sh + diff --git a/talk/vmil2012/logs/backend_summary.csv b/talk/vmil2012/logs/backend_summary.csv new file mode 100644 --- /dev/null +++ b/talk/vmil2012/logs/backend_summary.csv @@ -0,0 +1,12 @@ +exe,bench,asm size,guard map size +pypy-c,chaos,154,24 +pypy-c,crypto_pyaes,167,24 +pypy-c,django,220,47 +pypy-c,go,4802,874 +pypy-c,pyflate-fast,719,150 +pypy-c,raytrace-simple,486,75 +pypy-c,richards,153,17 +pypy-c,spambayes,2502,337 +pypy-c,sympy_expand,918,211 +pypy-c,telco,506,77 +pypy-c,twisted_names,1604,211 diff --git a/talk/vmil2012/logs/summary.csv b/talk/vmil2012/logs/summary.csv --- a/talk/vmil2012/logs/summary.csv +++ b/talk/vmil2012/logs/summary.csv @@ -1,12 +1,12 @@ exe,bench,number of loops,new before,new after,get before,get after,set before,set after,guard before,guard after,numeric before,numeric after,rest before,rest after -pypy,chaos,32,1810,186,1874,928,8996,684,598,242,1024,417,7603,2711 -pypy,crypto_pyaes,35,1385,234,1066,641,9660,873,373,110,1333,735,5976,3435 -pypy,django,39,1328,184,2711,1125,8251,803,884,275,623,231,7847,2831 -pypy,go,870,59577,4874,93474,32476,373715,22356,21449,7742,20792,7191,217142,78327 -pypy,pyflate-fast,147,5797,781,7654,3346,38540,2394,1977,1031,3805,1990,28135,12097 -pypy,raytrace-simple,115,7001,629,6283,2664,43793,2788,2078,861,2263,1353,28079,9234 -pypy,richards,51,1933,84,2614,1009,15947,569,634,268,700,192,10633,3430 -pypy,spambayes,477,16535,2861,29399,13143,114323,17032,6620,2318,13209,5387,75324,32570 -pypy,sympy_expand,174,6485,1067,10328,4131,36197,4078,2981,956,2493,1133,34017,11162 -pypy,telco,93,7289,464,9825,2244,40435,2559,2063,473,2833,964,35278,8996 -pypy,twisted_names,260,15575,2246,28618,10050,94792,9744,7838,1792,9127,2978,78420,25893 +pypy-c,chaos,32,1810,186,1874,928,8996,684,598,242,1024,417,7603,2711 +pypy-c,crypto_pyaes,35,1385,234,1066,641,9660,873,373,110,1333,735,5976,3435 +pypy-c,django,39,1328,184,2711,1125,8251,803,884,275,623,231,7847,2831 +pypy-c,go,870,59577,4874,93474,32476,373715,22356,21449,7742,20792,7191,217142,78327 +pypy-c,pyflate-fast,147,5797,781,7654,3346,38540,2394,1977,1031,3805,1990,28135,12097 +pypy-c,raytrace-simple,115,7001,629,6283,2664,43793,2788,2078,861,2263,1353,28079,9234 +pypy-c,richards,51,1933,84,2614,1009,15947,569,634,268,700,192,10633,3430 +pypy-c,spambayes,472,16117,2832,28469,12885,110877,16673,6419,2280,12936,5293,73480,31978 +pypy-c,sympy_expand,174,6485,1067,10328,4131,36197,4078,2981,956,2493,1133,34017,11162 +pypy-c,telco,93,7289,464,9825,2244,40435,2559,2063,473,2833,964,35278,8996 +pypy-c,twisted_names,235,14357,2012,26042,9251,88092,8553,7125,1656,8216,2649,71912,23881 diff --git a/talk/vmil2012/paper.tex b/talk/vmil2012/paper.tex --- a/talk/vmil2012/paper.tex +++ b/talk/vmil2012/paper.tex @@ -354,9 +354,9 @@ \noindent \centering \begin{minipage}{1\columnwidth} - \begin{lstlisting} - i8 = int_eq(i6, 1) - guard_false(i8) [i6, i1, i0] + \begin{lstlisting}[mathescape] +$b_1$ = int_eq($i_2$, 1) +guard_false($b_1$) \end{lstlisting} \end{minipage} \begin{minipage}{.40\columnwidth} @@ -455,7 +455,54 @@ \section{Evaluation} \label{sec:evaluation} -\include{figures/benchmarks_table} +The following analysis is based on a selection of benchmarks taken from the set +of benchmarks used to measure the performance of PyPy as can be seen +on\footnote{http://speed.pypy.org/}. The selection is based on the following +criteria \bivab{??}. The benchmarks were taken from the PyPy benchmarks +repository using revision +\texttt{ff7b35837d0f}\footnote{https://bitbucket.org/pypy/benchmarks/src/ff7b35837d0f}. +The benchmarks were run on a version of PyPy based on the +tag~\texttt{release-1.9} and patched to collect additional data about the +guards in the machine code +backends\footnote{https://bitbucket.org/pypy/pypy/src/release-1.9}. All +benchmark data was collected on a MacBook Pro 64 bit running Max OS +10.7.4 \bivab{do we need more data for this kind of benchmarks} with the loop +unrolling optimization disabled\bivab{rationale?}. + +Figure~\ref{fig:ops_count} shows the total number of operations that are +recorded during tracing for each of the benchmarks on what percentage of these +are guards. Figure~\ref{fig:ops_count} also shows the number of operations left +after performing the different trace optimizations done by the trace optimizer, +such as xxx. The last columns show the overall optimization rate and the +optimization rate specific for guard operations, showing what percentage of the +operations was removed during the optimizations phase. + +\begin{figure*} + \include{figures/benchmarks_table} + \caption{Benchmark Results} + \label{fig:ops_count} +\end{figure*} + +\bivab{should we rather count the trampolines as part of the guard data instead +of counting it as part of the instructions} + +Figure~\ref{fig:backend_data} shows +the total memory consumption of the code and of the data generated by the machine code +backend for the different benchmarks mentioned above. Meaning the operations +left after optimization take the space shown in Figure~\ref{fig:backend_data} +after being compiled. Also the additional data stored for the guards to be used +in case of a bailout and attaching a bridge. +\begin{figure*} + \include{figures/backend_table} + \caption{Total size of generated machine code and guard data} + \label{fig:backend_data} +\end{figure*} + +Both figures do not take into account garbage collection. Pieces of machine +code can be globally invalidated or just become cold again. In both cases the +generated machine code and the related data is garbage collected. The figures +show the total amount of operations that are evaluated by the JIT and the +total amount of code and data that is generated from the optimized traces. * Evaluation * Measure guard memory consumption and machine code size diff --git a/talk/vmil2012/tool/backenddata.py b/talk/vmil2012/tool/backenddata.py new file mode 100644 --- /dev/null +++ b/talk/vmil2012/tool/backenddata.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +""" +Parse and summarize the traces produced by pypy-c-jit when PYPYLOG is set. +only works for logs when unrolling is disabled +""" + +import csv +import optparse +import os +import re +import sys +from pypy.jit.metainterp.history import ConstInt +from pypy.jit.tool.oparser import parse +from pypy.rpython.lltypesystem import llmemory, lltype +from pypy.tool import logparser + + +def collect_logfiles(path): + if not os.path.isdir(path): + logs = [os.path.basename(path)] + else: + logs = os.listdir(path) + all = [] + for log in logs: + parts = log.split(".") + if len(parts) != 3: + continue + l, exe, bench = parts + if l != "logbench": + continue + all.append((exe, bench, log)) + all.sort() + return all + + +def collect_guard_data(log): + """Calculate the total size in bytes of the locations maps for all guards + in a logfile""" + guards = logparser.extract_category(log, 'jit-backend-guard-size') + return sum(int(x[6:]) for x in guards if x.startswith('chars')) + + +def collect_asm_size(log, guard_size=0): + """Calculate the size of the machine code pieces of a logfile. If + guard_size is passed it is substracted from result under the assumption + that the guard location maps are encoded in the instruction stream""" + asm = logparser.extract_category(log, 'jit-backend-dump') + asmlen = 0 + for block in asm: + expr = re.compile("CODE_DUMP @\w+ \+\d+\s+(.*$)") + match = expr.search(block) + assert match is not None # no match found + code = match.group(1) + asmlen += len(code) + return asmlen - guard_size + + +def collect_data(dirname, logs): + for exe, name, log in logs: + path = os.path.join(dirname, log) + logfile = logparser.parse_log_file(path) + guard_size = collect_guard_data(logfile) + asm_size = collect_asm_size(logfile, guard_size) + yield (exe, name, log, asm_size, guard_size) + + +def main(path): + logs = collect_logfiles(path) + if os.path.isdir(path): + dirname = path + else: + dirname = os.path.dirname(path) + results = collect_data(dirname, logs) + + with file("logs/backend_summary.csv", "w") as f: + csv_writer = csv.writer(f) + row = ["exe", "bench", "asm size", "guard map size"] + csv_writer.writerow(row) + print row + for exe, bench, log, asm_size, guard_size in results: + row = [exe, bench, asm_size / 1024, guard_size / 1024] + csv_writer.writerow(row) + print row + +if __name__ == '__main__': + parser = optparse.OptionParser(usage="%prog logdir_or_file") + + options, args = parser.parse_args() + if len(args) != 1: + parser.print_help() + sys.exit(2) + else: + main(args[0]) diff --git a/talk/vmil2012/tool/build_tables.py b/talk/vmil2012/tool/build_tables.py --- a/talk/vmil2012/tool/build_tables.py +++ b/talk/vmil2012/tool/build_tables.py @@ -2,25 +2,29 @@ import csv import django from django.template import Template, Context -import optparse -from os import path +import os import sys -# +# This line is required for Django configuration +django.conf.settings.configure() -def main(csvfile, template, texfile): +def getlines(csvfile): with open(csvfile, 'rb') as f: reader = csv.DictReader(f, delimiter=',') - lines = [l for l in reader] + return [l for l in reader] + + +def build_ops_count_table(csvfile, texfile, template): + lines = getlines(csvfile) head = ['Benchmark', 'ops b/o', '\\% guards b/o', 'ops a/o', '\\% guards a/o', - 'opt. rate', - 'guard opt. rate',] + 'opt. rate in \\%', + 'guard opt. rate in \\%'] table = [] # collect data @@ -33,22 +37,45 @@ res = [ bench['bench'].replace('_', '\\_'), ops_bo, - "%.2f (%s)" % (guards_bo / ops_bo * 100, bench['guard before']), + "%.2f" % (guards_bo / ops_bo * 100,), ops_ao, - "%.2f (%s)" % (guards_ao / ops_ao * 100, bench['guard after']), - "%.2f" % ((1 - ops_ao/ops_bo) * 100,), - "%.2f" % ((1 - guards_ao/guards_bo) * 100,), + "%.2f" % (guards_ao / ops_ao * 100,), + "%.2f" % ((1 - ops_ao / ops_bo) * 100,), + "%.2f" % ((1 - guards_ao / guards_bo) * 100,), ] table.append(res) - output = render_table(template, head, table) + output = render_table(template, head, sorted(table)) + write_table(output, texfile) + + +def build_backend_count_table(csvfile, texfile, template): + lines = getlines(csvfile) + + head = ['Benchmark', + 'Machine code size (kB)', + 'll resume data (kB)', + '\\% of machine code size'] + + table = [] + # collect data + for bench in lines: + bench['bench'] = bench['bench'].replace('_', '\\_') + keys = ['bench', 'asm size', 'guard map size'] + gmsize = int(bench['guard map size']) + asmsize = int(bench['asm size']) + rel = "%.2f" % (gmsize / asmsize * 100,) + table.append([bench[k] for k in keys] + [rel]) + output = render_table(template, head, sorted(table)) + write_table(output, texfile) + + +def write_table(output, texfile): # Write the output to a file with open(texfile, 'w') as out_f: out_f.write(output) def render_table(ttempl, head, table): - # This line is required for Django configuration - django.conf.settings.configure() # open and read template with open(ttempl) as f: t = Template(f.read()) @@ -56,12 +83,25 @@ return t.render(c) +tables = { + 'benchmarks_table.tex': + ('summary.csv', build_ops_count_table), + 'backend_table.tex': + ('backend_summary.csv', build_backend_count_table) + } + + +def main(table): + tablename = os.path.basename(table) + if tablename not in tables: + raise AssertionError('unsupported table') + data, builder = tables[tablename] + csvfile = os.path.join('logs', data) + texfile = os.path.join('figures', tablename) + template = os.path.join('tool', 'table_template.tex') + builder(csvfile, texfile, template) + + if __name__ == '__main__': - parser = optparse.OptionParser(usage="%prog csvfile template.tex output.tex") - options, args = parser.parse_args() - if len(args) < 3: - parser.print_help() - sys.exit(2) - else: - main(args[0], args[1], args[2]) - + assert len(sys.argv) > 1 + main(sys.argv[1]) diff --git a/talk/vmil2012/tool/ll_resume_data_count.patch b/talk/vmil2012/tool/ll_resume_data_count.patch new file mode 100644 --- /dev/null +++ b/talk/vmil2012/tool/ll_resume_data_count.patch @@ -0,0 +1,37 @@ +diff -r eec77c3e87d6 pypy/jit/backend/x86/assembler.py +--- a/pypy/jit/backend/x86/assembler.py Tue Jul 24 11:06:31 2012 +0200 ++++ b/pypy/jit/backend/x86/assembler.py Tue Jul 24 14:29:36 2012 +0200 +@@ -1849,6 +1849,7 @@ + CODE_INPUTARG = 8 | DESCR_SPECIAL + + def write_failure_recovery_description(self, mc, failargs, locs): ++ char_count = 0 + for i in range(len(failargs)): + arg = failargs[i] + if arg is not None: +@@ -1865,6 +1866,7 @@ + pos = loc.position + if pos < 0: + mc.writechar(chr(self.CODE_INPUTARG)) ++ char_count += 1 + pos = ~pos + n = self.CODE_FROMSTACK//4 + pos + else: +@@ -1873,11 +1875,17 @@ + n = kind + 4*n + while n > 0x7F: + mc.writechar(chr((n & 0x7F) | 0x80)) ++ char_count += 1 + n >>= 7 + else: + n = self.CODE_HOLE + mc.writechar(chr(n)) ++ char_count += 1 + mc.writechar(chr(self.CODE_STOP)) ++ char_count += 1 ++ debug_start('jit-backend-guard-size') ++ debug_print("chars %s" % char_count) ++ debug_stop('jit-backend-guard-size') + # assert that the fail_boxes lists are big enough + assert len(failargs) <= self.fail_boxes_int.SIZE + diff --git a/talk/vmil2012/tool/run_benchmarks.sh b/talk/vmil2012/tool/run_benchmarks.sh --- a/talk/vmil2012/tool/run_benchmarks.sh +++ b/talk/vmil2012/tool/run_benchmarks.sh @@ -4,9 +4,32 @@ bench_list="${base}/logs/benchs.txt" benchmarks="${base}/pypy-benchmarks" REV="ff7b35837d0f" -pypy=$(which pypy) +pypy_co="${base}/pypy" +PYPYREV='release-1.9' +pypy="${pypy_co}/pypy-c" pypy_opts=",--jit enable_opts=intbounds:rewrite:virtualize:string:pure:heap:ffi" baseline=$(which true) +logopts='jit-backend-dump,jit-backend-guard-size,jit-log-opt,jit-log-noopt' +# checkout and build a pypy-c version +if [ ! -d "${pypy_co}" ]; then + echo "Cloning pypy repository to ${pypy_co}" + hg clone https://bi...@bitbucket.org/pypy/pypy "${pypy_co}" +fi +# +cd "${pypy_co}" +echo "updating pypy to fixed revision ${PYPYREV}" +hg update "${PYPYREV}" +echo "Patching pypy" +patch -p1 -N < "$base/tool/ll_resume_data_count.patch" +# +echo "Checking for an existing pypy-c" +if [ ! -x "${pypy-c}" ] +then + pypy/bin/rpython -Ojit pypy/translator/goal/targetpypystandalone.py +else + echo "found!" +fi + # setup a checkout of the pypy benchmarks and update to a fixed revision if [ ! -d "${benchmarks}" ]; then @@ -16,7 +39,7 @@ echo "updating benchmarks to fixed revision ${REV}" hg update "${REV}" echo "Patching benchmarks to pass PYPYLOG to benchmarks" - patch -p1 < "$base/tool/env.patch" + patch -p1 < "$base/tool/env.patch" else cd "${benchmarks}" echo "Clone of pypy/benchmarks already present, reverting changes in the checkout" @@ -24,13 +47,13 @@ echo "updating benchmarks to fixed revision ${REV}" hg update "${REV}" echo "Patching benchmarks to pass PYPYLOG to benchmarks" - patch -p1 < "$base/tool/env.patch" + patch -p1 < "$base/tool/env.patch" fi # run each benchmark defined on $bench_list while read line do logname="${base}/logs/logbench.$(basename "${pypy}").${line}" - export PYPYLOG="jit:$logname" + export PYPYLOG="${logopts}:$logname" bash -c "./runner.py --changed=\"${pypy}\" --args=\"${pypy_opts}\" --benchmarks=${line}" done < $bench_list diff --git a/talk/vmil2012/tool/table_template.tex b/talk/vmil2012/tool/table_template.tex --- a/talk/vmil2012/tool/table_template.tex +++ b/talk/vmil2012/tool/table_template.tex @@ -1,5 +1,5 @@ -\begin{table} - \centering +\begin{center} +{\smaller \begin{tabular}{ {%for c in head %} |l| {% endfor %} } \hline {% for col in head %} @@ -21,6 +21,5 @@ {% endfor %} \hline \end{tabular} - \caption{'fff'} - \label{'fff'} -\end{table} +} +\end{center} _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit