Author: Maciej Fijalkowski <fij...@gmail.com> Branch: extradoc Changeset: r3717:029b8e343f97 Date: 2011-06-16 21:48 +0200 http://bitbucket.org/pypy/extradoc/changeset/029b8e343f97/
Log: start describing benchmarks diff --git a/talk/iwtc11/benchmarks/benchmark.sh b/talk/iwtc11/benchmarks/benchmark.sh --- a/talk/iwtc11/benchmarks/benchmark.sh +++ b/talk/iwtc11/benchmarks/benchmark.sh @@ -5,7 +5,7 @@ if [ "$1" == "gcc" ]; then ./runner.py -n 5 -c "$*" sqrt/sqrt_double.c ./runner.py -n 5 -c "$*" sqrt/sqrt_long.c - #./runner.py -n 5 -c "$*" sqrt/sqrt_fix16.c + ./runner.py -n 5 -c "$*" sqrt/sqrt_fix16.c #./runner.py -n 5 -c "$* -lm" convolution/conv3.c 1 #./runner.py -n 5 -c "$* -lm" convolution/conv5.c 1 ./runner.py -n 5 -c "$* -lm" convolution/conv3.c 100 @@ -20,7 +20,7 @@ else $* ./runner.py -n 10 sqrt/sqrt.py main int $* ./runner.py -n 10 sqrt/sqrt.py main float - #$* ./runner.py -n 10 sqrt/sqrt.py main Fix16 + $* ./runner.py -n 10 sqrt/sqrt.py main Fix16 #$* ./runner.py -n 10 convolution/convolution.py conv3 1 #$* ./runner.py -n 10 convolution/convolution.py conv5 1 $* ./runner.py -n 10 convolution/convolution.py conv3 100 diff --git a/talk/iwtc11/benchmarks/new_result.txt b/talk/iwtc11/benchmarks/new_result.txt --- a/talk/iwtc11/benchmarks/new_result.txt +++ b/talk/iwtc11/benchmarks/new_result.txt @@ -1,63 +1,37 @@ -pypy --jit enable_opts=intbounds:rewrite:virtualize:heap:unroll -sqrt(int): 1.79892385006 +- 0.00194840037512 -sqrt(float): 0.983013772964 +- 0.00221919586293 -conv3(1e6): 0.766417503357 +- 0.00866699505143 -conv5(1e6): 0.996688437462 +- 0.012036835877 -conv3(1e5): 0.730618429184 +- 0.00375146136701 -conv5(1e5): 1.03531208038 +- 0.0111413026874 -conv3x3(3): 0.069846701622 +- 0.000501920798166 -conv3x3(1000): 0.0522719621658 +- 0.0357056076979 -dilate3x3(1000): 0.38942694664 +- 0.00619291977785 -NoBorderImagePadded: 1.89698078632 +- 0.0208055951105 -NoBorderImagePadded(iter): 0.519681739807 +- 0.0200662890046 -NoBorderImagePadded(range): 0.450081467628 +- 0.00105444417894 -NoBorderImage: 2.13951308727 +- 0.00576674378529 -NoBorderImage(iter): 1.46965010166 +- 0.00394661836239 -NoBorderImage(range): 1.35105161667 +- 0.00249887289286 -sobel(NoBorderImagePadded): 0.45955350399 +- 0.00145458444751 -sobel_uint8(NoBorderImagePadded): 0.498426914215 +- 0.00665320862997 +pypy +sqrt(int): 1.81218411922 +- 0.0217793211373 +sqrt(float): 0.987752747536 +- 0.010412866907 +sqrt(Fix16): 2.21716473103 +- 0.00636762886973 +conv3(1e6): 0.756063270569 +- 0.0225483799804 +conv5(1e6): 1.07853357792 +- 0.00813703177389 +conv3(1e5): 0.731228137016 +- 0.0161735983974 +conv5(1e5): 1.05882668495 +- 0.0341698164686 +conv3x3(3): 0.0685305118561 +- 0.000278350915493 +conv3x3(1000): 0.0520143270493 +- 0.0342524928471 +dilate3x3(1000): 0.415372800827 +- 0.044525202511 +NoBorderImagePadded: 1.89855155945 +- 0.00727766698755 +NoBorderImagePadded(iter): 0.477786374092 +- 0.00111654321125 +NoBorderImagePadded(range): 0.451609492302 +- 0.00325743719387 +NoBorderImage: 2.22490911484 +- 0.0605514739511 +NoBorderImage(iter): 1.46266727448 +- 0.00415733919764 +NoBorderImage(range): 1.37843291759 +- 0.0630244441411 +sobel(NoBorderImagePadded): 0.463502717018 +- 0.00257025834276 +sobel_uint8(NoBorderImagePadded): 0.52193570137 +- 0.0198069947724 pypy --jit enable_opts=intbounds:rewrite:virtualize:heap -sqrt(int): 2.27739796638 +- 0.0271040580427 -sqrt(float): 1.364168787 +- 0.0235396053333 -conv3(1e6): 1.72038755417 +- 0.0280206343663 -conv5(1e6): 1.93043384552 +- 0.0302489061093 -conv3(1e5): 1.6559261322 +- 0.0364074757582 -conv5(1e5): 1.85165474415 +- 0.032410582414 -conv3x3(3): 0.107097601891 +- 0.00457118866065 -conv3x3(1000): 0.0721160173416 +- 0.00365968876656 -dilate3x3(1000): 0.43175163269 +- 0.0720869033105 -NoBorderImagePadded: 2.00819942951 +- 0.0260239930765 -NoBorderImagePadded(iter): 1.22523207664 +- 0.026102105011 -NoBorderImagePadded(range): 1.113205266 +- 0.0381177388909 -NoBorderImage: 2.21718068123 +- 0.0503771001922 -NoBorderImage(iter): 1.39955751896 +- 0.0034236237913 -NoBorderImage(range): 1.34794125557 +- 0.0379578329049 -sobel(NoBorderImagePadded): 1.00590751171 +- 0.0175536088063 -sobel_uint8(NoBorderImagePadded): 1.03622698784 +- 0.00533611100064 - - -gcc -O2 -sqrt(float): 0.98 +- 0.00707106781187 -sqrt(int): 0.792 +- 0.004472135955 -conv3(1e6): 0.77 +- 0.0141421356237 -conv5(1e6): 1.026 +- 0.00894427191 -conv3(1e5): 0.686 +- 0.00894427191 -conv5(1e5): 0.976 +- 0.00547722557505 -conv3x3(3): 0.282 +- 0.00836660026534 -conv3x3(1000): 0.244 +- 0.00894427191 -dilate3x3(1000): 0.252 +- 0.004472135955 -sobel_magnitude: 0.18 +- 0.0 - -gcc -O3 -march=native -fno-tree-vectorize -sqrt(float): 0.978 +- 0.004472135955 -sqrt(int): 0.79 +- 0.0 -conv3(1e6): 0.74 +- 0.00707106781187 -conv5(1e6): 0.768 +- 0.01788854382 -conv3(1e5): 0.576 +- 0.00547722557505 -conv5(1e5): 0.652 +- 0.00836660026534 -conv3x3(3): 0.27 +- 0.0 -conv3x3(1000): 0.242 +- 0.004472135955 -dilate3x3(1000): 0.25 +- 0.0 -sobel_magnitude: 0.196 +- 0.00894427191 +sqrt(int): 2.25307536125 +- 0.00499253786735 +sqrt(float): 1.35373134613 +- 0.0033224112985 +sqrt(Fix16): 3.76334681511 +- 0.0499187749691 +conv3(1e6): 1.68589990139 +- 0.00487515973416 +conv5(1e6): 1.89262311459 +- 0.00528534075689 +conv3(1e5): 1.65629634857 +- 0.0281428596263 +conv5(1e5): 1.87937986851 +- 0.0451342539426 +conv3x3(3): 0.100474405289 +- 0.00342971298052 +conv3x3(1000): 0.0688207149506 +- 0.00054547298268 +dilate3x3(1000): 0.415153169632 +- 0.0341885675395 +NoBorderImagePadded: 1.9624298811 +- 0.0123395556581 +NoBorderImagePadded(iter): 1.1778367281 +- 0.00914555357417 +NoBorderImagePadded(range): 1.13030288219 +- 0.00631341257207 +NoBorderImage: 2.17767724991 +- 0.0252138424223 +NoBorderImage(iter): 1.45121192932 +- 0.0274908528137 diff --git a/talk/iwtc11/benchmarks/runall.sh b/talk/iwtc11/benchmarks/runall.sh --- a/talk/iwtc11/benchmarks/runall.sh +++ b/talk/iwtc11/benchmarks/runall.sh @@ -1,10 +1,10 @@ -#!/bin/sh +#!/bin/bash -#./benchmark.sh pypy -./benchmark.sh pypy --jit enable_opts=intbounds:rewrite:virtualize:heap:unroll +./benchmark.sh pypy +#./benchmark.sh pypy --jit enable_opts=intbounds:rewrite:virtualize:heap:unroll ./benchmark.sh pypy --jit enable_opts=intbounds:rewrite:virtualize:heap #./benchmark.sh gcc ./benchmark.sh gcc -O2 ./benchmark.sh gcc -O3 -march=native -fno-tree-vectorize -#./benchmark.sh python2.7 - +./benchmark.sh python2.7 +./benchmark.sh python2.6 psyco-wrapper.py diff --git a/talk/iwtc11/paper.tex b/talk/iwtc11/paper.tex --- a/talk/iwtc11/paper.tex +++ b/talk/iwtc11/paper.tex @@ -889,6 +889,25 @@ under a nice interface without loosing performance. \end{itemize} +Benchmarks were run on Intel i7 M620 @2.67GHz with 4M cache and 8G of RAM in +32bit mode. +The machine was otherwise unoccupied. We run the following software +for benchmarks: + +\begin{itemize} +\item PyPy 1.5 +\item CPython 2.7.2 +\item Psyco 1.6 with CPython 2.6.6 +\item GCC 4.4.5 shipped with Ubuntu 11.4 +\end{itemize} + +We run GCC both with -O2 optimization and -O3 -march=native, disabling the +automatic loop vectorization. In all cases, SSE2 instructions were used for +floating point operations, except Psyco which uses x87 FPU instructions. +We also run PyPy with loop peeling optimization and without (but otherwise +identical). + +XXX discuss some more, explain why gcc is faster XXX we need Psyco numbers \subsection{Numpy} _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit