Author: Maciej Fijalkowski <[email protected]>
Branch: extradoc
Changeset: r3727:130c4b72e410
Date: 2011-06-17 11:48 +0200
http://bitbucket.org/pypy/extradoc/changeset/130c4b72e410/

Log:    small improvements, write para about benchmarks

diff --git a/talk/iwtc11/benchmarks/benchmark.sh 
b/talk/iwtc11/benchmarks/benchmark.sh
--- a/talk/iwtc11/benchmarks/benchmark.sh
+++ b/talk/iwtc11/benchmarks/benchmark.sh
@@ -18,24 +18,30 @@
     ./runner.py -n 5 -c "$* -lstdc++" image/sobel.cc 1002 1002
     rm a.out
 else
-    $* ./runner.py -n 10 sqrt/sqrt.py main int
-    $* ./runner.py -n 10 sqrt/sqrt.py main float
-    $* ./runner.py -n 10 sqrt/sqrt.py main Fix16
-    #$* ./runner.py -n 10 convolution/convolution.py conv3 1
-    #$* ./runner.py -n 10 convolution/convolution.py conv5 1
-    $* ./runner.py -n 10 convolution/convolution.py conv3 100
-    $* ./runner.py -n 10 convolution/convolution.py conv5 100
-    $* ./runner.py -n 10 convolution/convolution.py conv3 1000
-    $* ./runner.py -n 10 convolution/convolution.py conv5 1000
-    $* ./runner.py -n 10 convolution/convolution.py conv3x3 1000000 3
-    $* ./runner.py -n 10 convolution/convolution.py conv3x3 1000 1000
-    $* ./runner.py -n 10 convolution/convolution.py dilate3x3 1000 1000
-    $* ./runner.py -n 10 image/noborder.py main NoBorderImagePadded
-    $* ./runner.py -n 10 image/noborder.py main NoBorderImagePadded iter
-    $* ./runner.py -n 10 image/noborder.py main NoBorderImagePadded range
-    $* ./runner.py -n 10 image/noborder.py main NoBorderImage
-    $* ./runner.py -n 10 image/noborder.py main NoBorderImage iter
-    $* ./runner.py -n 10 image/noborder.py main NoBorderImage range
-    $* ./runner.py -n 10 image/sobel.py main NoBorderImagePadded
-    $* ./runner.py -n 10 image/sobel.py main NoBorderImagePadded uint8
+    if [ "$1" == "python2.7" ]; then
+        EXTRA_OPTS='-w 0 -n 1'
+    fi
+    if [ "$1" == "python2.6" ]; then
+        EXTRA_OPTS='-w 1 -n 1'
+    fi
+    $* ./runner.py $EXTRA_OPTS sqrt/sqrt.py main int
+    $* ./runner.py $EXTRA_OPTS sqrt/sqrt.py main float
+    $* ./runner.py $EXTRA_OPTS sqrt/sqrt.py main Fix16
+    #$* ./runner.py $EXTRA_OPTS convolution/convolution.py conv3 1
+    #$* ./runner.py $EXTRA_OPTS convolution/convolution.py conv5 1
+    $* ./runner.py $EXTRA_OPTS convolution/convolution.py conv3 100
+    $* ./runner.py $EXTRA_OPTS convolution/convolution.py conv5 100
+    $* ./runner.py $EXTRA_OPTS convolution/convolution.py conv3 1000
+    $* ./runner.py $EXTRA_OPTS convolution/convolution.py conv5 1000
+    $* ./runner.py $EXTRA_OPTS convolution/convolution.py conv3x3 1000000 3
+    $* ./runner.py $EXTRA_OPTS convolution/convolution.py conv3x3 1000 1000
+    $* ./runner.py $EXTRA_OPTS convolution/convolution.py dilate3x3 1000 1000
+    $* ./runner.py $EXTRA_OPTS image/noborder.py main NoBorderImagePadded
+    $* ./runner.py $EXTRA_OPTS image/noborder.py main NoBorderImagePadded iter
+    $* ./runner.py $EXTRA_OPTS image/noborder.py main NoBorderImagePadded range
+    $* ./runner.py $EXTRA_OPTS image/noborder.py main NoBorderImage
+    $* ./runner.py $EXTRA_OPTS image/noborder.py main NoBorderImage iter
+    $* ./runner.py $EXTRA_OPTS image/noborder.py main NoBorderImage range
+    $* ./runner.py $EXTRA_OPTS image/sobel.py main NoBorderImagePadded
+    $* ./runner.py $EXTRA_OPTS image/sobel.py main NoBorderImagePadded uint8
 fi
diff --git a/talk/iwtc11/benchmarks/new_result.txt 
b/talk/iwtc11/benchmarks/new_result.txt
--- a/talk/iwtc11/benchmarks/new_result.txt
+++ b/talk/iwtc11/benchmarks/new_result.txt
@@ -1,83 +1,82 @@
 
 pypy
-sqrt(int): 1.81218411922 +- 0.0217793211373
-sqrt(float): 0.987752747536 +- 0.010412866907
-sqrt(Fix16): 2.21716473103 +- 0.00636762886973
-conv3(1e6): 0.756063270569 +- 0.0225483799804
-conv5(1e6): 1.07853357792 +- 0.00813703177389
-conv3(1e5): 0.731228137016 +- 0.0161735983974
-conv5(1e5): 1.05882668495 +- 0.0341698164686
-conv3x3(3): 0.0685305118561 +- 0.000278350915493
-conv3x3(1000): 0.0520143270493 +- 0.0342524928471
-dilate3x3(1000): 0.415372800827 +- 0.044525202511
-NoBorderImagePadded: 1.89855155945 +- 0.00727766698755
-NoBorderImagePadded(iter): 0.477786374092 +- 0.00111654321125
-NoBorderImagePadded(range): 0.451609492302 +- 0.00325743719387
-NoBorderImage: 2.22490911484 +- 0.0605514739511
-NoBorderImage(iter): 1.46266727448 +- 0.00415733919764
-NoBorderImage(range): 1.37843291759 +- 0.0630244441411
-sobel(NoBorderImagePadded): 0.463502717018 +- 0.00257025834276
-sobel_uint8(NoBorderImagePadded): 0.52193570137 +- 0.0198069947724
+sqrt(int): 1.81961710453 +- 0.00969663499951
+sqrt(float): 0.997122144699 +- 0.00475528903922
+sqrt(Fix16): 2.14047310352 +- 0.0175369211294
+conv3(1e6): 0.765250277519 +- 0.0111246299589
+conv5(1e6): 1.08676469326 +- 0.0181131040106
+conv3(1e5): 0.675209879875 +- 0.0210395038414
+conv5(1e5): 1.05374486446 +- 0.0284513681407
+conv3x3(3): 0.0678671360016 +- 0.00108163728271
+conv3x3(1000): 0.0530683040619 +- 0.0344658980996
+dilate3x3(1000): 0.389708518982 +- 0.00835149413747
+NoBorderImagePadded: 1.93399097919 +- 0.0524961558513
+NoBorderImagePadded(iter): 0.488634562492 +- 0.0171516205712
+NoBorderImagePadded(range): 0.483622479439 +- 0.00925072290815
+NoBorderImage: 2.16889901161 +- 0.0157656334579
+NoBorderImage(iter): 1.47057991028 +- 0.0233604904862
+NoBorderImage(range): 1.39746711254 +- 0.0358702404701
+sobel(NoBorderImagePadded): 0.47727098465 +- 0.0285302209995
+sobel_uint8(NoBorderImagePadded): 0.513068723679 +- 0.00450907878019
 
 pypy --jit enable_opts=intbounds:rewrite:virtualize:heap
-sqrt(int): 2.25307536125 +- 0.00499253786735
-sqrt(float): 1.35373134613 +- 0.0033224112985
-sqrt(Fix16): 3.76334681511 +- 0.0499187749691
-conv3(1e6): 1.68589990139 +- 0.00487515973416
-conv5(1e6): 1.89262311459 +- 0.00528534075689
-conv3(1e5): 1.65629634857 +- 0.0281428596263
-conv5(1e5): 1.87937986851 +- 0.0451342539426
-conv3x3(3): 0.100474405289 +- 0.00342971298052
-conv3x3(1000): 0.0688207149506 +- 0.00054547298268
-dilate3x3(1000): 0.415153169632 +- 0.0341885675395
-NoBorderImagePadded: 1.9624298811 +- 0.0123395556581
-NoBorderImagePadded(iter): 1.1778367281 +- 0.00914555357417
-NoBorderImagePadded(range): 1.13030288219 +- 0.00631341257207
-NoBorderImage: 2.17767724991 +- 0.0252138424223
-NoBorderImage(iter): 1.45121192932 +- 0.0274908528137
-NoBorderImage(range): 1.33527753353 +- 0.00819077114708
-sobel(NoBorderImagePadded): 1.01328015327 +- 0.0191050555554
-sobel_uint8(NoBorderImagePadded): 1.10731520653 +- 0.0325015176484
+sqrt(int): 2.26462423801 +- 0.0076627615314
+sqrt(float): 1.35695979595 +- 0.0251587469884
+sqrt(Fix16): 3.93270061016 +- 0.109339327977
+conv3(1e6): 1.68973388672 +- 0.0142045606781
+conv5(1e6): 1.92141816616 +- 0.034837452752
+conv3(1e5): 1.77114777565 +- 0.0558894026315
+conv5(1e5): 1.86009068489 +- 0.0184543492536
+conv3x3(3): 0.0988693475723 +- 0.00115722747303
+conv3x3(1000): 0.0734650850296 +- 0.00267271135671
+dilate3x3(1000): 0.411496067047 +- 0.035852331563
+NoBorderImagePadded: 2.09047472477 +- 0.117371924965
+NoBorderImagePadded(iter): 1.2149545908 +- 0.0217855739412
+NoBorderImagePadded(range): 1.11978774071 +- 0.0280553099539
+NoBorderImage: 2.22395954132 +- 0.0316863806008
+NoBorderImage(iter): 1.44512989521 +- 0.0304946877295
+NoBorderImage(range): 1.34203736782 +- 0.0314288487567
+sobel(NoBorderImagePadded): 1.01348490715 +- 0.0263135905465
+sobel_uint8(NoBorderImagePadded): 1.04967999458 +- 0.0124143422099
 
 gcc -O2
-sqrt(float): 0.984 +- 0.00547722557505
-sqrt(int): 0.796 +- 0.00894427191
-sqrt(Fix16): 0.12 +- 0.0
-conv3(1e6): 0.754 +- 0.00547722557505
-conv5(1e6): 1.01 +- 0.01
-conv3(1e5): 0.666 +- 0.00547722557505
-conv5(1e5): 0.98 +- 0.0122474487139
-conv3x3(3): 0.268 +- 0.004472135955
-conv3x3(1000): 0.24 +- 0.0
-dilate3x3(1000): 0.26 +- 0.0
-sobel_magnitude: 0.19 +- 0.0122474487139
+sqrt(float): 0.98 +- 1.24126707662e-16
+sqrt(int): 0.806 +- 0.00894427191
+sqrt(Fix16): 0.972 +- 0.01788854382
+conv3(1e6): 0.84 +- 0.0452769256907
+conv5(1e6): 1.074 +- 0.0517687164222
+conv3(1e5): 0.702 +- 0.0465832587954
+conv5(1e5): 1.03 +- 0.0484767985742
+conv3x3(3): 0.274 +- 0.00894427191
+conv3x3(1000): 0.242 +- 0.004472135955
+dilate3x3(1000): 0.258 +- 0.004472135955
+sobel_magnitude: 0.194 +- 0.00894427191
 
 gcc -O3 -march=native -fno-tree-vectorize
-sqrt(float): 0.982 +- 0.0109544511501
-sqrt(int): 0.788 +- 0.004472135955
-sqrt(Fix16): 0.12 +- 0.0
-conv3(1e6): 0.73 +- 0.00707106781187
-conv5(1e6): 0.758 +- 0.0192353840617
-conv3(1e5): 0.572 +- 0.004472135955
-conv5(1e5): 0.636 +- 0.00547722557505
-conv3x3(3): 0.276 +- 0.00894427191
-conv3x3(1000): 0.24 +- 0.0
-dilate3x3(1000): 0.252 +- 0.004472135955
-sobel_magnitude: 0.182 +- 0.004472135955
+sqrt(float): 0.98 +- 1.24126707662e-16
+sqrt(int): 0.804 +- 0.00894427191
+sqrt(Fix16): 0.96 +- 0.0122474487139
+conv3(1e6): 0.744 +- 0.011401754251
+conv5(1e6): 0.8 +- 0.0122474487139
+conv3(1e5): 0.588 +- 0.0130384048104
+conv5(1e5): 0.65 +- 0.0122474487139
+conv3x3(3): 0.274 +- 0.00547722557505
+conv3x3(1000): 0.25 +- 0.00707106781187
+dilate3x3(1000): 0.256 +- 0.00894427191
+sobel_magnitude: 0.2 +- 0.0141421356237
 
 python2.7
-sqrt(int): 20.7507618427 +- 0.0820766211411
-sqrt(float): 24.1753005743 +- 0.0810562636584
-sqrt(Fix16): 720.625649452 +- 1.15280139821
-conv3(1e6): 77.3019948721 +- 0.459894508642
-conv5(1e6): 121.30363183 +- 0.199402921369
-conv3(1e5): 78.3221033573 +- 0.153951273532
-conv5(1e5): 121.907371497 +- 0.418968304706
-conv3x3(3): 23.9262646198 +- 0.00662884454029
-conv3x3(1000): 23.9549843311 +- 0.0459730336084
-dilate3x3(1000): 23.4804996014 +- 0.0165083869024
-NoBorderImagePadded: 545.637593651 +- 0.215675086283
-NoBorderImagePadded(iter): 549.07568202 +- 0.198266656745
-NoBorderImagePadded(range): 551.072267699 +- 2.18911451251
-
-python2.6 psyco-wrapper.py
+sqrt(int): 20.8419699669
+sqrt(float): 24.2056779861
+sqrt(Fix16): 744.34590292
+conv3(1e6): 77.1459159851
+conv5(1e6): 125.768272161
+conv3(1e5): 77.8904190063
+conv5(1e5): 122.540805101
+conv3x3(3): 23.8474378586
+conv3x3(1000): 23.7241849899
+dilate3x3(1000): 23.2892370224
+NoBorderImagePadded: 543.731127977
+NoBorderImagePadded(iter): 546.704558849
+NoBorderImagePadded(range): 550.923794985
+NoBorderImage: 537.306480885
diff --git a/talk/iwtc11/paper.tex b/talk/iwtc11/paper.tex
--- a/talk/iwtc11/paper.tex
+++ b/talk/iwtc11/paper.tex
@@ -917,13 +917,20 @@
 We also run PyPy with loop peeling optimization and without (but otherwise
 identical).
 
-For all JIT:ed runs (PyPy and Psyco) 3 iterations were run to warm up the JIT,
-followed by 10 iterations for averaging. For GCC and CPython, 5 iterations
+For PyPy 10 iterations were run, prefaced with 3 iterations for warming up.
+Due to benchmarks taking large amounts of time on CPython, only one run
+was performed, prefaced with one warmup run for Psyco.
+For GCC 5 iterations
 were run. In all cases, the standard deviation is very low, making benchmarks
 very well reproducible.
 
-XXX discuss some more, explain why gcc is faster
-XXX we need Psyco numbers
+We can observe that PyPy (even without loop peeling) is orders of magnitude
+faster than either CPython or Psyco. This is due to the JIT compilation
+advantages and optimizations we discussed in XXX [ref to other paper]. Loop
+peeling gives an additional XXX on average, which makes benchmark times
+comparable with native-compiled C code. Missing performance we attribute to
+the relative immaturity of PyPy's JIT assembler backend as well as missing
+optimizations, like instruction scheduling.
 
 \subsection{Numpy}
 
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to