Author: Carl Friedrich Bolz <[email protected]>
Branch:
Changeset: r87592:fb6bb835369e
Date: 2016-10-05 17:31 +0200
http://bitbucket.org/pypy/pypy/changeset/fb6bb835369e/
Log: - change timeit to report the average +- stdandard deviation
- print a warning and point to the perf module.
- increase default number of runs from 3 to 7
reporting the minimum is often quite misleading, see eg
"Statistically Rigorous Java Performance Evaluation" by Georges
et.al. 2007
diff --git a/lib-python/2.7/test/test_timeit.py
b/lib-python/2.7/test/test_timeit.py
--- a/lib-python/2.7/test/test_timeit.py
+++ b/lib-python/2.7/test/test_timeit.py
@@ -13,7 +13,7 @@
DEFAULT_NUMBER = 1000000
# timeit's default number of repetitions.
-DEFAULT_REPEAT = 3
+DEFAULT_REPEAT = timeit.default_repeat
# XXX: some tests are commented out that would improve the coverage but take a
# long time to run because they test the default number of loops, which is
@@ -204,7 +204,7 @@
t.print_exc(s)
self.assert_exc_string(s.getvalue(), 'ZeroDivisionError')
- MAIN_DEFAULT_OUTPUT = "10 loops, best of 3: 1 sec per loop\n"
+ MAIN_DEFAULT_OUTPUT = "1 loops, average of 7: 1 +- 0 sec per loop (using
standard deviation)\n"
def run_main(self, seconds_per_increment=1.0, switches=None, timer=None):
if timer is None:
@@ -230,33 +230,35 @@
def test_main_seconds(self):
s = self.run_main(seconds_per_increment=5.5)
- self.assertEqual(s, "10 loops, best of 3: 5.5 sec per loop\n")
+ self.assertIn("1 loops, average of 7: 5.5 +- 0 sec per loop (using
standard deviation)\n", s)
def test_main_milliseconds(self):
s = self.run_main(seconds_per_increment=0.0055)
- self.assertEqual(s, "100 loops, best of 3: 5.5 msec per loop\n")
+ self.assertIn("100 loops, average of 7: 5.5 +-", s)
+ self.assertIn("msec per loop", s)
def test_main_microseconds(self):
s = self.run_main(seconds_per_increment=0.0000025, switches=['-n100'])
- self.assertEqual(s, "100 loops, best of 3: 2.5 usec per loop\n")
+ self.assertIn("100 loops, average of 7: 2.5", s)
+ self.assertIn("usec per loop", s)
def test_main_fixed_iters(self):
s = self.run_main(seconds_per_increment=2.0, switches=['-n35'])
- self.assertEqual(s, "35 loops, best of 3: 2 sec per loop\n")
+ self.assertIn("35 loops, average of 7: 2 +- 0 sec per loop (using
standard deviation)\n", s)
def test_main_setup(self):
s = self.run_main(seconds_per_increment=2.0,
switches=['-n35', '-s', 'print("CustomSetup")'])
- self.assertEqual(s, "CustomSetup\n" * 3 +
- "35 loops, best of 3: 2 sec per loop\n")
+ self.assertIn("CustomSetup\n" * DEFAULT_REPEAT +
+ "35 loops, average of 7: 2 +- 0 sec per loop (using standard
deviation)\n", s)
def test_main_fixed_reps(self):
s = self.run_main(seconds_per_increment=60.0, switches=['-r9'])
- self.assertEqual(s, "10 loops, best of 9: 60 sec per loop\n")
+ self.assertIn("1 loops, average of 9: 60 +- 0 sec per loop (using
standard deviation)\n", s)
def test_main_negative_reps(self):
s = self.run_main(seconds_per_increment=60.0, switches=['-r-5'])
- self.assertEqual(s, "10 loops, best of 1: 60 sec per loop\n")
+ self.assertIn("1 loops, average of 1: 60 +- 0 sec per loop (using
standard deviation)\n", s)
@unittest.skipIf(sys.flags.optimize >= 2, "need __doc__")
def test_main_help(self):
@@ -266,33 +268,34 @@
def test_main_using_time(self):
fake_timer = FakeTimer()
s = self.run_main(switches=['-t'], timer=fake_timer)
- self.assertEqual(s, self.MAIN_DEFAULT_OUTPUT)
+ self.assertIn(self.MAIN_DEFAULT_OUTPUT, s)
self.assertIs(fake_timer.saved_timer, time.time)
def test_main_using_clock(self):
fake_timer = FakeTimer()
s = self.run_main(switches=['-c'], timer=fake_timer)
- self.assertEqual(s, self.MAIN_DEFAULT_OUTPUT)
+ self.assertIn(self.MAIN_DEFAULT_OUTPUT, s)
self.assertIs(fake_timer.saved_timer, time.clock)
def test_main_verbose(self):
s = self.run_main(switches=['-v'])
- self.assertEqual(s, dedent("""\
- 10 loops -> 10 secs
- raw times: 10 10 10
- 10 loops, best of 3: 1 sec per loop
- """))
+ self.assertIn(dedent("""\
+ 1 loops -> 1 secs
+ raw times: 1 1 1 1 1 1 1
+ 1 loops, average of 7: 1 +- 0 sec per loop (using standard
deviation)
+ """), s)
def test_main_very_verbose(self):
s = self.run_main(seconds_per_increment=0.000050, switches=['-vv'])
- self.assertEqual(s, dedent("""\
+ self.assertIn(dedent("""\
+ 1 loops -> 5e-05 secs
10 loops -> 0.0005 secs
100 loops -> 0.005 secs
1000 loops -> 0.05 secs
10000 loops -> 0.5 secs
- raw times: 0.5 0.5 0.5
- 10000 loops, best of 3: 50 usec per loop
- """))
+ raw times: 0.5 0.5 0.5 0.5 0.5 0.5 0.5
+ 10000 loops, average of 7: 50 +- 0 usec per loop (using
standard deviation)
+ """), s)
def test_main_exception(self):
with captured_stderr() as error_stringio:
@@ -304,6 +307,15 @@
s = self.run_main(switches=['-n1', '1.0/0.0'])
self.assert_exc_string(error_stringio.getvalue(), 'ZeroDivisionError')
+ def test_main_recommends_perf(self):
+ s = self.run_main(seconds_per_increment=2.0, switches=['-n35', '-s',
'print("CustomSetup")'])
+ self.assertIn(dedent("""\
+ WARNING: timeit is a very unreliable tool. use perf or something
else for real measurements
+ pypy -m pip install perf
+ pypy -m perf timeit -n35 -s 'print("CustomSetup")' 'import timeit;
timeit._fake_timer.inc()'
+ """), s)
+
+
def test_main():
run_unittest(TestTimeit)
diff --git a/lib-python/2.7/timeit.py b/lib-python/2.7/timeit.py
--- a/lib-python/2.7/timeit.py
+++ b/lib-python/2.7/timeit.py
@@ -13,7 +13,7 @@
Options:
-n/--number N: how many times to execute 'statement' (default: see below)
- -r/--repeat N: how many times to repeat the timer (default 3)
+ -r/--repeat N: how many times to repeat the timer (default 7)
-s/--setup S: statement to be executed once initially (default 'pass')
-t/--time: use time.time() (default on Unix)
-c/--clock: use time.clock() (default on Windows)
@@ -53,6 +53,8 @@
"""
import gc
+import math
+import os
import sys
import time
@@ -60,7 +62,7 @@
dummy_src_name = "<timeit-src>"
default_number = 1000000
-default_repeat = 3
+default_repeat = 7
if sys.platform == "win32":
# On Windows, the best timer is time.clock()
@@ -265,6 +267,7 @@
"""
if args is None:
args = sys.argv[1:]
+ origargs = args
import getopt
try:
opts, args = getopt.getopt(args, "n:s:r:tcvh",
@@ -281,6 +284,7 @@
repeat = default_repeat
verbose = 0
precision = 3
+ units = {"sec": 1, "msec": 1e3, "usec": 1e6, "ns": 1e9}
for o, a in opts:
if o in ("-n", "--number"):
number = int(a)
@@ -302,17 +306,25 @@
print __doc__,
return 0
setup = "\n".join(setup) or "pass"
+
+ print "WARNING: timeit is a very unreliable tool. use perf or something
else for real measurements"
+ executable = os.path.basename(sys.executable)
+ print "%s -m pip install perf" % executable
+ print "%s -m perf timeit %s" % (
+ executable,
+ " ".join([(arg if arg.startswith("-") else repr(arg))
+ for arg in origargs]), )
+ print "-" * 60
# Include the current directory, so that local imports work (sys.path
# contains the directory of this script, rather than the current
# directory)
- import os
sys.path.insert(0, os.curdir)
if _wrap_timer is not None:
timer = _wrap_timer(timer)
t = Timer(stmt, setup, timer)
if number == 0:
# determine number so that 0.2 <= total time < 2.0
- for i in range(1, 10):
+ for i in range(0, 10):
number = 10**i
try:
x = t.timeit(number)
@@ -324,24 +336,34 @@
if x >= 0.2:
break
try:
- r = t.repeat(repeat, number)
+ timings = t.repeat(repeat, number)
except:
t.print_exc()
return 1
- best = min(r)
if verbose:
- print "raw times:", " ".join(["%.*g" % (precision, x) for x in r])
- print "%d loops," % number,
- usec = best * 1e6 / number
- if usec < 1000:
- print "best of %d: %.*g usec per loop" % (repeat, precision, usec)
- else:
- msec = usec / 1000
- if msec < 1000:
- print "best of %d: %.*g msec per loop" % (repeat, precision, msec)
- else:
- sec = msec / 1000
- print "best of %d: %.*g sec per loop" % (repeat, precision, sec)
+ print "raw times:", " ".join(["%.*g" % (precision, x) for x in
timings])
+
+ timings = [dt / number for dt in timings]
+
+ def _avg(l):
+ return math.fsum(l) / len(l)
+ def _stdev(l):
+ avg = _avg(l)
+ return (math.fsum([(x - avg) ** 2 for x in l]) / len(l)) ** 0.5
+
+ average = _avg(timings)
+
+ scales = [(scale, unit) for unit, scale in units.items()]
+ scales.sort()
+ for scale, time_unit in scales:
+ if average * scale >= 1.0:
+ break
+
+ stdev = _stdev(timings)
+ print("%s loops, average of %d: %.*g +- %.*g %s per loop (using standard
deviation)"
+ % (number, repeat,
+ precision, average * scale,
+ precision, stdev * scale, time_unit))
return None
if __name__ == "__main__":
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit