Author: Carl Friedrich Bolz <[email protected]>
Branch: 
Changeset: r87592:fb6bb835369e
Date: 2016-10-05 17:31 +0200
http://bitbucket.org/pypy/pypy/changeset/fb6bb835369e/

Log:    - change timeit to report the average +- stdandard deviation
        - print a warning and point to the perf module.
        - increase default number of runs from 3 to 7

        reporting the minimum is often quite misleading, see eg
        "Statistically Rigorous Java Performance Evaluation" by Georges
        et.al. 2007

diff --git a/lib-python/2.7/test/test_timeit.py 
b/lib-python/2.7/test/test_timeit.py
--- a/lib-python/2.7/test/test_timeit.py
+++ b/lib-python/2.7/test/test_timeit.py
@@ -13,7 +13,7 @@
 DEFAULT_NUMBER = 1000000
 
 # timeit's default number of repetitions.
-DEFAULT_REPEAT = 3
+DEFAULT_REPEAT = timeit.default_repeat
 
 # XXX: some tests are commented out that would improve the coverage but take a
 # long time to run because they test the default number of loops, which is
@@ -204,7 +204,7 @@
             t.print_exc(s)
         self.assert_exc_string(s.getvalue(), 'ZeroDivisionError')
 
-    MAIN_DEFAULT_OUTPUT = "10 loops, best of 3: 1 sec per loop\n"
+    MAIN_DEFAULT_OUTPUT = "1 loops, average of 7: 1 +- 0 sec per loop (using 
standard deviation)\n"
 
     def run_main(self, seconds_per_increment=1.0, switches=None, timer=None):
         if timer is None:
@@ -230,33 +230,35 @@
 
     def test_main_seconds(self):
         s = self.run_main(seconds_per_increment=5.5)
-        self.assertEqual(s, "10 loops, best of 3: 5.5 sec per loop\n")
+        self.assertIn("1 loops, average of 7: 5.5 +- 0 sec per loop (using 
standard deviation)\n", s)
 
     def test_main_milliseconds(self):
         s = self.run_main(seconds_per_increment=0.0055)
-        self.assertEqual(s, "100 loops, best of 3: 5.5 msec per loop\n")
+        self.assertIn("100 loops, average of 7: 5.5 +-", s)
+        self.assertIn("msec per loop", s)
 
     def test_main_microseconds(self):
         s = self.run_main(seconds_per_increment=0.0000025, switches=['-n100'])
-        self.assertEqual(s, "100 loops, best of 3: 2.5 usec per loop\n")
+        self.assertIn("100 loops, average of 7: 2.5", s)
+        self.assertIn("usec per loop", s)
 
     def test_main_fixed_iters(self):
         s = self.run_main(seconds_per_increment=2.0, switches=['-n35'])
-        self.assertEqual(s, "35 loops, best of 3: 2 sec per loop\n")
+        self.assertIn("35 loops, average of 7: 2 +- 0 sec per loop (using 
standard deviation)\n", s)
 
     def test_main_setup(self):
         s = self.run_main(seconds_per_increment=2.0,
                 switches=['-n35', '-s', 'print("CustomSetup")'])
-        self.assertEqual(s, "CustomSetup\n" * 3 +
-                "35 loops, best of 3: 2 sec per loop\n")
+        self.assertIn("CustomSetup\n" * DEFAULT_REPEAT +
+                "35 loops, average of 7: 2 +- 0 sec per loop (using standard 
deviation)\n", s)
 
     def test_main_fixed_reps(self):
         s = self.run_main(seconds_per_increment=60.0, switches=['-r9'])
-        self.assertEqual(s, "10 loops, best of 9: 60 sec per loop\n")
+        self.assertIn("1 loops, average of 9: 60 +- 0 sec per loop (using 
standard deviation)\n", s)
 
     def test_main_negative_reps(self):
         s = self.run_main(seconds_per_increment=60.0, switches=['-r-5'])
-        self.assertEqual(s, "10 loops, best of 1: 60 sec per loop\n")
+        self.assertIn("1 loops, average of 1: 60 +- 0 sec per loop (using 
standard deviation)\n", s)
 
     @unittest.skipIf(sys.flags.optimize >= 2, "need __doc__")
     def test_main_help(self):
@@ -266,33 +268,34 @@
     def test_main_using_time(self):
         fake_timer = FakeTimer()
         s = self.run_main(switches=['-t'], timer=fake_timer)
-        self.assertEqual(s, self.MAIN_DEFAULT_OUTPUT)
+        self.assertIn(self.MAIN_DEFAULT_OUTPUT, s)
         self.assertIs(fake_timer.saved_timer, time.time)
 
     def test_main_using_clock(self):
         fake_timer = FakeTimer()
         s = self.run_main(switches=['-c'], timer=fake_timer)
-        self.assertEqual(s, self.MAIN_DEFAULT_OUTPUT)
+        self.assertIn(self.MAIN_DEFAULT_OUTPUT, s)
         self.assertIs(fake_timer.saved_timer, time.clock)
 
     def test_main_verbose(self):
         s = self.run_main(switches=['-v'])
-        self.assertEqual(s, dedent("""\
-                10 loops -> 10 secs
-                raw times: 10 10 10
-                10 loops, best of 3: 1 sec per loop
-            """))
+        self.assertIn(dedent("""\
+                1 loops -> 1 secs
+                raw times: 1 1 1 1 1 1 1
+                1 loops, average of 7: 1 +- 0 sec per loop (using standard 
deviation)
+            """), s)
 
     def test_main_very_verbose(self):
         s = self.run_main(seconds_per_increment=0.000050, switches=['-vv'])
-        self.assertEqual(s, dedent("""\
+        self.assertIn(dedent("""\
+                1 loops -> 5e-05 secs
                 10 loops -> 0.0005 secs
                 100 loops -> 0.005 secs
                 1000 loops -> 0.05 secs
                 10000 loops -> 0.5 secs
-                raw times: 0.5 0.5 0.5
-                10000 loops, best of 3: 50 usec per loop
-            """))
+                raw times: 0.5 0.5 0.5 0.5 0.5 0.5 0.5
+                10000 loops, average of 7: 50 +- 0 usec per loop (using 
standard deviation)
+            """), s)
 
     def test_main_exception(self):
         with captured_stderr() as error_stringio:
@@ -304,6 +307,15 @@
             s = self.run_main(switches=['-n1', '1.0/0.0'])
         self.assert_exc_string(error_stringio.getvalue(), 'ZeroDivisionError')
 
+    def test_main_recommends_perf(self):
+        s = self.run_main(seconds_per_increment=2.0, switches=['-n35', '-s', 
'print("CustomSetup")'])
+        self.assertIn(dedent("""\
+            WARNING: timeit is a very unreliable tool. use perf or something 
else for real measurements
+            pypy -m pip install perf
+            pypy -m perf timeit -n35 -s 'print("CustomSetup")' 'import timeit; 
timeit._fake_timer.inc()'
+        """), s)
+
+
 
 def test_main():
     run_unittest(TestTimeit)
diff --git a/lib-python/2.7/timeit.py b/lib-python/2.7/timeit.py
--- a/lib-python/2.7/timeit.py
+++ b/lib-python/2.7/timeit.py
@@ -13,7 +13,7 @@
 
 Options:
   -n/--number N: how many times to execute 'statement' (default: see below)
-  -r/--repeat N: how many times to repeat the timer (default 3)
+  -r/--repeat N: how many times to repeat the timer (default 7)
   -s/--setup S: statement to be executed once initially (default 'pass')
   -t/--time: use time.time() (default on Unix)
   -c/--clock: use time.clock() (default on Windows)
@@ -53,6 +53,8 @@
 """
 
 import gc
+import math
+import os
 import sys
 import time
 
@@ -60,7 +62,7 @@
 
 dummy_src_name = "<timeit-src>"
 default_number = 1000000
-default_repeat = 3
+default_repeat = 7
 
 if sys.platform == "win32":
     # On Windows, the best timer is time.clock()
@@ -265,6 +267,7 @@
     """
     if args is None:
         args = sys.argv[1:]
+    origargs = args
     import getopt
     try:
         opts, args = getopt.getopt(args, "n:s:r:tcvh",
@@ -281,6 +284,7 @@
     repeat = default_repeat
     verbose = 0
     precision = 3
+    units = {"sec": 1, "msec": 1e3, "usec": 1e6, "ns": 1e9}
     for o, a in opts:
         if o in ("-n", "--number"):
             number = int(a)
@@ -302,17 +306,25 @@
             print __doc__,
             return 0
     setup = "\n".join(setup) or "pass"
+
+    print "WARNING: timeit is a very unreliable tool. use perf or something 
else for real measurements"
+    executable = os.path.basename(sys.executable)
+    print "%s -m pip install perf" % executable
+    print "%s -m perf timeit %s" % (
+        executable,
+        " ".join([(arg if arg.startswith("-") else repr(arg))
+                        for arg in origargs]), )
+    print "-" * 60
     # Include the current directory, so that local imports work (sys.path
     # contains the directory of this script, rather than the current
     # directory)
-    import os
     sys.path.insert(0, os.curdir)
     if _wrap_timer is not None:
         timer = _wrap_timer(timer)
     t = Timer(stmt, setup, timer)
     if number == 0:
         # determine number so that 0.2 <= total time < 2.0
-        for i in range(1, 10):
+        for i in range(0, 10):
             number = 10**i
             try:
                 x = t.timeit(number)
@@ -324,24 +336,34 @@
             if x >= 0.2:
                 break
     try:
-        r = t.repeat(repeat, number)
+        timings = t.repeat(repeat, number)
     except:
         t.print_exc()
         return 1
-    best = min(r)
     if verbose:
-        print "raw times:", " ".join(["%.*g" % (precision, x) for x in r])
-    print "%d loops," % number,
-    usec = best * 1e6 / number
-    if usec < 1000:
-        print "best of %d: %.*g usec per loop" % (repeat, precision, usec)
-    else:
-        msec = usec / 1000
-        if msec < 1000:
-            print "best of %d: %.*g msec per loop" % (repeat, precision, msec)
-        else:
-            sec = msec / 1000
-            print "best of %d: %.*g sec per loop" % (repeat, precision, sec)
+        print "raw times:", " ".join(["%.*g" % (precision, x) for x in 
timings])
+
+    timings = [dt / number for dt in timings]
+
+    def _avg(l):
+        return math.fsum(l) / len(l)
+    def _stdev(l):
+        avg = _avg(l)
+        return (math.fsum([(x - avg) ** 2 for x in l]) / len(l)) ** 0.5
+
+    average = _avg(timings)
+
+    scales = [(scale, unit) for unit, scale in units.items()]
+    scales.sort()
+    for scale, time_unit in scales:
+        if average * scale >= 1.0:
+             break
+
+    stdev = _stdev(timings)
+    print("%s loops, average of %d: %.*g +- %.*g %s per loop (using standard 
deviation)"
+          % (number, repeat,
+             precision, average * scale,
+             precision, stdev * scale, time_unit))
     return None
 
 if __name__ == "__main__":
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to