Hello Jeremy,
ouch. it turns out the problem is neither your code, nor PyPy, but
timeit. The timeit module is just not a safe benchmarking tool unless
you use it in *exactly* the right way. The way that timeit runs code is
using an exec'ed string, eg f"recurse({i})" in your case. This
bytecode-compiles some code for every time you call timeit.repeat. This
means that the JIT has to compile that bytecode to machine code for
every call to repeat. That is the overhead that you were measuring.
So your code:
for i in range(1, 4000):
times["iter"][i] = repeat(
stmt = f"iterate({i})",
...
simply never warms up.
To fix this, you could write your own timing helper. I did that (I'm
attaching my version of the script).
My results (timing all the 4000 numbers together, per implementation)
are like this:
CPython 3.11.6:
iter 1.976924208
rcsv 2.300890293
PyPy3 7.3.17:
iter 0.265697421 (7.4x faster)
rcsv 0.643804392 (3.6x faster)
I also tried to use a list and "".join, a bytearray, and a pypy-specific
stringbuilder, all were slower. Yes, your string additions are
quadratic. But the longest string that is being built is 15, and doing
1+2+3+...+15=107 characters copies is still very cheap.
My main takeaway would be that micro-benchmarking is very hard :-(.
Would it be ok if I use your code in a small PSA blog post about timeit?
Cheers,
CF
On 2024-09-20 04:16, Jeremy Brown wrote:
Manuel,
Your code is more similar to the code example that shows a case where
the optimization doesn’t work.
Does that explain the behavior you’re seeing? Feel free to make
suggestions on how to clarify the wording.
I think I just interpreted that statement incorrectly, I thought the
linear code section explained how the JIT optimized the code, not the
preconditions needed for the JIT to function correctly.
CF,
Hi Jeremy,
Can you share how you are running this function? I tried a few variants, and
pypy is often faster than CPython on my attempts, so the rest of your code is
necessary to find out what's wrong.
Sure, here's a link to a copy of the benchmark: https://pastebin.com/fR0C6qcB.
I also know the JIT definitely can't optimize the iterative version,
but even if this is a problem specific to PyPy I wouldn't expect runs
to take twice as long compared to CPython.
-Jeremy
_______________________________________________
pypy-dev mailing list -- pypy-dev@python.org
To unsubscribe send an email to pypy-dev-le...@python.org
https://mail.python.org/mailman3/lists/pypy-dev.python.org/
Member address: cfb...@gmx.de
#!/usr/bin/env python
from csv import DictWriter
from dis import dis
from json import dump
from pathlib import Path
from platform import python_implementation
from sys import version_info
from timeit import Timer, repeat
from time import process_time_ns
def recurse(num):
if num >= 1000:
return "M" + recurse(num - 1000)
elif num >= 900:
return "CM" + recurse(num - 900)
elif num >= 500:
return "D" + recurse(num - 500)
elif num >= 400:
return "CD" + recurse(num - 400)
elif num >= 100:
return "C" + recurse(num - 100)
elif num >= 90:
return "XC" + recurse(num - 90)
elif num >= 50:
return "L" + recurse(num - 50)
elif num >= 40:
return "XL" + recurse(num - 40)
elif num >= 10:
return "X" + recurse(num - 10)
elif num >= 9:
return "IX" + recurse(num - 9)
elif num >= 5:
return "V" + recurse(num - 5)
elif num >= 4:
return "IV" + recurse(num - 4)
elif num >= 1:
return "I" + recurse(num - 1)
else:
return ""
def iterate(num):
result = ""
while True:
if num >= 1000:
result += "M"
num -= 1000
continue
if num >= 900:
result += "CM"
num -= 900
continue
if num >= 500:
result += "D"
num -= 500
continue
if num >= 400:
result += "CD"
num -= 400
continue
if num >= 100:
result += "C"
num -= 100
continue
if num >= 90:
result += "XC"
num -= 90
continue
if num >= 50:
result += "L"
num -= 50
continue
if num >= 40:
result += "XL"
num -= 40
continue
if num >= 10:
result += "X"
num -= 10
continue
if num >= 9:
result += "IX"
num -= 9
continue
if num >= 5:
result += "V"
num -= 5
continue
if num >= 4:
result += "IV"
num -= 4
continue
if num >= 1:
result += "I"
num -= 1
continue
else:
break
return result
def iterate_append(num):
result = []
while True:
if num >= 1000:
result.append("M")
num -= 1000
continue
if num >= 900:
result.append("CM")
num -= 900
continue
if num >= 500:
result.append("D")
num -= 500
continue
if num >= 400:
result.append("CD")
num -= 400
continue
if num >= 100:
result.append("C")
num -= 100
continue
if num >= 90:
result.append("XC")
num -= 90
continue
if num >= 50:
result.append("L")
num -= 50
continue
if num >= 40:
result.append("XL")
num -= 40
continue
if num >= 10:
result.append("X")
num -= 10
continue
if num >= 9:
result.append("IX")
num -= 9
continue
if num >= 5:
result.append("V")
num -= 5
continue
if num >= 4:
result.append("IV")
num -= 4
continue
if num >= 1:
result.append("I")
num -= 1
continue
else:
break
return "\n".join(result)
def iterate_stringbuilder(num):
from __pypy__.builders import StringBuilder
result = StringBuilder()
while True:
if num >= 1000:
result.append("M")
num -= 1000
continue
if num >= 900:
result.append("CM")
num -= 900
continue
if num >= 500:
result.append("D")
num -= 500
continue
if num >= 400:
result.append("CD")
num -= 400
continue
if num >= 100:
result.append("C")
num -= 100
continue
if num >= 90:
result.append("XC")
num -= 90
continue
if num >= 50:
result.append("L")
num -= 50
continue
if num >= 40:
result.append("XL")
num -= 40
continue
if num >= 10:
result.append("X")
num -= 10
continue
if num >= 9:
result.append("IX")
num -= 9
continue
if num >= 5:
result.append("V")
num -= 5
continue
if num >= 4:
result.append("IV")
num -= 4
continue
if num >= 1:
result.append("I")
num -= 1
continue
else:
break
return result.build()
def repeat(func, repeat, number, timer):
res = []
for num in range(number):
t1 = timer()
for _ in range(repeat):
func()
t2 = timer()
res.append(t2 - t1)
return res
def main():
bench = Path(__file__).stem
impl = f"{python_implementation().lower()}"
version = f"{version_info.major}-{version_info.minor}-{version_info.micro}"
base = f"{bench}_{impl}_{version}"
data_dir = Path(__file__).parents[1].joinpath("data")
bytecode_dir = Path(__file__).parents[1].joinpath("bytecode")
times = {"iter": {}, "rcsv": {}, "iterate_append": {}, "iterate_stringbuilder": {}}
t1 = process_time_ns()
for i in range(1, 4000):
times["iter"][i] = repeat(
func = lambda: iterate(i),
repeat = 1100,
number = 1,
timer = process_time_ns)
t2 = process_time_ns()
print("iter", (t2 - t1) / 1e9)
t1 = process_time_ns()
for i in range(1, 4000):
times["rcsv"][i] = repeat(
func = lambda: recurse(i),
repeat = 1100,
number = 1,
timer = process_time_ns)
t2 = process_time_ns()
print("rcsv", (t2 - t1) / 1e9)
t1 = process_time_ns()
for i in range(1, 4000):
times["iterate_append"][i] = repeat(
func = lambda: iterate_append(i),
repeat = 1100,
number = 1,
timer = process_time_ns)
t2 = process_time_ns()
print("iterate_append", (t2 - t1) / 1e9)
try:
from __pypy__.builders import StringBuilder
except ImportError:
pass
else:
t1 = process_time_ns()
for i in range(1, 4000):
times["iterate_stringbuilder"][i] = repeat(
func = lambda: iterate_stringbuilder(i),
repeat = 1100,
number = 1,
timer = process_time_ns)
t2 = process_time_ns()
print("iterate_stringbuilder", (t2 - t1) / 1e9)
with open(data_dir.joinpath(f"{base}_iter_times.csv").resolve(), "w") as timefile:
iterwriter = DictWriter(timefile, fieldnames=times["iter"].keys())
iterwriter.writeheader()
iterwriter.writerows({
num: times["iter"][num][run]
for num in times["iter"]
}
for run in range(len(next(iter(times["iter"].values())))))
with open(data_dir.joinpath(f"{base}_rcsv_times.csv").resolve(), "w") as timefile:
iterwriter = DictWriter(timefile, fieldnames=times["rcsv"].keys())
iterwriter.writeheader()
iterwriter.writerows({
num: times["rcsv"][num][run]
for num in times["rcsv"]
}
for run in range(len(next(iter(times["rcsv"].values())))))
with open(bytecode_dir.joinpath(f"{base}_dis_iter.txt").resolve(), "w") as disfile:
dis(iterate, file=disfile)
with open(bytecode_dir.joinpath(f"{base}_dis_rcsv.txt").resolve(), "w") as disfile:
dis(recurse, file=disfile)
if __name__ == "__main__":
main()
_______________________________________________
pypy-dev mailing list -- pypy-dev@python.org
To unsubscribe send an email to pypy-dev-le...@python.org
https://mail.python.org/mailman3/lists/pypy-dev.python.org/
Member address: arch...@mail-archive.com