New submission from Dan Snider <mr.assume.a...@gmail.com>:
The ones I know of are list.__getitem__, dict __contains__ & __getitem__, and (frozen)set.__contains__ but from what I can tell so far it seems like dict.__getitem__ takes the worst hit. For dicts, I've spent a few hours trying to figure out what's getting put into the heap type's mp_subscript slot to slow it down so drastically but I just can't figure it out. So I just forced `dict_subscript` into the heap type's mp_subscript slot to confirm the rather obvious impossibility of it breaking anything. Here's a "little" timeit script to demonstrate how horribly inefficient `dict.__getitem__` is when called on anything other than an extension type: if __name__ == '__main__': import timeit from collections import OrderedDict as FastDictSub from ctypes import * class mappingmethods(Structure): _fields_ = [ ('mp_length', c_void_p), ('mp_subscript', PYFUNCTYPE(py_object,py_object,py_object)), ('mp_ass_subscript', c_void_p)] class _type_object(Structure): _fields_ = [('junk', (c_uint8*56)), # <- brevity ('tp_as_mapping', POINTER(mappingmethods))] py_type = POINTER(_type_object) class SlowDictSub(dict): pass assert SlowDictSub.__base__ is dict and FastDictSub.__base__ is dict assert SlowDictSub.__getitem__ is dict.__getitem__ assert FastDictSub.__getitem__ is dict.__getitem__ mp = dict.fromkeys(range(15)) setup = 'from __main__ import mp, %s as Dict; d=Dict(mp)' print(f'Comparing execution time of heap allocated dict subtype ' f'versus PyODict_Type (who also inherits dict.__getitem__)...\n') slown, slowt = timeit.Timer('d[10]', setup%'SlowDictSub').autorange() print(f'avg. exec {slown/slowt:_.0f} SlowDictSub[x] statements per second') fastn, fastt = timeit.Timer('d[10]', setup%'FastDictSub').autorange() print(f'avg. exec {fastn/fastt:_.0f} OrderedDict[x] statements per second') print() print(f'SlowDictSub was {1/(fastt/slowt):.2f}x slower than OrderedDict... ' f"Let's see what happens when we fix SlowDictSub's 'broken' " "mp_subscript slot:\n") slow_tp = cast(id(SlowDictSub), py_type).contents fast_tp = cast(id(FastDictSub), py_type).contents slow_tp.tp_as_mapping.contents.mp_subscript = ( fast_tp.tp_as_mapping.contents.mp_subscript) postn, postt = timeit.Timer('d[10]', setup%'SlowDictSub').autorange() print(f'avg. exec {postn/postt:_.0f} SlowDictSub[x] after slot change') print(f'which is now {1/(fastt/postt):.2}x the speed of dict_item') Comparing execution time of heap allocated dict subtype versus PyODict_Type (who also inherits dict.__getitem__)... avg. exec 6_220_709 SlowDictSub[x] statements per second avg. exec 21_894_971 OrderedDict[x] statements per second SlowDictSub was 3.52x slower than OrderedDict... Let's see what happens when we fix SlowDictSub's 'broken' mp_subscript slot: avg. exec 21_665_595 SlowDictSub[x] after slot change which is now 1.0x the speed of dict_item ---------- components: Interpreter Core messages: 323490 nosy: bup priority: normal severity: normal status: open title: Certain methods that heap allocated subtypes inherit suffer a 50-80% performance penalty type: performance versions: Python 3.7, Python 3.8 _______________________________________ Python tracker <rep...@bugs.python.org> <https://bugs.python.org/issue34396> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com