Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: Changeset: r95905:cfad18a6fd4b Date: 2019-02-08 15:38 +0100 http://bitbucket.org/pypy/pypy/changeset/cfad18a6fd4b/
Log: merge regalloc-playground improve register allocation by using better heuristics. diff too long, truncating to 2000 out of 3847 lines diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -23,4 +23,8 @@ .. math-improvements Improve performance of long operations where one of the operands fits into -an int. \ No newline at end of file +an int. + +.. regalloc-playgrounds + +Improve register allocation in the JIT. diff --git a/pypy/doc/whatsnew-pypy2-5.10.0.rst b/pypy/doc/whatsnew-pypy2-5.10.0.rst --- a/pypy/doc/whatsnew-pypy2-5.10.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.10.0.rst @@ -1,42 +1,42 @@ -========================== -What's new in PyPy2.7 5.10 -========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - - -.. branch: cppyy-packaging - -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols - -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches - -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests - -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 - -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -.. branch: fix-vmprof-stacklet-switch-2 - -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - -.. branch: rdict-fast-hash - -Make it possible to declare that the hash function of an r_dict is fast in RPython. +========================== +What's new in PyPy2.7 5.10 +========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +.. branch: fix-vmprof-stacklet-switch-2 + +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch: rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/doc/whatsnew-pypy2-6.0.0.rst b/pypy/doc/whatsnew-pypy2-6.0.0.rst --- a/pypy/doc/whatsnew-pypy2-6.0.0.rst +++ b/pypy/doc/whatsnew-pypy2-6.0.0.rst @@ -1,128 +1,128 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.10.0 -.. startrev: 6b024edd9d12 - -.. branch: cpyext-avoid-roundtrip - -Big refactoring of some cpyext code, which avoids a lot of nonsense when -calling C from Python and vice-versa: the result is a big speedup in -function/method calls, up to 6 times faster. - -.. branch: cpyext-datetime2 - -Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD - - -.. branch: mapdict-size-limit - -Fix a corner case of mapdict: When an instance is used like a dict (using -``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are -added, then the performance using mapdict is linear in the number of -attributes. This is now fixed (by switching to a regular dict after 80 -attributes). - - -.. branch: cpyext-faster-arg-passing - -When using cpyext, improve the speed of passing certain objects from PyPy to C -code, most notably None, True, False, types, all instances of C-defined types. -Before, a dict lookup was needed every time such an object crossed over, now it -is just a field read. - - -.. branch: 2634_datetime_timedelta_performance - -Improve datetime + timedelta performance. - -.. branch: memory-accounting - -Improve way to describe memory - -.. branch: msvc14 - -Allow compilaiton with Visual Studio 2017 compiler suite on windows - -.. branch: refactor-slots - -Refactor cpyext slots. - - -.. branch: call-loopinvariant-into-bridges - -Speed up branchy code that does a lot of function inlining by saving one call -to read the TLS in most bridges. - -.. branch: rpython-sprint - -Refactor in rpython signatures - -.. branch: cpyext-tls-operror2 - -Store error state thread-locally in executioncontext, fixes issue #2764 - -.. branch: cpyext-fast-typecheck - -Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify -`W_PyCWrapperObject` which is used to call slots from the C-API, greatly -improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks - - -.. branch: fix-sre-problems - -Fix two (unrelated) JIT bugs manifesting in the re module: - -- green fields are broken and were thus disabled, plus their usage removed from - the _sre implementation - -- in rare "trace is too long" situations, the JIT could break behaviour - arbitrarily. - -.. branch: jit-hooks-can-be-disabled - -Be more efficient about JIT hooks. Make it possible for the frontend to declare -that jit hooks are currently not enabled at all. in that case, the list of ops -does not have to be created in the case of the on_abort hook (which is -expensive). - - -.. branch: pyparser-improvements - -Improve speed of Python parser, improve ParseError messages slightly. - -.. branch: ioctl-arg-size - -Work around possible bugs in upstream ioctl users, like CPython allocate at -least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes -issue #2776 - -.. branch: cpyext-subclass-setattr - -Fix for python-level classes that inherit from C-API types, previously the -`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj` -which led to cases where instance attributes were lost. Fixes issue #2793 - - -.. branch: pyparser-improvements-2 - -Improve line offsets that are reported by SyntaxError. Improve error messages -for a few situations, including mismatched parenthesis. - -.. branch: issue2752 - -Fix a rare GC bug that was introduced more than one year ago, but was -not diagnosed before issue #2752. - -.. branch: gc-hooks - -Introduce GC hooks, as documented in doc/gc_info.rst - -.. branch: gc-hook-better-timestamp - -Improve GC hooks - -.. branch: cppyy-packaging - -Update backend to 0.6.0 and support exceptions through wrappers +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.10.0 +.. startrev: 6b024edd9d12 + +.. branch: cpyext-avoid-roundtrip + +Big refactoring of some cpyext code, which avoids a lot of nonsense when +calling C from Python and vice-versa: the result is a big speedup in +function/method calls, up to 6 times faster. + +.. branch: cpyext-datetime2 + +Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD + + +.. branch: mapdict-size-limit + +Fix a corner case of mapdict: When an instance is used like a dict (using +``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are +added, then the performance using mapdict is linear in the number of +attributes. This is now fixed (by switching to a regular dict after 80 +attributes). + + +.. branch: cpyext-faster-arg-passing + +When using cpyext, improve the speed of passing certain objects from PyPy to C +code, most notably None, True, False, types, all instances of C-defined types. +Before, a dict lookup was needed every time such an object crossed over, now it +is just a field read. + + +.. branch: 2634_datetime_timedelta_performance + +Improve datetime + timedelta performance. + +.. branch: memory-accounting + +Improve way to describe memory + +.. branch: msvc14 + +Allow compilaiton with Visual Studio 2017 compiler suite on windows + +.. branch: refactor-slots + +Refactor cpyext slots. + + +.. branch: call-loopinvariant-into-bridges + +Speed up branchy code that does a lot of function inlining by saving one call +to read the TLS in most bridges. + +.. branch: rpython-sprint + +Refactor in rpython signatures + +.. branch: cpyext-tls-operror2 + +Store error state thread-locally in executioncontext, fixes issue #2764 + +.. branch: cpyext-fast-typecheck + +Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify +`W_PyCWrapperObject` which is used to call slots from the C-API, greatly +improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks + + +.. branch: fix-sre-problems + +Fix two (unrelated) JIT bugs manifesting in the re module: + +- green fields are broken and were thus disabled, plus their usage removed from + the _sre implementation + +- in rare "trace is too long" situations, the JIT could break behaviour + arbitrarily. + +.. branch: jit-hooks-can-be-disabled + +Be more efficient about JIT hooks. Make it possible for the frontend to declare +that jit hooks are currently not enabled at all. in that case, the list of ops +does not have to be created in the case of the on_abort hook (which is +expensive). + + +.. branch: pyparser-improvements + +Improve speed of Python parser, improve ParseError messages slightly. + +.. branch: ioctl-arg-size + +Work around possible bugs in upstream ioctl users, like CPython allocate at +least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes +issue #2776 + +.. branch: cpyext-subclass-setattr + +Fix for python-level classes that inherit from C-API types, previously the +`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj` +which led to cases where instance attributes were lost. Fixes issue #2793 + + +.. branch: pyparser-improvements-2 + +Improve line offsets that are reported by SyntaxError. Improve error messages +for a few situations, including mismatched parenthesis. + +.. branch: issue2752 + +Fix a rare GC bug that was introduced more than one year ago, but was +not diagnosed before issue #2752. + +.. branch: gc-hooks + +Introduce GC hooks, as documented in doc/gc_info.rst + +.. branch: gc-hook-better-timestamp + +Improve GC hooks + +.. branch: cppyy-packaging + +Update backend to 0.6.0 and support exceptions through wrappers diff --git a/pypy/doc/whatsnew-pypy2-7.0.0.rst b/pypy/doc/whatsnew-pypy2-7.0.0.rst --- a/pypy/doc/whatsnew-pypy2-7.0.0.rst +++ b/pypy/doc/whatsnew-pypy2-7.0.0.rst @@ -1,69 +1,69 @@ -========================== -What's new in PyPy2.7 6.0+ -========================== - -.. this is a revision shortly after release-pypy-6.0.0 -.. startrev: e50e11af23f1 - -.. branch: cppyy-packaging - -Main items: vastly better template resolution and improved performance. In -detail: upgrade to backend 1.4, improved handling of templated methods and -functions (in particular automatic deduction of types), improved pythonization -interface, range of compatibility fixes for Python3, free functions now take -fast libffi path when possible, moves for strings (incl. from Python str), -easier/faster handling of std::vector by numpy, improved and faster object -identity preservation - -.. branch: socket_default_timeout_blockingness - -Make sure 'blocking-ness' of socket is set along with default timeout - -.. branch: crypt_h - -Include crypt.h for crypt() on Linux - -.. branch: gc-more-logging - -Log additional gc-minor and gc-collect-step info in the PYPYLOG - -.. branch: reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb - - -.. branch: pyparser-improvements-3 - -Small refactorings in the Python parser. - -.. branch: fix-readme-typo - -.. branch: avoid_shell_injection_in_shutil - -Backport CPython fix for possible shell injection issue in `distutils.spawn`, -https://bugs.python.org/issue34540 - -.. branch: cffi_dlopen_unicode - -Enable use of unicode file names in `dlopen` - -.. branch: rlock-in-rpython - -Backport CPython fix for `thread.RLock` - - -.. branch: expose-gc-time - -Make GC hooks measure time in seconds (as opposed to an opaque unit). - -.. branch: cleanup-test_lib_pypy - -Update most test_lib_pypy/ tests and move them to extra_tests/. - -.. branch: gc-disable - -Make it possible to manually manage the GC by using a combination of -gc.disable() and gc.collect_step(). Make sure to write a proper release -announcement in which we explain that existing programs could leak memory if -they run for too much time between a gc.disable()/gc.enable() +========================== +What's new in PyPy2.7 6.0+ +========================== + +.. this is a revision shortly after release-pypy-6.0.0 +.. startrev: e50e11af23f1 + +.. branch: cppyy-packaging + +Main items: vastly better template resolution and improved performance. In +detail: upgrade to backend 1.4, improved handling of templated methods and +functions (in particular automatic deduction of types), improved pythonization +interface, range of compatibility fixes for Python3, free functions now take +fast libffi path when possible, moves for strings (incl. from Python str), +easier/faster handling of std::vector by numpy, improved and faster object +identity preservation + +.. branch: socket_default_timeout_blockingness + +Make sure 'blocking-ness' of socket is set along with default timeout + +.. branch: crypt_h + +Include crypt.h for crypt() on Linux + +.. branch: gc-more-logging + +Log additional gc-minor and gc-collect-step info in the PYPYLOG + +.. branch: reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb + + +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. + +.. branch: fix-readme-typo + +.. branch: avoid_shell_injection_in_shutil + +Backport CPython fix for possible shell injection issue in `distutils.spawn`, +https://bugs.python.org/issue34540 + +.. branch: cffi_dlopen_unicode + +Enable use of unicode file names in `dlopen` + +.. branch: rlock-in-rpython + +Backport CPython fix for `thread.RLock` + + +.. branch: expose-gc-time + +Make GC hooks measure time in seconds (as opposed to an opaque unit). + +.. branch: cleanup-test_lib_pypy + +Update most test_lib_pypy/ tests and move them to extra_tests/. + +.. branch: gc-disable + +Make it possible to manually manage the GC by using a combination of +gc.disable() and gc.collect_step(). Make sure to write a proper release +announcement in which we explain that existing programs could leak memory if +they run for too much time between a gc.disable()/gc.enable() diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py --- a/rpython/annotator/binaryop.py +++ b/rpython/annotator/binaryop.py @@ -727,6 +727,10 @@ return [get_setitem, op.simple_call(get_setitem.result, v_idx, v_value)] +@op.contains.register_transform(SomeInstance) +def contains_SomeInstance(annotator, v_ins, v_idx): + get_contains = op.getattr(v_ins, const('__contains__')) + return [get_contains, op.simple_call(get_contains.result, v_idx)] class __extend__(pairtype(SomeIterator, SomeIterator)): diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -4065,6 +4065,20 @@ assert len(a.translator.graphs) == 2 # fn, __setitem__ assert isinstance(s, annmodel.SomeInteger) + def test_instance_contains(self): + class A(object): + def __contains__(self, i): + return i & 1 == 0 + + def fn(i): + a = A() + return 0 in a and 1 not in a + + a = self.RPythonAnnotator() + s = a.build_types(fn, [int]) + assert len(a.translator.graphs) == 2 # fn, __contains__ + assert isinstance(s, annmodel.SomeBool) + def test_instance_getslice(self): class A(object): def __getslice__(self, stop, start): diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -1,4 +1,4 @@ -import os +import sys from rpython.jit.metainterp.history import Const, REF, JitCellToken from rpython.rlib.objectmodel import we_are_translated, specialize from rpython.jit.metainterp.resoperation import rop, AbstractValue @@ -67,6 +67,7 @@ @specialize.arg(1) def foreach(self, function, arg): + # XXX unused? node = self.master_node while node is not None: function(arg, node.val) @@ -297,12 +298,12 @@ def is_still_alive(self, v): # Check if 'v' is alive at the current position. # Return False if the last usage is strictly before. - return self.longevity[v][1] >= self.position + return self.longevity[v].last_usage >= self.position def stays_alive(self, v): # Check if 'v' stays alive after the current position. # Return False if the last usage is before or at position. - return self.longevity[v][1] > self.position + return self.longevity[v].last_usage > self.position def next_instruction(self, incr=1): self.position += incr @@ -319,7 +320,7 @@ self._check_type(v) if isinstance(v, Const): return - if v not in self.longevity or self.longevity[v][1] <= self.position: + if v not in self.longevity or self.longevity[v].last_usage <= self.position: if v in self.reg_bindings: self.free_regs.append(self.reg_bindings[v]) del self.reg_bindings[v] @@ -355,7 +356,7 @@ for v in self.reg_bindings: if v not in self.longevity: llop.debug_print(lltype.Void, "variable %s not in longevity\n" % v.repr({})) - assert self.longevity[v][1] > self.position + assert self.longevity[v].last_usage > self.position def try_allocate_reg(self, v, selected_reg=None, need_lower_byte=False): """ Try to allocate a register, if we have one free. @@ -366,6 +367,9 @@ returns allocated register or None, if not possible. """ self._check_type(v) + if isinstance(v, TempVar): + self.longevity[v] = Lifetime(self.position, self.position) + # YYY all subtly similar code assert not isinstance(v, Const) if selected_reg is not None: res = self.reg_bindings.get(v, None) @@ -385,42 +389,55 @@ loc = self.reg_bindings.get(v, None) if loc is not None and loc not in self.no_lower_byte_regs: return loc - for i in range(len(self.free_regs) - 1, -1, -1): - reg = self.free_regs[i] - if reg not in self.no_lower_byte_regs: - if loc is not None: - self.free_regs[i] = loc - else: - del self.free_regs[i] - self.reg_bindings[v] = reg - return reg - return None + free_regs = [reg for reg in self.free_regs + if reg not in self.no_lower_byte_regs] + newloc = self.longevity.try_pick_free_reg( + self.position, v, free_regs) + if newloc is None: + return None + self.free_regs.remove(newloc) + if loc is not None: + self.free_regs.append(loc) + self.reg_bindings[v] = newloc + return newloc try: return self.reg_bindings[v] except KeyError: - if self.free_regs: - loc = self.free_regs.pop() - self.reg_bindings[v] = loc - return loc + loc = self.longevity.try_pick_free_reg( + self.position, v, self.free_regs) + if loc is None: + return None + self.reg_bindings[v] = loc + self.free_regs.remove(loc) + return loc - def _spill_var(self, v, forbidden_vars, selected_reg, + def _spill_var(self, forbidden_vars, selected_reg, need_lower_byte=False): - v_to_spill = self._pick_variable_to_spill(v, forbidden_vars, + v_to_spill = self._pick_variable_to_spill(forbidden_vars, selected_reg, need_lower_byte=need_lower_byte) loc = self.reg_bindings[v_to_spill] + self._sync_var_to_stack(v_to_spill) del self.reg_bindings[v_to_spill] - if self.frame_manager.get(v_to_spill) is None: - newloc = self.frame_manager.loc(v_to_spill) - self.assembler.regalloc_mov(loc, newloc) return loc - def _pick_variable_to_spill(self, v, forbidden_vars, selected_reg=None, - need_lower_byte=False): - """ Slightly less silly algorithm. - """ - cur_max_age = -1 + def _pick_variable_to_spill(self, forbidden_vars, selected_reg=None, + need_lower_byte=False, regs=None): + + # try to spill a variable that has no further real usages, ie that only + # appears in failargs or in a jump + # if that doesn't exist, spill the variable that has a real_usage that + # is the furthest away from the current position + + # YYY check for fixed variable usages + if regs is None: + regs = self.reg_bindings.keys() + + cur_max_use_distance = -1 + position = self.position candidate = None - for next in self.reg_bindings: + cur_max_age_failargs = -1 + candidate_from_failargs = None + for next in regs: reg = self.reg_bindings[next] if next in forbidden_vars: continue @@ -431,13 +448,25 @@ continue if need_lower_byte and reg in self.no_lower_byte_regs: continue - max_age = self.longevity[next][1] - if cur_max_age < max_age: - cur_max_age = max_age - candidate = next - if candidate is None: - raise NoVariableToSpill - return candidate + lifetime = self.longevity[next] + if lifetime.is_last_real_use_before(position): + # this variable has no "real" use as an argument to an op left + # it is only used in failargs, and maybe in a jump. spilling is + # fine + max_age = lifetime.last_usage + if cur_max_age_failargs < max_age: + cur_max_age_failargs = max_age + candidate_from_failargs = next + else: + use_distance = lifetime.next_real_usage(position) - position + if cur_max_use_distance < use_distance: + cur_max_use_distance = use_distance + candidate = next + if candidate_from_failargs is not None: + return candidate_from_failargs + if candidate is not None: + return candidate + raise NoVariableToSpill def force_allocate_reg(self, v, forbidden_vars=[], selected_reg=None, need_lower_byte=False): @@ -450,12 +479,12 @@ """ self._check_type(v) if isinstance(v, TempVar): - self.longevity[v] = (self.position, self.position) + self.longevity[v] = Lifetime(self.position, self.position) loc = self.try_allocate_reg(v, selected_reg, need_lower_byte=need_lower_byte) if loc: return loc - loc = self._spill_var(v, forbidden_vars, selected_reg, + loc = self._spill_var(forbidden_vars, selected_reg, need_lower_byte=need_lower_byte) prev_loc = self.reg_bindings.get(v, None) if prev_loc is not None: @@ -468,7 +497,7 @@ self.bindings_to_frame_reg[v] = None def force_spill_var(self, var): - self._sync_var(var) + self._sync_var_to_stack(var) try: loc = self.reg_bindings[var] del self.reg_bindings[var] @@ -502,7 +531,7 @@ if selected_reg in self.free_regs: self.assembler.regalloc_mov(immloc, selected_reg) return selected_reg - loc = self._spill_var(v, forbidden_vars, selected_reg) + loc = self._spill_var(forbidden_vars, selected_reg) self.free_regs.append(loc) self.assembler.regalloc_mov(immloc, loc) return loc @@ -523,6 +552,7 @@ loc = self.force_allocate_reg(v, forbidden_vars, selected_reg, need_lower_byte=need_lower_byte) if prev_loc is not loc: + self.assembler.num_reloads += 1 self.assembler.regalloc_mov(prev_loc, loc) return loc @@ -530,15 +560,7 @@ reg = self.reg_bindings[from_v] del self.reg_bindings[from_v] self.reg_bindings[to_v] = reg - - def _move_variable_away(self, v, prev_loc): - if self.free_regs: - loc = self.free_regs.pop() - self.reg_bindings[v] = loc - self.assembler.regalloc_mov(prev_loc, loc) - else: - loc = self.frame_manager.loc(v) - self.assembler.regalloc_mov(prev_loc, loc) + return reg def force_result_in_reg(self, result_v, v, forbidden_vars=[]): """ Make sure that result is in the same register as v. @@ -548,41 +570,39 @@ self._check_type(result_v) self._check_type(v) if isinstance(v, Const): - if self.free_regs: - loc = self.free_regs.pop() - else: - loc = self._spill_var(v, forbidden_vars, None) - self.assembler.regalloc_mov(self.convert_to_imm(v), loc) - self.reg_bindings[result_v] = loc - return loc - if v not in self.reg_bindings: - # v not in a register. allocate one for result_v and move v there - prev_loc = self.frame_manager.loc(v) - loc = self.force_allocate_reg(result_v, forbidden_vars) - self.assembler.regalloc_mov(prev_loc, loc) - return loc - if self.longevity[v][1] > self.position: - # we need to find a new place for variable v and - # store result in the same place - loc = self.reg_bindings[v] - del self.reg_bindings[v] - if self.frame_manager.get(v) is None: - self._move_variable_away(v, loc) - self.reg_bindings[result_v] = loc - else: - self._reallocate_from_to(v, result_v) - loc = self.reg_bindings[result_v] - return loc + result_loc = self.force_allocate_reg(result_v, forbidden_vars) + self.assembler.regalloc_mov(self.convert_to_imm(v), result_loc) + return result_loc + v_keeps_living = self.longevity[v].last_usage > self.position + # there are two cases where we should allocate a new register for + # result: + # 1) v is itself not in a register + # 2) v keeps on being live. if there is a free register, we need a move + # anyway, so we can use force_allocate_reg on result_v to make sure any + # fixed registers are used + if (v not in self.reg_bindings or (v_keeps_living and self.free_regs)): + v_loc = self.loc(v) + result_loc = self.force_allocate_reg(result_v, forbidden_vars) + self.assembler.regalloc_mov(v_loc, result_loc) + return result_loc + if v_keeps_living: + # since there are no free registers, v needs to go to the stack. + # sync it there. + self._sync_var_to_stack(v) + return self._reallocate_from_to(v, result_v) - def _sync_var(self, v): + def _sync_var_to_stack(self, v): + self.assembler.num_spills += 1 if not self.frame_manager.get(v): reg = self.reg_bindings[v] to = self.frame_manager.loc(v) self.assembler.regalloc_mov(reg, to) + else: + self.assembler.num_spills_to_existing += 1 # otherwise it's clean def _bc_spill(self, v, new_free_regs): - self._sync_var(v) + self._sync_var_to_stack(v) new_free_regs.append(self.reg_bindings.pop(v)) def before_call(self, force_store=[], save_all_regs=0): @@ -650,7 +670,7 @@ move_or_spill = [] for v, reg in self.reg_bindings.items(): - max_age = self.longevity[v][1] + max_age = self.longevity[v].last_usage if v not in force_store and max_age <= self.position: # variable dies del self.reg_bindings[v] @@ -671,45 +691,32 @@ else: # this is a register like eax/rax, which needs either # spilling or moving. - move_or_spill.append((v, max_age)) + move_or_spill.append(v) if len(move_or_spill) > 0: - while len(self.free_regs) > 0: - new_reg = self.free_regs.pop() - if new_reg in self.save_around_call_regs: - new_free_regs.append(new_reg) # not this register... - continue - # This 'new_reg' is suitable for moving a candidate to. - # Pick the one with the smallest max_age. (This - # is one step of a naive sorting algo, slow in theory, - # but the list should always be very small so it - # doesn't matter.) - best_i = 0 - smallest_max_age = move_or_spill[0][1] - for i in range(1, len(move_or_spill)): - max_age = move_or_spill[i][1] - if max_age < smallest_max_age: - best_i = i - smallest_max_age = max_age - v, max_age = move_or_spill.pop(best_i) - # move from 'reg' to 'new_reg' + free_regs = [reg for reg in self.free_regs + if reg not in self.save_around_call_regs] + # chose which to spill using the usual spill heuristics + while len(move_or_spill) > len(free_regs): + v = self._pick_variable_to_spill([], regs=move_or_spill) + self._bc_spill(v, new_free_regs) + move_or_spill.remove(v) + assert len(move_or_spill) <= len(free_regs) + for v in move_or_spill: + # search next good reg + new_reg = None + while True: + new_reg = self.free_regs.pop() + if new_reg in self.save_around_call_regs: + new_free_regs.append(new_reg) # not this register... + continue + break + assert new_reg is not None # must succeed reg = self.reg_bindings[v] - if not we_are_translated(): - if move_or_spill: - assert max_age <= min([_a for _, _a in move_or_spill]) - assert reg in save_sublist - assert reg in self.save_around_call_regs - assert new_reg not in self.save_around_call_regs + self.assembler.num_moves_calls += 1 self.assembler.regalloc_mov(reg, new_reg) self.reg_bindings[v] = new_reg # change the binding new_free_regs.append(reg) - # - if len(move_or_spill) == 0: - break - else: - # no more free registers to move to, spill the rest - for v, max_age in move_or_spill: - self._bc_spill(v, new_free_regs) # re-add registers in 'new_free_regs', but in reverse order, # so that the last ones (added just above, from @@ -772,7 +779,7 @@ # of COND_CALL don't accept a cc as input if next_op.getarg(0) is not op: return False - if self.longevity[op][1] > i + 1: + if self.longevity[op].last_usage > i + 1: return False if opnum != rop.COND_CALL: if op in operations[i + 1].getfailargs(): @@ -786,61 +793,295 @@ descr = op.getdescr() assert isinstance(descr, JitCellToken) if op.numargs() == 2: - self.rm._sync_var(op.getarg(1)) + self.rm._sync_var_to_stack(op.getarg(1)) return [self.loc(op.getarg(0)), self.fm.loc(op.getarg(1))] else: assert op.numargs() == 1 return [self.loc(op.getarg(0))] +# ____________________________________________________________ + + + +UNDEF_POS = -42 + +class Lifetime(object): + def __init__(self, definition_pos=UNDEF_POS, last_usage=UNDEF_POS): + # all positions are indexes into the operations list + + # the position where the variable is defined + self.definition_pos = definition_pos + # the position where the variable is last used. this includes failargs + # and jumps + self.last_usage = last_usage + + # *real* usages, ie as an argument to an operation (as opposed to jump + # arguments or in failargs) + self.real_usages = None + + # fixed registers are positions where the variable *needs* to be in a + # specific register + self.fixed_positions = None + + # another Lifetime that lives after the current one that would like to + # share a register with this variable + self.share_with = None + + # the other lifetime will have this variable set to self.definition_pos + self._definition_pos_shared = UNDEF_POS + + def last_usage_including_sharing(self): + while self.share_with is not None: + self = self.share_with + return self.last_usage + + def is_last_real_use_before(self, position): + if self.real_usages is None: + return True + return self.real_usages[-1] <= position + + def next_real_usage(self, position): + assert position >= self.definition_pos + # binary search + l = self.real_usages + low = 0 + high = len(l) + if position >= l[-1]: + return -1 + while low < high: + mid = low + (high - low) // 2 # no overflow ;-) + if position < l[mid]: + high = mid + else: + low = mid + 1 + return l[low] + + def definition_pos_shared(self): + if self._definition_pos_shared != UNDEF_POS: + return self._definition_pos_shared + else: + return self.definition_pos + + def fixed_register(self, position, reg): + """ registers a fixed register use for the variable at position in + register reg. returns the position from where on the register should be + held free. """ + assert self.definition_pos <= position <= self.last_usage + if self.fixed_positions is None: + self.fixed_positions = [] + res = self.definition_pos_shared() + else: + assert position > self.fixed_positions[-1][0] + res = self.fixed_positions[-1][0] + self.fixed_positions.append((position, reg)) + return res + + def find_fixed_register(self, opindex): + # XXX could use binary search + if self.fixed_positions is not None: + for (index, reg) in self.fixed_positions: + if opindex <= index: + return reg + if self.share_with is not None: + return self.share_with.find_fixed_register(opindex) + + def _check_invariants(self): + assert self.definition_pos <= self.last_usage + if self.real_usages is not None: + assert sorted(self.real_usages) == self.real_usages + assert self.last_usage >= max(self.real_usages) + assert self.definition_pos < min(self.real_usages) + + def __repr__(self): + if self.fixed_positions: + s = " " + ", ".join("@%s in %s" % (index, reg) for (index, reg) in self.fixed_positions) + else: + s = "" + return "%s:%s(%s)%s" % (self.definition_pos, self.real_usages, self.last_usage, s) + + +class FixedRegisterPositions(object): + def __init__(self, register): + self.register = register + + self.index_lifetimes = [] + + def fixed_register(self, opindex, definition_pos): + if self.index_lifetimes: + assert opindex > self.index_lifetimes[-1][0] + self.index_lifetimes.append((opindex, definition_pos)) + + def free_until_pos(self, opindex): + # XXX could use binary search + for (index, definition_pos) in self.index_lifetimes: + if opindex <= index: + if definition_pos >= opindex: + return definition_pos + else: + # the variable doesn't exist or didn't make it into the + # register despite being defined already. so we don't care + # too much, and can say that the variable is free until + # index + return index + return sys.maxint + + def __repr__(self): + return "%s: fixed at %s" % (self.register, self.index_lifetimes) + + +class LifetimeManager(object): + def __init__(self, longevity): + self.longevity = longevity + + # dictionary maps register to FixedRegisterPositions + self.fixed_register_use = {} + + def fixed_register(self, opindex, register, var=None): + """ Tell the LifetimeManager that variable var *must* be in register at + operation opindex. var can be None, if no variable at all can be in + that register at the point.""" + if var is None: + definition_pos = opindex + else: + varlifetime = self.longevity[var] + definition_pos = varlifetime.fixed_register(opindex, register) + if register not in self.fixed_register_use: + self.fixed_register_use[register] = FixedRegisterPositions(register) + self.fixed_register_use[register].fixed_register(opindex, definition_pos) + + def try_use_same_register(self, v0, v1): + """ Try to arrange things to put v0 and v1 into the same register. + v0 must be defined before v1""" + # only works in limited situations now + longevityvar0 = self[v0] + longevityvar1 = self[v1] + assert longevityvar0.definition_pos < longevityvar1.definition_pos + if longevityvar0.last_usage != longevityvar1.definition_pos: + return # not supported for now + longevityvar0.share_with = longevityvar1 + longevityvar1._definition_pos_shared = longevityvar0.definition_pos_shared() + + def longest_free_reg(self, position, free_regs): + """ for every register in free_regs, compute how far into the future + that register can remain free, according to the constraints of the + fixed registers. Find the register that is free the longest. Return a + tuple (reg, free_until_pos). """ + free_until_pos = {} + max_free_pos = position + best_reg = None + # reverse for compatibility with old code + for i in range(len(free_regs) - 1, -1, -1): + reg = free_regs[i] + fixed_reg_pos = self.fixed_register_use.get(reg, None) + if fixed_reg_pos is None: + return reg, sys.maxint + else: + free_until_pos = fixed_reg_pos.free_until_pos(position) + if free_until_pos > max_free_pos: + best_reg = reg + max_free_pos = free_until_pos + return best_reg, max_free_pos + + def free_reg_whole_lifetime(self, position, v, free_regs): + """ try to find a register from free_regs for v at position that's + free for the whole lifetime of v. pick the one that is blocked first + *after* the lifetime of v. """ + longevityvar = self[v] + min_fixed_use_after = sys.maxint + best_reg = None + unfixed_reg = None + for reg in free_regs: + fixed_reg_pos = self.fixed_register_use.get(reg, None) + if fixed_reg_pos is None: + unfixed_reg = reg + continue + use_after = fixed_reg_pos.free_until_pos(position) + if use_after <= longevityvar.last_usage_including_sharing(): + # can't fit + continue + assert use_after >= longevityvar.last_usage_including_sharing() + if use_after < min_fixed_use_after: + best_reg = reg + min_fixed_use_after = use_after + if best_reg is not None: + return best_reg + + # no fitting fixed registers. pick a non-fixed one + return unfixed_reg + + def try_pick_free_reg(self, position, v, free_regs): + if not free_regs: + return None + longevityvar = self[v] + # check whether there is a fixed register and whether it's free + reg = longevityvar.find_fixed_register(position) + if reg is not None and reg in free_regs: + return reg + + # try to find a register that's free for the whole lifetime of v + # pick the one that is blocked first *after* the lifetime of v + loc = self.free_reg_whole_lifetime(position, v, free_regs) + if loc is not None: + return loc + + # can't fit v completely, so pick the register that's free the longest + loc, free_until = self.longest_free_reg(position, free_regs) + if loc is not None: + return loc + # YYY could check whether it's best to spill v here, but hard + # to do in the current system + return None + + def __contains__(self, var): + return var in self.longevity + + def __getitem__(self, var): + return self.longevity[var] + + def __setitem__(self, var, val): + self.longevity[var] = val + def compute_vars_longevity(inputargs, operations): - # compute a dictionary that maps variables to index in - # operations that is a "last-time-seen" - - # returns a pair longevity/useful. Non-useful variables are ones that - # never appear in the assembler or it does not matter if they appear on - # stack or in registers. Main example is loop arguments that go - # only to guard operations or to jump or to finish - last_used = {} - last_real_usage = {} + # compute a dictionary that maps variables to Lifetime information + # if a variable is not in the dictionary, it's operation is dead because + # it's side-effect-free and the result is unused + longevity = {} for i in range(len(operations)-1, -1, -1): op = operations[i] - if op.type != 'v': - if op not in last_used and rop.has_no_side_effect(op.opnum): + opnum = op.getopnum() + if op not in longevity: + if op.type != 'v' and rop.has_no_side_effect(opnum): + # result not used, operation has no side-effect, it can be + # removed continue - opnum = op.getopnum() + longevity[op] = Lifetime(definition_pos=i, last_usage=i) + else: + longevity[op].definition_pos = i for j in range(op.numargs()): arg = op.getarg(j) if isinstance(arg, Const): continue - if arg not in last_used: - last_used[arg] = i + if arg not in longevity: + lifetime = longevity[arg] = Lifetime(last_usage=i) + else: + lifetime = longevity[arg] if opnum != rop.JUMP and opnum != rop.LABEL: - if arg not in last_real_usage: - last_real_usage[arg] = i + if lifetime.real_usages is None: + lifetime.real_usages = [] + lifetime.real_usages.append(i) if rop.is_guard(op.opnum): for arg in op.getfailargs(): if arg is None: # hole continue assert not isinstance(arg, Const) - if arg not in last_used: - last_used[arg] = i + if arg not in longevity: + longevity[arg] = Lifetime(last_usage=i) # - longevity = {} - for i, arg in enumerate(operations): - if arg.type != 'v' and arg in last_used: - assert not isinstance(arg, Const) - assert i < last_used[arg] - longevity[arg] = (i, last_used[arg]) - del last_used[arg] for arg in inputargs: assert not isinstance(arg, Const) - if arg not in last_used: - longevity[arg] = (-1, -1) - else: - longevity[arg] = (0, last_used[arg]) - del last_used[arg] - assert len(last_used) == 0 + if arg not in longevity: + longevity[arg] = Lifetime(-1, -1) if not we_are_translated(): produced = {} @@ -851,9 +1092,15 @@ if not isinstance(arg, Const): assert arg in produced produced[op] = None - - return longevity, last_real_usage + for lifetime in longevity.itervalues(): + if lifetime.real_usages is not None: + lifetime.real_usages.reverse() + if not we_are_translated(): + lifetime._check_invariants() + return LifetimeManager(longevity) + +# YYY unused? def is_comparison_or_ovf_op(opnum): return rop.is_comparison(opnum) or rop.is_ovf(opnum) diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py b/rpython/jit/backend/llsupport/test/test_regalloc.py --- a/rpython/jit/backend/llsupport/test/test_regalloc.py +++ b/rpython/jit/backend/llsupport/test/test_regalloc.py @@ -1,9 +1,19 @@ import py +import sys from rpython.jit.metainterp.history import ConstInt, INT, FLOAT -from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList -from rpython.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan +from rpython.jit.metainterp.history import BasicFailDescr, TargetToken +from rpython.jit.metainterp.resoperation import rop from rpython.jit.metainterp.resoperation import InputArgInt, InputArgRef,\ InputArgFloat +from rpython.jit.backend.detect_cpu import getcpuclass +from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList +from rpython.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan,\ + Lifetime as RealLifetime, UNDEF_POS, BaseRegalloc, compute_vars_longevity,\ + LifetimeManager +from rpython.jit.tool.oparser import parse +from rpython.jit.codewriter.effectinfo import EffectInfo +from rpython.rtyper.lltypesystem import lltype +from rpython.rtyper.annlowlevel import llhelper def newboxes(*values): return [InputArgInt(v) for v in values] @@ -11,33 +21,61 @@ def newrefboxes(count): return [InputArgRef() for _ in range(count)] +def Lifetime(definition_pos=UNDEF_POS, last_usage=UNDEF_POS, + real_usages=UNDEF_POS): + if real_usages == UNDEF_POS: + real_usages = last_usage + lifetime = RealLifetime(definition_pos, last_usage) + if isinstance(real_usages, int): + real_usages = [real_usages] + lifetime.real_usages = real_usages + return lifetime + + def boxes_and_longevity(num): res = [] longevity = {} for i in range(num): box = InputArgInt(0) res.append(box) - longevity[box] = (0, 1) + longevity[box] = Lifetime(0, 1) return res, longevity class FakeReg(object): def __init__(self, i): self.n = i + def _getregkey(self): + return self.n + def is_memory_reference(self): + return False def __repr__(self): return 'r%d' % self.n r0, r1, r2, r3 = [FakeReg(i) for i in range(4)] +r4, r5, r6, r7, r8, r9 = [FakeReg(i) for i in range(4, 10)] + regs = [r0, r1, r2, r3] class RegisterManager(BaseRegMan): all_regs = regs + + def __init__(self, longevity, frame_manager=None, assembler=None): + if isinstance(longevity, dict): + longevity = LifetimeManager(longevity) + BaseRegMan.__init__(self, longevity, frame_manager, assembler) + def convert_to_imm(self, v): return v class FakeFramePos(object): def __init__(self, pos, box_type): self.pos = pos + self.value = pos self.box_type = box_type + def _getregkey(self): + return ~self.value + def is_memory_reference(self): + return True def __repr__(self): return 'FramePos<%d,%s>' % (self.pos, self.box_type) def __eq__(self, other): @@ -66,17 +104,311 @@ assert isinstance(loc, FakeFramePos) return loc.pos +class FakeCPU(object): + def get_baseofs_of_frame_field(self): + return 0 + class MockAsm(object): def __init__(self): self.moves = [] - + self.emitted = [] + self.cpu = FakeCPU() + + # XXX register allocation statistics to be removed later + self.num_moves_calls = 0 + self.num_moves_jump = 0 + self.num_spills = 0 + self.num_spills_to_existing = 0 + self.num_reloads = 0 + + self.preamble_num_moves_calls = 0 + self.preamble_num_moves_jump = 0 + self.preamble_num_spills = 0 + self.preamble_num_spills_to_existing = 0 + self.preamble_num_reloads = 0 + def regalloc_mov(self, from_loc, to_loc): self.moves.append((from_loc, to_loc)) + self.emitted.append(("move", to_loc, from_loc)) + + +def test_lifetime_next_real_usage(): + lt = RealLifetime(0, 1000) + lt.real_usages = [0, 1, 5, 10, 24, 35, 55, 56, 57, 90, 92, 100] + for i in range(100): + next = lt.next_real_usage(i) + assert next in lt.real_usages + assert next > i + assert lt.real_usages[lt.real_usages.index(next) - 1] <= i + assert lt.next_real_usage(100) == -1 + assert lt.next_real_usage(101) == -1 + +def test_fixed_position(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(0, 9) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0, b0) + longevity.fixed_register(4, r2, b0) + longevity.fixed_register(5, r1, b1) + longevity.fixed_register(8, r1, b1) + + assert l0.fixed_positions == [(1, r0), (4, r2)] + assert l1.fixed_positions == [(5, r1), (8, r1)] + assert l2.fixed_positions is None + + fpr0 = longevity.fixed_register_use[r0] + fpr1 = longevity.fixed_register_use[r1] + fpr2 = longevity.fixed_register_use[r2] + assert r3 not in longevity.fixed_register_use + assert fpr0.index_lifetimes == [(1, 0)] + assert fpr1.index_lifetimes == [(5, 2), (8, 5)] + assert fpr2.index_lifetimes == [(4, 1)] + +def test_fixed_position_none(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(0, 9) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0) + longevity.fixed_register(4, r2) + longevity.fixed_register(5, r1) + longevity.fixed_register(8, r1) + + fpr0 = longevity.fixed_register_use[r0] + fpr1 = longevity.fixed_register_use[r1] + fpr2 = longevity.fixed_register_use[r2] + assert r3 not in longevity.fixed_register_use + assert fpr0.index_lifetimes == [(1, 1)] + assert fpr1.index_lifetimes == [(5, 5), (8, 8)] + assert fpr2.index_lifetimes == [(4, 4)] + + +def test_free_until_pos_none(): + longevity = LifetimeManager({}) + longevity.fixed_register(5, r1, None) + longevity.fixed_register(8, r1, None) + longevity.fixed_register(35, r1, None) + + fpr1 = longevity.fixed_register_use[r1] + + assert fpr1.free_until_pos(0) == 5 + assert fpr1.free_until_pos(1) == 5 + assert fpr1.free_until_pos(2) == 5 + assert fpr1.free_until_pos(3) == 5 + assert fpr1.free_until_pos(4) == 5 + assert fpr1.free_until_pos(5) == 5 + assert fpr1.free_until_pos(10) == 35 + assert fpr1.free_until_pos(20) == 35 + assert fpr1.free_until_pos(30) == 35 + assert fpr1.free_until_pos(36) == sys.maxint + +def test_free_until_pos(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(30, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(5, r1, b1) + longevity.fixed_register(8, r1, b1) + longevity.fixed_register(35, r1, b2) + + fpr1 = longevity.fixed_register_use[r1] + + # simple cases: we are before the beginning of the lifetime of the variable + # in the fixed register, then it's free until the definition of the + # variable + assert fpr1.free_until_pos(0) == 2 + assert fpr1.free_until_pos(1) == 2 + assert fpr1.free_until_pos(2) == 2 + assert fpr1.free_until_pos(10) == 30 + assert fpr1.free_until_pos(20) == 30 + assert fpr1.free_until_pos(30) == 30 + + # after the fixed use, we are fine anyway + assert fpr1.free_until_pos(36) == sys.maxint + assert fpr1.free_until_pos(50) == sys.maxint + + # asking for a position *after* the definition of the variable in the fixed + # register means the variable didn't make it into the fixed register, but + # at the latest by the use point it will have to go there + assert fpr1.free_until_pos(3) == 5 + assert fpr1.free_until_pos(4) == 5 + assert fpr1.free_until_pos(5) == 5 + assert fpr1.free_until_pos(6) == 8 + assert fpr1.free_until_pos(7) == 8 + assert fpr1.free_until_pos(8) == 8 + assert fpr1.free_until_pos(31) == 35 + assert fpr1.free_until_pos(32) == 35 + assert fpr1.free_until_pos(33) == 35 + assert fpr1.free_until_pos(34) == 35 + assert fpr1.free_until_pos(35) == 35 + +def test_free_until_pos_different_regs(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(30, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0, b0) + longevity.fixed_register(4, r2, b0) + fpr2 = longevity.fixed_register_use[r2] + # the definition of b0 is before the other fixed register use of r0, so the + # earliest b0 can be in r2 is that use point at index 1 + assert fpr2.free_until_pos(0) == 1 + + +def test_longest_free_reg(): + b0, b1, b2 = newboxes(0, 0, 0) + l0 = Lifetime(0, 5) + l1 = Lifetime(2, 9) + l2 = Lifetime(30, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(1, r0, b0) + longevity.fixed_register(4, r2, b0) + longevity.fixed_register(5, r1, b1) + longevity.fixed_register(8, r1, b1) + longevity.fixed_register(35, r1, b2) + + assert longevity.longest_free_reg(0, [r0, r1, r2]) == (r1, 2) + +def test_try_pick_free_reg(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(0, 4) + l1 = Lifetime(2, 20) + l2 = Lifetime(6, 20) + l3 = Lifetime(8, 20) + l4 = Lifetime(0, 10) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b3: l3, b4: l4}) + longevity.fixed_register(3, r1, b1) + longevity.fixed_register(7, r2, b2) + longevity.fixed_register(9, r3, b3) + + # a best fit + loc = longevity.try_pick_free_reg(0, b0, [r1, r2, r3, r4, r5]) + assert loc is r2 + + # does not fit into any of the fixed regs, use a non-fixed one + loc = longevity.try_pick_free_reg(0, b4, [r5, r2, r3, r4, r1]) + assert loc in [r4, r5] + + # all available are fixed but var doesn't fit completely into any of these. + # pick the biggest interval + loc = longevity.try_pick_free_reg(0, b4, [r1, r2, r3]) + assert loc is r3 + +def test_try_pick_free_reg_bug(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(10, 30) + l1 = Lifetime(0, 15) + longevity = LifetimeManager({b0: l0, b1: l1}) + longevity.fixed_register(20, r0, b0) + + # does not fit into r0, use r1 + loc = longevity.try_pick_free_reg(0, b1, [r0, r1]) + assert loc == r1 + +def test_try_pick_free_reg_bug2(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(1, 2) + l1 = Lifetime(2, 4) + longevity = LifetimeManager({b0: l0, b1: l1}) + longevity.fixed_register(4, r1, b1) + + # does not fit into r0, use r1 + loc = longevity.try_pick_free_reg(0, b0, [r0, r1]) + assert loc == r0 + +def test_simple_coalescing(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(0, 4) + l1 = Lifetime(4, 20) + l2 = Lifetime(4, 20) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2}) + longevity.fixed_register(10, r1, b1) + longevity.fixed_register(10, r2, b2) + longevity.try_use_same_register(b0, b2) + + loc = longevity.try_pick_free_reg(0, b0, [r0, r1, r2, r3, r4]) + assert loc is r2 + +def test_coalescing_blocks_regs_correctly(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(10, 30) + l1 = Lifetime(30, 40) + l2 = Lifetime(30, 40) + l3 = Lifetime(0, 15) + l4 = Lifetime(0, 5) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b3: l3, b4: l4}) + longevity.try_use_same_register(b0, b1) + longevity.fixed_register(35, r1, b1) + longevity.fixed_register(35, r2, b2) + + loc = longevity.try_pick_free_reg(0, b3, [r1, r2]) + # r2 is picked, otherwise b0 can't end up in r1 + assert loc is r2 + + loc = longevity.try_pick_free_reg(0, b4, [r1, r2]) + # r1 is picked, because b4 fits before b0 + assert loc is r1 + +def test_coalescing_non_fixed_regs(): + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(0, 10) + l1 = Lifetime(10, 20) + l2 = Lifetime(25, 40) + l3 = Lifetime(15, 40) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b3: l3}) + longevity.try_use_same_register(b0, b1) + longevity.fixed_register(35, r2, b2) + longevity.fixed_register(35, r3, b3) + + loc = longevity.try_pick_free_reg(0, b0, [r1, r2, r3]) + # r2 is picked, otherwise b1 can't end up in the same reg as b0 + assert loc is r2 + + +def test_chained_coalescing(): + # 5 + b4 + # | + # 10 + b0 | + # | | + # | 15 + + # | + # + + # 20 + # + b1 + # | + # | + # | + # + + # 30 + # + b2 + # | + # r1 * + # | + # + + # 40 + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + l0 = Lifetime(10, 20) + l1 = Lifetime(20, 30) + l2 = Lifetime(30, 40) + l4 = Lifetime(5, 15) + longevity = LifetimeManager({b0: l0, b1: l1, b2: l2, b4: l4}) + longevity.try_use_same_register(b0, b1) + longevity.try_use_same_register(b1, b2) + longevity.fixed_register(35, r1, b2) + + loc = longevity.try_pick_free_reg(5, b4, [r0, r1]) + assert loc is r0 + class TestRegalloc(object): def test_freeing_vars(self): b0, b1, b2 = newboxes(0, 0, 0) - longevity = {b0: (0, 1), b1: (0, 2), b2: (0, 2)} + longevity = {b0: Lifetime(0, 1), b1: Lifetime(0, 2), b2: Lifetime(0, 2)} rm = RegisterManager(longevity) rm.next_instruction() for b in b0, b1, b2: @@ -99,7 +431,7 @@ rm._check_invariants() assert len(rm.free_regs) == 4 assert len(rm.reg_bindings) == 0 - + def test_register_exhaustion(self): boxes, longevity = boxes_and_longevity(5) rm = RegisterManager(longevity) @@ -115,7 +447,7 @@ class XRegisterManager(RegisterManager): no_lower_byte_regs = [r2, r3] - + rm = XRegisterManager(longevity) rm.next_instruction() loc0 = rm.try_allocate_reg(b0, need_lower_byte=True) @@ -150,7 +482,7 @@ class XRegisterManager(RegisterManager): no_lower_byte_regs = [r2, r3] - + rm = XRegisterManager(longevity, frame_manager=fm, assembler=MockAsm()) @@ -173,7 +505,7 @@ assert isinstance(loc, FakeReg) assert loc not in [r2, r3] rm._check_invariants() - + def test_make_sure_var_in_reg(self): boxes, longevity = boxes_and_longevity(5) fm = TFrameManager() @@ -187,74 +519,10 @@ loc = rm.make_sure_var_in_reg(b0) assert isinstance(loc, FakeReg) rm._check_invariants() - - def test_force_result_in_reg_1(self): - b0, b1 = newboxes(0, 0) - longevity = {b0: (0, 1), b1: (1, 3)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - # first path, var is already in reg and dies - loc0 = rm.force_allocate_reg(b0) - rm._check_invariants() - rm.next_instruction() - loc = rm.force_result_in_reg(b1, b0) - assert loc is loc0 - assert len(asm.moves) == 0 - rm._check_invariants() - - def test_force_result_in_reg_2(self): - b0, b1 = newboxes(0, 0) - longevity = {b0: (0, 2), b1: (1, 3)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - loc0 = rm.force_allocate_reg(b0) - rm._check_invariants() - rm.next_instruction() - loc = rm.force_result_in_reg(b1, b0) - assert loc is loc0 - assert rm.loc(b0) is not loc0 - assert len(asm.moves) == 1 - rm._check_invariants() - - def test_force_result_in_reg_3(self): - b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) - longevity = {b0: (0, 2), b1: (0, 2), b3: (0, 2), b2: (0, 2), b4: (1, 3)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - for b in b0, b1, b2, b3: - rm.force_allocate_reg(b) - assert not len(rm.free_regs) - rm._check_invariants() - rm.next_instruction() - rm.force_result_in_reg(b4, b0) - rm._check_invariants() - assert len(asm.moves) == 1 - - def test_force_result_in_reg_4(self): - b0, b1 = newboxes(0, 0) - longevity = {b0: (0, 1), b1: (0, 1)} - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) - rm.next_instruction() - fm.loc(b0) - rm.force_result_in_reg(b1, b0) - rm._check_invariants() - loc = rm.loc(b1) - assert isinstance(loc, FakeReg) - loc = rm.loc(b0) - assert isinstance(loc, FakeFramePos) - assert len(asm.moves) == 1 def test_bogus_make_sure_var_in_reg(self): b0, = newboxes(0) - longevity = {b0: (0, 1)} + longevity = {b0: Lifetime(0, 1)} fm = TFrameManager() asm = MockAsm() rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) @@ -281,17 +549,6 @@ assert len(rm.reg_bindings) == 4 rm._check_invariants() - def test_force_result_in_reg_const(self): - boxes, longevity = boxes_and_longevity(2) - fm = TFrameManager() - asm = MockAsm() - rm = RegisterManager(longevity, frame_manager=fm, - assembler=asm) - rm.next_instruction() - c = ConstInt(0) - rm.force_result_in_reg(boxes[0], c) - rm._check_invariants() - def test_loc_of_const(self): rm = RegisterManager({}) rm.next_instruction() @@ -311,6 +568,7 @@ assembler=asm) for b in boxes[:-1]: rm.force_allocate_reg(b) + rm.position = 0 rm.before_call() assert len(rm.reg_bindings) == 2 assert fm.get_frame_depth() == 2 @@ -342,7 +600,7 @@ rm.after_call(boxes[-1]) assert len(rm.reg_bindings) == 1 rm._check_invariants() - + def test_different_frame_width(self): class XRegisterManager(RegisterManager): @@ -350,19 +608,21 @@ fm = TFrameManager() b0 = InputArgInt() - longevity = {b0: (0, 1)} + longevity = {b0: Lifetime(0, 1)} asm = MockAsm() rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) f0 = InputArgFloat() - longevity = {f0: (0, 1)} + longevity = {f0: Lifetime(0, 1)} xrm = XRegisterManager(longevity, frame_manager=fm, assembler=asm) xrm.loc(f0) rm.loc(b0) assert fm.get_frame_depth() == 3 - + def test_spilling(self): b0, b1, b2, b3, b4, b5 = newboxes(0, 1, 2, 3, 4, 5) - longevity = {b0: (0, 3), b1: (0, 3), b3: (0, 5), b2: (0, 2), b4: (1, 4), b5: (1, 3)} + longevity = {b0: Lifetime(0, 3), b1: Lifetime(0, 3), + b3: Lifetime(0, 5), b2: Lifetime(0, 2), + b4: Lifetime(1, 4), b5: Lifetime(1, 3)} fm = TFrameManager() asm = MockAsm() rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) @@ -378,6 +638,48 @@ assert spilled2 is loc rm._check_invariants() + def test_spilling_furthest_next_real_use(self): + b0, b1, b2, b3, b4, b5 = newboxes(0, 1, 2, 3, 4, 5) + longevity = {b0: Lifetime(0, 3, [1, 2, 3]), b1: Lifetime(0, 3, [3]), + b3: Lifetime(0, 4, [1, 2, 3, 4]), b2: Lifetime(0, 2), + b4: Lifetime(1, 4), b5: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + for b in b0, b1, b2, b3: + rm.force_allocate_reg(b) + assert len(rm.free_regs) == 0 + rm.next_instruction() + loc = rm.loc(b1) + spilled = rm.force_allocate_reg(b4) + assert spilled is loc + spilled2 = rm.force_allocate_reg(b5) + assert spilled2 is loc + rm._check_invariants() + + + def test_spill_useless_vars_first(self): + b0, b1, b2, b3, b4, b5 = newboxes(0, 1, 2, 3, 4, 5) + longevity = {b0: Lifetime(0, 5), b1: Lifetime(0, 10), + # b2 and b3 become useless but b3 lives longer + b3: Lifetime(0, 7, 3), b2: Lifetime(0, 6, 3), + b4: Lifetime(4, 5), b5: Lifetime(4, 7)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + for b in b0, b1, b2, b3: + rm.force_allocate_reg(b) + rm.position = 4 + assert len(rm.free_regs) == 0 + loc = rm.loc(b3) + spilled = rm.force_allocate_reg(b4) + assert spilled is loc + loc = rm.loc(b2) + spilled2 = rm.force_allocate_reg(b5) + assert spilled2 is loc + rm._check_invariants() def test_hint_frame_locations_1(self): for hint_value in range(11): @@ -568,3 +870,503 @@ assert fm.get_loc_index(floc) == 0 for box in fm.bindings.keys(): fm.mark_as_free(box) + + +class TestForceResultInReg(object): + # use it's own class since there are so many cases + + def test_force_result_in_reg_1(self): + # var in reg, dies + b0, b1 = newboxes(0, 0) + longevity = {b0: Lifetime(0, 1), b1: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + loc0 = rm.force_allocate_reg(b0) + rm._check_invariants() + rm.next_instruction() + loc = rm.force_result_in_reg(b1, b0) + assert loc is loc0 + assert len(asm.moves) == 0 + rm._check_invariants() + + def test_force_result_in_reg_2(self): + # var in reg, survives + b0, b1 = newboxes(0, 0) + longevity = {b0: Lifetime(0, 2), b1: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + loc0 = rm.force_allocate_reg(b0) + rm._check_invariants() + rm.next_instruction() + loc = rm.force_result_in_reg(b1, b0) + assert loc is not loc0 + assert rm.loc(b0) is loc0 + assert len(asm.moves) == 1 + rm._check_invariants() + + def test_force_result_in_reg_3(self): + # var in reg, survives, no free registers + b0, b1, b2, b3, b4 = newboxes(0, 0, 0, 0, 0) + longevity = {b0: Lifetime(0, 2), b1: Lifetime(0, 2), + b3: Lifetime(0, 2), b2: Lifetime(0, 2), + b4: Lifetime(1, 3)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + for b in b0, b1, b2, b3: + rm.force_allocate_reg(b) + assert not len(rm.free_regs) + rm._check_invariants() + rm.next_instruction() + rm.force_result_in_reg(b4, b0) + rm._check_invariants() + assert len(asm.moves) == 1 + + def test_force_result_in_reg_4(self): + b0, b1 = newboxes(0, 0) + longevity = {b0: Lifetime(0, 1), b1: Lifetime(0, 1)} + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + fm.loc(b0) + rm.force_result_in_reg(b1, b0) + rm._check_invariants() + loc = rm.loc(b1) + assert isinstance(loc, FakeReg) + loc = rm.loc(b0) + assert isinstance(loc, FakeFramePos) + assert len(asm.moves) == 1 + + def test_force_result_in_reg_const(self): + # const + boxes, longevity = boxes_and_longevity(2) + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, + assembler=asm) + rm.next_instruction() + c = ConstInt(0) + rm.force_result_in_reg(boxes[0], c) + rm._check_invariants() + + # some tests where the result is supposed to go in a fixed register + + def test_force_result_in_reg_fixed_reg_1(self): + # var in reg, dies + b0, b1 = newboxes(0, 0) + longevity = LifetimeManager({b0: Lifetime(0, 1), b1: Lifetime(1, 3)}) + longevity.try_use_same_register(b0, b1) + longevity.fixed_register(1, r1, b1) + fm = TFrameManager() + asm = MockAsm() + rm = RegisterManager(longevity, frame_manager=fm, assembler=asm) + rm.next_instruction() + loc0 = rm.force_allocate_reg(b0) + rm._check_invariants() + rm.next_instruction() + loc = rm.force_result_in_reg(b1, b0) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit