Author: Ronan Lamy <ronan.l...@gmail.com> Branch: testing-cleanup Changeset: r85081:80829afb3cac Date: 2016-06-10 15:18 +0100 http://bitbucket.org/pypy/pypy/changeset/80829afb3cac/
Log: hg merge default diff too long, truncating to 2000 out of 2410 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -25,3 +25,4 @@ 80ef432a32d9baa4b3c5a54c215e8ebe499f6374 release-5.1.2 40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2 40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2 +c09c19272c990a0611b17569a0085ad1ab00c8ff release-pypy2.7-v5.3 diff --git a/pypy/doc/release-pypy2.7-v5.3.0.rst b/pypy/doc/release-pypy2.7-v5.3.0.rst --- a/pypy/doc/release-pypy2.7-v5.3.0.rst +++ b/pypy/doc/release-pypy2.7-v5.3.0.rst @@ -176,8 +176,8 @@ * Reduce the size of generated code by using the same function objects in all generated subclasses - * Share cpyext Py* function wrappers according to the signature, shrining the - translated libpypy.so by about + * Share cpyext Py* function wrappers according to the signature, shrinking the + translated libpypy.so by about 10% (measured without the JIT) * Compile c snippets with -Werror, and fix warnings it exposed diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,3 +5,20 @@ .. this is a revision shortly after release-pypy2.7-v5.3 .. startrev: 873218a739f1 +.. branch: fix-gen-dfa + +Resolves an issue with the generator script to build the dfa for Python syntax. + +.. branch: z196-support + +Fixes a critical issue in the register allocator and extends support on s390x. +PyPy runs and translates on the s390x revisions z10 (released February 2008, experimental) +and z196 (released August 2010) in addition to zEC12 and z13. +To target e.g. z196 on a zEC12 machine supply CFLAGS="-march=z196" to your shell environment. + +.. branch: s390x-5.3-catchup + +Implement the backend related changes for s390x. + +.. branch: incminimark-ll_assert +.. branch: vmprof-openbsd diff --git a/pypy/interpreter/pyparser/genpytokenize.py b/pypy/interpreter/pyparser/genpytokenize.py --- a/pypy/interpreter/pyparser/genpytokenize.py +++ b/pypy/interpreter/pyparser/genpytokenize.py @@ -191,7 +191,7 @@ newArcPair(states, EMPTY), pseudoExtras, number, funny, contStr, name)) dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken) - return DFA(dfaStates, dfaAccepts) + return DFA(dfaStates, dfaAccepts), dfaStates # ______________________________________________________________________ @@ -205,7 +205,9 @@ newArcPair(states, DEFAULT), any(states, notGroupStr(states, "'\\")))), newArcPair(states, "'")) - singleDFA = DFA(*nfaToDfa(states, *single)) + states, accepts = nfaToDfa(states, *single) + singleDFA = DFA(states, accepts) + states_singleDFA = states states = [] double = chain(states, any(states, notGroupStr(states, '"\\')), @@ -215,7 +217,9 @@ newArcPair(states, DEFAULT), any(states, notGroupStr(states, '"\\')))), newArcPair(states, '"')) - doubleDFA = DFA(*nfaToDfa(states, *double)) + states, accepts = nfaToDfa(states, *double) + doubleDFA = DFA(states, accepts) + states_doubleDFA = states states = [] single3 = chain(states, any(states, notGroupStr(states, "'\\")), @@ -230,7 +234,9 @@ notChainStr(states, "''"))), any(states, notGroupStr(states, "'\\")))), chainStr(states, "'''")) - single3DFA = NonGreedyDFA(*nfaToDfa(states, *single3)) + states, accepts = nfaToDfa(states, *single3) + single3DFA = NonGreedyDFA(states, accepts) + states_single3DFA = states states = [] double3 = chain(states, any(states, notGroupStr(states, '"\\')), @@ -245,9 +251,11 @@ notChainStr(states, '""'))), any(states, notGroupStr(states, '"\\')))), chainStr(states, '"""')) - double3DFA = NonGreedyDFA(*nfaToDfa(states, *double3)) - map = {"'" : singleDFA, - '"' : doubleDFA, + states, accepts = nfaToDfa(states, *double3) + double3DFA = NonGreedyDFA(states, accepts) + states_double3DFA = states + map = {"'" : (singleDFA, states_singleDFA), + '"' : (doubleDFA, states_doubleDFA), "r" : None, "R" : None, "u" : None, @@ -257,25 +265,30 @@ for uniPrefix in ("", "u", "U", "b", "B", ): for rawPrefix in ("", "r", "R"): prefix = uniPrefix + rawPrefix - map[prefix + "'''"] = single3DFA - map[prefix + '"""'] = double3DFA + map[prefix + "'''"] = (single3DFA, states_single3DFA) + map[prefix + '"""'] = (double3DFA, states_double3DFA) return map # ______________________________________________________________________ -def output(name, dfa_class, dfa): +def output(name, dfa_class, dfa, states): import textwrap + lines = [] i = 0 for line in textwrap.wrap(repr(dfa.accepts), width = 50): if i == 0: - print "accepts =", line + lines.append("accepts = ") else: - print " ", line + lines.append(" ") + lines.append(line) + lines.append("\n") i += 1 import StringIO - print "states = [" - for numstate, state in enumerate(dfa.states): - print " #", numstate + lines.append("states = [\n") + for numstate, state in enumerate(states): + lines.append(" # ") + lines.append(str(numstate)) + lines.append('\n') s = StringIO.StringIO() i = 0 for k, v in sorted(state.items()): @@ -298,22 +311,28 @@ for line in text: line = line.replace('::', ': ') if i == 0: - print ' {' + line + lines.append(' {') else: - print ' ' + line + lines.append(' ') + lines.append(line) + lines.append('\n') i += 1 - print " ]" - print "%s = automata.%s(states, accepts)" % (name, dfa_class) - print + lines.append(" ]\n") + lines.append("%s = automata.%s(states, accepts)\n" % (name, dfa_class)) + return ''.join(lines) def main (): - pseudoDFA = makePyPseudoDFA() - output("pseudoDFA", "DFA", pseudoDFA) + pseudoDFA, states_pseudoDFA = makePyPseudoDFA() + print output("pseudoDFA", "DFA", pseudoDFA, states_pseudoDFA) endDFAMap = makePyEndDFAMap() - output("double3DFA", "NonGreedyDFA", endDFAMap['"""']) - output("single3DFA", "NonGreedyDFA", endDFAMap["'''"]) - output("singleDFA", "DFA", endDFAMap["'"]) - output("doubleDFA", "DFA", endDFAMap['"']) + dfa, states = endDFAMap['"""'] + print output("double3DFA", "NonGreedyDFA", dfa, states) + dfa, states = endDFAMap["'''"] + print output("single3DFA", "NonGreedyDFA", dfa, states) + dfa, states = endDFAMap["'"] + print output("singleDFA", "DFA", dfa, states) + dfa, states = endDFAMap["\""] + print output("doubleDFA", "DFA", dfa, states) # ______________________________________________________________________ diff --git a/pypy/interpreter/pyparser/test/test_gendfa.py b/pypy/interpreter/pyparser/test/test_gendfa.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/pyparser/test/test_gendfa.py @@ -0,0 +1,16 @@ +from pypy.interpreter.pyparser.automata import DFA, DEFAULT +from pypy.interpreter.pyparser.genpytokenize import output + +def test_states(): + states = [{"\x00": 1}, {"\x01": 0}] + d = DFA(states[:], [False, True]) + assert output('test', DFA, d, states) == """\ +accepts = [False, True] +states = [ + # 0 + {'\\x00': 1}, + # 1 + {'\\x01': 0}, + ] +test = automata.pypy.interpreter.pyparser.automata.DFA(states, accepts) +""" diff --git a/pypy/module/__pypy__/interp_intop.py b/pypy/module/__pypy__/interp_intop.py --- a/pypy/module/__pypy__/interp_intop.py +++ b/pypy/module/__pypy__/interp_intop.py @@ -2,21 +2,10 @@ from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.lltypesystem.lloperation import llop from rpython.rlib.rarithmetic import r_uint, intmask +from rpython.rlib.rarithmetic import int_c_div, int_c_mod from rpython.rlib import jit -# XXX maybe temporary: hide llop.int_{floordiv,mod} from the JIT, -# because now it expects only Python-style divisions, not the -# C-style divisions of these two ll operations -@jit.dont_look_inside -def _int_floordiv(n, m): - return llop.int_floordiv(lltype.Signed, n, m) - -@jit.dont_look_inside -def _int_mod(n, m): - return llop.int_mod(lltype.Signed, n, m) - - @unwrap_spec(n=int, m=int) def int_add(space, n, m): return space.wrap(llop.int_add(lltype.Signed, n, m)) @@ -31,11 +20,11 @@ @unwrap_spec(n=int, m=int) def int_floordiv(space, n, m): - return space.wrap(_int_floordiv(n, m)) + return space.wrap(int_c_div(n, m)) @unwrap_spec(n=int, m=int) def int_mod(space, n, m): - return space.wrap(_int_mod(n, m)) + return space.wrap(int_c_mod(n, m)) @unwrap_spec(n=int, m=int) def int_lshift(space, n, m): diff --git a/pypy/module/_cffi_backend/ccallback.py b/pypy/module/_cffi_backend/ccallback.py --- a/pypy/module/_cffi_backend/ccallback.py +++ b/pypy/module/_cffi_backend/ccallback.py @@ -220,6 +220,11 @@ if rffi.cast(lltype.Signed, res) != clibffi.FFI_OK: raise oefmt(space.w_SystemError, "libffi failed to build this callback") + if closure_ptr.c_user_data != unique_id: + raise oefmt(space.w_SystemError, + "ffi_prep_closure(): bad user_data (it seems that the " + "version of the libffi library seen at runtime is " + "different from the 'ffi.h' file seen at compile-time)") def py_invoke(self, ll_res, ll_args): jitdriver1.jit_merge_point(callback=self, diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -201,6 +201,9 @@ else: copy_string_to_raw(llstr(src_string), dest_data, 0, n) else: + # nowadays this case should be rare or impossible: as far as + # I know, all common types implementing the *writable* buffer + # interface now support get_raw_address() if src_is_ptr: for i in range(n): dest_buf.setitem(i, src_data[i]) diff --git a/pypy/module/cpyext/include/pymem.h b/pypy/module/cpyext/include/pymem.h --- a/pypy/module/cpyext/include/pymem.h +++ b/pypy/module/cpyext/include/pymem.h @@ -1,5 +1,11 @@ #include <stdlib.h> +#ifndef Py_PYMEM_H +#define Py_PYMEM_H + +#ifdef __cplusplus +extern "C" { +#endif #define PyMem_MALLOC(n) malloc((n) ? (n) : 1) #define PyMem_REALLOC(p, n) realloc((p), (n) ? (n) : 1) @@ -44,3 +50,9 @@ */ #define PyMem_Del PyMem_Free #define PyMem_DEL PyMem_FREE + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_PYMEM_H */ diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -23,7 +23,7 @@ guard_true(i14, descr=...) guard_not_invalidated(descr=...) i16 = int_eq(i6, %d) - i19 = call_i(ConstClass(ll_int_mod__Signed_Signed), i6, i10, descr=<Calli . ii EF=0 OS=14>) + i19 = call_i(ConstClass(ll_int_py_mod__Signed_Signed), i6, i10, descr=<Calli . ii EF=0 OS=14>) i21 = int_lt(i19, 0) guard_false(i21, descr=...) i22 = int_ge(i19, i10) diff --git a/pypy/module/select/test/test_epoll.py b/pypy/module/select/test/test_epoll.py --- a/pypy/module/select/test/test_epoll.py +++ b/pypy/module/select/test/test_epoll.py @@ -20,6 +20,10 @@ self.w_sockets = self.space.wrap([]) if platform.machine().startswith('arm'): self.w_timeout = self.space.wrap(0.06) + if platform.machine().startswith('s390x'): + # s390x is not slow, but it seems there is one case when epoll + # modify method is called that takes longer on s390x + self.w_timeout = self.space.wrap(0.06) else: self.w_timeout = self.space.wrap(0.02) diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -3,10 +3,12 @@ It uses 'pypy/goal/pypy-c' and parts of the rest of the working copy. Usage: - package.py [--options] pypy-VER-PLATFORM + package.py [--options] --archive-name=pypy-VER-PLATFORM The output is found in the directory from --builddir, by default /tmp/usession-YOURNAME/build/. + +For a list of all options, see 'package.py --help'. """ import shutil @@ -61,6 +63,7 @@ name = options.name if not name: name = 'pypy-nightly' + assert '/' not in name rename_pypy_c = options.pypy_c override_pypy_c = options.override_pypy_c @@ -288,26 +291,12 @@ help='destination dir for archive') parser.add_argument('--override_pypy_c', type=str, default='', help='use as pypy exe instead of pypy/goal/pypy-c') - # Positional arguments, for backward compatability with buldbots - parser.add_argument('extra_args', help='optional interface to positional arguments', nargs=argparse.REMAINDER, - metavar='[archive-name] [rename_pypy_c] [targetdir] [override_pypy_c]', - ) options = parser.parse_args(args) - # Handle positional arguments, choke if both methods are used - for i,target, default in ([1, 'name', ''], [2, 'pypy_c', pypy_exe], - [3, 'targetdir', ''], [4,'override_pypy_c', '']): - if len(options.extra_args)>i: - if getattr(options, target) != default: - print 'positional argument',i,target,'already has value',getattr(options, target) - parser.print_help() - return - setattr(options, target, options.extra_args[i]) if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"): options.nostrip = True - if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"): - options.tk = True + options.no_tk = True if not options.builddir: # The import actually creates the udir directory from rpython.tool.udir import udir diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh --- a/pypy/tool/release/repackage.sh +++ b/pypy/tool/release/repackage.sh @@ -3,7 +3,7 @@ min=3 rev=0 branchname=release-$maj.x # ==OR== release-$maj.$min.x -tagname=release-$maj.$min.$rev # ==OR== release-$maj.$min +tagname=release-pypy2.7-v$maj.$min # ==OR== release-$maj.$min echo checking hg log -r $branchname hg log -r $branchname || exit 1 @@ -34,17 +34,19 @@ plat=win32 wget http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.zip unzip pypy-c-jit-latest-$plat.zip +rm pypy-c-jit-latest-$plat.zip mv pypy-c-jit-*-$plat $rel-$plat -zip -r $rel-$plat.zip $rel-$plat +zip -rq $rel-$plat.zip $rel-$plat rm -rf $rel-$plat # Do this after creating a tag, note the untarred directory is pypy-pypy-<hash> # so make sure there is not another one wget https://bitbucket.org/pypy/pypy/get/$tagname.tar.bz2 tar -xf $tagname.tar.bz2 +rm $tagname.tar.bz2 mv pypy-pypy-* $rel-src tar --owner=root --group=root --numeric-owner -cjf $rel-src.tar.bz2 $rel-src -zip -r $rel-src.zip $rel-src +zip -rq $rel-src.zip $rel-src rm -rf $rel-src # Print out the md5, sha1, sha256 diff --git a/pypy/tool/release/test/test_package.py b/pypy/tool/release/test/test_package.py --- a/pypy/tool/release/test/test_package.py +++ b/pypy/tool/release/test/test_package.py @@ -21,8 +21,10 @@ def test_dir_structure(self, test='test'): retval, builddir = package.package( - '--without-cffi', str(py.path.local(pypydir).dirpath()), - test, self.rename_pypy_c, _fake=True) + '--without-cffi', + '--archive-name', test, + '--rename_pypy_c', self.rename_pypy_c, + _fake=True) assert retval == 0 prefix = builddir.join(test) cpyver = '%d.%d' % CPYTHON_VERSION[:2] @@ -71,8 +73,9 @@ builddir = udir.ensure("build", dir=True) retval, builddir = package.package( '--without-cffi', '--builddir', str(builddir), - str(py.path.local(pypydir).dirpath()), - test, self.rename_pypy_c, _fake=True) + '--archive-name', test, + '--rename_pypy_c', self.rename_pypy_c, + _fake=True) def test_with_zipfile_module(self): prev = package.USE_ZIPFILE_MODULE diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -4610,6 +4610,19 @@ a.build_types(fd, []) py.test.raises(AnnotatorError, a.build_types, fb, []) + def test_annotate_generator_with_unreachable_yields(self): + def f(n): + if n < 0: + yield 42 + yield n + yield n + def main(n): + for x in f(abs(n)): + pass + # + a = self.RPythonAnnotator() + a.build_types(main, [int]) + def g(n): return [0, 1, 2, n] diff --git a/rpython/doc/arch/index.rst b/rpython/doc/arch/index.rst new file mode 100644 --- /dev/null +++ b/rpython/doc/arch/index.rst @@ -0,0 +1,11 @@ +.. _arch_index: + +Architecture specific notes +=========================== + +Here you can find some architecture specific notes. + +.. toctree:: + :maxdepth: 1 + + s390x diff --git a/rpython/doc/arch/s390x.rst b/rpython/doc/arch/s390x.rst new file mode 100644 --- /dev/null +++ b/rpython/doc/arch/s390x.rst @@ -0,0 +1,34 @@ +.. _s390x: + +IBM Mainframe S390X +=================== + +Our JIT implements the 64 bit version of the IBM Mainframe called s390x. +Note that this architecture is big endian. + +Currently supported ISAs: + +* z13 (released January 2015) +* zEC12 (released September 2012) +* z196 (released August 2010) +* z10 (released February 2008) + +To check if all the necessary CPU facilities are installed +on the subject machine, please run the test using a copy of the pypy +source code:: + + $ ./pytest.py rpython/jit/backend/zarch/test/test_assembler -v -k 'test_facility' + +In addition you can run the auto encoding test to check if your Linux GCC tool chain +is able to compile all instructions used in the JIT backend:: + + $ ./pytest.py rpython/jit/backend/zarch/test/test_auto_encoding.py -v + +Translating +----------- + +Specifically check for these two dependencies. On old versions of some +Linux distributions ship older versions. + +* libffi (version should do > 3.0.+). +* CPython 2.7.+. diff --git a/rpython/doc/index.rst b/rpython/doc/index.rst --- a/rpython/doc/index.rst +++ b/rpython/doc/index.rst @@ -37,7 +37,6 @@ arm logging - s390x Writing your own interpreter in RPython @@ -61,6 +60,7 @@ getting-started dir-reference jit/index + arch/index translation rtyper garbage_collection diff --git a/rpython/doc/s390x.rst b/rpython/doc/s390x.rst deleted file mode 100644 --- a/rpython/doc/s390x.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. _s390x: - -S390X JIT Backend -================= - -Our JIT implements the 64 bit version of the IBM Mainframe called s390x. -Note that this architecture is big endian. - -The following facilities need to be installed to operate -correctly (all of the machines used for development these where installed): - -* General-Instructions-Extension -* Long-Displacement -* Binary Floating Point (IEEE) - -Translating ------------ - -Ensure that libffi is installed (version should do > 3.0.+). -CPython should be version 2.7.+. diff --git a/rpython/flowspace/generator.py b/rpython/flowspace/generator.py --- a/rpython/flowspace/generator.py +++ b/rpython/flowspace/generator.py @@ -132,13 +132,14 @@ del block.operations[index] newlink = split_block(block, index) newblock = newlink.target + varnames = get_variable_names(newlink.args) # class Resume(AbstractPosition): _immutable_ = True + _attrs_ = varnames block = newblock Resume.__name__ = 'Resume%d' % len(mappings) mappings.append(Resume) - varnames = get_variable_names(newlink.args) # _insert_reads(newblock, varnames) # diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -901,6 +901,8 @@ size_box = op.getarg(0) assert isinstance(size_box, ConstInt) size = size_box.getint() + # hint: try to move unrelated registers away from r0 and r1 now + self.rm.spill_or_move_registers_before_call([r.r0, r.r1]) self.rm.force_allocate_reg(op, selected_reg=r.r0) t = TempInt() @@ -924,6 +926,7 @@ # sizeloc must be in a register, but we can free it now # (we take care explicitly of conflicts with r0 or r1) sizeloc = self.rm.make_sure_var_in_reg(size_box) + self.rm.spill_or_move_registers_before_call([r.r0, r.r1]) # sizeloc safe self.rm.possibly_free_var(size_box) # self.rm.force_allocate_reg(op, selected_reg=r.r0) @@ -951,6 +954,11 @@ arraydescr = op.getdescr() length_box = op.getarg(2) assert not isinstance(length_box, Const) # we cannot have a const here! + # can only use spill_or_move_registers_before_call() as a hint if + # we are sure that length_box stays alive and won't be freed now + # (it should always be the case, see below, but better safe than sorry) + if self.rm.stays_alive(length_box): + self.rm.spill_or_move_registers_before_call([r.r0, r.r1]) # the result will be in r0 self.rm.force_allocate_reg(op, selected_reg=r.r0) # we need r1 as a temporary diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -579,11 +579,26 @@ new_free_regs.append(self.reg_bindings.pop(v)) def before_call(self, force_store=[], save_all_regs=0): - """Spill or move some registers before a call. By default, - this means: for every register in 'self.save_around_call_regs', + self.spill_or_move_registers_before_call(self.save_around_call_regs, + force_store, save_all_regs) + + def spill_or_move_registers_before_call(self, save_sublist, + force_store=[], save_all_regs=0): + """Spill or move some registers before a call. + + By default, this means: for every register in 'save_sublist', if there is a variable there and it survives longer than the current operation, then it is spilled/moved somewhere else. + WARNING: this might do the equivalent of possibly_free_vars() + on variables dying in the current operation. It won't + immediately overwrite registers that used to be occupied by + these variables, though. Use this function *after* you finished + calling self.loc() or self.make_sure_var_in_reg(), i.e. when you + know the location of all input arguments. These locations stay + valid, but only *if they are in self.save_around_call_regs,* + not if they are callee-saved registers! + 'save_all_regs' can be 0 (default set of registers), 1 (do that for all registers), or 2 (default + gc ptrs). @@ -612,6 +627,16 @@ anyway, as a local hack in this function, because on x86 CPUs such register-register moves are almost free. """ + if not we_are_translated(): + # 'save_sublist' is either the whole + # 'self.save_around_call_regs', or a sublist thereof, and + # then only those registers are spilled/moved. But when + # we move them, we never move them to other registers in + # 'self.save_around_call_regs', to avoid ping-pong effects + # where the same value is constantly moved around. + for reg in save_sublist: + assert reg in self.save_around_call_regs + new_free_regs = [] move_or_spill = [] @@ -631,7 +656,7 @@ # we need to spill all GC ptrs in this mode self._bc_spill(v, new_free_regs) # - elif reg not in self.save_around_call_regs: + elif reg not in save_sublist: continue # in a register like ebx/rbx: it is fine where it is # else: @@ -663,6 +688,7 @@ if not we_are_translated(): if move_or_spill: assert max_age <= min([_a for _, _a in move_or_spill]) + assert reg in save_sublist assert reg in self.save_around_call_regs assert new_reg not in self.save_around_call_regs self.assembler.regalloc_mov(reg, new_reg) diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py b/rpython/jit/backend/llsupport/test/test_gc_integration.py --- a/rpython/jit/backend/llsupport/test/test_gc_integration.py +++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py @@ -324,17 +324,19 @@ def check(frame): expected_size = 1 idx = 0 + fixed_size = self.cpu.JITFRAME_FIXED_SIZE if self.cpu.backend_name.startswith('arm'): # jitframe fixed part is larger here expected_size = 2 idx = 1 + fixed_size -= 32 assert len(frame.jf_gcmap) == expected_size - if self.cpu.IS_64_BIT: - exp_idx = self.cpu.JITFRAME_FIXED_SIZE + 1 # +1 from i0 - else: - assert frame.jf_gcmap[idx] - exp_idx = self.cpu.JITFRAME_FIXED_SIZE - 32 * idx + 1 # +1 from i0 - assert frame.jf_gcmap[idx] == (1 << (exp_idx + 1)) | (1 << exp_idx) + # check that we have two bits set, and that they are in two + # registers (p0 and p1 are moved away when doing p2, but not + # spilled, just moved to different registers) + bits = [n for n in range(fixed_size) + if frame.jf_gcmap[idx] & (1<<n)] + assert len(bits) == 2 self.cpu = self.getcpu(check) ops = ''' diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -2825,6 +2825,7 @@ from rpython.rlib.rarithmetic import r_singlefloat from rpython.translator.c import primitive + def same_as_for_box(b): if b.type == 'i': return rop.SAME_AS_I @@ -2835,6 +2836,8 @@ cpu = self.cpu rnd = random.Random(525) + seed = py.test.config.option.randomseed + print("random seed %d" % seed) ALL_TYPES = [ (types.ulong, lltype.Unsigned), diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -204,20 +204,20 @@ def _build_malloc_slowpath(self, kind): """ While arriving on slowpath, we have a gcpattern on stack 0. - The arguments are passed in eax and edi, as follows: + The arguments are passed in ecx and edx, as follows: - kind == 'fixed': nursery_head in eax and the size in edi - eax. + kind == 'fixed': nursery_head in ecx and the size in (edx - ecx). - kind == 'str/unicode': length of the string to allocate in edi. + kind == 'str/unicode': length of the string to allocate in edx. - kind == 'var': length to allocate in edi, tid in eax, + kind == 'var': length to allocate in edx, tid in ecx, and itemsize in the stack 1 (position esp+WORD). - This function must preserve all registers apart from eax and edi. + This function must preserve all registers apart from ecx and edx. """ assert kind in ['fixed', 'str', 'unicode', 'var'] mc = codebuf.MachineCodeBlockWrapper() - self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats) + self._push_all_regs_to_frame(mc, [ecx, edx], self.cpu.supports_floats) # the caller already did push_gcmap(store=True) # if kind == 'fixed': @@ -231,32 +231,32 @@ mc.SUB_ri(esp.value, 16 - WORD) # restore 16-byte alignment # magically, the above is enough on X86_32 to reserve 3 stack places if kind == 'fixed': - mc.SUB_rr(edi.value, eax.value) # compute the size we want - # the arg is already in edi + mc.SUB_rr(edx.value, ecx.value) # compute the size we want if IS_X86_32: - mc.MOV_sr(0, edi.value) + mc.MOV_sr(0, edx.value) # store the length if hasattr(self.cpu.gc_ll_descr, 'passes_frame'): - mc.MOV_sr(WORD, ebp.value) - elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'): - # for tests only - mc.MOV_rr(esi.value, ebp.value) + mc.MOV_sr(WORD, ebp.value) # for tests only + else: + mc.MOV_rr(edi.value, edx.value) # length argument + if hasattr(self.cpu.gc_ll_descr, 'passes_frame'): + mc.MOV_rr(esi.value, ebp.value) # for tests only elif kind == 'str' or kind == 'unicode': if IS_X86_32: # stack layout: [---][---][---][ret].. with 3 free stack places - mc.MOV_sr(0, edi.value) # store the length - else: - pass # length already in edi + mc.MOV_sr(0, edx.value) # store the length + elif IS_X86_64: + mc.MOV_rr(edi.value, edx.value) # length argument else: if IS_X86_32: # stack layout: [---][---][---][ret][gcmap][itemsize]... - mc.MOV_sr(WORD * 2, edi.value) # store the length - mc.MOV_sr(WORD * 1, eax.value) # store the tid - mc.MOV_rs(edi.value, WORD * 5) # load the itemsize - mc.MOV_sr(WORD * 0, edi.value) # store the itemsize + mc.MOV_sr(WORD * 2, edx.value) # store the length + mc.MOV_sr(WORD * 1, ecx.value) # store the tid + mc.MOV_rs(edx.value, WORD * 5) # load the itemsize + mc.MOV_sr(WORD * 0, edx.value) # store the itemsize else: # stack layout: [---][ret][gcmap][itemsize]... - mc.MOV_rr(edx.value, edi.value) # length - mc.MOV_rr(esi.value, eax.value) # tid + # (already in edx) # length + mc.MOV_rr(esi.value, ecx.value) # tid mc.MOV_rs(edi.value, WORD * 3) # load the itemsize self.set_extra_stack_depth(mc, 16) mc.CALL(imm(follow_jump(addr))) @@ -267,10 +267,11 @@ mc.TEST_rr(eax.value, eax.value) mc.J_il(rx86.Conditions['Z'], 0xfffff) # patched later jz_location = mc.get_relative_pos() + mc.MOV_rr(ecx.value, eax.value) # nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr() - self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats) - mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI + self._pop_all_regs_from_frame(mc, [ecx, edx], self.cpu.supports_floats) + mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX self.pop_gcmap(mc) # push_gcmap(store=True) done by the caller mc.RET() # @@ -2441,9 +2442,9 @@ def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap): assert size & (WORD-1) == 0 # must be correctly aligned - self.mc.MOV(eax, heap(nursery_free_adr)) - self.mc.LEA_rm(edi.value, (eax.value, size)) - self.mc.CMP(edi, heap(nursery_top_adr)) + self.mc.MOV(ecx, heap(nursery_free_adr)) + self.mc.LEA_rm(edx.value, (ecx.value, size)) + self.mc.CMP(edx, heap(nursery_top_adr)) self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later jmp_adr = self.mc.get_relative_pos() # save the gcmap @@ -2452,19 +2453,19 @@ offset = self.mc.get_relative_pos() - jmp_adr assert 0 < offset <= 127 self.mc.overwrite(jmp_adr-1, chr(offset)) - self.mc.MOV(heap(nursery_free_adr), edi) + self.mc.MOV(heap(nursery_free_adr), edx) def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr, sizeloc, gcmap): - if sizeloc is eax: - self.mc.MOV(edi, sizeloc) - sizeloc = edi - self.mc.MOV(eax, heap(nursery_free_adr)) - if sizeloc is edi: - self.mc.ADD_rr(edi.value, eax.value) + if sizeloc is ecx: + self.mc.MOV(edx, sizeloc) + sizeloc = edx + self.mc.MOV(ecx, heap(nursery_free_adr)) + if sizeloc is edx: + self.mc.ADD_rr(edx.value, ecx.value) else: - self.mc.LEA_ra(edi.value, (eax.value, sizeloc.value, 0, 0)) - self.mc.CMP(edi, heap(nursery_top_adr)) + self.mc.LEA_ra(edx.value, (ecx.value, sizeloc.value, 0, 0)) + self.mc.CMP(edx, heap(nursery_top_adr)) self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later jmp_adr = self.mc.get_relative_pos() # save the gcmap @@ -2473,7 +2474,7 @@ offset = self.mc.get_relative_pos() - jmp_adr assert 0 < offset <= 127 self.mc.overwrite(jmp_adr-1, chr(offset)) - self.mc.MOV(heap(nursery_free_adr), edi) + self.mc.MOV(heap(nursery_free_adr), edx) def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr, lengthloc, itemsize, maxlength, gcmap, @@ -2482,39 +2483,39 @@ assert isinstance(arraydescr, ArrayDescr) # lengthloc is the length of the array, which we must not modify! - assert lengthloc is not eax and lengthloc is not edi + assert lengthloc is not ecx and lengthloc is not edx if isinstance(lengthloc, RegLoc): varsizeloc = lengthloc else: - self.mc.MOV(edi, lengthloc) - varsizeloc = edi + self.mc.MOV(edx, lengthloc) + varsizeloc = edx self.mc.CMP(varsizeloc, imm(maxlength)) self.mc.J_il8(rx86.Conditions['A'], 0) # patched later jmp_adr0 = self.mc.get_relative_pos() - self.mc.MOV(eax, heap(nursery_free_adr)) + self.mc.MOV(ecx, heap(nursery_free_adr)) if valid_addressing_size(itemsize): shift = get_scale(itemsize) else: - shift = self._imul_const_scaled(self.mc, edi.value, + shift = self._imul_const_scaled(self.mc, edx.value, varsizeloc.value, itemsize) - varsizeloc = edi + varsizeloc = edx - # now varsizeloc is a register != eax. The size of + # now varsizeloc is a register != ecx. The size of # the variable part of the array is (varsizeloc << shift) assert arraydescr.basesize >= self.gc_minimal_size_in_nursery constsize = arraydescr.basesize + self.gc_size_of_header force_realignment = (itemsize % WORD) != 0 if force_realignment: constsize += WORD - 1 - self.mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift, + self.mc.LEA_ra(edx.value, (ecx.value, varsizeloc.value, shift, constsize)) if force_realignment: - self.mc.AND_ri(edi.value, ~(WORD - 1)) - # now edi contains the total size in bytes, rounded up to a multiple + self.mc.AND_ri(edx.value, ~(WORD - 1)) + # now edx contains the total size in bytes, rounded up to a multiple # of WORD, plus nursery_free_adr - self.mc.CMP(edi, heap(nursery_top_adr)) + self.mc.CMP(edx, heap(nursery_top_adr)) self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later jmp_adr1 = self.mc.get_relative_pos() # @@ -2525,8 +2526,8 @@ self.push_gcmap(self.mc, gcmap, store=True) if kind == rewrite.FLAG_ARRAY: self.mc.MOV_si(WORD, itemsize) - self.mc.MOV(edi, lengthloc) - self.mc.MOV_ri(eax.value, arraydescr.tid) + self.mc.MOV(edx, lengthloc) + self.mc.MOV_ri(ecx.value, arraydescr.tid) addr = self.malloc_slowpath_varsize else: if kind == rewrite.FLAG_STR: @@ -2534,7 +2535,7 @@ else: assert kind == rewrite.FLAG_UNICODE addr = self.malloc_slowpath_unicode - self.mc.MOV(edi, lengthloc) + self.mc.MOV(edx, lengthloc) self.mc.CALL(imm(follow_jump(addr))) self.mc.JMP_l8(0) # jump to done, patched later jmp_location = self.mc.get_relative_pos() @@ -2544,9 +2545,9 @@ self.mc.overwrite(jmp_adr1-1, chr(offset)) self.mc.force_frame_size(DEFAULT_FRAME_BYTES) # write down the tid, but not if it's the result of the CALL - self.mc.MOV(mem(eax, 0), imm(arraydescr.tid)) + self.mc.MOV(mem(ecx, 0), imm(arraydescr.tid)) # while we're at it, this line is not needed if we've done the CALL - self.mc.MOV(heap(nursery_free_adr), edi) + self.mc.MOV(heap(nursery_free_adr), edx) # offset = self.mc.get_relative_pos() - jmp_location assert 0 < offset <= 127 diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -952,14 +952,16 @@ size_box = op.getarg(0) assert isinstance(size_box, ConstInt) size = size_box.getint() - # looking at the result - self.rm.force_allocate_reg(op, selected_reg=eax) + # hint: try to move unrelated registers away from eax and edx now + self.rm.spill_or_move_registers_before_call([ecx, edx]) + # the result will be in ecx + self.rm.force_allocate_reg(op, selected_reg=ecx) # - # We need edi as a temporary, but otherwise don't save any more + # We need edx as a temporary, but otherwise don't save any more # register. See comments in _build_malloc_slowpath(). tmp_box = TempVar() - self.rm.force_allocate_reg(tmp_box, selected_reg=edi) - gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before* + self.rm.force_allocate_reg(tmp_box, selected_reg=edx) + gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before* self.rm.possibly_free_var(tmp_box) # gc_ll_descr = self.assembler.cpu.gc_ll_descr @@ -972,15 +974,16 @@ size_box = op.getarg(0) assert not isinstance(size_box, Const) # we cannot have a const here! # sizeloc must be in a register, but we can free it now - # (we take care explicitly of conflicts with eax or edi) + # (we take care explicitly of conflicts with ecx or edx) sizeloc = self.rm.make_sure_var_in_reg(size_box) + self.rm.spill_or_move_registers_before_call([ecx, edx]) # sizeloc safe self.rm.possibly_free_var(size_box) - # the result will be in eax - self.rm.force_allocate_reg(op, selected_reg=eax) - # we need edi as a temporary + # the result will be in ecx + self.rm.force_allocate_reg(op, selected_reg=ecx) + # we need edx as a temporary tmp_box = TempVar() - self.rm.force_allocate_reg(tmp_box, selected_reg=edi) - gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before* + self.rm.force_allocate_reg(tmp_box, selected_reg=edx) + gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before* self.rm.possibly_free_var(tmp_box) # gc_ll_descr = self.assembler.cpu.gc_ll_descr @@ -997,16 +1000,21 @@ arraydescr = op.getdescr() length_box = op.getarg(2) assert not isinstance(length_box, Const) # we cannot have a const here! - # the result will be in eax - self.rm.force_allocate_reg(op, selected_reg=eax) - # we need edi as a temporary + # can only use spill_or_move_registers_before_call() as a hint if + # we are sure that length_box stays alive and won't be freed now + # (it should always be the case, see below, but better safe than sorry) + if self.rm.stays_alive(length_box): + self.rm.spill_or_move_registers_before_call([ecx, edx]) + # the result will be in ecx + self.rm.force_allocate_reg(op, selected_reg=ecx) + # we need edx as a temporary tmp_box = TempVar() - self.rm.force_allocate_reg(tmp_box, selected_reg=edi) - gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before* + self.rm.force_allocate_reg(tmp_box, selected_reg=edx) + gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before* self.rm.possibly_free_var(tmp_box) # length_box always survives: it's typically also present in the # next operation that will copy it inside the new array. It's - # fine to load it from the stack too, as long as it's != eax, edi. + # fine to load it from the stack too, as long as it is != ecx, edx. lengthloc = self.rm.loc(length_box) self.rm.possibly_free_var(length_box) # @@ -1225,6 +1233,8 @@ raise AssertionError("bad unicode item size") def _consider_math_read_timestamp(self, op): + # hint: try to move unrelated registers away from eax and edx now + self.rm.spill_or_move_registers_before_call([eax, edx]) tmpbox_high = TempVar() self.rm.force_allocate_reg(tmpbox_high, selected_reg=eax) if longlong.is_64_bit: diff --git a/rpython/jit/backend/x86/test/test_zvmprof.py b/rpython/jit/backend/x86/test/test_zvmprof.py deleted file mode 100644 --- a/rpython/jit/backend/x86/test/test_zvmprof.py +++ /dev/null @@ -1,7 +0,0 @@ - -from rpython.jit.backend.llsupport.test.zrpy_vmprof_test import CompiledVmprofTest - -class TestZVMprof(CompiledVmprofTest): - - gcrootfinder = "shadowstack" - gc = "incminimark" \ No newline at end of file diff --git a/rpython/jit/backend/zarch/callbuilder.py b/rpython/jit/backend/zarch/callbuilder.py --- a/rpython/jit/backend/zarch/callbuilder.py +++ b/rpython/jit/backend/zarch/callbuilder.py @@ -12,6 +12,8 @@ from rpython.rtyper.lltypesystem import rffi from rpython.jit.backend.llsupport.descr import CallDescr +CALL_RELEASE_GIL_STACK_OFF = 6*WORD + class CallBuilder(AbstractCallBuilder): GPR_ARGS = [r.r2, r.r3, r.r4, r.r5, r.r6] FPR_ARGS = [r.f0, r.f2, r.f4, r.f6] @@ -85,8 +87,8 @@ self.subtracted_to_sp += len(stack_params) * WORD base = len(stack_params) * WORD if self.is_call_release_gil: - self.subtracted_to_sp += 8*WORD - base += 8*WORD + self.subtracted_to_sp += CALL_RELEASE_GIL_STACK_OFF + base += CALL_RELEASE_GIL_STACK_OFF for idx,i in enumerate(stack_params): loc = arglocs[i] offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx @@ -187,7 +189,7 @@ RSHADOWPTR = self.RSHADOWPTR RFASTGILPTR = self.RFASTGILPTR # - pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD + pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP)) # # Save this thread's shadowstack pointer into r8, for later comparison @@ -286,7 +288,7 @@ if gcrootmap: if gcrootmap.is_shadow_stack and self.is_call_release_gil: self.mc.LGR(r.SCRATCH, RSHADOWOLD) - pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD + pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP)) def write_real_errno(self, save_err): diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py --- a/rpython/jit/backend/zarch/instructions.py +++ b/rpython/jit/backend/zarch/instructions.py @@ -29,6 +29,7 @@ 'MGHI': ('ri', ['\xA7','\x0D']), 'MSGFI': ('ril', ['\xC2','\x00']), 'MLGR': ('rre', ['\xB9','\x86'], 'eo,r'), + 'MLG': ('rxy', ['\xE3','\x86'], 'eo,bid'), # div/mod 'DSGR': ('rre', ['\xB9','\x0D'], 'eo,r'), 'DSG': ('rxy', ['\xE3','\x0D'], 'eo,bidl'), @@ -44,7 +45,6 @@ # rotating 'RISBG': ('rie_f', ['\xEC','\x55']), - 'RISBGN': ('rie_f', ['\xEC','\x59']), # invert & negative & absolute 'LPGR': ('rre', ['\xB9','\x00']), diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -160,11 +160,15 @@ omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow)) omc.overwrite() - emit_int_floordiv = gen_emit_div_mod('DSGR', 'DSG') - emit_uint_floordiv = gen_emit_div_mod('DLGR', 'DLG') - # NOTE division sets one register with the modulo value, thus - # the regalloc ensures the right register survives. - emit_int_mod = gen_emit_div_mod('DSGR', 'DSG') + def emit_uint_mul_high(self, op, arglocs, regalloc): + r0, _, a1 = arglocs + # _ carries the value, contents of r0 are ignored + assert not r0.is_imm() + assert not a1.is_imm() + if a1.is_core_reg(): + self.mc.MLGR(r0, a1) + else: + self.mc.MLG(r0, a1) def emit_int_invert(self, op, arglocs, regalloc): l0, = arglocs diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py --- a/rpython/jit/backend/zarch/regalloc.py +++ b/rpython/jit/backend/zarch/regalloc.py @@ -733,9 +733,6 @@ prepare_int_sub_ovf = helper.prepare_int_sub prepare_int_mul = helper.prepare_int_mul prepare_int_mul_ovf = helper.prepare_int_mul_ovf - prepare_int_floordiv = helper.prepare_int_div - prepare_uint_floordiv = helper.prepare_int_div - prepare_int_mod = helper.prepare_int_mod prepare_nursery_ptr_increment = prepare_int_add prepare_int_and = helper.prepare_int_logic @@ -746,6 +743,18 @@ prepare_int_lshift = helper.prepare_int_shift prepare_uint_rshift = helper.prepare_int_shift + def prepare_uint_mul_high(self, op): + a0 = op.getarg(0) + a1 = op.getarg(1) + if a0.is_constant(): + a0, a1 = a1, a0 + if helper.check_imm32(a1): + l1 = self.ensure_reg(a1) + else: + l1 = self.ensure_reg_or_pool(a1) + lr,lq = self.rm.ensure_even_odd_pair(a0, op, bind_first=True) + return [lr, lq, l1] + prepare_int_le = helper.generate_cmp_op() prepare_int_lt = helper.generate_cmp_op() prepare_int_ge = helper.generate_cmp_op() diff --git a/rpython/jit/backend/zarch/test/test_assembler.py b/rpython/jit/backend/zarch/test/test_assembler.py --- a/rpython/jit/backend/zarch/test/test_assembler.py +++ b/rpython/jit/backend/zarch/test/test_assembler.py @@ -155,7 +155,15 @@ s64 = bin(fac_data[1])[2:] print(f64) print(s64) + for i,c in enumerate(f64): + print('index: %d is set? %s' % (i,c)) + + assert f64[1] == '1' # The z/Architecture architectural mode is installed. + assert f64[2] == '1' # The z/Architecture architectural mode is active. assert f64[18] == '1' # long displacement facility + assert f64[21] == '1' # extended immediate facility + assert f64[34] == '1' # general instruction facility + assert f64[41] == '1' # floating-point-support-enhancement def test_load_byte_zero_extend(self): adr = self.a.datablockwrapper.malloc_aligned(16, 16) @@ -189,7 +197,7 @@ @py.test.mark.parametrize('p', [2**32,2**32+1,2**63-1,2**63-2,0,1,2,3,4,5,6,7,8,10001]) def test_align_withroll(self, p): self.a.mc.load_imm(r.r2, p & 0xffffFFFFffffFFFF) - self.a.mc.RISBGN(r.r2, r.r2, loc.imm(0), loc.imm(0x80 | 60), loc.imm(0)) + self.a.mc.RISBG(r.r2, r.r2, loc.imm(0), loc.imm(0x80 | 60), loc.imm(0)) self.a.mc.BCR(con.ANY, r.r14) assert run_asm(self.a) == rffi.cast(rffi.ULONG,p) & ~(7) @@ -214,7 +222,7 @@ n = 13 l = loc self.a.mc.load_imm(r.r2, 7<<n) - self.a.mc.RISBGN(r.r2, r.r2, l.imm(61), l.imm(0x80 | 63), l.imm(64-n)) + self.a.mc.RISBG(r.r2, r.r2, l.imm(61), l.imm(0x80 | 63), l.imm(64-n)) self.a.mc.BCR(con.ANY, r.r14) assert run_asm(self.a) == 7 @@ -222,7 +230,7 @@ n = 16 l = loc self.a.mc.load_imm(r.r2, 0xffFFffFF) - self.a.mc.RISBGN(r.r2, r.r2, l.imm(60), l.imm(0x80 | 63), l.imm(64-n)) + self.a.mc.RISBG(r.r2, r.r2, l.imm(60), l.imm(0x80 | 63), l.imm(64-n)) self.a.mc.BCR(con.ANY, r.r14) assert run_asm(self.a) == 15 diff --git a/rpython/jit/backend/zarch/test/test_auto_encoding.py b/rpython/jit/backend/zarch/test/test_auto_encoding.py --- a/rpython/jit/backend/zarch/test/test_auto_encoding.py +++ b/rpython/jit/backend/zarch/test/test_auto_encoding.py @@ -204,7 +204,7 @@ g.write('%s\n' % op) oplist.append(op) g.write('\t.string "%s"\n' % END_TAG) - proc = subprocess.Popen(['as', '-m64', '-mzarch', '-march=zEC12', + proc = subprocess.Popen(['as', '-m64', '-mzarch', '-march=z196', inputname, '-o', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/rpython/jit/backend/zarch/test/test_int.py b/rpython/jit/backend/zarch/test/test_int.py --- a/rpython/jit/backend/zarch/test/test_int.py +++ b/rpython/jit/backend/zarch/test/test_int.py @@ -35,41 +35,13 @@ fail = self.cpu.get_latest_descr(deadframe) assert fail == finishdescr # ensures that guard is not taken! - def test_double_evenodd_pair(self): - code = """ - [i0] - i1 = int_floordiv(i0, 2) - i2 = int_floordiv(i0, 3) - i3 = int_floordiv(i0, 4) - i4 = int_floordiv(i0, 5) - i5 = int_floordiv(i0, 6) - i6 = int_floordiv(i0, 7) - i7 = int_floordiv(i0, 8) - i8 = int_le(i1, 0) - guard_true(i8) [i1,i2,i3,i4,i5,i6,i7] - finish(i0, descr=faildescr) - """ - # the guard forces 3 spills because after 4 divisions - # all even slots of the managed registers are full - loop = parse(code, namespace={'faildescr': BasicFinalDescr(1)}) - looptoken = JitCellToken() - self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken) - deadframe = self.cpu.execute_token(looptoken, 100) - fail = self.cpu.get_latest_descr(deadframe) - for i in range(2,9): - assert self.cpu.get_int_value(deadframe, i-2) == 100//i - - - @py.test.mark.parametrize('value', [2,3,15,2**16]) def test_evenodd_pair_extensive(self, value): instrs = [] failargs = [] values = [] j = 0 - mapping = (('int_floordiv',lambda x,y: x // y), - ('int_mod', lambda x,y: x % y), - ('int_mul_ovf', lambda x,y: x * y)) + mapping = (('int_mul_ovf', lambda x,y: x * y),) for i in range(20): name, func = mapping[j] instrs.append("i{d} = {i}(i0, {d})".format(d=i+1, i=name)) diff --git a/rpython/jit/backend/zarch/test/test_regalloc.py b/rpython/jit/backend/zarch/test/test_regalloc.py --- a/rpython/jit/backend/zarch/test/test_regalloc.py +++ b/rpython/jit/backend/zarch/test/test_regalloc.py @@ -146,128 +146,3 @@ assert cpu.get_int_value(deadframe, 0) == 0 assert cpu.get_int_value(deadframe, 1) == -1000 -def test_bug_0(): - cpu, deadframe = run([-13, 10, 10, 8, -8, -16, -18, 46, -12, 26], ''' - [i1, i2, i3, i4, i5, i6, i7, i8, i9, i10] - i11 = uint_gt(i3, -48) - i12 = int_xor(i8, i1) - i13 = int_gt(i6, -9) - i14 = int_le(i13, i2) - i15 = int_le(i11, i5) - i16 = uint_ge(i13, i13) - i17 = int_or(i9, -23) - i18 = int_lt(i10, i13) - i19 = int_or(i15, i5) - i20 = int_xor(i17, 54) - i21 = int_mul(i8, i10) - i22 = int_or(i3, i9) - i41 = int_and(i11, -4) - i42 = int_or(i41, 1) - i23 = int_mod(i12, i42) - i24 = int_is_true(i6) - i25 = uint_rshift(i15, 6) - i26 = int_or(-4, i25) - i27 = int_invert(i8) - i28 = int_sub(-113, i11) - i29 = int_neg(i7) - i30 = int_neg(i24) - i31 = int_floordiv(i3, 53) - i32 = int_mul(i28, i27) - i43 = int_and(i18, -4) - i44 = int_or(i43, 1) - i33 = int_mod(i26, i44) - i34 = int_or(i27, i19) - i35 = uint_lt(i13, 1) - i45 = int_and(i21, 31) - i36 = int_rshift(i21, i45) - i46 = int_and(i20, 31) - i37 = uint_rshift(i4, i46) - i38 = uint_gt(i33, -11) - i39 = int_neg(i7) - i40 = int_gt(i24, i32) - i99 = same_as_i(0) - guard_true(i99) [i40, i36, i37, i31, i16, i34, i35, i23, i22, i29, i14, i39, i30, i38] - finish(42) - ''') - assert cpu.get_int_value(deadframe, 0) == 0 - assert cpu.get_int_value(deadframe, 1) == 0 - assert cpu.get_int_value(deadframe, 2) == 0 - assert cpu.get_int_value(deadframe, 3) == 0 - assert cpu.get_int_value(deadframe, 4) == 1 - assert cpu.get_int_value(deadframe, 5) == -7 - assert cpu.get_int_value(deadframe, 6) == 1 - assert cpu.get_int_value(deadframe, 7) == 0 - assert cpu.get_int_value(deadframe, 8) == -2 - assert cpu.get_int_value(deadframe, 9) == 18 - assert cpu.get_int_value(deadframe, 10) == 1 - assert cpu.get_int_value(deadframe, 11) == 18 - assert cpu.get_int_value(deadframe, 12) == -1 - assert cpu.get_int_value(deadframe, 13) == 0 - -def test_bug_1(): - cpu, deadframe = run([17, -20, -6, 6, 1, 13, 13, 9, 49, 8], ''' - [i1, i2, i3, i4, i5, i6, i7, i8, i9, i10] - i11 = uint_lt(i6, 0) - i41 = int_and(i3, 31) - i12 = int_rshift(i3, i41) - i13 = int_neg(i2) - i14 = int_add(i11, i7) - i15 = int_or(i3, i2) - i16 = int_or(i12, i12) - i17 = int_ne(i2, i5) - i42 = int_and(i5, 31) - i18 = uint_rshift(i14, i42) - i43 = int_and(i14, 31) - i19 = int_lshift(7, i43) - i20 = int_neg(i19) - i21 = int_mod(i3, 1) - i22 = uint_ge(i15, i1) - i44 = int_and(i16, 31) - i23 = int_lshift(i8, i44) - i24 = int_is_true(i17) - i45 = int_and(i5, 31) - i25 = int_lshift(i14, i45) - i26 = int_lshift(i5, 17) - i27 = int_eq(i9, i15) - i28 = int_ge(0, i6) - i29 = int_neg(i15) - i30 = int_neg(i22) - i31 = int_add(i7, i16) - i32 = uint_lt(i19, i19) - i33 = int_add(i2, 1) - i34 = int_neg(i5) - i35 = int_add(i17, i24) - i36 = uint_lt(2, i16) - i37 = int_neg(i9) - i38 = int_gt(i4, i11) - i39 = int_lt(i27, i22) - i40 = int_neg(i27) - i99 = same_as_i(0) - guard_true(i99) [i40, i10, i36, i26, i13, i30, i21, i33, i18, i25, i31, i32, i28, i29, i35, i38, i20, i39, i34, i23, i37] - finish(-42) - ''') - assert cpu.get_int_value(deadframe, 0) == 0 - assert cpu.get_int_value(deadframe, 1) == 8 - assert cpu.get_int_value(deadframe, 2) == 1 - assert cpu.get_int_value(deadframe, 3) == 131072 - assert cpu.get_int_value(deadframe, 4) == 20 - assert cpu.get_int_value(deadframe, 5) == -1 - assert cpu.get_int_value(deadframe, 6) == 0 - assert cpu.get_int_value(deadframe, 7) == -19 - assert cpu.get_int_value(deadframe, 8) == 6 - assert cpu.get_int_value(deadframe, 9) == 26 - assert cpu.get_int_value(deadframe, 10) == 12 - assert cpu.get_int_value(deadframe, 11) == 0 - assert cpu.get_int_value(deadframe, 12) == 0 - assert cpu.get_int_value(deadframe, 13) == 2 - assert cpu.get_int_value(deadframe, 14) == 2 - assert cpu.get_int_value(deadframe, 15) == 1 - assert cpu.get_int_value(deadframe, 16) == -57344 - assert cpu.get_int_value(deadframe, 17) == 1 - assert cpu.get_int_value(deadframe, 18) == -1 - if WORD == 4: - assert cpu.get_int_value(deadframe, 19) == -2147483648 - elif WORD == 8: - assert cpu.get_int_value(deadframe, 19) == 19327352832 - assert cpu.get_int_value(deadframe, 20) == -49 - diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -521,6 +521,8 @@ # XXX some of the following functions should not become residual calls # but be really compiled rewrite_op_int_abs = _do_builtin_call + rewrite_op_int_floordiv = _do_builtin_call + rewrite_op_int_mod = _do_builtin_call rewrite_op_llong_abs = _do_builtin_call rewrite_op_llong_floordiv = _do_builtin_call rewrite_op_llong_mod = _do_builtin_call @@ -530,7 +532,6 @@ rewrite_op_gc_id = _do_builtin_call rewrite_op_gc_pin = _do_builtin_call rewrite_op_gc_unpin = _do_builtin_call - rewrite_op_uint_mod = _do_builtin_call rewrite_op_cast_float_to_uint = _do_builtin_call rewrite_op_cast_uint_to_float = _do_builtin_call rewrite_op_weakref_create = _do_builtin_call diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py --- a/rpython/jit/codewriter/support.py +++ b/rpython/jit/codewriter/support.py @@ -248,6 +248,26 @@ mask = x >> (LONG_BIT - 1) return (x ^ mask) - mask + +def _ll_2_int_floordiv(x, y): + # this is used only if the RPython program uses llop.int_floordiv() + # explicitly. For 'a // b', see _handle_int_special() in jtransform.py. + # This is the reverse of rpython.rtyper.rint.ll_int_py_div(), i.e. + # the same logic as rpython.rtyper.lltypesystem.opimpl.op_int_floordiv + # but written in a no-branch style. + r = x // y + p = r * y + # the JIT knows that if x and y are both positive, this is just 'r' + return r + (((x ^ y) >> (LONG_BIT - 1)) & (p != x)) + +def _ll_2_int_mod(x, y): + # same comments as _ll_2_int_floordiv() + r = x % y + # the JIT knows that if x and y are both positive, this doesn't change 'r' + r -= y & (((x ^ y) & (r | -r)) >> (LONG_BIT - 1)) + return r + + def _ll_1_cast_uint_to_float(x): # XXX on 32-bit platforms, this should be done using cast_longlong_to_float # (which is a residual call right now in the x86 backend) @@ -417,6 +437,8 @@ # in the following calls to builtins, the JIT is allowed to look inside: inline_calls_to = [ ('int_abs', [lltype.Signed], lltype.Signed), + ('int_floordiv', [lltype.Signed, lltype.Signed], lltype.Signed), + ('int_mod', [lltype.Signed, lltype.Signed], lltype.Signed), ('ll_math.ll_math_sqrt', [lltype.Float], lltype.Float), ] diff --git a/rpython/jit/codewriter/test/test_flatten.py b/rpython/jit/codewriter/test/test_flatten.py --- a/rpython/jit/codewriter/test/test_flatten.py +++ b/rpython/jit/codewriter/test/test_flatten.py @@ -478,7 +478,7 @@ except ZeroDivisionError: return -42 self.encoding_test(f, [7, 2], """ - residual_call_ir_i $<* fn ll_int_floordiv_ovf_zer__Signed_Signed>, I[%i0, %i1], R[], <Descr> -> %i2 + residual_call_ir_i $<* fn ll_int_py_div_ovf_zer__Signed_Signed>, I[%i0, %i1], R[], <Descr> -> %i2 -live- catch_exception L1 int_return %i2 @@ -505,7 +505,7 @@ return 42 # XXX so far, this really produces a int_mod_ovf_zer... self.encoding_test(f, [7, 2], """ - residual_call_ir_i $<* fn ll_int_mod_ovf_zer__Signed_Signed>, I[%i0, %i1], R[], <Descr> -> %i2 + residual_call_ir_i $<* fn ll_int_py_mod_ovf_zer__Signed_Signed>, I[%i0, %i1], R[], <Descr> -> %i2 -live- catch_exception L1 int_return %i2 diff --git a/rpython/jit/codewriter/test/test_support.py b/rpython/jit/codewriter/test/test_support.py --- a/rpython/jit/codewriter/test/test_support.py +++ b/rpython/jit/codewriter/test/test_support.py @@ -3,7 +3,6 @@ from rpython.rtyper.annlowlevel import llstr from rpython.flowspace.model import Variable, Constant, SpaceOperation from rpython.jit.codewriter.support import decode_builtin_call, LLtypeHelpers -from rpython.jit.codewriter.support import _ll_1_int_abs def newconst(x): return Constant(x, lltype.typeOf(x)) @@ -136,6 +135,7 @@ py.test.raises(AttributeError, func, llstr(None), p2) def test_int_abs(): + from rpython.jit.codewriter.support import _ll_1_int_abs assert _ll_1_int_abs(0) == 0 assert _ll_1_int_abs(1) == 1 assert _ll_1_int_abs(10) == 10 @@ -143,3 +143,14 @@ assert _ll_1_int_abs(-1) == 1 assert _ll_1_int_abs(-10) == 10 assert _ll_1_int_abs(-sys.maxint) == sys.maxint + +def test_int_floordiv_mod(): + from rpython.rtyper.lltypesystem.lloperation import llop + from rpython.jit.codewriter.support import _ll_2_int_floordiv, _ll_2_int_mod + for x in range(-6, 7): + for y in range(-3, 4): + if y != 0: + assert (_ll_2_int_floordiv(x, y) == + llop.int_floordiv(lltype.Signed, x, y)) + assert (_ll_2_int_mod(x, y) == + llop.int_mod(lltype.Signed, x, y)) diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -97,17 +97,14 @@ self.emit_operation(op) r = self.getintbound(op) - if b2.is_constant(): - val = b2.lower - if val >= 0: - r.intersect(IntBound(0, val)) - elif b1.is_constant(): - val = b1.lower - if val >= 0: - r.intersect(IntBound(0, val)) - elif b1.known_ge(IntBound(0, 0)) and b2.known_ge(IntBound(0, 0)): - lesser = min(b1.upper, b2.upper) - r.intersect(IntBound(0, next_pow2_m1(lesser))) + pos1 = b1.known_ge(IntBound(0, 0)) + pos2 = b2.known_ge(IntBound(0, 0)) + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(b1) + if pos2: + r.make_le(b2) def optimize_INT_SUB(self, op): self.emit_operation(op) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py @@ -5188,6 +5188,25 @@ """ self.optimize_loop(ops, ops) + def test_int_and_positive(self): + ops = """ + [i0, i1] + i2 = int_ge(i1, 0) + guard_true(i2) [] + i3 = int_and(i0, i1) + i4 = int_ge(i3, 0) + guard_true(i4) [] + jump(i3) + """ + expected = """ + [i0, i1] + i2 = int_ge(i1, 0) + guard_true(i2) [] + i3 = int_and(i0, i1) + jump(i3) + """ + self.optimize_loop(ops, expected) + def test_int_or_cmp_above_bounds(self): ops = """ [p0,p1] @@ -5252,6 +5271,47 @@ """ self.optimize_loop(ops, ops) + def test_int_xor_positive_is_positive(self): + ops = """ + [i0, i1] + i2 = int_lt(i0, 0) + guard_false(i2) [] + i3 = int_lt(i1, 0) + guard_false(i3) [] + i4 = int_xor(i0, i1) + i5 = int_lt(i4, 0) + guard_false(i5) [] + jump(i4, i0) + """ + expected = """ + [i0, i1] + i2 = int_lt(i0, 0) + guard_false(i2) [] + i3 = int_lt(i1, 0) + guard_false(i3) [] + i4 = int_xor(i0, i1) + jump(i4, i0) + """ + self.optimize_loop(ops, expected) + + def test_positive_rshift_bits_minus_1(self): + ops = """ + [i0] + i2 = int_lt(i0, 0) + guard_false(i2) [] + i3 = int_rshift(i2, %d) + escape_n(i3) + jump(i0) + """ % (LONG_BIT - 1,) + expected = """ + [i0] + i2 = int_lt(i0, 0) + guard_false(i2) [] + escape_n(0) + jump(i0) + """ + self.optimize_loop(ops, expected) + def test_int_or_same_arg(self): ops = """ [i0] diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py --- a/rpython/jit/metainterp/test/test_ajit.py +++ b/rpython/jit/metainterp/test/test_ajit.py @@ -955,6 +955,75 @@ res = self.meta_interp(f, [-5]) assert res == 5+4+3+2+1+0+1+2+3+4+5+6+7+8+9 + def test_int_c_div(self): + from rpython.rlib.rarithmetic import int_c_div + myjitdriver = JitDriver(greens = [], reds = ['i', 't']) + def f(i): + t = 0 + while i < 10: + myjitdriver.can_enter_jit(i=i, t=t) + myjitdriver.jit_merge_point(i=i, t=t) + t += int_c_div(-100, i) + i += 1 + return t + expected = -sum([100 // n for n in range(1, 10)]) + assert f(1) == expected + res = self.meta_interp(f, [1]) + assert res == expected + # should contain a call_i(..., OS=OS_INT_PY_DIV) + + def test_int_c_mod(self): + from rpython.rlib.rarithmetic import int_c_mod + myjitdriver = JitDriver(greens = [], reds = ['i', 't']) + def f(i): + t = 0 + while i < 10: + myjitdriver.can_enter_jit(i=i, t=t) + myjitdriver.jit_merge_point(i=i, t=t) + t += int_c_mod(-100, i) + i += 1 + return t + expected = -sum([100 % n for n in range(1, 10)]) + assert f(1) == expected + res = self.meta_interp(f, [1]) + assert res == expected + # should contain a call_i(..., OS=OS_INT_PY_MOD) + + def test_positive_c_div_mod(self): + from rpython.rlib.rarithmetic import int_c_div, int_c_mod + myjitdriver = JitDriver(greens = [], reds = ['i', 't']) + def f(i): + t = 0 + while i < 10: + myjitdriver.can_enter_jit(i=i, t=t) + myjitdriver.jit_merge_point(i=i, t=t) + assert i > 0 + t += int_c_div(100, i) - int_c_mod(100, i) + i += 1 + return t + expected = sum([100 // n - 100 % n for n in range(1, 10)]) + assert f(1) == expected + res = self.meta_interp(f, [1]) + assert res == expected + # all the correction code should be dead now, xxx test that + + def test_int_c_div_by_constant(self): + from rpython.rlib.rarithmetic import int_c_div + myjitdriver = JitDriver(greens = ['k'], reds = ['i', 't']) + def f(i, k): + t = 0 + while i < 100: + myjitdriver.can_enter_jit(i=i, t=t, k=k) + myjitdriver.jit_merge_point(i=i, t=t, k=k) + t += int_c_div(i, k) + i += 1 + return t + expected = sum([i // 10 for i in range(51, 100)]) + assert f(-50, 10) == expected + res = self.meta_interp(f, [-50, 10]) + assert res == expected + self.check_resops(call=0, uint_mul_high=2) + def test_float(self): myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res']) def f(x, y): diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -281,11 +281,12 @@ large_object=8*WORD, ArenaCollectionClass=None, **kwds): + "NOT_RPYTHON" MovingGCBase.__init__(self, config, **kwds) assert small_request_threshold % WORD == 0 self.read_from_env = read_from_env self.nursery_size = nursery_size - + self.small_request_threshold = small_request_threshold self.major_collection_threshold = major_collection_threshold self.growth_rate_max = growth_rate_max @@ -644,6 +645,7 @@ # Get the memory from the nursery. If there is not enough space # there, do a collect first. result = self.nursery_free + ll_assert(result != llmemory.NULL, "uninitialized nursery") self.nursery_free = new_free = result + totalsize if new_free > self.nursery_top: result = self.collect_and_reserve(totalsize) @@ -703,6 +705,7 @@ # Get the memory from the nursery. If there is not enough space # there, do a collect first. result = self.nursery_free + ll_assert(result != llmemory.NULL, "uninitialized nursery") self.nursery_free = new_free = result + totalsize if new_free > self.nursery_top: result = self.collect_and_reserve(totalsize) @@ -1139,7 +1142,8 @@ Implemented a bit obscurely by checking an unrelated flag that can never be set on a young object -- except if tid == -42. """ - assert self.is_in_nursery(obj) + ll_assert(self.is_in_nursery(obj), + "Can't forward an object outside the nursery.") tid = self.header(obj).tid result = (tid & GCFLAG_FINALIZATION_ORDERING != 0) if result: @@ -1463,7 +1467,8 @@ objhdr.tid |= GCFLAG_CARDS_SET remember_young_pointer_from_array2._dont_inline_ = True - assert self.card_page_indices > 0 + ll_assert(self.card_page_indices > 0, + "non-positive card_page_indices") self.remember_young_pointer_from_array2 = ( remember_young_pointer_from_array2) @@ -1513,7 +1518,8 @@ return True # ^^^ a fast path of write-barrier # - if source_hdr.tid & GCFLAG_HAS_CARDS != 0: + if (self.card_page_indices > 0 and # check constant-folded + source_hdr.tid & GCFLAG_HAS_CARDS != 0): # if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0: # The source object may have random young pointers. @@ -1548,7 +1554,8 @@ def manually_copy_card_bits(self, source_addr, dest_addr, length): # manually copy the individual card marks from source to dest - assert self.card_page_indices > 0 + ll_assert(self.card_page_indices > 0, + "non-positive card_page_indices") bytes = self.card_marking_bytes_for_length(length) # anybyte = 0 @@ -1721,12 +1728,15 @@ nursery_barriers = self.AddressDeque() prev = self.nursery self.surviving_pinned_objects.sort() - assert self.pinned_objects_in_nursery == \ - self.surviving_pinned_objects.length() + ll_assert( + self.pinned_objects_in_nursery == \ + self.surviving_pinned_objects.length(), + "pinned_objects_in_nursery != surviving_pinned_objects.length()") while self.surviving_pinned_objects.non_empty(): # cur = self.surviving_pinned_objects.pop() - assert cur >= prev + ll_assert( + cur >= prev, "pinned objects encountered in backwards order") # # clear the arena between the last pinned object (or arena start) # and the pinned object @@ -1784,7 +1794,8 @@ debug_stop("gc-minor") def _reset_flag_old_objects_pointing_to_pinned(self, obj, ignore): - assert self.header(obj).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN + ll_assert(self.header(obj).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN != 0, + "!GCFLAG_PINNED_OBJECT_PARENT_KNOWN, but requested to reset.") self.header(obj).tid &= ~GCFLAG_PINNED_OBJECT_PARENT_KNOWN def _visit_old_objects_pointing_to_pinned(self, obj, ignore): diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -554,6 +554,7 @@ assert res # we optimized it assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS == 0 # and we copied the flag # + self.gc.card_page_indices = 128 # force > 0 hdr_src.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS hdr_dst.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS hdr_src.tid |= minimark.GCFLAG_HAS_CARDS diff --git a/rpython/rlib/clibffi.py b/rpython/rlib/clibffi.py --- a/rpython/rlib/clibffi.py +++ b/rpython/rlib/clibffi.py @@ -148,7 +148,8 @@ ('elements', FFI_TYPE_PP)]) ffi_cif = rffi_platform.Struct('ffi_cif', []) - ffi_closure = rffi_platform.Struct('ffi_closure', []) + ffi_closure = rffi_platform.Struct('ffi_closure', + [('user_data', rffi.VOIDP)]) def add_simple_type(type_name): for name in ['size', 'alignment', 'type']: diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py --- a/rpython/rlib/rarithmetic.py +++ b/rpython/rlib/rarithmetic.py @@ -650,6 +650,26 @@ from rpython.rtyper.lltypesystem.lloperation import llop return llop.int_force_ge_zero(lltype.Signed, n) +def int_c_div(x, y): + """Return the result of the C-style 'x / y'. This differs from the + Python-style division if (x < 0 xor y < 0). The JIT implements it + with a Python-style division followed by correction code. This + is not that bad, because the JIT removes the correction code if + x and y are both nonnegative, and if y is any nonnegative constant + then the division turns into a rshift or a mul. + """ + from rpython.rtyper.lltypesystem import lltype + from rpython.rtyper.lltypesystem.lloperation import llop + return llop.int_floordiv(lltype.Signed, x, y) + +def int_c_mod(x, y): + """Return the result of the C-style 'x % y'. This differs from the + Python-style division if (x < 0 xor y < 0). + """ + from rpython.rtyper.lltypesystem import lltype + from rpython.rtyper.lltypesystem.lloperation import llop + return llop.int_mod(lltype.Signed, x, y) + @objectmodel.specialize.ll() def byteswap(arg): """ Convert little->big endian and the opposite diff --git a/rpython/rlib/rvmprof/src/vmprof_config.h b/rpython/rlib/rvmprof/src/vmprof_config.h --- a/rpython/rlib/rvmprof/src/vmprof_config.h +++ b/rpython/rlib/rvmprof/src/vmprof_config.h @@ -1,10 +1,17 @@ -#define HAVE_SYS_UCONTEXT_H +#if !defined(__OpenBSD__) +# define HAVE_SYS_UCONTEXT_H +#else +# define HAVE_SIGNAL_H +#endif + #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) #ifdef __i386__ #define PC_FROM_UCONTEXT uc_mcontext.mc_eip #else #define PC_FROM_UCONTEXT uc_mcontext.mc_rip #endif +#elif defined(__OpenBSD__) +#define PC_FROM_UCONTEXT sc_rip #elif defined( __APPLE__) #if ((ULONG_MAX) == (UINT_MAX)) #define PC_FROM_UCONTEXT uc_mcontext->__ss.__eip diff --git a/rpython/rlib/rvmprof/src/vmprof_getpc.h b/rpython/rlib/rvmprof/src/vmprof_getpc.h --- a/rpython/rlib/rvmprof/src/vmprof_getpc.h +++ b/rpython/rlib/rvmprof/src/vmprof_getpc.h @@ -65,6 +65,10 @@ #elif defined(HAVE_CYGWIN_SIGNAL_H) #include <cygwin/signal.h> typedef ucontext ucontext_t; +#elif defined(HAVE_SIGNAL_H) +#include <signal.h> +#else +# error "don't know how to get the pc on this platform" #endif diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -2,6 +2,7 @@ from rpython.rtyper.test.test_llinterp import interpret from rpython.rlib.rarithmetic import * from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError +from hypothesis import given, strategies import sys import py @@ -393,6 +394,21 @@ assert not int_between(1, 2, 2) assert not int_between(1, 1, 1) +def test_int_force_ge_zero(): + assert int_force_ge_zero(42) == 42 + assert int_force_ge_zero(0) == 0 + assert int_force_ge_zero(-42) == 0 + +@given(strategies.integers(min_value=0, max_value=sys.maxint), + strategies.integers(min_value=1, max_value=sys.maxint)) +def test_int_c_div_mod(x, y): + assert int_c_div(~x, y) == -(abs(~x) // y) + assert int_c_div( x,-y) == -(x // y) + assert int_c_div(~x,-y) == +(abs(~x) // y) + for x1 in [x, ~x]: + for y1 in [y, -y]: + assert int_c_div(x1, y1) * y1 + int_c_mod(x1, y1) == x1 + # these can't be prebuilt on 32bit U1 = r_ulonglong(0x0102030405060708L) U2 = r_ulonglong(0x0807060504030201L) diff --git a/rpython/rtyper/rint.py b/rpython/rtyper/rint.py --- a/rpython/rtyper/rint.py +++ b/rpython/rtyper/rint.py @@ -236,11 +236,11 @@ return _rtype_template(hop, 'mul_ovf') def rtype_floordiv(_, hop): - return _rtype_call_helper(hop, 'floordiv', [ZeroDivisionError]) + return _rtype_call_helper(hop, 'py_div', [ZeroDivisionError]) rtype_inplace_floordiv = rtype_floordiv def rtype_floordiv_ovf(_, hop): - return _rtype_call_helper(hop, 'floordiv_ovf', [ZeroDivisionError]) + return _rtype_call_helper(hop, 'py_div_ovf', [ZeroDivisionError]) # turn 'div' on integers into 'floordiv' rtype_div = rtype_floordiv @@ -250,11 +250,11 @@ # 'def rtype_truediv' is delegated to the superclass FloatRepr def rtype_mod(_, hop): - return _rtype_call_helper(hop, 'mod', [ZeroDivisionError]) + return _rtype_call_helper(hop, 'py_mod', [ZeroDivisionError]) rtype_inplace_mod = rtype_mod def rtype_mod_ovf(_, hop): - return _rtype_call_helper(hop, 'mod_ovf', [ZeroDivisionError]) + return _rtype_call_helper(hop, 'py_mod_ovf', [ZeroDivisionError]) def rtype_xor(_, hop): return _rtype_template(hop, 'xor') @@ -319,7 +319,7 @@ vlist = hop.inputargs(repr, repr2) prefix = repr.opprefix - if '_ovf' in func or func.startswith(('mod', 'floordiv')): + if '_ovf' in func or func.startswith(('py_mod', 'py_div')): if prefix+func not in ('int_add_ovf', 'int_add_nonneg_ovf', 'int_sub_ovf', 'int_mul_ovf'): raise TyperError("%r should not be used here any more" % (func,)) @@ -353,7 +353,7 @@ any_implicit_exception = True if not any_implicit_exception: - if not func.startswith(('mod', 'floordiv')): + if not func.startswith(('py_mod', 'py_div')): return _rtype_template(hop, func) repr = hop.r_result @@ -388,7 +388,7 @@ # ---------- floordiv ---------- @jit.oopspec("int.py_div(x, y)") -def ll_int_floordiv(x, y): +def ll_int_py_div(x, y): # Python, and RPython, assume that integer division truncates # towards -infinity. However, in C, integer division truncates # towards 0. So assuming that, we need to apply a correction @@ -400,159 +400,159 @@ return r + (u >> INT_BITS_1) @jit.oopspec("int.py_div(x, y)") -def ll_int_floordiv_nonnegargs(x, y): +def ll_int_py_div_nonnegargs(x, y): from rpython.rlib.debug import ll_assert r = llop.int_floordiv(Signed, x, y) # <= truncates like in C - ll_assert(r >= 0, "int_floordiv_nonnegargs(): one arg is negative") + ll_assert(r >= 0, "int_py_div_nonnegargs(): one arg is negative") return r -def ll_int_floordiv_zer(x, y): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit