Author: Ronan Lamy <ronan.l...@gmail.com>
Branch: testing-cleanup
Changeset: r85081:80829afb3cac
Date: 2016-06-10 15:18 +0100
http://bitbucket.org/pypy/pypy/changeset/80829afb3cac/

Log:    hg merge default

diff too long, truncating to 2000 out of 2410 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -25,3 +25,4 @@
 80ef432a32d9baa4b3c5a54c215e8ebe499f6374 release-5.1.2
 40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
 40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
+c09c19272c990a0611b17569a0085ad1ab00c8ff release-pypy2.7-v5.3
diff --git a/pypy/doc/release-pypy2.7-v5.3.0.rst 
b/pypy/doc/release-pypy2.7-v5.3.0.rst
--- a/pypy/doc/release-pypy2.7-v5.3.0.rst
+++ b/pypy/doc/release-pypy2.7-v5.3.0.rst
@@ -176,8 +176,8 @@
   * Reduce the size of generated code by using the same function objects in
     all generated subclasses
 
- * Share cpyext Py* function wrappers according to the signature, shrining the
-   translated libpypy.so by about 
+  * Share cpyext Py* function wrappers according to the signature, shrinking 
the
+    translated libpypy.so by about 10% (measured without the JIT)
 
   * Compile c snippets with -Werror, and fix warnings it exposed
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,20 @@
 .. this is a revision shortly after release-pypy2.7-v5.3
 .. startrev: 873218a739f1
 
+.. branch: fix-gen-dfa
+
+Resolves an issue with the generator script to build the dfa for Python syntax.
+
+.. branch: z196-support
+
+Fixes a critical issue in the register allocator and extends support on s390x.
+PyPy runs and translates on the s390x revisions z10 (released February 2008, 
experimental)
+and z196 (released August 2010) in addition to zEC12 and z13.
+To target e.g. z196 on a zEC12 machine supply CFLAGS="-march=z196" to your 
shell environment.
+
+.. branch: s390x-5.3-catchup
+
+Implement the backend related changes for s390x.
+
+.. branch: incminimark-ll_assert
+.. branch: vmprof-openbsd
diff --git a/pypy/interpreter/pyparser/genpytokenize.py 
b/pypy/interpreter/pyparser/genpytokenize.py
--- a/pypy/interpreter/pyparser/genpytokenize.py
+++ b/pypy/interpreter/pyparser/genpytokenize.py
@@ -191,7 +191,7 @@
                               newArcPair(states, EMPTY),
                               pseudoExtras, number, funny, contStr, name))
     dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken)
-    return DFA(dfaStates, dfaAccepts)
+    return DFA(dfaStates, dfaAccepts), dfaStates
 
 # ______________________________________________________________________
 
@@ -205,7 +205,9 @@
                              newArcPair(states, DEFAULT),
                              any(states, notGroupStr(states, "'\\")))),
                    newArcPair(states, "'"))
-    singleDFA = DFA(*nfaToDfa(states, *single))
+    states, accepts = nfaToDfa(states, *single)
+    singleDFA = DFA(states, accepts)
+    states_singleDFA = states
     states = []
     double = chain(states,
                    any(states, notGroupStr(states, '"\\')),
@@ -215,7 +217,9 @@
                              newArcPair(states, DEFAULT),
                              any(states, notGroupStr(states, '"\\')))),
                    newArcPair(states, '"'))
-    doubleDFA = DFA(*nfaToDfa(states, *double))
+    states, accepts = nfaToDfa(states, *double)
+    doubleDFA = DFA(states, accepts)
+    states_doubleDFA = states
     states = []
     single3 = chain(states,
                     any(states, notGroupStr(states, "'\\")),
@@ -230,7 +234,9 @@
                                           notChainStr(states, "''"))),
                               any(states, notGroupStr(states, "'\\")))),
                     chainStr(states, "'''"))
-    single3DFA = NonGreedyDFA(*nfaToDfa(states, *single3))
+    states, accepts = nfaToDfa(states, *single3)
+    single3DFA = NonGreedyDFA(states, accepts)
+    states_single3DFA = states
     states = []
     double3 = chain(states,
                     any(states, notGroupStr(states, '"\\')),
@@ -245,9 +251,11 @@
                                           notChainStr(states, '""'))),
                               any(states, notGroupStr(states, '"\\')))),
                     chainStr(states, '"""'))
-    double3DFA = NonGreedyDFA(*nfaToDfa(states, *double3))
-    map = {"'" : singleDFA,
-           '"' : doubleDFA,
+    states, accepts = nfaToDfa(states, *double3)
+    double3DFA = NonGreedyDFA(states, accepts)
+    states_double3DFA = states
+    map = {"'" : (singleDFA, states_singleDFA),
+           '"' : (doubleDFA, states_doubleDFA),
            "r" : None,
            "R" : None,
            "u" : None,
@@ -257,25 +265,30 @@
     for uniPrefix in ("", "u", "U", "b", "B", ):
         for rawPrefix in ("", "r", "R"):
             prefix = uniPrefix + rawPrefix
-            map[prefix + "'''"] = single3DFA
-            map[prefix + '"""'] = double3DFA
+            map[prefix + "'''"] = (single3DFA, states_single3DFA)
+            map[prefix + '"""'] = (double3DFA, states_double3DFA)
     return map
 
 # ______________________________________________________________________
 
-def output(name, dfa_class, dfa):
+def output(name, dfa_class, dfa, states):
     import textwrap
+    lines = []
     i = 0
     for line in textwrap.wrap(repr(dfa.accepts), width = 50):
         if i == 0:
-            print "accepts =", line
+            lines.append("accepts = ")
         else:
-            print "          ", line
+            lines.append("           ")
+        lines.append(line)
+        lines.append("\n")
         i += 1
     import StringIO
-    print "states = ["
-    for numstate, state in enumerate(dfa.states):
-        print "    #", numstate
+    lines.append("states = [\n")
+    for numstate, state in enumerate(states):
+        lines.append("    # ")
+        lines.append(str(numstate))
+        lines.append('\n')
         s = StringIO.StringIO()
         i = 0
         for k, v in sorted(state.items()):
@@ -298,22 +311,28 @@
         for line in text:
             line = line.replace('::', ': ')
             if i == 0:
-                print '    {' + line
+                lines.append('    {')
             else:
-                print '     ' + line
+                lines.append('     ')
+            lines.append(line)
+            lines.append('\n')
             i += 1
-    print "    ]"
-    print "%s = automata.%s(states, accepts)" % (name, dfa_class)
-    print
+    lines.append("    ]\n")
+    lines.append("%s = automata.%s(states, accepts)\n" % (name, dfa_class))
+    return ''.join(lines)
 
 def main ():
-    pseudoDFA = makePyPseudoDFA()
-    output("pseudoDFA", "DFA", pseudoDFA)
+    pseudoDFA, states_pseudoDFA = makePyPseudoDFA()
+    print output("pseudoDFA", "DFA", pseudoDFA, states_pseudoDFA)
     endDFAMap = makePyEndDFAMap()
-    output("double3DFA", "NonGreedyDFA", endDFAMap['"""'])
-    output("single3DFA", "NonGreedyDFA", endDFAMap["'''"])
-    output("singleDFA", "DFA", endDFAMap["'"])
-    output("doubleDFA", "DFA", endDFAMap['"'])
+    dfa, states = endDFAMap['"""']
+    print output("double3DFA", "NonGreedyDFA", dfa, states)
+    dfa, states = endDFAMap["'''"]
+    print output("single3DFA", "NonGreedyDFA", dfa, states)
+    dfa, states = endDFAMap["'"]
+    print output("singleDFA", "DFA", dfa, states)
+    dfa, states = endDFAMap["\""]
+    print output("doubleDFA", "DFA", dfa, states)
 
 # ______________________________________________________________________
 
diff --git a/pypy/interpreter/pyparser/test/test_gendfa.py 
b/pypy/interpreter/pyparser/test/test_gendfa.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/pyparser/test/test_gendfa.py
@@ -0,0 +1,16 @@
+from pypy.interpreter.pyparser.automata import DFA, DEFAULT
+from pypy.interpreter.pyparser.genpytokenize import output
+
+def test_states():
+    states = [{"\x00": 1}, {"\x01": 0}]
+    d = DFA(states[:], [False, True])
+    assert output('test', DFA, d, states) == """\
+accepts = [False, True]
+states = [
+    # 0
+    {'\\x00': 1},
+    # 1
+    {'\\x01': 0},
+    ]
+test = automata.pypy.interpreter.pyparser.automata.DFA(states, accepts)
+"""
diff --git a/pypy/module/__pypy__/interp_intop.py 
b/pypy/module/__pypy__/interp_intop.py
--- a/pypy/module/__pypy__/interp_intop.py
+++ b/pypy/module/__pypy__/interp_intop.py
@@ -2,21 +2,10 @@
 from rpython.rtyper.lltypesystem import lltype
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rlib.rarithmetic import r_uint, intmask
+from rpython.rlib.rarithmetic import int_c_div, int_c_mod
 from rpython.rlib import jit
 
 
-# XXX maybe temporary: hide llop.int_{floordiv,mod} from the JIT,
-#     because now it expects only Python-style divisions, not the
-#     C-style divisions of these two ll operations
-@jit.dont_look_inside
-def _int_floordiv(n, m):
-    return llop.int_floordiv(lltype.Signed, n, m)
-
-@jit.dont_look_inside
-def _int_mod(n, m):
-    return llop.int_mod(lltype.Signed, n, m)
-
-
 @unwrap_spec(n=int, m=int)
 def int_add(space, n, m):
     return space.wrap(llop.int_add(lltype.Signed, n, m))
@@ -31,11 +20,11 @@
 
 @unwrap_spec(n=int, m=int)
 def int_floordiv(space, n, m):
-    return space.wrap(_int_floordiv(n, m))
+    return space.wrap(int_c_div(n, m))
 
 @unwrap_spec(n=int, m=int)
 def int_mod(space, n, m):
-    return space.wrap(_int_mod(n, m))
+    return space.wrap(int_c_mod(n, m))
 
 @unwrap_spec(n=int, m=int)
 def int_lshift(space, n, m):
diff --git a/pypy/module/_cffi_backend/ccallback.py 
b/pypy/module/_cffi_backend/ccallback.py
--- a/pypy/module/_cffi_backend/ccallback.py
+++ b/pypy/module/_cffi_backend/ccallback.py
@@ -220,6 +220,11 @@
         if rffi.cast(lltype.Signed, res) != clibffi.FFI_OK:
             raise oefmt(space.w_SystemError,
                         "libffi failed to build this callback")
+        if closure_ptr.c_user_data != unique_id:
+            raise oefmt(space.w_SystemError,
+                "ffi_prep_closure(): bad user_data (it seems that the "
+                "version of the libffi library seen at runtime is "
+                "different from the 'ffi.h' file seen at compile-time)")
 
     def py_invoke(self, ll_res, ll_args):
         jitdriver1.jit_merge_point(callback=self,
diff --git a/pypy/module/_cffi_backend/func.py 
b/pypy/module/_cffi_backend/func.py
--- a/pypy/module/_cffi_backend/func.py
+++ b/pypy/module/_cffi_backend/func.py
@@ -201,6 +201,9 @@
         else:
             copy_string_to_raw(llstr(src_string), dest_data, 0, n)
     else:
+        # nowadays this case should be rare or impossible: as far as
+        # I know, all common types implementing the *writable* buffer
+        # interface now support get_raw_address()
         if src_is_ptr:
             for i in range(n):
                 dest_buf.setitem(i, src_data[i])
diff --git a/pypy/module/cpyext/include/pymem.h 
b/pypy/module/cpyext/include/pymem.h
--- a/pypy/module/cpyext/include/pymem.h
+++ b/pypy/module/cpyext/include/pymem.h
@@ -1,5 +1,11 @@
 #include <stdlib.h>
 
+#ifndef Py_PYMEM_H
+#define Py_PYMEM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 #define PyMem_MALLOC(n)                malloc((n) ? (n) : 1)
 #define PyMem_REALLOC(p, n)    realloc((p), (n) ? (n) : 1)
@@ -44,3 +50,9 @@
  */
 #define PyMem_Del               PyMem_Free
 #define PyMem_DEL               PyMem_FREE
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !Py_PYMEM_H */
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py 
b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -23,7 +23,7 @@
             guard_true(i14, descr=...)
             guard_not_invalidated(descr=...)
             i16 = int_eq(i6, %d)
-            i19 = call_i(ConstClass(ll_int_mod__Signed_Signed), i6, i10, 
descr=<Calli . ii EF=0 OS=14>)
+            i19 = call_i(ConstClass(ll_int_py_mod__Signed_Signed), i6, i10, 
descr=<Calli . ii EF=0 OS=14>)
             i21 = int_lt(i19, 0)
             guard_false(i21, descr=...)
             i22 = int_ge(i19, i10)
diff --git a/pypy/module/select/test/test_epoll.py 
b/pypy/module/select/test/test_epoll.py
--- a/pypy/module/select/test/test_epoll.py
+++ b/pypy/module/select/test/test_epoll.py
@@ -20,6 +20,10 @@
         self.w_sockets = self.space.wrap([])
         if platform.machine().startswith('arm'):
             self.w_timeout = self.space.wrap(0.06)
+        if platform.machine().startswith('s390x'):
+            # s390x is not slow, but it seems there is one case when epoll
+            # modify method is called that takes longer on s390x
+            self.w_timeout = self.space.wrap(0.06)
         else:
             self.w_timeout = self.space.wrap(0.02)
 
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -3,10 +3,12 @@
 It uses 'pypy/goal/pypy-c' and parts of the rest of the working
 copy.  Usage:
 
-    package.py [--options] pypy-VER-PLATFORM
+    package.py [--options] --archive-name=pypy-VER-PLATFORM
 
 The output is found in the directory from --builddir,
 by default /tmp/usession-YOURNAME/build/.
+
+For a list of all options, see 'package.py --help'.
 """
 
 import shutil
@@ -61,6 +63,7 @@
     name = options.name
     if not name:
         name = 'pypy-nightly'
+    assert '/' not in name
     rename_pypy_c = options.pypy_c
     override_pypy_c = options.override_pypy_c
 
@@ -288,26 +291,12 @@
         help='destination dir for archive')
     parser.add_argument('--override_pypy_c', type=str, default='',
         help='use as pypy exe instead of pypy/goal/pypy-c')
-    # Positional arguments, for backward compatability with buldbots
-    parser.add_argument('extra_args', help='optional interface to positional 
arguments', nargs=argparse.REMAINDER,
-        metavar='[archive-name] [rename_pypy_c] [targetdir] [override_pypy_c]',
-        )
     options = parser.parse_args(args)
 
-    # Handle positional arguments, choke if both methods are used
-    for i,target, default in ([1, 'name', ''], [2, 'pypy_c', pypy_exe],
-                              [3, 'targetdir', ''], [4,'override_pypy_c', '']):
-        if len(options.extra_args)>i:
-            if getattr(options, target) != default:
-                print 'positional argument',i,target,'already has 
value',getattr(options, target)
-                parser.print_help()
-                return
-            setattr(options, target, options.extra_args[i])
     if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"):
         options.nostrip = True
-
     if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"):
-        options.tk = True
+        options.no_tk = True
     if not options.builddir:
         # The import actually creates the udir directory
         from rpython.tool.udir import udir
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -3,7 +3,7 @@
 min=3
 rev=0
 branchname=release-$maj.x  # ==OR== release-$maj.$min.x
-tagname=release-$maj.$min.$rev  # ==OR== release-$maj.$min
+tagname=release-pypy2.7-v$maj.$min  # ==OR== release-$maj.$min
 
 echo checking hg log -r $branchname
 hg log -r $branchname || exit 1
@@ -34,17 +34,19 @@
 plat=win32
 wget http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.zip
 unzip pypy-c-jit-latest-$plat.zip
+rm pypy-c-jit-latest-$plat.zip
 mv pypy-c-jit-*-$plat $rel-$plat
-zip -r $rel-$plat.zip $rel-$plat
+zip -rq $rel-$plat.zip $rel-$plat
 rm -rf $rel-$plat
 
 # Do this after creating a tag, note the untarred directory is pypy-pypy-<hash>
 # so make sure there is not another one
 wget https://bitbucket.org/pypy/pypy/get/$tagname.tar.bz2
 tar -xf $tagname.tar.bz2
+rm $tagname.tar.bz2
 mv pypy-pypy-* $rel-src
 tar --owner=root --group=root --numeric-owner -cjf $rel-src.tar.bz2 $rel-src
-zip -r $rel-src.zip $rel-src
+zip -rq $rel-src.zip $rel-src
 rm -rf $rel-src
 
 # Print out the md5, sha1, sha256
diff --git a/pypy/tool/release/test/test_package.py 
b/pypy/tool/release/test/test_package.py
--- a/pypy/tool/release/test/test_package.py
+++ b/pypy/tool/release/test/test_package.py
@@ -21,8 +21,10 @@
 
     def test_dir_structure(self, test='test'):
         retval, builddir = package.package(
-            '--without-cffi', str(py.path.local(pypydir).dirpath()),
-            test, self.rename_pypy_c, _fake=True)
+            '--without-cffi',
+            '--archive-name', test,
+            '--rename_pypy_c', self.rename_pypy_c,
+            _fake=True)
         assert retval == 0
         prefix = builddir.join(test)
         cpyver = '%d.%d' % CPYTHON_VERSION[:2]
@@ -71,8 +73,9 @@
         builddir = udir.ensure("build", dir=True)
         retval, builddir = package.package(
             '--without-cffi', '--builddir', str(builddir),
-            str(py.path.local(pypydir).dirpath()),
-            test, self.rename_pypy_c, _fake=True)
+            '--archive-name', test,
+            '--rename_pypy_c', self.rename_pypy_c,
+            _fake=True)
 
     def test_with_zipfile_module(self):
         prev = package.USE_ZIPFILE_MODULE
diff --git a/rpython/annotator/test/test_annrpython.py 
b/rpython/annotator/test/test_annrpython.py
--- a/rpython/annotator/test/test_annrpython.py
+++ b/rpython/annotator/test/test_annrpython.py
@@ -4610,6 +4610,19 @@
         a.build_types(fd, [])
         py.test.raises(AnnotatorError, a.build_types, fb, [])
 
+    def test_annotate_generator_with_unreachable_yields(self):
+        def f(n):
+            if n < 0:
+                yield 42
+            yield n
+            yield n
+        def main(n):
+            for x in f(abs(n)):
+                pass
+        #
+        a = self.RPythonAnnotator()
+        a.build_types(main, [int])
+
 
 def g(n):
     return [0, 1, 2, n]
diff --git a/rpython/doc/arch/index.rst b/rpython/doc/arch/index.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/arch/index.rst
@@ -0,0 +1,11 @@
+.. _arch_index:
+
+Architecture specific notes
+===========================
+
+Here you can find some architecture specific notes.
+
+.. toctree::
+    :maxdepth: 1
+
+    s390x
diff --git a/rpython/doc/arch/s390x.rst b/rpython/doc/arch/s390x.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/arch/s390x.rst
@@ -0,0 +1,34 @@
+.. _s390x:
+
+IBM Mainframe S390X
+===================
+
+Our JIT implements the 64 bit version of the IBM Mainframe called s390x.
+Note that this architecture is big endian.
+
+Currently supported ISAs:
+
+* z13 (released January 2015)
+* zEC12 (released September 2012)
+* z196 (released August 2010)
+* z10 (released February 2008)
+
+To check if all the necessary CPU facilities are installed
+on the subject machine, please run the test using a copy of the pypy
+source code::
+
+    $ ./pytest.py rpython/jit/backend/zarch/test/test_assembler -v -k 
'test_facility'
+
+In addition you can run the auto encoding test to check if your Linux GCC tool 
chain
+is able to compile all instructions used in the JIT backend::
+
+    $ ./pytest.py rpython/jit/backend/zarch/test/test_auto_encoding.py -v
+
+Translating
+-----------
+
+Specifically check for these two dependencies. On old versions of some
+Linux distributions ship older versions.
+
+* libffi (version should do > 3.0.+).
+* CPython 2.7.+.
diff --git a/rpython/doc/index.rst b/rpython/doc/index.rst
--- a/rpython/doc/index.rst
+++ b/rpython/doc/index.rst
@@ -37,7 +37,6 @@
 
    arm
    logging
-   s390x
 
 
 Writing your own interpreter in RPython
@@ -61,6 +60,7 @@
    getting-started
    dir-reference
    jit/index
+   arch/index
    translation
    rtyper
    garbage_collection
diff --git a/rpython/doc/s390x.rst b/rpython/doc/s390x.rst
deleted file mode 100644
--- a/rpython/doc/s390x.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. _s390x:
-
-S390X JIT Backend
-=================
-
-Our JIT implements the 64 bit version of the IBM Mainframe called s390x.
-Note that this architecture is big endian.
-
-The following facilities need to be installed to operate
-correctly (all of the machines used for development these where installed):
-
-* General-Instructions-Extension
-* Long-Displacement
-* Binary Floating Point (IEEE)
-
-Translating
------------
-
-Ensure that libffi is installed (version should do > 3.0.+).
-CPython should be version 2.7.+.
diff --git a/rpython/flowspace/generator.py b/rpython/flowspace/generator.py
--- a/rpython/flowspace/generator.py
+++ b/rpython/flowspace/generator.py
@@ -132,13 +132,14 @@
                 del block.operations[index]
                 newlink = split_block(block, index)
                 newblock = newlink.target
+                varnames = get_variable_names(newlink.args)
                 #
                 class Resume(AbstractPosition):
                     _immutable_ = True
+                    _attrs_ = varnames
                     block = newblock
                 Resume.__name__ = 'Resume%d' % len(mappings)
                 mappings.append(Resume)
-                varnames = get_variable_names(newlink.args)
                 #
                 _insert_reads(newblock, varnames)
                 #
diff --git a/rpython/jit/backend/arm/regalloc.py 
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -901,6 +901,8 @@
         size_box = op.getarg(0)
         assert isinstance(size_box, ConstInt)
         size = size_box.getint()
+        # hint: try to move unrelated registers away from r0 and r1 now
+        self.rm.spill_or_move_registers_before_call([r.r0, r.r1])
 
         self.rm.force_allocate_reg(op, selected_reg=r.r0)
         t = TempInt()
@@ -924,6 +926,7 @@
         # sizeloc must be in a register, but we can free it now
         # (we take care explicitly of conflicts with r0 or r1)
         sizeloc = self.rm.make_sure_var_in_reg(size_box)
+        self.rm.spill_or_move_registers_before_call([r.r0, r.r1]) # sizeloc 
safe
         self.rm.possibly_free_var(size_box)
         #
         self.rm.force_allocate_reg(op, selected_reg=r.r0)
@@ -951,6 +954,11 @@
         arraydescr = op.getdescr()
         length_box = op.getarg(2)
         assert not isinstance(length_box, Const) # we cannot have a const here!
+        # can only use spill_or_move_registers_before_call() as a hint if
+        # we are sure that length_box stays alive and won't be freed now
+        # (it should always be the case, see below, but better safe than sorry)
+        if self.rm.stays_alive(length_box):
+            self.rm.spill_or_move_registers_before_call([r.r0, r.r1])
         # the result will be in r0
         self.rm.force_allocate_reg(op, selected_reg=r.r0)
         # we need r1 as a temporary
diff --git a/rpython/jit/backend/llsupport/regalloc.py 
b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -579,11 +579,26 @@
         new_free_regs.append(self.reg_bindings.pop(v))
 
     def before_call(self, force_store=[], save_all_regs=0):
-        """Spill or move some registers before a call.  By default,
-        this means: for every register in 'self.save_around_call_regs',
+        self.spill_or_move_registers_before_call(self.save_around_call_regs,
+                                                 force_store, save_all_regs)
+
+    def spill_or_move_registers_before_call(self, save_sublist,
+                                            force_store=[], save_all_regs=0):
+        """Spill or move some registers before a call.
+
+        By default, this means: for every register in 'save_sublist',
         if there is a variable there and it survives longer than
         the current operation, then it is spilled/moved somewhere else.
 
+        WARNING: this might do the equivalent of possibly_free_vars()
+        on variables dying in the current operation.  It won't
+        immediately overwrite registers that used to be occupied by
+        these variables, though.  Use this function *after* you finished
+        calling self.loc() or self.make_sure_var_in_reg(), i.e. when you
+        know the location of all input arguments.  These locations stay
+        valid, but only *if they are in self.save_around_call_regs,*
+        not if they are callee-saved registers!
+
         'save_all_regs' can be 0 (default set of registers), 1 (do that
         for all registers), or 2 (default + gc ptrs).
 
@@ -612,6 +627,16 @@
         anyway, as a local hack in this function, because on x86 CPUs
         such register-register moves are almost free.
         """
+        if not we_are_translated():
+            # 'save_sublist' is either the whole
+            # 'self.save_around_call_regs', or a sublist thereof, and
+            # then only those registers are spilled/moved.  But when
+            # we move them, we never move them to other registers in
+            # 'self.save_around_call_regs', to avoid ping-pong effects
+            # where the same value is constantly moved around.
+            for reg in save_sublist:
+                assert reg in self.save_around_call_regs
+
         new_free_regs = []
         move_or_spill = []
 
@@ -631,7 +656,7 @@
                 # we need to spill all GC ptrs in this mode
                 self._bc_spill(v, new_free_regs)
                 #
-            elif reg not in self.save_around_call_regs:
+            elif reg not in save_sublist:
                 continue  # in a register like ebx/rbx: it is fine where it is
                 #
             else:
@@ -663,6 +688,7 @@
                 if not we_are_translated():
                     if move_or_spill:
                         assert max_age <= min([_a for _, _a in move_or_spill])
+                    assert reg in save_sublist
                     assert reg in self.save_around_call_regs
                     assert new_reg not in self.save_around_call_regs
                 self.assembler.regalloc_mov(reg, new_reg)
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py 
b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -324,17 +324,19 @@
         def check(frame):
             expected_size = 1
             idx = 0
+            fixed_size = self.cpu.JITFRAME_FIXED_SIZE
             if self.cpu.backend_name.startswith('arm'):
                 # jitframe fixed part is larger here
                 expected_size = 2
                 idx = 1
+                fixed_size -= 32
             assert len(frame.jf_gcmap) == expected_size
-            if self.cpu.IS_64_BIT:
-                exp_idx = self.cpu.JITFRAME_FIXED_SIZE + 1  # +1 from i0
-            else:
-                assert frame.jf_gcmap[idx]
-                exp_idx = self.cpu.JITFRAME_FIXED_SIZE - 32 * idx + 1 # +1 
from i0
-            assert frame.jf_gcmap[idx] == (1 << (exp_idx + 1)) | (1 << exp_idx)
+            # check that we have two bits set, and that they are in two
+            # registers (p0 and p1 are moved away when doing p2, but not
+            # spilled, just moved to different registers)
+            bits = [n for n in range(fixed_size)
+                      if frame.jf_gcmap[idx] & (1<<n)]
+            assert len(bits) == 2
 
         self.cpu = self.getcpu(check)
         ops = '''
diff --git a/rpython/jit/backend/test/runner_test.py 
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2825,6 +2825,7 @@
         from rpython.rlib.rarithmetic import r_singlefloat
         from rpython.translator.c import primitive
 
+
         def same_as_for_box(b):
             if b.type == 'i':
                 return rop.SAME_AS_I
@@ -2835,6 +2836,8 @@
 
         cpu = self.cpu
         rnd = random.Random(525)
+        seed = py.test.config.option.randomseed
+        print("random seed %d" % seed)
 
         ALL_TYPES = [
             (types.ulong,  lltype.Unsigned),
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -204,20 +204,20 @@
 
     def _build_malloc_slowpath(self, kind):
         """ While arriving on slowpath, we have a gcpattern on stack 0.
-        The arguments are passed in eax and edi, as follows:
+        The arguments are passed in ecx and edx, as follows:
 
-        kind == 'fixed': nursery_head in eax and the size in edi - eax.
+        kind == 'fixed': nursery_head in ecx and the size in (edx - ecx).
 
-        kind == 'str/unicode': length of the string to allocate in edi.
+        kind == 'str/unicode': length of the string to allocate in edx.
 
-        kind == 'var': length to allocate in edi, tid in eax,
+        kind == 'var': length to allocate in edx, tid in ecx,
                        and itemsize in the stack 1 (position esp+WORD).
 
-        This function must preserve all registers apart from eax and edi.
+        This function must preserve all registers apart from ecx and edx.
         """
         assert kind in ['fixed', 'str', 'unicode', 'var']
         mc = codebuf.MachineCodeBlockWrapper()
-        self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
+        self._push_all_regs_to_frame(mc, [ecx, edx], self.cpu.supports_floats)
         # the caller already did push_gcmap(store=True)
         #
         if kind == 'fixed':
@@ -231,32 +231,32 @@
         mc.SUB_ri(esp.value, 16 - WORD)  # restore 16-byte alignment
         # magically, the above is enough on X86_32 to reserve 3 stack places
         if kind == 'fixed':
-            mc.SUB_rr(edi.value, eax.value) # compute the size we want
-            # the arg is already in edi
+            mc.SUB_rr(edx.value, ecx.value) # compute the size we want
             if IS_X86_32:
-                mc.MOV_sr(0, edi.value)
+                mc.MOV_sr(0, edx.value)     # store the length
                 if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
-                    mc.MOV_sr(WORD, ebp.value)
-            elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
-                # for tests only
-                mc.MOV_rr(esi.value, ebp.value)
+                    mc.MOV_sr(WORD, ebp.value)        # for tests only
+            else:
+                mc.MOV_rr(edi.value, edx.value)   # length argument
+                if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+                    mc.MOV_rr(esi.value, ebp.value)   # for tests only
         elif kind == 'str' or kind == 'unicode':
             if IS_X86_32:
                 # stack layout: [---][---][---][ret].. with 3 free stack places
-                mc.MOV_sr(0, edi.value)     # store the length
-            else:
-                pass                        # length already in edi
+                mc.MOV_sr(0, edx.value)     # store the length
+            elif IS_X86_64:
+                mc.MOV_rr(edi.value, edx.value)   # length argument
         else:
             if IS_X86_32:
                 # stack layout: [---][---][---][ret][gcmap][itemsize]...
-                mc.MOV_sr(WORD * 2, edi.value)  # store the length
-                mc.MOV_sr(WORD * 1, eax.value)  # store the tid
-                mc.MOV_rs(edi.value, WORD * 5)  # load the itemsize
-                mc.MOV_sr(WORD * 0, edi.value)  # store the itemsize
+                mc.MOV_sr(WORD * 2, edx.value)  # store the length
+                mc.MOV_sr(WORD * 1, ecx.value)  # store the tid
+                mc.MOV_rs(edx.value, WORD * 5)  # load the itemsize
+                mc.MOV_sr(WORD * 0, edx.value)  # store the itemsize
             else:
                 # stack layout: [---][ret][gcmap][itemsize]...
-                mc.MOV_rr(edx.value, edi.value) # length
-                mc.MOV_rr(esi.value, eax.value) # tid
+                # (already in edx)              # length
+                mc.MOV_rr(esi.value, ecx.value) # tid
                 mc.MOV_rs(edi.value, WORD * 3)  # load the itemsize
         self.set_extra_stack_depth(mc, 16)
         mc.CALL(imm(follow_jump(addr)))
@@ -267,10 +267,11 @@
         mc.TEST_rr(eax.value, eax.value)
         mc.J_il(rx86.Conditions['Z'], 0xfffff) # patched later
         jz_location = mc.get_relative_pos()
+        mc.MOV_rr(ecx.value, eax.value)
         #
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
-        self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
-        mc.MOV(edi, heap(nursery_free_adr))   # load this in EDI
+        self._pop_all_regs_from_frame(mc, [ecx, edx], self.cpu.supports_floats)
+        mc.MOV(edx, heap(nursery_free_adr))   # load this in EDX
         self.pop_gcmap(mc)   # push_gcmap(store=True) done by the caller
         mc.RET()
         #
@@ -2441,9 +2442,9 @@
 
     def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
         assert size & (WORD-1) == 0     # must be correctly aligned
-        self.mc.MOV(eax, heap(nursery_free_adr))
-        self.mc.LEA_rm(edi.value, (eax.value, size))
-        self.mc.CMP(edi, heap(nursery_top_adr))
+        self.mc.MOV(ecx, heap(nursery_free_adr))
+        self.mc.LEA_rm(edx.value, (ecx.value, size))
+        self.mc.CMP(edx, heap(nursery_top_adr))
         self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr = self.mc.get_relative_pos()
         # save the gcmap
@@ -2452,19 +2453,19 @@
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr-1, chr(offset))
-        self.mc.MOV(heap(nursery_free_adr), edi)
+        self.mc.MOV(heap(nursery_free_adr), edx)
 
     def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
                                   sizeloc, gcmap):
-        if sizeloc is eax:
-            self.mc.MOV(edi, sizeloc)
-            sizeloc = edi
-        self.mc.MOV(eax, heap(nursery_free_adr))
-        if sizeloc is edi:
-            self.mc.ADD_rr(edi.value, eax.value)
+        if sizeloc is ecx:
+            self.mc.MOV(edx, sizeloc)
+            sizeloc = edx
+        self.mc.MOV(ecx, heap(nursery_free_adr))
+        if sizeloc is edx:
+            self.mc.ADD_rr(edx.value, ecx.value)
         else:
-            self.mc.LEA_ra(edi.value, (eax.value, sizeloc.value, 0, 0))
-        self.mc.CMP(edi, heap(nursery_top_adr))
+            self.mc.LEA_ra(edx.value, (ecx.value, sizeloc.value, 0, 0))
+        self.mc.CMP(edx, heap(nursery_top_adr))
         self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr = self.mc.get_relative_pos()
         # save the gcmap
@@ -2473,7 +2474,7 @@
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr-1, chr(offset))
-        self.mc.MOV(heap(nursery_free_adr), edi)
+        self.mc.MOV(heap(nursery_free_adr), edx)
 
     def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
                             lengthloc, itemsize, maxlength, gcmap,
@@ -2482,39 +2483,39 @@
         assert isinstance(arraydescr, ArrayDescr)
 
         # lengthloc is the length of the array, which we must not modify!
-        assert lengthloc is not eax and lengthloc is not edi
+        assert lengthloc is not ecx and lengthloc is not edx
         if isinstance(lengthloc, RegLoc):
             varsizeloc = lengthloc
         else:
-            self.mc.MOV(edi, lengthloc)
-            varsizeloc = edi
+            self.mc.MOV(edx, lengthloc)
+            varsizeloc = edx
 
         self.mc.CMP(varsizeloc, imm(maxlength))
         self.mc.J_il8(rx86.Conditions['A'], 0) # patched later
         jmp_adr0 = self.mc.get_relative_pos()
 
-        self.mc.MOV(eax, heap(nursery_free_adr))
+        self.mc.MOV(ecx, heap(nursery_free_adr))
         if valid_addressing_size(itemsize):
             shift = get_scale(itemsize)
         else:
-            shift = self._imul_const_scaled(self.mc, edi.value,
+            shift = self._imul_const_scaled(self.mc, edx.value,
                                             varsizeloc.value, itemsize)
-            varsizeloc = edi
+            varsizeloc = edx
 
-        # now varsizeloc is a register != eax.  The size of
+        # now varsizeloc is a register != ecx.  The size of
         # the variable part of the array is (varsizeloc << shift)
         assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
         constsize = arraydescr.basesize + self.gc_size_of_header
         force_realignment = (itemsize % WORD) != 0
         if force_realignment:
             constsize += WORD - 1
-        self.mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift,
+        self.mc.LEA_ra(edx.value, (ecx.value, varsizeloc.value, shift,
                                    constsize))
         if force_realignment:
-            self.mc.AND_ri(edi.value, ~(WORD - 1))
-        # now edi contains the total size in bytes, rounded up to a multiple
+            self.mc.AND_ri(edx.value, ~(WORD - 1))
+        # now edx contains the total size in bytes, rounded up to a multiple
         # of WORD, plus nursery_free_adr
-        self.mc.CMP(edi, heap(nursery_top_adr))
+        self.mc.CMP(edx, heap(nursery_top_adr))
         self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr1 = self.mc.get_relative_pos()
         #
@@ -2525,8 +2526,8 @@
         self.push_gcmap(self.mc, gcmap, store=True)
         if kind == rewrite.FLAG_ARRAY:
             self.mc.MOV_si(WORD, itemsize)
-            self.mc.MOV(edi, lengthloc)
-            self.mc.MOV_ri(eax.value, arraydescr.tid)
+            self.mc.MOV(edx, lengthloc)
+            self.mc.MOV_ri(ecx.value, arraydescr.tid)
             addr = self.malloc_slowpath_varsize
         else:
             if kind == rewrite.FLAG_STR:
@@ -2534,7 +2535,7 @@
             else:
                 assert kind == rewrite.FLAG_UNICODE
                 addr = self.malloc_slowpath_unicode
-            self.mc.MOV(edi, lengthloc)
+            self.mc.MOV(edx, lengthloc)
         self.mc.CALL(imm(follow_jump(addr)))
         self.mc.JMP_l8(0)      # jump to done, patched later
         jmp_location = self.mc.get_relative_pos()
@@ -2544,9 +2545,9 @@
         self.mc.overwrite(jmp_adr1-1, chr(offset))
         self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
         # write down the tid, but not if it's the result of the CALL
-        self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+        self.mc.MOV(mem(ecx, 0), imm(arraydescr.tid))
         # while we're at it, this line is not needed if we've done the CALL
-        self.mc.MOV(heap(nursery_free_adr), edi)
+        self.mc.MOV(heap(nursery_free_adr), edx)
         #
         offset = self.mc.get_relative_pos() - jmp_location
         assert 0 < offset <= 127
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -952,14 +952,16 @@
         size_box = op.getarg(0)
         assert isinstance(size_box, ConstInt)
         size = size_box.getint()
-        # looking at the result
-        self.rm.force_allocate_reg(op, selected_reg=eax)
+        # hint: try to move unrelated registers away from eax and edx now
+        self.rm.spill_or_move_registers_before_call([ecx, edx])
+        # the result will be in ecx
+        self.rm.force_allocate_reg(op, selected_reg=ecx)
         #
-        # We need edi as a temporary, but otherwise don't save any more
+        # We need edx as a temporary, but otherwise don't save any more
         # register.  See comments in _build_malloc_slowpath().
         tmp_box = TempVar()
-        self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
-        gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+        gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
         self.rm.possibly_free_var(tmp_box)
         #
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
@@ -972,15 +974,16 @@
         size_box = op.getarg(0)
         assert not isinstance(size_box, Const) # we cannot have a const here!
         # sizeloc must be in a register, but we can free it now
-        # (we take care explicitly of conflicts with eax or edi)
+        # (we take care explicitly of conflicts with ecx or edx)
         sizeloc = self.rm.make_sure_var_in_reg(size_box)
+        self.rm.spill_or_move_registers_before_call([ecx, edx])  # sizeloc safe
         self.rm.possibly_free_var(size_box)
-        # the result will be in eax
-        self.rm.force_allocate_reg(op, selected_reg=eax)
-        # we need edi as a temporary
+        # the result will be in ecx
+        self.rm.force_allocate_reg(op, selected_reg=ecx)
+        # we need edx as a temporary
         tmp_box = TempVar()
-        self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
-        gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+        gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
         self.rm.possibly_free_var(tmp_box)
         #
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
@@ -997,16 +1000,21 @@
         arraydescr = op.getdescr()
         length_box = op.getarg(2)
         assert not isinstance(length_box, Const) # we cannot have a const here!
-        # the result will be in eax
-        self.rm.force_allocate_reg(op, selected_reg=eax)
-        # we need edi as a temporary
+        # can only use spill_or_move_registers_before_call() as a hint if
+        # we are sure that length_box stays alive and won't be freed now
+        # (it should always be the case, see below, but better safe than sorry)
+        if self.rm.stays_alive(length_box):
+            self.rm.spill_or_move_registers_before_call([ecx, edx])
+        # the result will be in ecx
+        self.rm.force_allocate_reg(op, selected_reg=ecx)
+        # we need edx as a temporary
         tmp_box = TempVar()
-        self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
-        gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+        gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
         self.rm.possibly_free_var(tmp_box)
         # length_box always survives: it's typically also present in the
         # next operation that will copy it inside the new array.  It's
-        # fine to load it from the stack too, as long as it's != eax, edi.
+        # fine to load it from the stack too, as long as it is != ecx, edx.
         lengthloc = self.rm.loc(length_box)
         self.rm.possibly_free_var(length_box)
         #
@@ -1225,6 +1233,8 @@
             raise AssertionError("bad unicode item size")
 
     def _consider_math_read_timestamp(self, op):
+        # hint: try to move unrelated registers away from eax and edx now
+        self.rm.spill_or_move_registers_before_call([eax, edx])
         tmpbox_high = TempVar()
         self.rm.force_allocate_reg(tmpbox_high, selected_reg=eax)
         if longlong.is_64_bit:
diff --git a/rpython/jit/backend/x86/test/test_zvmprof.py 
b/rpython/jit/backend/x86/test/test_zvmprof.py
deleted file mode 100644
--- a/rpython/jit/backend/x86/test/test_zvmprof.py
+++ /dev/null
@@ -1,7 +0,0 @@
-
-from rpython.jit.backend.llsupport.test.zrpy_vmprof_test import 
CompiledVmprofTest
-
-class TestZVMprof(CompiledVmprofTest):
-    
-    gcrootfinder = "shadowstack"
-    gc = "incminimark"
\ No newline at end of file
diff --git a/rpython/jit/backend/zarch/callbuilder.py 
b/rpython/jit/backend/zarch/callbuilder.py
--- a/rpython/jit/backend/zarch/callbuilder.py
+++ b/rpython/jit/backend/zarch/callbuilder.py
@@ -12,6 +12,8 @@
 from rpython.rtyper.lltypesystem import rffi
 from rpython.jit.backend.llsupport.descr import CallDescr
 
+CALL_RELEASE_GIL_STACK_OFF = 6*WORD
+
 class CallBuilder(AbstractCallBuilder):
     GPR_ARGS = [r.r2, r.r3, r.r4, r.r5, r.r6]
     FPR_ARGS =  [r.f0, r.f2, r.f4, r.f6]
@@ -85,8 +87,8 @@
         self.subtracted_to_sp += len(stack_params) * WORD
         base = len(stack_params) * WORD
         if self.is_call_release_gil:
-            self.subtracted_to_sp += 8*WORD
-            base += 8*WORD
+            self.subtracted_to_sp += CALL_RELEASE_GIL_STACK_OFF
+            base += CALL_RELEASE_GIL_STACK_OFF
         for idx,i in enumerate(stack_params):
             loc = arglocs[i]
             offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx
@@ -187,7 +189,7 @@
         RSHADOWPTR = self.RSHADOWPTR
         RFASTGILPTR = self.RFASTGILPTR
         #
-        pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD
+        pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF
         self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP))
         #
         # Save this thread's shadowstack pointer into r8, for later comparison
@@ -286,7 +288,7 @@
         if gcrootmap:
             if gcrootmap.is_shadow_stack and self.is_call_release_gil:
                 self.mc.LGR(r.SCRATCH, RSHADOWOLD)
-        pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD
+        pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF
         self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP))
 
     def write_real_errno(self, save_err):
diff --git a/rpython/jit/backend/zarch/instructions.py 
b/rpython/jit/backend/zarch/instructions.py
--- a/rpython/jit/backend/zarch/instructions.py
+++ b/rpython/jit/backend/zarch/instructions.py
@@ -29,6 +29,7 @@
     'MGHI':    ('ri',    ['\xA7','\x0D']),
     'MSGFI':   ('ril',   ['\xC2','\x00']),
     'MLGR':    ('rre',   ['\xB9','\x86'], 'eo,r'),
+    'MLG':     ('rxy',   ['\xE3','\x86'], 'eo,bid'),
     # div/mod
     'DSGR':    ('rre',   ['\xB9','\x0D'], 'eo,r'),
     'DSG':     ('rxy',   ['\xE3','\x0D'], 'eo,bidl'),
@@ -44,7 +45,6 @@
 
     # rotating
     'RISBG':   ('rie_f',   ['\xEC','\x55']),
-    'RISBGN':  ('rie_f',   ['\xEC','\x59']),
 
     # invert & negative & absolute
     'LPGR':    ('rre',   ['\xB9','\x00']),
diff --git a/rpython/jit/backend/zarch/opassembler.py 
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -160,11 +160,15 @@
         omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow))
         omc.overwrite()
 
-    emit_int_floordiv = gen_emit_div_mod('DSGR', 'DSG')
-    emit_uint_floordiv = gen_emit_div_mod('DLGR', 'DLG')
-    # NOTE division sets one register with the modulo value, thus
-    # the regalloc ensures the right register survives.
-    emit_int_mod = gen_emit_div_mod('DSGR', 'DSG')
+    def emit_uint_mul_high(self, op, arglocs, regalloc):
+        r0, _, a1 = arglocs
+        # _ carries the value, contents of r0 are ignored
+        assert not r0.is_imm()
+        assert not a1.is_imm()
+        if a1.is_core_reg():
+            self.mc.MLGR(r0, a1)
+        else:
+            self.mc.MLG(r0, a1)
 
     def emit_int_invert(self, op, arglocs, regalloc):
         l0, = arglocs
diff --git a/rpython/jit/backend/zarch/regalloc.py 
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -733,9 +733,6 @@
     prepare_int_sub_ovf = helper.prepare_int_sub
     prepare_int_mul = helper.prepare_int_mul
     prepare_int_mul_ovf = helper.prepare_int_mul_ovf
-    prepare_int_floordiv = helper.prepare_int_div
-    prepare_uint_floordiv = helper.prepare_int_div
-    prepare_int_mod = helper.prepare_int_mod
     prepare_nursery_ptr_increment = prepare_int_add
 
     prepare_int_and = helper.prepare_int_logic
@@ -746,6 +743,18 @@
     prepare_int_lshift  = helper.prepare_int_shift
     prepare_uint_rshift = helper.prepare_int_shift
 
+    def prepare_uint_mul_high(self, op):
+        a0 = op.getarg(0)
+        a1 = op.getarg(1)
+        if a0.is_constant():
+            a0, a1 = a1, a0
+        if helper.check_imm32(a1):
+            l1 = self.ensure_reg(a1)
+        else:
+            l1 = self.ensure_reg_or_pool(a1)
+        lr,lq = self.rm.ensure_even_odd_pair(a0, op, bind_first=True)
+        return [lr, lq, l1]
+
     prepare_int_le = helper.generate_cmp_op()
     prepare_int_lt = helper.generate_cmp_op()
     prepare_int_ge = helper.generate_cmp_op()
diff --git a/rpython/jit/backend/zarch/test/test_assembler.py 
b/rpython/jit/backend/zarch/test/test_assembler.py
--- a/rpython/jit/backend/zarch/test/test_assembler.py
+++ b/rpython/jit/backend/zarch/test/test_assembler.py
@@ -155,7 +155,15 @@
         s64 = bin(fac_data[1])[2:]
         print(f64)
         print(s64)
+        for i,c in enumerate(f64):
+            print('index: %d is set? %s' % (i,c))
+
+        assert f64[1] == '1' # The z/Architecture architectural mode is 
installed.
+        assert f64[2] == '1' # The z/Architecture architectural mode is active.
         assert f64[18] == '1' # long displacement facility
+        assert f64[21] == '1' # extended immediate facility
+        assert f64[34] == '1' # general instruction facility
+        assert f64[41] == '1' # floating-point-support-enhancement
 
     def test_load_byte_zero_extend(self):
         adr = self.a.datablockwrapper.malloc_aligned(16, 16)
@@ -189,7 +197,7 @@
     @py.test.mark.parametrize('p', 
[2**32,2**32+1,2**63-1,2**63-2,0,1,2,3,4,5,6,7,8,10001])
     def test_align_withroll(self, p):
         self.a.mc.load_imm(r.r2, p & 0xffffFFFFffffFFFF)
-        self.a.mc.RISBGN(r.r2, r.r2, loc.imm(0), loc.imm(0x80 | 60), 
loc.imm(0))
+        self.a.mc.RISBG(r.r2, r.r2, loc.imm(0), loc.imm(0x80 | 60), loc.imm(0))
         self.a.mc.BCR(con.ANY, r.r14)
         assert run_asm(self.a) == rffi.cast(rffi.ULONG,p) & ~(7)
 
@@ -214,7 +222,7 @@
         n = 13
         l = loc
         self.a.mc.load_imm(r.r2, 7<<n)
-        self.a.mc.RISBGN(r.r2, r.r2, l.imm(61), l.imm(0x80 | 63), l.imm(64-n))
+        self.a.mc.RISBG(r.r2, r.r2, l.imm(61), l.imm(0x80 | 63), l.imm(64-n))
         self.a.mc.BCR(con.ANY, r.r14)
         assert run_asm(self.a) == 7
 
@@ -222,7 +230,7 @@
         n = 16
         l = loc
         self.a.mc.load_imm(r.r2, 0xffFFffFF)
-        self.a.mc.RISBGN(r.r2, r.r2, l.imm(60), l.imm(0x80 | 63), l.imm(64-n))
+        self.a.mc.RISBG(r.r2, r.r2, l.imm(60), l.imm(0x80 | 63), l.imm(64-n))
         self.a.mc.BCR(con.ANY, r.r14)
         assert run_asm(self.a) == 15
 
diff --git a/rpython/jit/backend/zarch/test/test_auto_encoding.py 
b/rpython/jit/backend/zarch/test/test_auto_encoding.py
--- a/rpython/jit/backend/zarch/test/test_auto_encoding.py
+++ b/rpython/jit/backend/zarch/test/test_auto_encoding.py
@@ -204,7 +204,7 @@
                 g.write('%s\n' % op)
                 oplist.append(op)
             g.write('\t.string "%s"\n' % END_TAG)
-        proc = subprocess.Popen(['as', '-m64', '-mzarch', '-march=zEC12',
+        proc = subprocess.Popen(['as', '-m64', '-mzarch', '-march=z196',
                                  inputname, '-o', filename],
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
diff --git a/rpython/jit/backend/zarch/test/test_int.py 
b/rpython/jit/backend/zarch/test/test_int.py
--- a/rpython/jit/backend/zarch/test/test_int.py
+++ b/rpython/jit/backend/zarch/test/test_int.py
@@ -35,41 +35,13 @@
         fail = self.cpu.get_latest_descr(deadframe)
         assert fail == finishdescr # ensures that guard is not taken!
 
-    def test_double_evenodd_pair(self):
-        code = """
-        [i0]
-        i1 = int_floordiv(i0, 2)
-        i2 = int_floordiv(i0, 3)
-        i3 = int_floordiv(i0, 4)
-        i4 = int_floordiv(i0, 5)
-        i5 = int_floordiv(i0, 6)
-        i6 = int_floordiv(i0, 7)
-        i7 = int_floordiv(i0, 8)
-        i8 = int_le(i1, 0)
-        guard_true(i8) [i1,i2,i3,i4,i5,i6,i7]
-        finish(i0, descr=faildescr)
-        """
-        # the guard forces 3 spills because after 4 divisions
-        # all even slots of the managed registers are full
-        loop = parse(code, namespace={'faildescr': BasicFinalDescr(1)})
-        looptoken = JitCellToken()
-        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-        deadframe = self.cpu.execute_token(looptoken, 100)
-        fail = self.cpu.get_latest_descr(deadframe)
-        for i in range(2,9):
-            assert self.cpu.get_int_value(deadframe, i-2) == 100//i
-
-
-
     @py.test.mark.parametrize('value', [2,3,15,2**16])
     def test_evenodd_pair_extensive(self, value):
         instrs = []
         failargs = []
         values = []
         j = 0
-        mapping = (('int_floordiv',lambda x,y: x // y),
-                   ('int_mod', lambda x,y: x % y),
-                   ('int_mul_ovf', lambda x,y: x * y))
+        mapping = (('int_mul_ovf', lambda x,y: x * y),)
         for i in range(20):
             name, func = mapping[j]
             instrs.append("i{d} = {i}(i0, {d})".format(d=i+1, i=name))
diff --git a/rpython/jit/backend/zarch/test/test_regalloc.py 
b/rpython/jit/backend/zarch/test/test_regalloc.py
--- a/rpython/jit/backend/zarch/test/test_regalloc.py
+++ b/rpython/jit/backend/zarch/test/test_regalloc.py
@@ -146,128 +146,3 @@
     assert cpu.get_int_value(deadframe, 0) == 0
     assert cpu.get_int_value(deadframe, 1) == -1000
 
-def test_bug_0():
-    cpu, deadframe = run([-13, 10, 10, 8, -8, -16, -18, 46, -12, 26], '''
-    [i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
-    i11 = uint_gt(i3, -48)
-    i12 = int_xor(i8, i1)
-    i13 = int_gt(i6, -9)
-    i14 = int_le(i13, i2)
-    i15 = int_le(i11, i5)
-    i16 = uint_ge(i13, i13)
-    i17 = int_or(i9, -23)
-    i18 = int_lt(i10, i13)
-    i19 = int_or(i15, i5)
-    i20 = int_xor(i17, 54)
-    i21 = int_mul(i8, i10)
-    i22 = int_or(i3, i9)
-    i41 = int_and(i11, -4)
-    i42 = int_or(i41, 1)
-    i23 = int_mod(i12, i42)
-    i24 = int_is_true(i6)
-    i25 = uint_rshift(i15, 6)
-    i26 = int_or(-4, i25)
-    i27 = int_invert(i8)
-    i28 = int_sub(-113, i11)
-    i29 = int_neg(i7)
-    i30 = int_neg(i24)
-    i31 = int_floordiv(i3, 53)
-    i32 = int_mul(i28, i27)
-    i43 = int_and(i18, -4)
-    i44 = int_or(i43, 1)
-    i33 = int_mod(i26, i44)
-    i34 = int_or(i27, i19)
-    i35 = uint_lt(i13, 1)
-    i45 = int_and(i21, 31)
-    i36 = int_rshift(i21, i45)
-    i46 = int_and(i20, 31)
-    i37 = uint_rshift(i4, i46)
-    i38 = uint_gt(i33, -11)
-    i39 = int_neg(i7)
-    i40 = int_gt(i24, i32)
-    i99 = same_as_i(0)
-    guard_true(i99) [i40, i36, i37, i31, i16, i34, i35, i23, i22, i29, i14, 
i39, i30, i38]
-    finish(42)
-    ''')
-    assert cpu.get_int_value(deadframe, 0) == 0
-    assert cpu.get_int_value(deadframe, 1) == 0
-    assert cpu.get_int_value(deadframe, 2) == 0
-    assert cpu.get_int_value(deadframe, 3) == 0
-    assert cpu.get_int_value(deadframe, 4) == 1
-    assert cpu.get_int_value(deadframe, 5) == -7
-    assert cpu.get_int_value(deadframe, 6) == 1
-    assert cpu.get_int_value(deadframe, 7) == 0
-    assert cpu.get_int_value(deadframe, 8) == -2
-    assert cpu.get_int_value(deadframe, 9) == 18
-    assert cpu.get_int_value(deadframe, 10) == 1
-    assert cpu.get_int_value(deadframe, 11) == 18
-    assert cpu.get_int_value(deadframe, 12) == -1
-    assert cpu.get_int_value(deadframe, 13) == 0
-
-def test_bug_1():
-    cpu, deadframe = run([17, -20, -6, 6, 1, 13, 13, 9, 49, 8], '''
-    [i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
-    i11 = uint_lt(i6, 0)
-    i41 = int_and(i3, 31)
-    i12 = int_rshift(i3, i41)
-    i13 = int_neg(i2)
-    i14 = int_add(i11, i7)
-    i15 = int_or(i3, i2)
-    i16 = int_or(i12, i12)
-    i17 = int_ne(i2, i5)
-    i42 = int_and(i5, 31)
-    i18 = uint_rshift(i14, i42)
-    i43 = int_and(i14, 31)
-    i19 = int_lshift(7, i43)
-    i20 = int_neg(i19)
-    i21 = int_mod(i3, 1)
-    i22 = uint_ge(i15, i1)
-    i44 = int_and(i16, 31)
-    i23 = int_lshift(i8, i44)
-    i24 = int_is_true(i17)
-    i45 = int_and(i5, 31)
-    i25 = int_lshift(i14, i45)
-    i26 = int_lshift(i5, 17)
-    i27 = int_eq(i9, i15)
-    i28 = int_ge(0, i6)
-    i29 = int_neg(i15)
-    i30 = int_neg(i22)
-    i31 = int_add(i7, i16)
-    i32 = uint_lt(i19, i19)
-    i33 = int_add(i2, 1)
-    i34 = int_neg(i5)
-    i35 = int_add(i17, i24)
-    i36 = uint_lt(2, i16)
-    i37 = int_neg(i9)
-    i38 = int_gt(i4, i11)
-    i39 = int_lt(i27, i22)
-    i40 = int_neg(i27)
-    i99 = same_as_i(0)
-    guard_true(i99) [i40, i10, i36, i26, i13, i30, i21, i33, i18, i25, i31, 
i32, i28, i29, i35, i38, i20, i39, i34, i23, i37]
-    finish(-42)
-    ''')
-    assert cpu.get_int_value(deadframe, 0) == 0
-    assert cpu.get_int_value(deadframe, 1) == 8
-    assert cpu.get_int_value(deadframe, 2) == 1
-    assert cpu.get_int_value(deadframe, 3) == 131072
-    assert cpu.get_int_value(deadframe, 4) == 20
-    assert cpu.get_int_value(deadframe, 5) == -1
-    assert cpu.get_int_value(deadframe, 6) == 0
-    assert cpu.get_int_value(deadframe, 7) == -19
-    assert cpu.get_int_value(deadframe, 8) == 6
-    assert cpu.get_int_value(deadframe, 9) == 26
-    assert cpu.get_int_value(deadframe, 10) == 12
-    assert cpu.get_int_value(deadframe, 11) == 0
-    assert cpu.get_int_value(deadframe, 12) == 0
-    assert cpu.get_int_value(deadframe, 13) == 2
-    assert cpu.get_int_value(deadframe, 14) == 2
-    assert cpu.get_int_value(deadframe, 15) == 1
-    assert cpu.get_int_value(deadframe, 16) == -57344
-    assert cpu.get_int_value(deadframe, 17) == 1
-    assert cpu.get_int_value(deadframe, 18) == -1
-    if WORD == 4:
-        assert cpu.get_int_value(deadframe, 19) == -2147483648
-    elif WORD == 8:
-        assert cpu.get_int_value(deadframe, 19) == 19327352832
-    assert cpu.get_int_value(deadframe, 20) == -49
-
diff --git a/rpython/jit/codewriter/jtransform.py 
b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -521,6 +521,8 @@
     # XXX some of the following functions should not become residual calls
     # but be really compiled
     rewrite_op_int_abs                = _do_builtin_call
+    rewrite_op_int_floordiv           = _do_builtin_call
+    rewrite_op_int_mod                = _do_builtin_call
     rewrite_op_llong_abs              = _do_builtin_call
     rewrite_op_llong_floordiv         = _do_builtin_call
     rewrite_op_llong_mod              = _do_builtin_call
@@ -530,7 +532,6 @@
     rewrite_op_gc_id                  = _do_builtin_call
     rewrite_op_gc_pin                 = _do_builtin_call
     rewrite_op_gc_unpin               = _do_builtin_call
-    rewrite_op_uint_mod               = _do_builtin_call
     rewrite_op_cast_float_to_uint     = _do_builtin_call
     rewrite_op_cast_uint_to_float     = _do_builtin_call
     rewrite_op_weakref_create         = _do_builtin_call
diff --git a/rpython/jit/codewriter/support.py 
b/rpython/jit/codewriter/support.py
--- a/rpython/jit/codewriter/support.py
+++ b/rpython/jit/codewriter/support.py
@@ -248,6 +248,26 @@
     mask = x >> (LONG_BIT - 1)
     return (x ^ mask) - mask
 
+
+def _ll_2_int_floordiv(x, y):
+    # this is used only if the RPython program uses llop.int_floordiv()
+    # explicitly.  For 'a // b', see _handle_int_special() in jtransform.py.
+    # This is the reverse of rpython.rtyper.rint.ll_int_py_div(), i.e.
+    # the same logic as rpython.rtyper.lltypesystem.opimpl.op_int_floordiv
+    # but written in a no-branch style.
+    r = x // y
+    p = r * y
+    # the JIT knows that if x and y are both positive, this is just 'r'
+    return r + (((x ^ y) >> (LONG_BIT - 1)) & (p != x))
+
+def _ll_2_int_mod(x, y):
+    # same comments as _ll_2_int_floordiv()
+    r = x % y
+    # the JIT knows that if x and y are both positive, this doesn't change 'r'
+    r -= y & (((x ^ y) & (r | -r)) >> (LONG_BIT - 1))
+    return r
+
+
 def _ll_1_cast_uint_to_float(x):
     # XXX on 32-bit platforms, this should be done using cast_longlong_to_float
     # (which is a residual call right now in the x86 backend)
@@ -417,6 +437,8 @@
 # in the following calls to builtins, the JIT is allowed to look inside:
 inline_calls_to = [
     ('int_abs',              [lltype.Signed],                lltype.Signed),
+    ('int_floordiv',         [lltype.Signed, lltype.Signed], lltype.Signed),
+    ('int_mod',              [lltype.Signed, lltype.Signed], lltype.Signed),
     ('ll_math.ll_math_sqrt', [lltype.Float],                 lltype.Float),
 ]
 
diff --git a/rpython/jit/codewriter/test/test_flatten.py 
b/rpython/jit/codewriter/test/test_flatten.py
--- a/rpython/jit/codewriter/test/test_flatten.py
+++ b/rpython/jit/codewriter/test/test_flatten.py
@@ -478,7 +478,7 @@
             except ZeroDivisionError:
                 return -42
         self.encoding_test(f, [7, 2], """
-            residual_call_ir_i $<* fn ll_int_floordiv_ovf_zer__Signed_Signed>, 
I[%i0, %i1], R[], <Descr> -> %i2
+            residual_call_ir_i $<* fn ll_int_py_div_ovf_zer__Signed_Signed>, 
I[%i0, %i1], R[], <Descr> -> %i2
             -live-
             catch_exception L1
             int_return %i2
@@ -505,7 +505,7 @@
                 return 42
         # XXX so far, this really produces a int_mod_ovf_zer...
         self.encoding_test(f, [7, 2], """
-            residual_call_ir_i $<* fn ll_int_mod_ovf_zer__Signed_Signed>, 
I[%i0, %i1], R[], <Descr> -> %i2
+            residual_call_ir_i $<* fn ll_int_py_mod_ovf_zer__Signed_Signed>, 
I[%i0, %i1], R[], <Descr> -> %i2
             -live-
             catch_exception L1
             int_return %i2
diff --git a/rpython/jit/codewriter/test/test_support.py 
b/rpython/jit/codewriter/test/test_support.py
--- a/rpython/jit/codewriter/test/test_support.py
+++ b/rpython/jit/codewriter/test/test_support.py
@@ -3,7 +3,6 @@
 from rpython.rtyper.annlowlevel import llstr
 from rpython.flowspace.model import Variable, Constant, SpaceOperation
 from rpython.jit.codewriter.support import decode_builtin_call, LLtypeHelpers
-from rpython.jit.codewriter.support import _ll_1_int_abs
 
 def newconst(x):
     return Constant(x, lltype.typeOf(x))
@@ -136,6 +135,7 @@
     py.test.raises(AttributeError, func, llstr(None), p2)
 
 def test_int_abs():
+    from rpython.jit.codewriter.support import _ll_1_int_abs
     assert _ll_1_int_abs(0) == 0
     assert _ll_1_int_abs(1) == 1
     assert _ll_1_int_abs(10) == 10
@@ -143,3 +143,14 @@
     assert _ll_1_int_abs(-1) == 1
     assert _ll_1_int_abs(-10) == 10
     assert _ll_1_int_abs(-sys.maxint) == sys.maxint
+
+def test_int_floordiv_mod():
+    from rpython.rtyper.lltypesystem.lloperation import llop
+    from rpython.jit.codewriter.support import _ll_2_int_floordiv, 
_ll_2_int_mod
+    for x in range(-6, 7):
+        for y in range(-3, 4):
+            if y != 0:
+                assert (_ll_2_int_floordiv(x, y) ==
+                        llop.int_floordiv(lltype.Signed, x, y))
+                assert (_ll_2_int_mod(x, y) ==
+                        llop.int_mod(lltype.Signed, x, y))
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py 
b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -97,17 +97,14 @@
         self.emit_operation(op)
 
         r = self.getintbound(op)
-        if b2.is_constant():
-            val = b2.lower
-            if val >= 0:
-                r.intersect(IntBound(0, val))
-        elif b1.is_constant():
-            val = b1.lower
-            if val >= 0:
-                r.intersect(IntBound(0, val))
-        elif b1.known_ge(IntBound(0, 0)) and b2.known_ge(IntBound(0, 0)):
-            lesser = min(b1.upper, b2.upper)
-            r.intersect(IntBound(0, next_pow2_m1(lesser)))
+        pos1 = b1.known_ge(IntBound(0, 0))
+        pos2 = b2.known_ge(IntBound(0, 0))
+        if pos1 or pos2:
+            r.make_ge(IntBound(0, 0))
+        if pos1:
+            r.make_le(b1)
+        if pos2:
+            r.make_le(b2)
 
     def optimize_INT_SUB(self, op):
         self.emit_operation(op)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py 
b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -5188,6 +5188,25 @@
         """
         self.optimize_loop(ops, ops)
 
+    def test_int_and_positive(self):
+        ops = """
+        [i0, i1]
+        i2 = int_ge(i1, 0)
+        guard_true(i2) []
+        i3 = int_and(i0, i1)
+        i4 = int_ge(i3, 0)
+        guard_true(i4) []
+        jump(i3)
+        """
+        expected = """
+        [i0, i1]
+        i2 = int_ge(i1, 0)
+        guard_true(i2) []
+        i3 = int_and(i0, i1)
+        jump(i3)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_int_or_cmp_above_bounds(self):
         ops = """
         [p0,p1]
@@ -5252,6 +5271,47 @@
         """
         self.optimize_loop(ops, ops)
 
+    def test_int_xor_positive_is_positive(self):
+        ops = """
+        [i0, i1]
+        i2 = int_lt(i0, 0)
+        guard_false(i2) []
+        i3 = int_lt(i1, 0)
+        guard_false(i3) []
+        i4 = int_xor(i0, i1)
+        i5 = int_lt(i4, 0)
+        guard_false(i5) []
+        jump(i4, i0)
+        """
+        expected = """
+        [i0, i1]
+        i2 = int_lt(i0, 0)
+        guard_false(i2) []
+        i3 = int_lt(i1, 0)
+        guard_false(i3) []
+        i4 = int_xor(i0, i1)
+        jump(i4, i0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_positive_rshift_bits_minus_1(self):
+        ops = """
+        [i0]
+        i2 = int_lt(i0, 0)
+        guard_false(i2) []
+        i3 = int_rshift(i2, %d)
+        escape_n(i3)
+        jump(i0)
+        """ % (LONG_BIT - 1,)
+        expected = """
+        [i0]
+        i2 = int_lt(i0, 0)
+        guard_false(i2) []
+        escape_n(0)
+        jump(i0)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_int_or_same_arg(self):
         ops = """
         [i0]
diff --git a/rpython/jit/metainterp/test/test_ajit.py 
b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -955,6 +955,75 @@
         res = self.meta_interp(f, [-5])
         assert res == 5+4+3+2+1+0+1+2+3+4+5+6+7+8+9
 
+    def test_int_c_div(self):
+        from rpython.rlib.rarithmetic import int_c_div
+        myjitdriver = JitDriver(greens = [], reds = ['i', 't'])
+        def f(i):
+            t = 0
+            while i < 10:
+                myjitdriver.can_enter_jit(i=i, t=t)
+                myjitdriver.jit_merge_point(i=i, t=t)
+                t += int_c_div(-100, i)
+                i += 1
+            return t
+        expected = -sum([100 // n for n in range(1, 10)])
+        assert f(1) == expected
+        res = self.meta_interp(f, [1])
+        assert res == expected
+        # should contain a call_i(..., OS=OS_INT_PY_DIV)
+
+    def test_int_c_mod(self):
+        from rpython.rlib.rarithmetic import int_c_mod
+        myjitdriver = JitDriver(greens = [], reds = ['i', 't'])
+        def f(i):
+            t = 0
+            while i < 10:
+                myjitdriver.can_enter_jit(i=i, t=t)
+                myjitdriver.jit_merge_point(i=i, t=t)
+                t += int_c_mod(-100, i)
+                i += 1
+            return t
+        expected = -sum([100 % n for n in range(1, 10)])
+        assert f(1) == expected
+        res = self.meta_interp(f, [1])
+        assert res == expected
+        # should contain a call_i(..., OS=OS_INT_PY_MOD)
+
+    def test_positive_c_div_mod(self):
+        from rpython.rlib.rarithmetic import int_c_div, int_c_mod
+        myjitdriver = JitDriver(greens = [], reds = ['i', 't'])
+        def f(i):
+            t = 0
+            while i < 10:
+                myjitdriver.can_enter_jit(i=i, t=t)
+                myjitdriver.jit_merge_point(i=i, t=t)
+                assert i > 0
+                t += int_c_div(100, i) - int_c_mod(100, i)
+                i += 1
+            return t
+        expected = sum([100 // n - 100 % n for n in range(1, 10)])
+        assert f(1) == expected
+        res = self.meta_interp(f, [1])
+        assert res == expected
+        # all the correction code should be dead now, xxx test that
+
+    def test_int_c_div_by_constant(self):
+        from rpython.rlib.rarithmetic import int_c_div
+        myjitdriver = JitDriver(greens = ['k'], reds = ['i', 't'])
+        def f(i, k):
+            t = 0
+            while i < 100:
+                myjitdriver.can_enter_jit(i=i, t=t, k=k)
+                myjitdriver.jit_merge_point(i=i, t=t, k=k)
+                t += int_c_div(i, k)
+                i += 1
+            return t
+        expected = sum([i // 10 for i in range(51, 100)])
+        assert f(-50, 10) == expected
+        res = self.meta_interp(f, [-50, 10])
+        assert res == expected
+        self.check_resops(call=0, uint_mul_high=2)
+
     def test_float(self):
         myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res'])
         def f(x, y):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -281,11 +281,12 @@
                  large_object=8*WORD,
                  ArenaCollectionClass=None,
                  **kwds):
+        "NOT_RPYTHON"
         MovingGCBase.__init__(self, config, **kwds)
         assert small_request_threshold % WORD == 0
         self.read_from_env = read_from_env
         self.nursery_size = nursery_size
-        
+
         self.small_request_threshold = small_request_threshold
         self.major_collection_threshold = major_collection_threshold
         self.growth_rate_max = growth_rate_max
@@ -644,6 +645,7 @@
             # Get the memory from the nursery.  If there is not enough space
             # there, do a collect first.
             result = self.nursery_free
+            ll_assert(result != llmemory.NULL, "uninitialized nursery")
             self.nursery_free = new_free = result + totalsize
             if new_free > self.nursery_top:
                 result = self.collect_and_reserve(totalsize)
@@ -703,6 +705,7 @@
             # Get the memory from the nursery.  If there is not enough space
             # there, do a collect first.
             result = self.nursery_free
+            ll_assert(result != llmemory.NULL, "uninitialized nursery")
             self.nursery_free = new_free = result + totalsize
             if new_free > self.nursery_top:
                 result = self.collect_and_reserve(totalsize)
@@ -1139,7 +1142,8 @@
         Implemented a bit obscurely by checking an unrelated flag
         that can never be set on a young object -- except if tid == -42.
         """
-        assert self.is_in_nursery(obj)
+        ll_assert(self.is_in_nursery(obj),
+                  "Can't forward an object outside the nursery.")
         tid = self.header(obj).tid
         result = (tid & GCFLAG_FINALIZATION_ORDERING != 0)
         if result:
@@ -1463,7 +1467,8 @@
                 objhdr.tid |= GCFLAG_CARDS_SET
 
         remember_young_pointer_from_array2._dont_inline_ = True
-        assert self.card_page_indices > 0
+        ll_assert(self.card_page_indices > 0,
+                  "non-positive card_page_indices")
         self.remember_young_pointer_from_array2 = (
             remember_young_pointer_from_array2)
 
@@ -1513,7 +1518,8 @@
             return True
         # ^^^ a fast path of write-barrier
         #
-        if source_hdr.tid & GCFLAG_HAS_CARDS != 0:
+        if (self.card_page_indices > 0 and     # check constant-folded
+            source_hdr.tid & GCFLAG_HAS_CARDS != 0):
             #
             if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
                 # The source object may have random young pointers.
@@ -1548,7 +1554,8 @@
 
     def manually_copy_card_bits(self, source_addr, dest_addr, length):
         # manually copy the individual card marks from source to dest
-        assert self.card_page_indices > 0
+        ll_assert(self.card_page_indices > 0,
+                  "non-positive card_page_indices")
         bytes = self.card_marking_bytes_for_length(length)
         #
         anybyte = 0
@@ -1721,12 +1728,15 @@
         nursery_barriers = self.AddressDeque()
         prev = self.nursery
         self.surviving_pinned_objects.sort()
-        assert self.pinned_objects_in_nursery == \
-            self.surviving_pinned_objects.length()
+        ll_assert(
+            self.pinned_objects_in_nursery == \
+            self.surviving_pinned_objects.length(),
+            "pinned_objects_in_nursery != surviving_pinned_objects.length()")
         while self.surviving_pinned_objects.non_empty():
             #
             cur = self.surviving_pinned_objects.pop()
-            assert cur >= prev
+            ll_assert(
+                cur >= prev, "pinned objects encountered in backwards order")
             #
             # clear the arena between the last pinned object (or arena start)
             # and the pinned object
@@ -1784,7 +1794,8 @@
         debug_stop("gc-minor")
 
     def _reset_flag_old_objects_pointing_to_pinned(self, obj, ignore):
-        assert self.header(obj).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN
+        ll_assert(self.header(obj).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN != 
0,
+                  "!GCFLAG_PINNED_OBJECT_PARENT_KNOWN, but requested to 
reset.")
         self.header(obj).tid &= ~GCFLAG_PINNED_OBJECT_PARENT_KNOWN
 
     def _visit_old_objects_pointing_to_pinned(self, obj, ignore):
diff --git a/rpython/memory/gc/test/test_direct.py 
b/rpython/memory/gc/test/test_direct.py
--- a/rpython/memory/gc/test/test_direct.py
+++ b/rpython/memory/gc/test/test_direct.py
@@ -554,6 +554,7 @@
         assert res # we optimized it
         assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS == 0 # and we 
copied the flag
         #
+        self.gc.card_page_indices = 128     # force > 0
         hdr_src.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
         hdr_dst.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
         hdr_src.tid |= minimark.GCFLAG_HAS_CARDS
diff --git a/rpython/rlib/clibffi.py b/rpython/rlib/clibffi.py
--- a/rpython/rlib/clibffi.py
+++ b/rpython/rlib/clibffi.py
@@ -148,7 +148,8 @@
                                                  ('elements', FFI_TYPE_PP)])
 
     ffi_cif = rffi_platform.Struct('ffi_cif', [])
-    ffi_closure = rffi_platform.Struct('ffi_closure', [])
+    ffi_closure = rffi_platform.Struct('ffi_closure',
+                                       [('user_data', rffi.VOIDP)])
 
 def add_simple_type(type_name):
     for name in ['size', 'alignment', 'type']:
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -650,6 +650,26 @@
     from rpython.rtyper.lltypesystem.lloperation import llop
     return llop.int_force_ge_zero(lltype.Signed, n)
 
+def int_c_div(x, y):
+    """Return the result of the C-style 'x / y'.  This differs from the
+    Python-style division if (x < 0  xor y < 0).  The JIT implements it
+    with a Python-style division followed by correction code.  This
+    is not that bad, because the JIT removes the correction code if
+    x and y are both nonnegative, and if y is any nonnegative constant
+    then the division turns into a rshift or a mul.
+    """
+    from rpython.rtyper.lltypesystem import lltype
+    from rpython.rtyper.lltypesystem.lloperation import llop
+    return llop.int_floordiv(lltype.Signed, x, y)
+
+def int_c_mod(x, y):
+    """Return the result of the C-style 'x % y'.  This differs from the
+    Python-style division if (x < 0  xor y < 0).
+    """
+    from rpython.rtyper.lltypesystem import lltype
+    from rpython.rtyper.lltypesystem.lloperation import llop
+    return llop.int_mod(lltype.Signed, x, y)
+
 @objectmodel.specialize.ll()
 def byteswap(arg):
     """ Convert little->big endian and the opposite
diff --git a/rpython/rlib/rvmprof/src/vmprof_config.h 
b/rpython/rlib/rvmprof/src/vmprof_config.h
--- a/rpython/rlib/rvmprof/src/vmprof_config.h
+++ b/rpython/rlib/rvmprof/src/vmprof_config.h
@@ -1,10 +1,17 @@
-#define HAVE_SYS_UCONTEXT_H
+#if !defined(__OpenBSD__)
+#  define HAVE_SYS_UCONTEXT_H
+#else
+#  define HAVE_SIGNAL_H
+#endif
+
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
   #ifdef __i386__
     #define PC_FROM_UCONTEXT uc_mcontext.mc_eip
   #else
     #define PC_FROM_UCONTEXT uc_mcontext.mc_rip
   #endif
+#elif defined(__OpenBSD__)
+#define PC_FROM_UCONTEXT sc_rip
 #elif defined( __APPLE__)
   #if ((ULONG_MAX) == (UINT_MAX))
     #define PC_FROM_UCONTEXT uc_mcontext->__ss.__eip
diff --git a/rpython/rlib/rvmprof/src/vmprof_getpc.h 
b/rpython/rlib/rvmprof/src/vmprof_getpc.h
--- a/rpython/rlib/rvmprof/src/vmprof_getpc.h
+++ b/rpython/rlib/rvmprof/src/vmprof_getpc.h
@@ -65,6 +65,10 @@
 #elif defined(HAVE_CYGWIN_SIGNAL_H)
 #include <cygwin/signal.h>
 typedef ucontext ucontext_t;
+#elif defined(HAVE_SIGNAL_H)
+#include <signal.h>
+#else
+#  error "don't know how to get the pc on this platform"
 #endif
 
 
diff --git a/rpython/rlib/test/test_rarithmetic.py 
b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -2,6 +2,7 @@
 from rpython.rtyper.test.test_llinterp import interpret
 from rpython.rlib.rarithmetic import *
 from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError
+from hypothesis import given, strategies
 import sys
 import py
 
@@ -393,6 +394,21 @@
     assert not int_between(1, 2, 2)
     assert not int_between(1, 1, 1)
 
+def test_int_force_ge_zero():
+    assert int_force_ge_zero(42) == 42
+    assert int_force_ge_zero(0) == 0
+    assert int_force_ge_zero(-42) == 0
+
+@given(strategies.integers(min_value=0, max_value=sys.maxint),
+       strategies.integers(min_value=1, max_value=sys.maxint))
+def test_int_c_div_mod(x, y):
+    assert int_c_div(~x, y) == -(abs(~x) // y)
+    assert int_c_div( x,-y) == -(x // y)
+    assert int_c_div(~x,-y) == +(abs(~x) // y)
+    for x1 in [x, ~x]:
+        for y1 in [y, -y]:
+            assert int_c_div(x1, y1) * y1 + int_c_mod(x1, y1) == x1
+
 # these can't be prebuilt on 32bit
 U1 = r_ulonglong(0x0102030405060708L)
 U2 = r_ulonglong(0x0807060504030201L)
diff --git a/rpython/rtyper/rint.py b/rpython/rtyper/rint.py
--- a/rpython/rtyper/rint.py
+++ b/rpython/rtyper/rint.py
@@ -236,11 +236,11 @@
         return _rtype_template(hop, 'mul_ovf')
 
     def rtype_floordiv(_, hop):
-        return _rtype_call_helper(hop, 'floordiv', [ZeroDivisionError])
+        return _rtype_call_helper(hop, 'py_div', [ZeroDivisionError])
     rtype_inplace_floordiv = rtype_floordiv
 
     def rtype_floordiv_ovf(_, hop):
-        return _rtype_call_helper(hop, 'floordiv_ovf', [ZeroDivisionError])
+        return _rtype_call_helper(hop, 'py_div_ovf', [ZeroDivisionError])
 
     # turn 'div' on integers into 'floordiv'
     rtype_div         = rtype_floordiv
@@ -250,11 +250,11 @@
     # 'def rtype_truediv' is delegated to the superclass FloatRepr
 
     def rtype_mod(_, hop):
-        return _rtype_call_helper(hop, 'mod', [ZeroDivisionError])
+        return _rtype_call_helper(hop, 'py_mod', [ZeroDivisionError])
     rtype_inplace_mod = rtype_mod
 
     def rtype_mod_ovf(_, hop):
-        return _rtype_call_helper(hop, 'mod_ovf', [ZeroDivisionError])
+        return _rtype_call_helper(hop, 'py_mod_ovf', [ZeroDivisionError])
 
     def rtype_xor(_, hop):
         return _rtype_template(hop, 'xor')
@@ -319,7 +319,7 @@
     vlist = hop.inputargs(repr, repr2)
     prefix = repr.opprefix
 
-    if '_ovf' in func or func.startswith(('mod', 'floordiv')):
+    if '_ovf' in func or func.startswith(('py_mod', 'py_div')):
         if prefix+func not in ('int_add_ovf', 'int_add_nonneg_ovf',
                                'int_sub_ovf', 'int_mul_ovf'):
             raise TyperError("%r should not be used here any more" % (func,))
@@ -353,7 +353,7 @@
             any_implicit_exception = True
 
     if not any_implicit_exception:
-        if not func.startswith(('mod', 'floordiv')):
+        if not func.startswith(('py_mod', 'py_div')):
             return _rtype_template(hop, func)
 
     repr = hop.r_result
@@ -388,7 +388,7 @@
 # ---------- floordiv ----------
 
 @jit.oopspec("int.py_div(x, y)")
-def ll_int_floordiv(x, y):
+def ll_int_py_div(x, y):
     # Python, and RPython, assume that integer division truncates
     # towards -infinity.  However, in C, integer division truncates
     # towards 0.  So assuming that, we need to apply a correction
@@ -400,159 +400,159 @@
     return r + (u >> INT_BITS_1)
 
 @jit.oopspec("int.py_div(x, y)")
-def ll_int_floordiv_nonnegargs(x, y):
+def ll_int_py_div_nonnegargs(x, y):
     from rpython.rlib.debug import ll_assert
     r = llop.int_floordiv(Signed, x, y)            # <= truncates like in C
-    ll_assert(r >= 0, "int_floordiv_nonnegargs(): one arg is negative")
+    ll_assert(r >= 0, "int_py_div_nonnegargs(): one arg is negative")
     return r
 
-def ll_int_floordiv_zer(x, y):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to