[pypy-commit] pypy default: merge the new s390x backend

plan_rich Thu, 10 Mar 2016 06:44:21 -0800

Author: Richard Plangger <[email protected]>
Branch: 
Changeset: r82941:308a29538cda
Date: 2016-03-10 15:40 +0100
http://bitbucket.org/pypy/pypy/changeset/308a29538cda/


Log:    merge the new s390x backend

diff too long, truncating to 2000 out of 11876 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -74,5 +74,6 @@
 ^rpython/doc/_build/.*$
 ^compiled
 ^.git/
+^.hypothesis/
 ^release/
 ^rpython/_cache$
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,15 @@
 .. this is a revision shortly after release-5.0
 .. startrev: b238b48f9138
 
+.. branch: memop-simplify3
+
+Simplification of zero_array. Start and end index are scaled using res ops (or 
cpu scaling) rather than doing it manually.
+
+.. branch: s390x-backend
+
+The jit compiler backend implementation for the s390x architecutre.
+
+.. branch: s390x-enhance-speed
+
+Refactoring to only store 64-bit values in the literal pool of the assembly. 
Generated machine code uses less space and runs faster.
+
diff --git a/pypy/module/__pypy__/test/test_signal.py 
b/pypy/module/__pypy__/test/test_signal.py
--- a/pypy/module/__pypy__/test/test_signal.py
+++ b/pypy/module/__pypy__/test/test_signal.py
@@ -2,7 +2,6 @@
 
 from pypy.module.thread.test.support import GenericTestThread
 
-
 class AppTestMinimal:
     spaceconfig = dict(usemodules=['__pypy__'])
 
diff --git a/pypy/module/_file/test/test_file_extra.py 
b/pypy/module/_file/test/test_file_extra.py
--- a/pypy/module/_file/test/test_file_extra.py
+++ b/pypy/module/_file/test/test_file_extra.py
@@ -389,6 +389,7 @@
 
     def test_writelines(self):
         import array
+        import sys
         fn = self.temptestfile
         with file(fn, 'w') as f:
             f.writelines(['abc'])
@@ -406,7 +407,10 @@
             exc = raises(TypeError, f.writelines, [memoryview('jkl')])
             assert str(exc.value) == "writelines() argument must be a sequence 
of strings"
         out = open(fn, 'rb').readlines()[0]
-        assert out[0:5] == 'abcd\x00'
+        if sys.byteorder == 'big':
+            assert out[0:7] == 'abc\x00\x00\x00d'
+        else:
+            assert out[0:5] == 'abcd\x00'
         assert out[-3:] == 'ghi'
 
         with file(fn, 'wb') as f:
diff --git a/pypy/module/_rawffi/callback.py b/pypy/module/_rawffi/callback.py
--- a/pypy/module/_rawffi/callback.py
+++ b/pypy/module/_rawffi/callback.py
@@ -1,17 +1,23 @@
-
+import sys
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from rpython.rtyper.lltypesystem import lltype, rffi
 from pypy.module._rawffi.interp_rawffi import write_ptr
 from pypy.module._rawffi.structure import W_Structure
 from pypy.module._rawffi.interp_rawffi import (W_DataInstance, letter2tp,
-     unwrap_value, unpack_argshapes, got_libffi_error)
+     unwrap_value, unpack_argshapes, got_libffi_error, is_narrow_integer_type,
+     LL_TYPEMAP, NARROW_INTEGER_TYPES)
 from rpython.rlib.clibffi import USERDATA_P, CallbackFuncPtr, FUNCFLAG_CDECL
 from rpython.rlib.clibffi import ffi_type_void, LibFFIError
 from rpython.rlib import rweakref
 from pypy.module._rawffi.tracker import tracker
 from pypy.interpreter.error import OperationError
 from pypy.interpreter import gateway
+from rpython.rlib.unroll import unrolling_iterable
+
+BIGENDIAN = sys.byteorder == 'big'
+
+unroll_narrow_integer_types = unrolling_iterable(NARROW_INTEGER_TYPES)
 
 app = gateway.applevel('''
     def tbprint(tb, err):
@@ -42,8 +48,17 @@
                 args_w[i] = space.wrap(rffi.cast(rffi.ULONG, ll_args[i]))
         w_res = space.call(w_callable, space.newtuple(args_w))
         if callback_ptr.result is not None: # don't return void
-            unwrap_value(space, write_ptr, ll_res, 0,
-                         callback_ptr.result, w_res)
+            ptr = ll_res
+            letter = callback_ptr.result
+            if BIGENDIAN:
+                # take care of narrow integers!
+                for int_type in unroll_narrow_integer_types:
+                    if int_type == letter:
+                        T = LL_TYPEMAP[int_type]
+                        n = rffi.sizeof(lltype.Signed) - rffi.sizeof(T)
+                        ptr = rffi.ptradd(ptr, n)
+                        break
+            unwrap_value(space, write_ptr, ptr, 0, letter, w_res)
     except OperationError, e:
         tbprint(space, space.wrap(e.get_traceback()),
                 space.wrap(e.errorstr(space)))
diff --git a/pypy/module/_rawffi/interp_rawffi.py 
b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -1,3 +1,4 @@
+import sys
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
 from pypy.interpreter.gateway import interp2app, unwrap_spec
@@ -19,6 +20,8 @@
 from pypy.module._rawffi.buffer import RawFFIBuffer
 from pypy.module._rawffi.tracker import tracker
 
+BIGENDIAN = sys.byteorder == 'big'
+
 TYPEMAP = {
     # XXX A mess with unsigned/signed/normal chars :-/
     'c' : ffi_type_uchar,
@@ -331,10 +334,14 @@
             if tracker.DO_TRACING:
                 ll_buf = rffi.cast(lltype.Signed, self.ll_buffer)
                 tracker.trace_allocation(ll_buf, self)
+        self._ll_buffer = self.ll_buffer
 
     def getbuffer(self, space):
         return space.wrap(rffi.cast(lltype.Unsigned, self.ll_buffer))
 
+    def buffer_advance(self, n):
+        self.ll_buffer = rffi.ptradd(self.ll_buffer, n)
+
     def byptr(self, space):
         from pypy.module._rawffi.array import ARRAY_OF_PTRS
         array = ARRAY_OF_PTRS.allocate(space, 1)
@@ -342,16 +349,17 @@
         return space.wrap(array)
 
     def free(self, space):
-        if not self.ll_buffer:
+        if not self._ll_buffer:
             raise segfault_exception(space, "freeing NULL pointer")
         self._free()
 
     def _free(self):
         if tracker.DO_TRACING:
-            ll_buf = rffi.cast(lltype.Signed, self.ll_buffer)
+            ll_buf = rffi.cast(lltype.Signed, self._ll_buffer)
             tracker.trace_free(ll_buf)
-        lltype.free(self.ll_buffer, flavor='raw')
+        lltype.free(self._ll_buffer, flavor='raw')
         self.ll_buffer = lltype.nullptr(rffi.VOIDP.TO)
+        self._ll_buffer = self.ll_buffer
 
     def buffer_w(self, space, flags):
         return RawFFIBuffer(self)
@@ -432,12 +440,19 @@
                          space.wrap("cannot directly read value"))
 wrap_value._annspecialcase_ = 'specialize:arg(1)'
 
+NARROW_INTEGER_TYPES = 'cbhiBIH?'
+
+def is_narrow_integer_type(letter):
+    return letter in NARROW_INTEGER_TYPES
 
 class W_FuncPtr(W_Root):
     def __init__(self, space, ptr, argshapes, resshape):
         self.ptr = ptr
         self.argshapes = argshapes
         self.resshape = resshape
+        self.narrow_integer = False
+        if resshape is not None:
+            self.narrow_integer = 
is_narrow_integer_type(resshape.itemcode.lower())
 
     def getbuffer(self, space):
         return space.wrap(rffi.cast(lltype.Unsigned, self.ptr.funcsym))
@@ -497,6 +512,10 @@
                 result = self.resshape.allocate(space, 1, autofree=True)
                 # adjust_return_size() was used here on result.ll_buffer
                 self.ptr.call(args_ll, result.ll_buffer)
+                if BIGENDIAN and self.narrow_integer:
+                    # we get a 8 byte value in big endian
+                    n = rffi.sizeof(lltype.Signed) - result.shape.size
+                    result.buffer_advance(n)
                 return space.wrap(result)
             else:
                 self.ptr.call(args_ll, lltype.nullptr(rffi.VOIDP.TO))
diff --git a/pypy/module/_rawffi/structure.py b/pypy/module/_rawffi/structure.py
--- a/pypy/module/_rawffi/structure.py
+++ b/pypy/module/_rawffi/structure.py
@@ -18,6 +18,9 @@
 from rpython.rlib.rarithmetic import intmask, signedtype, r_uint, \
     r_ulonglong
 from rpython.rtyper.lltypesystem import lltype, rffi
+import sys
+
+IS_BIG_ENDIAN = sys.byteorder == 'big'
 
 
 
@@ -114,20 +117,32 @@
                 size += intmask(fieldsize)
                 bitsizes.append(fieldsize)
             elif field_type == NEW_BITFIELD:
-                bitsizes.append((bitsize << 16) + bitoffset)
+                if IS_BIG_ENDIAN:
+                    off = last_size - bitoffset - bitsize
+                else:
+                    off = bitoffset
+                bitsizes.append((bitsize << 16) + off)
                 bitoffset = bitsize
                 size = round_up(size, fieldalignment)
                 pos.append(size)
                 size += fieldsize
             elif field_type == CONT_BITFIELD:
-                bitsizes.append((bitsize << 16) + bitoffset)
+                if IS_BIG_ENDIAN:
+                    off = last_size - bitoffset - bitsize
+                else:
+                    off = bitoffset
+                bitsizes.append((bitsize << 16) + off)
                 bitoffset += bitsize
                 # offset is already updated for the NEXT field
                 pos.append(size - fieldsize)
             elif field_type == EXPAND_BITFIELD:
                 size += fieldsize - last_size / 8
                 last_size = fieldsize * 8
-                bitsizes.append((bitsize << 16) + bitoffset)
+                if IS_BIG_ENDIAN:
+                    off = last_size - bitoffset - bitsize
+                else:
+                    off = bitoffset
+                bitsizes.append((bitsize << 16) + off)
                 bitoffset += bitsize
                 # offset is already updated for the NEXT field
                 pos.append(size - fieldsize)
diff --git a/pypy/module/_rawffi/test/test__rawffi.py 
b/pypy/module/_rawffi/test/test__rawffi.py
--- a/pypy/module/_rawffi/test/test__rawffi.py
+++ b/pypy/module/_rawffi/test/test__rawffi.py
@@ -704,7 +704,6 @@
         def compare(a, b):
             a1 = 
_rawffi.Array('i').fromaddress(_rawffi.Array('P').fromaddress(a, 1)[0], 1)
             a2 = 
_rawffi.Array('i').fromaddress(_rawffi.Array('P').fromaddress(b, 1)[0], 1)
-            print "comparing", a1[0], "with", a2[0]
             if a1[0] not in [1,2,3,4] or a2[0] not in [1,2,3,4]:
                 bogus_args.append((a1[0], a2[0]))
             if a1[0] > a2[0]:
@@ -715,7 +714,7 @@
         a2[0] = len(ll_to_sort)
         a3 = _rawffi.Array('l')(1)
         a3[0] = struct.calcsize('i')
-        cb = _rawffi.CallbackPtr(compare, ['P', 'P'], 'i')
+        cb = _rawffi.CallbackPtr(compare, ['P', 'P'], 'l')
         a4 = cb.byptr()
         qsort(a1, a2, a3, a4)
         res = [ll_to_sort[i] for i in range(len(ll_to_sort))]
@@ -896,11 +895,21 @@
         b = _rawffi.Array('c').fromaddress(a.buffer, 38)
         if sys.maxunicode > 65535:
             # UCS4 build
-            assert b[0] == 'x'
-            assert b[1] == '\x00'
-            assert b[2] == '\x00'
-            assert b[3] == '\x00'
-            assert b[4] == 'y'
+            if sys.byteorder == 'big':
+                assert b[0] == '\x00'
+                assert b[1] == '\x00'
+                assert b[2] == '\x00'
+                assert b[3] == 'x'
+                assert b[4] == '\x00'
+                assert b[5] == '\x00'
+                assert b[6] == '\x00'
+                assert b[7] == 'y'
+            else:
+                assert b[0] == 'x'
+                assert b[1] == '\x00'
+                assert b[2] == '\x00'
+                assert b[3] == '\x00'
+                assert b[4] == 'y'
         else:
             # UCS2 build
             assert b[0] == 'x'
diff --git a/pypy/module/_rawffi/test/test_struct.py 
b/pypy/module/_rawffi/test/test_struct.py
--- a/pypy/module/_rawffi/test/test_struct.py
+++ b/pypy/module/_rawffi/test/test_struct.py
@@ -1,4 +1,4 @@
-
+import sys
 from pypy.module._rawffi.structure import size_alignment_pos
 from pypy.module._rawffi.interp_rawffi import TYPEMAP, letter2tp
 
@@ -63,4 +63,7 @@
          for (name, t, size) in fields])
     assert size == 8
     assert pos == [0, 0, 0]
-    assert bitsizes == [0x10000, 0x3e0001, 0x1003f]
+    if sys.byteorder == 'little':
+        assert bitsizes == [0x10000, 0x3e0001, 0x1003f]
+    else:
+        assert bitsizes == [0x1003f, 0x3e0001, 0x10000]
diff --git a/pypy/module/_vmprof/conftest.py b/pypy/module/_vmprof/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/conftest.py
@@ -0,0 +1,6 @@
+import py, os
+
+def pytest_collect_directory(path, parent):
+    if os.uname()[4] == 's390x':
+        py.test.skip("zarch tests skipped")
+pytest_collect_file = pytest_collect_directory
diff --git a/pypy/module/cppyy/src/dummy_backend.cxx 
b/pypy/module/cppyy/src/dummy_backend.cxx
--- a/pypy/module/cppyy/src/dummy_backend.cxx
+++ b/pypy/module/cppyy/src/dummy_backend.cxx
@@ -390,7 +390,7 @@
         ((dummy::cppyy_test_data*)self)->destroy_arrays();
     } else if (idx == s_methods["cppyy_test_data::set_bool"]) {
         assert(self && nargs == 1);
-        
((dummy::cppyy_test_data*)self)->set_bool((bool)((CPPYY_G__value*)args)[0].obj.in);
+        
((dummy::cppyy_test_data*)self)->set_bool((bool)((CPPYY_G__value*)args)[0].obj.i);
     } else if (idx == s_methods["cppyy_test_data::set_char"]) {
         assert(self && nargs == 1);
         
((dummy::cppyy_test_data*)self)->set_char(((CPPYY_G__value*)args)[0].obj.ch);
diff --git a/pypy/module/cpyext/test/test_arraymodule.py 
b/pypy/module/cpyext/test/test_arraymodule.py
--- a/pypy/module/cpyext/test/test_arraymodule.py
+++ b/pypy/module/cpyext/test/test_arraymodule.py
@@ -51,13 +51,19 @@
         assert arr.tolist() == [1, 23, 4]
 
     def test_buffer(self):
+        import sys
         module = self.import_module(name='array')
         arr = module.array('i', [1,2,3,4])
         buf = buffer(arr)
         exc = raises(TypeError, "buf[1] = '1'")
         assert str(exc.value) == "buffer is read-only"
-        # XXX big-endian
-        assert str(buf) == ('\x01\0\0\0'
-                            '\x02\0\0\0'
-                            '\x03\0\0\0'
-                            '\x04\0\0\0')
+        if sys.byteorder == 'big':
+            assert str(buf) == ('\0\0\0\x01'
+                                '\0\0\0\x02'
+                                '\0\0\0\x03'
+                                '\0\0\0\x04')
+        else:
+            assert str(buf) == ('\x01\0\0\0'
+                                '\x02\0\0\0'
+                                '\x03\0\0\0'
+                                '\x04\0\0\0')
diff --git a/pypy/module/cpyext/test/test_typeobject.py 
b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -606,7 +606,7 @@
                 long intval;
                 PyObject *name;
 
-                if (!PyArg_ParseTuple(args, "i", &intval))
+                if (!PyArg_ParseTuple(args, "l", &intval))
                     return NULL;
 
                 IntLike_Type.tp_as_number = &intlike_as_number;
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py 
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -386,11 +386,11 @@
                 lltype.free(pendian, flavor='raw')
 
         test("\x61\x00\x62\x00\x63\x00\x64\x00", -1)
-
-        test("\x61\x00\x62\x00\x63\x00\x64\x00", None)
-
+        if sys.byteorder == 'big':
+            test("\x00\x61\x00\x62\x00\x63\x00\x64", None)
+        else:
+            test("\x61\x00\x62\x00\x63\x00\x64\x00", None)
         test("\x00\x61\x00\x62\x00\x63\x00\x64", 1)
-
         test("\xFE\xFF\x00\x61\x00\x62\x00\x63\x00\x64", 0, 1)
         test("\xFF\xFE\x61\x00\x62\x00\x63\x00\x64\x00", 0, -1)
 
@@ -423,7 +423,10 @@
 
         test("\x61\x00\x00\x00\x62\x00\x00\x00", -1)
 
-        test("\x61\x00\x00\x00\x62\x00\x00\x00", None)
+        if sys.byteorder == 'big':
+            test("\x00\x00\x00\x61\x00\x00\x00\x62", None)
+        else:
+            test("\x61\x00\x00\x00\x62\x00\x00\x00", None)
 
         test("\x00\x00\x00\x61\x00\x00\x00\x62", 1)
 
diff --git a/pypy/module/marshal/test/test_marshalimpl.py 
b/pypy/module/marshal/test/test_marshalimpl.py
--- a/pypy/module/marshal/test/test_marshalimpl.py
+++ b/pypy/module/marshal/test/test_marshalimpl.py
@@ -64,14 +64,17 @@
     import marshal, struct
 
     class FakeM:
+        # NOTE: marshal is platform independent, running this test must assume
+        # that self.seen gets values from the endianess of the marshal module.
+        # (which is little endian!)
         def __init__(self):
             self.seen = []
         def start(self, code):
             self.seen.append(code)
         def put_int(self, value):
-            self.seen.append(struct.pack("i", value))
+            self.seen.append(struct.pack("<i", value))
         def put_short(self, value):
-            self.seen.append(struct.pack("h", value))
+            self.seen.append(struct.pack("<h", value))
 
     def _marshal_check(x):
         expected = marshal.dumps(long(x))
diff --git a/pypy/module/micronumpy/test/test_dtypes.py 
b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -351,7 +351,10 @@
         assert np.dtype(xyz).name == 'xyz'
         # another obscure API, used in numpy record.py
         a = np.dtype((xyz, [('x', 'int32'), ('y', 'float32')]))
-        assert "[('x', '<i4'), ('y', '<f4')]" in repr(a)
+        if sys.byteorder == 'big':
+            assert "[('x', '>i4'), ('y', '>f4')]" in repr(a)
+        else:
+            assert "[('x', '<i4'), ('y', '<f4')]" in repr(a)
         assert 'xyz' in repr(a)
         data = [(1, 'a'), (2, 'bbb')]
         b = np.dtype((xyz, [('a', int), ('b', object)]))
@@ -361,7 +364,10 @@
             arr = np.array(data, dtype=b)
             assert arr[0][0] == 1
             assert arr[0][1] == 'a'
-        b = np.dtype((xyz, [("col1", "<i4"), ("col2", "<i4"), ("col3", 
"<i4")]))
+        # NOTE if micronumpy is completed, we might extend this test to check 
both
+        # "<i4" and ">i4"
+        E = '<' if sys.byteorder == 'little' else '>'
+        b = np.dtype((xyz, [("col1", E+"i4"), ("col2", E+"i4"), ("col3", 
E+"i4")]))
         data = [(1, 2,3), (4, 5, 6)]
         a = np.array(data, dtype=b)
         x = pickle.loads(pickle.dumps(a))
@@ -423,18 +429,20 @@
         assert hash(t5) != hash(t6)
 
     def test_pickle(self):
+        import sys
         import numpy as np
         from numpy import array, dtype
         from cPickle import loads, dumps
         a = array([1,2,3])
+        E = '<' if sys.byteorder == 'little' else '>'
         if self.ptr_size == 8:
-            assert a.dtype.__reduce__() == (dtype, ('i8', 0, 1), (3, '<', 
None, None, None, -1, -1, 0))
+            assert a.dtype.__reduce__() == (dtype, ('i8', 0, 1), (3, E, None, 
None, None, -1, -1, 0))
         else:
-            assert a.dtype.__reduce__() == (dtype, ('i4', 0, 1), (3, '<', 
None, None, None, -1, -1, 0))
+            assert a.dtype.__reduce__() == (dtype, ('i4', 0, 1), (3, E, None, 
None, None, -1, -1, 0))
         assert loads(dumps(a.dtype)) == a.dtype
         assert np.dtype('bool').__reduce__() == (dtype, ('b1', 0, 1), (3, '|', 
None, None, None, -1, -1, 0))
         assert np.dtype('|V16').__reduce__() == (dtype, ('V16', 0, 1), (3, 
'|', None, None, None, 16, 1, 0))
-        assert np.dtype(('<f8', 2)).__reduce__() == (dtype, ('V16', 0, 1), (3, 
'|', (dtype('float64'), (2,)), None, None, 16, 8, 0))
+        assert np.dtype((E+'f8', 2)).__reduce__() == (dtype, ('V16', 0, 1), 
(3, '|', (dtype('float64'), (2,)), None, None, 16, 8, 0))
 
     def test_newbyteorder(self):
         import numpy as np
@@ -916,6 +924,7 @@
 
     def test_dtype_str(self):
         from numpy import dtype
+        import sys
         byteorder = self.native_prefix
         assert dtype('i8').str == byteorder + 'i8'
         assert dtype('<i8').str == '<i8'
@@ -937,7 +946,8 @@
         assert dtype('unicode').str == byteorder + 'U0'
         assert dtype(('string', 7)).str == '|S7'
         assert dtype('=S5').str == '|S5'
-        assert dtype(('unicode', 7)).str == '<U7'
+        assert dtype(('unicode', 7)).str == \
+               ('<' if sys.byteorder == 'little' else '>')+'U7'
         assert dtype([('', 'f8')]).str == "|V8"
         assert dtype(('f8', 2)).str == "|V16"
 
@@ -968,8 +978,12 @@
 
     def test_isnative(self):
         from numpy import dtype
+        import sys
         assert dtype('i4').isnative == True
-        assert dtype('>i8').isnative == False
+        if sys.byteorder == 'big':
+            assert dtype('<i8').isnative == False
+        else:
+            assert dtype('>i8').isnative == False
 
     def test_any_all_nonzero(self):
         import numpy
@@ -1185,6 +1199,7 @@
     def test_setstate(self):
         import numpy as np
         import sys
+        E = '<' if sys.byteorder == 'little' else '>'
         d = np.dtype('f8')
         d.__setstate__((3, '|', (np.dtype('float64'), (2,)), None, None, 20, 
1, 0))
         assert d.str == ('<' if sys.byteorder == 'little' else '>') + 'f8'
@@ -1201,7 +1216,7 @@
         assert d.shape == (2,)
         assert d.itemsize == 8
         assert d.subdtype is not None
-        assert repr(d) == "dtype(('<f8', (2,)))"
+        assert repr(d) == "dtype(('{E}f8', (2,)))".format(E=E)
 
         d = np.dtype(('<f8', 2))
         assert d.fields is None
@@ -1216,7 +1231,7 @@
         assert d.shape == (2,)
         assert d.itemsize == 20
         assert d.subdtype is not None
-        assert repr(d) == "dtype(('<f8', (2,)))"
+        assert repr(d) == "dtype(('{E}f8', (2,)))".format(E=E)
 
         d = np.dtype(('<f8', 2))
         d.__setstate__((3, '|', (np.dtype('float64'), 2), None, None, 20, 1, 
0))
@@ -1224,7 +1239,7 @@
         assert d.shape == (2,)
         assert d.itemsize == 20
         assert d.subdtype is not None
-        assert repr(d) == "dtype(('<f8', (2,)))"
+        assert repr(d) == "dtype(('{E}f8', (2,)))".format(E=E)
 
         d = np.dtype(('<f8', 2))
         exc = raises(ValueError, "d.__setstate__((3, '|', None, ('f0', 'f1'), 
None, 16, 1, 0))")
@@ -1256,14 +1271,14 @@
         assert d.fields is not None
         assert d.shape == (2,)
         assert d.subdtype is not None
-        assert repr(d) == "dtype([('f0', '<f8'), ('f1', '<f8')])"
+        assert repr(d) == "dtype([('f0', '{E}f8'), ('f1', 
'{E}f8')])".format(E=E)
 
         d = np.dtype(('<f8', 2))
         d.__setstate__((3, '|', None, ('f0', 'f1'), {'f0': 
(np.dtype('float64'), 0), 'f1': (np.dtype('float64'), 8)}, 16, 1, 0))
         assert d.fields is not None
         assert d.shape == ()
         assert d.subdtype is None
-        assert repr(d) == "dtype([('f0', '<f8'), ('f1', '<f8')])"
+        assert repr(d) == "dtype([('f0', '{E}f8'), ('f1', 
'{E}f8')])".format(E=E)
 
         d = np.dtype(('<f8', 2))
         d.__setstate__((3, '|', None, ('f0', 'f1'), {'f0': 
(np.dtype('float64'), 0), 'f1': (np.dtype('float64'), 8)}, 16, 1, 0))
@@ -1271,7 +1286,7 @@
         assert d.fields is not None
         assert d.shape == (2,)
         assert d.subdtype is not None
-        assert repr(d) == "dtype([('f0', '<f8'), ('f1', '<f8')])"
+        assert repr(d) == "dtype([('f0', '{E}f8'), ('f1', 
'{E}f8')])".format(E=E)
 
     def test_pickle_record(self):
         from numpy import array, dtype
@@ -1317,6 +1332,7 @@
         raises(ValueError, np.dtype, [('a', 'f4', (-1, -1))])
 
     def test_aligned_size(self):
+        import sys
         import numpy as np
         if self.test_for_core_internal:
             try:
@@ -1335,9 +1351,10 @@
         dt = np.dtype({'f0': ('i4', 0), 'f1':('u1', 4)}, align=True)
         assert dt.itemsize == 8
         assert dt.alignment == 4
-        assert str(dt) == "{'names':['f0','f1'], 'formats':['<i4','u1'], 
'offsets':[0,4], 'itemsize':8, 'aligned':True}"
+        E = '<' if sys.byteorder == 'little' else '>'
+        assert str(dt) == "{'names':['f0','f1'], 'formats':['%si4','u1'], 
'offsets':[0,4], 'itemsize':8, 'aligned':True}" % E
         dt = np.dtype([('f1', 'u1'), ('f0', 'i4')], align=True)
-        assert str(dt) == "{'names':['f1','f0'], 'formats':['u1','<i4'], 
'offsets':[0,4], 'itemsize':8, 'aligned':True}"
+        assert str(dt) == "{'names':['f1','f0'], 'formats':['u1','%si4'], 
'offsets':[0,4], 'itemsize':8, 'aligned':True}" % E
         # Nesting should preserve that alignment
         dt1 = np.dtype([('f0', 'i4'),
                        ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]),
@@ -1357,12 +1374,12 @@
         assert dt3.itemsize == 20
         assert dt1 == dt2
         answer = "{'names':['f0','f1','f2'], " + \
-                    "'formats':['<i4',{'names':['f1','f2','f3'], " + \
-                                      "'formats':['i1','<i4','i1'], " + \
+                    "'formats':['%si4',{'names':['f1','f2','f3'], " + \
+                                      "'formats':['i1','%si4','i1'], " + \
                                       "'offsets':[0,4,8], 'itemsize':12}," + \
                                  "'i1'], " + \
                     "'offsets':[0,4,16], 'itemsize':20, 'aligned':True}"
-        assert str(dt3) == answer
+        assert str(dt3) == answer % (E,E)
         assert dt2 == dt3
         # Nesting should preserve packing
         dt1 = np.dtype([('f0', 'i4'),
diff --git a/pypy/module/micronumpy/test/test_ndarray.py 
b/pypy/module/micronumpy/test/test_ndarray.py
--- a/pypy/module/micronumpy/test/test_ndarray.py
+++ b/pypy/module/micronumpy/test/test_ndarray.py
@@ -1791,6 +1791,7 @@
 
     def test_scalar_view(self):
         from numpy import array
+        import sys
         a = array(3, dtype='int32')
         b = a.view(dtype='float32')
         assert b.shape == ()
@@ -1799,17 +1800,27 @@
         assert exc.value[0] == "new type not compatible with array."
         exc = raises(TypeError, a.view, 'string')
         assert exc.value[0] == "data-type must not be 0-sized"
-        assert a.view('S4') == '\x03'
+        if sys.byteorder == 'big':
+            assert a.view('S4') == '\x00\x00\x00\x03'
+        else:
+            assert a.view('S4') == '\x03'
         a = array('abc1', dtype='c')
         assert (a == ['a', 'b', 'c', '1']).all()
         assert a.view('S4') == 'abc1'
         b = a.view([('a', 'i2'), ('b', 'i2')])
         assert b.shape == (1,)
-        assert b[0][0] == 25185
-        assert b[0][1] == 12643
+        if sys.byteorder == 'big':
+            assert b[0][0] == 0x6162
+            assert b[0][1] == 0x6331
+        else:
+            assert b[0][0] == 25185
+            assert b[0][1] == 12643
         a = array([(1, 2)], dtype=[('a', 'int64'), ('b', 'int64')])[0]
         assert a.shape == ()
-        assert a.view('S16') == '\x01' + '\x00' * 7 + '\x02'
+        if sys.byteorder == 'big':
+            assert a.view('S16') == '\x00' * 7 + '\x01' + '\x00' * 7 + '\x02'
+        else:
+            assert a.view('S16') == '\x01' + '\x00' * 7 + '\x02'
         a = array(2, dtype='<i8')
         b = a.view('<c8')
         assert 0 < b.real < 1
@@ -1818,15 +1829,23 @@
 
     def test_array_view(self):
         from numpy import array, dtype
+        import sys
         x = array((1, 2), dtype='int8')
         assert x.shape == (2,)
         y = x.view(dtype='int16')
         assert x.shape == (2,)
-        assert y[0] == 513
+        if sys.byteorder == 'big':
+            assert y[0] == 0x0102
+        else:
+            assert y[0] == 513 == 0x0201
         assert y.dtype == dtype('int16')
         y[0] = 670
-        assert x[0] == -98
-        assert x[1] == 2
+        if sys.byteorder == 'little':
+            assert x[0] == -98
+            assert x[1] == 2
+        else:
+            assert x[0] == 2
+            assert x[1] == -98
         f = array([1000, -1234], dtype='i4')
         nnp = self.non_native_prefix
         d = f.view(dtype=nnp + 'i4')
@@ -1847,7 +1866,10 @@
         assert x.view('S4')[0] == 'abc'
         assert x.view('S4')[1] == 'defg'
         a = array([(1, 2)], dtype=[('a', 'int64'), ('b', 'int64')])
-        assert a.view('S16')[0] == '\x01' + '\x00' * 7 + '\x02'
+        if sys.byteorder == 'big':
+            assert a.view('S16')[0] == '\x00' * 7 + '\x01' + '\x00' * 7 + 
'\x02'
+        else:
+            assert a.view('S16')[0] == '\x01' + '\x00' * 7 + '\x02'
 
     def test_half_conversions(self):
         from numpy import array, arange
@@ -2425,11 +2447,16 @@
         from numpy import array
         import sys
         a = array([1, 2, 3, 4], dtype='i4')
-        assert a.data[0] == '\x01'
+        assert a.data[0] == ('\x01' if sys.byteorder == 'little' else '\x00')
         assert a.data[1] == '\x00'
-        assert a.data[4] == '\x02'
-        a.data[4] = '\xff'
-        assert a[1] == 0xff
+        assert a.data[3] == ('\x00' if sys.byteorder == 'little' else '\x01')
+        assert a.data[4] == ('\x02' if sys.byteorder == 'little' else '\x00')
+        a.data[4] = '\x7f'
+        if sys.byteorder == 'big':
+            a.data[7] = '\x00' # make sure 0x02 is reset to 0
+            assert a[1] == (0x7f000000)
+        else:
+            assert a[1] == 0x7f
         assert len(a.data) == 16
         assert type(a.data) is buffer
         if '__pypy__' in sys.builtin_module_names:
@@ -2501,12 +2528,17 @@
     def test__reduce__(self):
         from numpy import array, dtype
         from cPickle import loads, dumps
+        import sys
 
         a = array([1, 2], dtype="int64")
         data = a.__reduce__()
 
-        assert data[2][4] == '\x01\x00\x00\x00\x00\x00\x00\x00' \
-                             '\x02\x00\x00\x00\x00\x00\x00\x00'
+        if sys.byteorder == 'big':
+            assert data[2][4] == '\x00\x00\x00\x00\x00\x00\x00\x01' \
+                                 '\x00\x00\x00\x00\x00\x00\x00\x02'
+        else:
+            assert data[2][4] == '\x01\x00\x00\x00\x00\x00\x00\x00' \
+                                 '\x02\x00\x00\x00\x00\x00\x00\x00'
 
         pickled_data = dumps(a)
         assert (loads(pickled_data) == a).all()
@@ -2639,12 +2671,16 @@
     def test_ndarray_from_buffer(self):
         import numpy as np
         import array
+        import sys
         buf = array.array('c', ['\x00']*2*3)
         a = np.ndarray((3,), buffer=buf, dtype='i2')
         a[0] = ord('b')
         a[1] = ord('a')
         a[2] = ord('r')
-        assert list(buf) == ['b', '\x00', 'a', '\x00', 'r', '\x00']
+        if sys.byteorder == 'big':
+            assert list(buf) == ['\x00', 'b', '\x00', 'a', '\x00', 'r']
+        else:
+            assert list(buf) == ['b', '\x00', 'a', '\x00', 'r', '\x00']
         assert a.base is buf
 
     def test_ndarray_subclass_from_buffer(self):
@@ -2659,13 +2695,17 @@
     def test_ndarray_from_buffer_and_offset(self):
         import numpy as np
         import array
+        import sys
         buf = array.array('c', ['\x00']*7)
         buf[0] = 'X'
         a = np.ndarray((3,), buffer=buf, offset=1, dtype='i2')
         a[0] = ord('b')
         a[1] = ord('a')
         a[2] = ord('r')
-        assert list(buf) == ['X', 'b', '\x00', 'a', '\x00', 'r', '\x00']
+        if sys.byteorder == 'big':
+            assert list(buf) == ['X', '\x00', 'b', '\x00', 'a', '\x00', 'r']
+        else:
+            assert list(buf) == ['X', 'b', '\x00', 'a', '\x00', 'r', '\x00']
 
     def test_ndarray_from_buffer_out_of_bounds(self):
         import numpy as np
@@ -3501,7 +3541,11 @@
         BaseNumpyAppTest.setup_class.im_func(cls)
         cls.w_data = cls.space.wrap(struct.pack('dddd', 1, 2, 3, 4))
         cls.w_fdata = cls.space.wrap(struct.pack('f', 2.3))
-        cls.w_float16val = cls.space.wrap('\x00E') # 5.0 in float16
+        import sys
+        if sys.byteorder == 'big':
+            cls.w_float16val = cls.space.wrap('E\x00') # 5.0 in float16
+        else:
+            cls.w_float16val = cls.space.wrap('\x00E') # 5.0 in float16
         cls.w_float32val = cls.space.wrap(struct.pack('f', 5.2))
         cls.w_float64val = cls.space.wrap(struct.pack('d', 300.4))
         cls.w_ulongval = cls.space.wrap(struct.pack('L', 12))
@@ -3609,9 +3653,15 @@
         assert (t == []).all()
         u = fromstring("\x01\x00\x00\x00\x00\x00\x00\x00", dtype=int)
         if sys.maxint > 2 ** 31 - 1:
-            assert (u == [1]).all()
+            if sys.byteorder == 'big':
+                assert (u == [0x0100000000000000]).all()
+            else:
+                assert (u == [1]).all()
         else:
-            assert (u == [1, 0]).all()
+            if sys.byteorder == 'big':
+                assert (u == [0x01000000, 0]).all()
+            else:
+                assert (u == [1, 0]).all()
         v = fromstring("abcd", dtype="|S2")
         assert v[0] == "ab"
         assert v[1] == "cd"
@@ -3668,9 +3718,15 @@
         k = fromstring(self.float16val, dtype='float16')
         assert k[0] == dtype('float16').type(5.)
         dt =  array([5], dtype='longfloat').dtype
+        print(dt.itemsize)
         if dt.itemsize == 8:
-            m = fromstring('\x00\x00\x00\x00\x00\x00\x14@',
-                           dtype='float64')
+            import sys
+            if sys.byteorder == 'big':
+                m = fromstring('@\x14\x00\x00\x00\x00\x00\x00',
+                               dtype='float64')
+            else:
+                m = fromstring('\x00\x00\x00\x00\x00\x00\x14@',
+                               dtype='float64')
         elif dt.itemsize == 12:
             m = fromstring('\x00\x00\x00\x00\x00\x00\x00\xa0\x01@\x00\x00',
                            dtype='float96')
@@ -3692,8 +3748,13 @@
 
     def test_tostring(self):
         from numpy import array
-        assert array([1, 2, 3], 'i2').tostring() == '\x01\x00\x02\x00\x03\x00'
-        assert array([1, 2, 3], 'i2')[::2].tostring() == '\x01\x00\x03\x00'
+        import sys
+        if sys.byteorder == 'big':
+            assert array([1, 2, 3], 'i2').tostring() == 
'\x00\x01\x00\x02\x00\x03'
+            assert array([1, 2, 3], 'i2')[::2].tostring() == '\x00\x01\x00\x03'
+        else:
+            assert array([1, 2, 3], 'i2').tostring() == 
'\x01\x00\x02\x00\x03\x00'
+            assert array([1, 2, 3], 'i2')[::2].tostring() == '\x01\x00\x03\x00'
         assert array([1, 2, 3], '<i2')[::2].tostring() == '\x01\x00\x03\x00'
         assert array([1, 2, 3], '>i2')[::2].tostring() == '\x00\x01\x00\x03'
         assert array(0, dtype='i2').tostring() == '\x00\x00'
@@ -4189,7 +4250,11 @@
         v = a.view(('float32', 4))
         assert v.dtype == np.dtype('float32')
         assert v.shape == (10, 4)
-        assert v[0][-1] == 2.53125
+        import sys
+        if sys.byteorder == 'big':
+            assert v[0][-2] == 2.53125
+        else:
+            assert v[0][-1] == 2.53125
         exc = raises(ValueError, "a.view(('float32', 2))")
         assert exc.value[0] == 'new type not compatible with array.'
 
diff --git a/pypy/module/micronumpy/test/test_scalar.py 
b/pypy/module/micronumpy/test/test_scalar.py
--- a/pypy/module/micronumpy/test/test_scalar.py
+++ b/pypy/module/micronumpy/test/test_scalar.py
@@ -109,6 +109,7 @@
 
     def test_pickle(self):
         from numpy import dtype, zeros
+        import sys
         try:
             from numpy.core.multiarray import scalar
         except ImportError:
@@ -119,9 +120,11 @@
         f = dtype('float64').type(13.37)
         c = dtype('complex128').type(13 + 37.j)
 
-        assert i.__reduce__() == (scalar, (dtype('int32'), '9\x05\x00\x00'))
-        assert f.__reduce__() == (scalar, (dtype('float64'), 
'=\n\xd7\xa3p\xbd*@'))
-        assert c.__reduce__() == (scalar, (dtype('complex128'), 
'\x00\x00\x00\x00\x00\x00*@\x00\x00\x00\x00\x00\x80B@'))
+        swap = lambda s: (''.join(reversed(s))) if sys.byteorder == 'big' else 
s
+        assert i.__reduce__() == (scalar, (dtype('int32'), 
swap('9\x05\x00\x00')))
+        assert f.__reduce__() == (scalar, (dtype('float64'), 
swap('=\n\xd7\xa3p\xbd*@')))
+        assert c.__reduce__() == (scalar, (dtype('complex128'), 
swap('\x00\x00\x00\x00\x00\x00*@') + \
+                                                                
swap('\x00\x00\x00\x00\x00\x80B@')))
 
         assert loads(dumps(i)) == i
         assert loads(dumps(f)) == f
@@ -256,13 +259,20 @@
         assert t < 7e-323
         t = s.view('complex64')
         assert type(t) is np.complex64
-        assert 0 < t.real < 1
-        assert t.imag == 0
+        if sys.byteorder == 'big':
+            assert 0 < t.imag < 1
+            assert t.real == 0
+        else:
+            assert 0 < t.real < 1
+            assert t.imag == 0
         exc = raises(TypeError, s.view, 'string')
         assert exc.value[0] == "data-type must not be 0-sized"
         t = s.view('S8')
         assert type(t) is np.string_
-        assert t == '\x0c'
+        if sys.byteorder == 'big':
+            assert t == '\x00' * 7 + '\x0c'
+        else:
+            assert t == '\x0c'
         s = np.dtype('string').type('abc1')
         assert s.view('S4') == 'abc1'
         if '__pypy__' in sys.builtin_module_names:
diff --git a/pypy/module/micronumpy/test/test_selection.py 
b/pypy/module/micronumpy/test/test_selection.py
--- a/pypy/module/micronumpy/test/test_selection.py
+++ b/pypy/module/micronumpy/test/test_selection.py
@@ -327,10 +327,15 @@
 # tests from numpy/core/tests/test_regression.py
     def test_sort_bigendian(self):
         from numpy import array, dtype
-        a = array(range(11), dtype='float64')
-        c = a.astype(dtype('<f8'))
-        c.sort()
-        assert max(abs(a-c)) < 1e-32
+        import sys
+
+        # little endian sorting for big endian machine
+        # is not yet supported! IMPL ME
+        if sys.byteorder == 'little':
+            a = array(range(11), dtype='float64')
+            c = a.astype(dtype('<f8'))
+            c.sort()
+            assert max(abs(a-c)) < 1e-32
 
     def test_string_argsort_with_zeros(self):
         import numpy as np
diff --git a/pypy/module/micronumpy/test/test_subtype.py 
b/pypy/module/micronumpy/test/test_subtype.py
--- a/pypy/module/micronumpy/test/test_subtype.py
+++ b/pypy/module/micronumpy/test/test_subtype.py
@@ -478,6 +478,7 @@
                 (version, shp, typ, isf, raw) = state
                 ndarray.__setstate__(self, (shp, typ, isf, raw))
 
+        E = '<' if sys.byteorder == 'little' else '>'
         D.__module__ = 'mod'
         mod = new.module('mod')
         mod.D = D
@@ -510,7 +511,7 @@
             tp9
             Rp10
             (I3
-            S'<'
+            S'{E}'
             p11
             NNNI-1
             I-1
@@ -520,7 +521,7 @@
             S'\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00@'
             p13
             tp14
-            b.'''.replace('            ','')
+            b.'''.replace('            ','').format(E=E)
         for ss,sn in zip(s.split('\n')[1:],s_from_numpy.split('\n')[1:]):
             if len(ss)>10:
                 # ignore binary data, it will be checked later
diff --git a/pypy/module/pypyjit/test_pypy_c/test_buffers.py 
b/pypy/module/pypyjit/test_pypy_c/test_buffers.py
--- a/pypy/module/pypyjit/test_pypy_c/test_buffers.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_buffers.py
@@ -34,7 +34,7 @@
             i = 0
             while i < n:
                 i += 1
-                struct.unpack('i', a)  # ID: unpack
+                struct.unpack('<i', a)  # ID: unpack
             return i
         log = self.run(main, [1000])
         assert log.result == 1000
diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py 
b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
--- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
@@ -1,5 +1,5 @@
 import py
-
+import sys
 from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 from rpython.rlib.rawstorage import misaligned_is_fine
 
@@ -99,6 +99,10 @@
         assert log.result is False
         assert len(log.loops) == 1
         loop = log._filter(log.loops[0])
+        if sys.byteorder == 'big':
+            bit = ord('>')
+        else:
+            bit = ord('<')
         assert loop.match("""
             guard_class(p1, #, descr=...)
             p4 = getfield_gc_r(p1, descr=<FieldP 
pypy.module.micronumpy.iterators.ArrayIter.inst_array \d+ pure>)
@@ -109,7 +113,7 @@
             i9 = getfield_gc_i(p4, descr=<FieldU 
pypy.module.micronumpy.concrete.BaseConcreteArray.inst_storage \d+ pure>)
             i10 = getfield_gc_i(p6, descr=<FieldU 
pypy.module.micronumpy.descriptor.W_Dtype.inst_byteorder \d+ pure>)
             i12 = int_eq(i10, 61)
-            i14 = int_eq(i10, 60)
+            i14 = int_eq(i10, %d)
             i15 = int_or(i12, i14)
             f16 = raw_load_f(i9, i5, descr=<ArrayF \d+>)
             guard_true(i15, descr=...)
@@ -142,7 +146,7 @@
             setfield_gc(p34, i30, descr=<FieldS 
pypy.module.micronumpy.iterators.IterState.inst_offset \d+>)
             }}}
             jump(..., descr=...)
-        """)
+        """ % (bit,))
 
     def test_reduce_logical_and(self):
         def main():
diff --git a/pypy/module/pypyjit/test_pypy_c/test_struct.py 
b/pypy/module/pypyjit/test_pypy_c/test_struct.py
--- a/pypy/module/pypyjit/test_pypy_c/test_struct.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_struct.py
@@ -19,8 +19,8 @@
             import struct
             i = 1
             while i < n:
-                buf = struct.pack("i", i)       # ID: pack
-                x = struct.unpack("i", buf)[0]  # ID: unpack
+                buf = struct.pack("<i", i)       # ID: pack
+                x = struct.unpack("<i", buf)[0]  # ID: unpack
                 i += x / i
             return i
 
@@ -43,20 +43,36 @@
             i20 = int_and(i19, 255)
         """ % extra)
 
-        # the newstr and the strsetitems are because the string is forced,
-        # which is in turn because the optimizer doesn't know how to handle a
-        # gc_load_indexed_i on a virtual string. It could be improved, but it
-        # is also true that in real life cases struct.unpack is called on
-        # strings which come from the outside, so it's a minor issue.
-        assert loop.match_by_id("unpack", """
-            # struct.unpack
-            p88 = newstr(4)
-            strsetitem(p88, 0, i11)
-            strsetitem(p88, 1, i14)
-            strsetitem(p88, 2, i17)
-            strsetitem(p88, 3, i20)
-            i91 = gc_load_indexed_i(p88, 0, 1, _, -4)
-        """)
+        if sys.byteorder == 'little':
+            # the newstr and the strsetitems are because the string is forced,
+            # which is in turn because the optimizer doesn't know how to 
handle a
+            # gc_load_indexed_i on a virtual string. It could be improved, but 
it
+            # is also true that in real life cases struct.unpack is called on
+            # strings which come from the outside, so it's a minor issue.
+            assert loop.match_by_id("unpack", """
+                # struct.unpack
+                p88 = newstr(4)
+                strsetitem(p88, 0, i11)
+                strsetitem(p88, 1, i14)
+                strsetitem(p88, 2, i17)
+                strsetitem(p88, 3, i20)
+                i91 = gc_load_indexed_i(p88, 0, 1, _, -4)
+            """)
+        else:
+            # on a big endian machine we cannot just write into
+            # a char buffer and then use load gc to read the integer,
+            # here manual shifting is applied
+            assert loop.match_by_id("unpack", """
+                # struct.unpack
+                i95 = int_lshift(i90, 8)
+                i96 = int_or(i88, i95)
+                i97 = int_lshift(i92, 16)
+                i98 = int_or(i96, i97)
+                i99 = int_ge(i94, 128)
+                guard_false(i99, descr=...)
+                i100 = int_lshift(i94, 24)
+                i101 = int_or(i98, i100)
+            """)
 
     def test_struct_object(self):
         def main(n):
@@ -72,31 +88,32 @@
         log = self.run(main, [1000])
         assert log.result == main(1000)
 
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('pack', """
-            guard_not_invalidated(descr=...)
-            # struct.pack
-            %s
-            i11 = int_and(i4, 255)
-            i13 = int_rshift(i4, 8)
-            i14 = int_and(i13, 255)
-            i16 = int_rshift(i13, 8)
-            i17 = int_and(i16, 255)
-            i19 = int_rshift(i16, 8)
-            i20 = int_and(i19, 255)
-        """ % extra)
+        if sys.byteorder == 'little':
+            loop, = log.loops_by_filename(self.filepath)
+            assert loop.match_by_id('pack', """
+                guard_not_invalidated(descr=...)
+                # struct.pack
+                %s
+                i11 = int_and(i4, 255)
+                i13 = int_rshift(i4, 8)
+                i14 = int_and(i13, 255)
+                i16 = int_rshift(i13, 8)
+                i17 = int_and(i16, 255)
+                i19 = int_rshift(i16, 8)
+                i20 = int_and(i19, 255)
+            """ % extra)
 
-        assert loop.match_by_id('unpack', """
-            # struct.unpack
-            p88 = newstr(8)
-            strsetitem(p88, 0, 255)
-            strsetitem(p88, 1, 255)
-            strsetitem(p88, 2, 255)
-            strsetitem(p88, 3, 255)
-            strsetitem(p88, 4, i11)
-            strsetitem(p88, 5, i14)
-            strsetitem(p88, 6, i17)
-            strsetitem(p88, 7, i20)
-            i90 = gc_load_indexed_i(p88, 0, 1, _, -4)
-            i91 = gc_load_indexed_i(p88, 4, 1, _, -4)
-        """)
+            assert loop.match_by_id('unpack', """
+                # struct.unpack
+                p88 = newstr(8)
+                strsetitem(p88, 0, 255)
+                strsetitem(p88, 1, 255)
+                strsetitem(p88, 2, 255)
+                strsetitem(p88, 3, 255)
+                strsetitem(p88, 4, i11)
+                strsetitem(p88, 5, i14)
+                strsetitem(p88, 6, i17)
+                strsetitem(p88, 7, i20)
+                i90 = gc_load_indexed_i(p88, 0, 1, _, -4)
+                i91 = gc_load_indexed_i(p88, 4, 1, _, -4)
+            """)
diff --git a/pypy/module/select/test/test_select.py 
b/pypy/module/select/test/test_select.py
--- a/pypy/module/select/test/test_select.py
+++ b/pypy/module/select/test/test_select.py
@@ -287,7 +287,8 @@
             t = thread.start_new_thread(pollster.poll, ())
             try:
                 time.sleep(0.3)
-                for i in range(5): print '',  # to release GIL untranslated
+                # TODO restore print '', if this is not the reason
+                for i in range(5): print 'release gil select'  # to release 
GIL untranslated
                 # trigger ufds array reallocation
                 for fd in rfds:
                     pollster.unregister(fd)
@@ -328,6 +329,10 @@
         "usemodules": ["select", "_socket", "time", "thread"],
     }
 
+    import os
+    if os.uname()[4] == 's390x':
+        py.test.skip("build bot for s390x cannot open sockets")
+
     def w_make_server(self):
         import socket
         if hasattr(self, 'sock'):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py 
b/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py
@@ -1,4 +1,4 @@
-
+import sys
 from ctypes import *
 from support import BaseCTypesTestChecker
 
@@ -8,8 +8,11 @@
             _fields_ = [('x', c_char), ('y', c_int)]
 
         stuff = Stuff()
-        stuff.y = ord('x')
-        assert stuff.x == 'x'
+        stuff.y = ord('x') | (ord('z') << 24)
+        if sys.byteorder == 'little':
+            assert stuff.x == 'x'
+        else:
+            assert stuff.x == 'z'
 
     def test_union_of_structures(self):
         class Stuff(Structure):
diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py
--- a/pypy/testrunner_cfg.py
+++ b/pypy/testrunner_cfg.py
@@ -5,6 +5,7 @@
     'translator/c', 'rlib',
     'memory/test', 'jit/metainterp',
     'jit/backend/arm', 'jit/backend/x86',
+    'jit/backend/zarch',
 ]
 
 def collect_one_testdir(testdirs, reldir, tests):
diff --git a/rpython/doc/index.rst b/rpython/doc/index.rst
--- a/rpython/doc/index.rst
+++ b/rpython/doc/index.rst
@@ -37,6 +37,7 @@
 
    arm
    logging
+   s390x
 
 
 Writing your own interpreter in RPython
diff --git a/rpython/doc/s390x.rst b/rpython/doc/s390x.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/s390x.rst
@@ -0,0 +1,20 @@
+.. _s390x:
+
+S390X JIT Backend
+=================
+
+Our JIT implements the 64 bit version of the IBM Mainframe called s390x.
+Note that this architecture is big endian.
+
+The following facilities need to be installed to operate
+correctly (all of the machines used for development these where installed):
+
+* General-Instructions-Extension
+* Long-Displacement
+* Binary Floating Point (IEEE)
+
+Translating
+-----------
+
+Ensure that libffi is installed (version should do > 3.0.+).
+CPython should be version 2.7.+.
diff --git a/rpython/jit/backend/arm/locations.py 
b/rpython/jit/backend/arm/locations.py
--- a/rpython/jit/backend/arm/locations.py
+++ b/rpython/jit/backend/arm/locations.py
@@ -1,7 +1,6 @@
 from rpython.jit.metainterp.history import INT, FLOAT
 from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE
 
-
 class AssemblerLocation(object):
     _immutable_ = True
     type = INT
diff --git a/rpython/jit/backend/arm/opassembler.py 
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -1143,35 +1143,42 @@
     def emit_op_zero_array(self, op, arglocs, regalloc, fcond):
         from rpython.jit.backend.llsupport.descr import unpack_arraydescr
         assert len(arglocs) == 0
-        length_box = op.getarg(2)
-        if isinstance(length_box, ConstInt) and length_box.getint() == 0:
+        size_box = op.getarg(2)
+        if isinstance(size_box, ConstInt) and size_box.getint() == 0:
             return fcond     # nothing to do
         itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
         args = op.getarglist()
+        #
+        # ZERO_ARRAY(base_loc, start, size, 1, 1)
+        # 'start' and 'size' are both expressed in bytes,
+        # and the two scaling arguments should always be ConstInt(1) on ARM.
+        assert args[3].getint() == 1
+        assert args[4].getint() == 1
+        #
         base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
-        sibox = args[1]
-        if isinstance(sibox, ConstInt):
-            startindex_loc = None
-            startindex = sibox.getint()
-            assert startindex >= 0
+        startbyte_box = args[1]
+        if isinstance(startbyte_box, ConstInt):
+            startbyte_loc = None
+            startbyte = startbyte_box.getint()
+            assert startbyte >= 0
         else:
-            startindex_loc = regalloc.rm.make_sure_var_in_reg(sibox, args)
-            startindex = -1
+            startbyte_loc = regalloc.rm.make_sure_var_in_reg(startbyte_box,
+                                                             args)
+            startbyte = -1
 
-        # base_loc and startindex_loc are in two regs here (or they are
-        # immediates).  Compute the dstaddr_loc, which is the raw
+        # base_loc and startbyte_loc are in two regs here (or startbyte_loc
+        # is an immediate).  Compute the dstaddr_loc, which is the raw
         # address that we will pass as first argument to memset().
         # It can be in the same register as either one, but not in
         # args[2], because we're still needing the latter.
         dstaddr_box = TempVar()
         dstaddr_loc = regalloc.rm.force_allocate_reg(dstaddr_box, [args[2]])
-        if startindex >= 0:    # a constant
-            ofs = baseofs + startindex * itemsize
+        if startbyte >= 0:    # a constant
+            ofs = baseofs + startbyte
             reg = base_loc.value
         else:
-            self.mc.gen_load_int(r.ip.value, itemsize)
-            self.mc.MLA(dstaddr_loc.value, r.ip.value,
-                        startindex_loc.value, base_loc.value)
+            self.mc.ADD_rr(dstaddr_loc.value,
+                           base_loc.value, startbyte_loc.value)
             ofs = baseofs
             reg = dstaddr_loc.value
         if check_imm_arg(ofs):
@@ -1180,20 +1187,27 @@
             self.mc.gen_load_int(r.ip.value, ofs)
             self.mc.ADD_rr(dstaddr_loc.value, reg, r.ip.value)
 
-        if (isinstance(length_box, ConstInt) and
-                length_box.getint() <= 14 and     # same limit as GCC
-                itemsize in (4, 2, 1)):
+        # We use STRB, STRH or STR based on whether we know the array
+        # item size is a multiple of 1, 2 or 4.
+        if   itemsize & 1: itemsize = 1
+        elif itemsize & 2: itemsize = 2
+        else:              itemsize = 4
+        limit = itemsize
+        next_group = -1
+        if itemsize < 4 and startbyte >= 0:
+            # we optimize STRB/STRH into STR, but this needs care:
+            # it only works if startindex_loc is a constant, otherwise
+            # we'd be doing unaligned accesses.
+            next_group = (-startbyte) & 3
+            limit = 4
+
+        if (isinstance(size_box, ConstInt) and
+                size_box.getint() <= 14 * limit):     # same limit as GCC
             # Inline a series of STR operations, starting at 'dstaddr_loc'.
-            next_group = -1
-            if itemsize < 4 and startindex >= 0:
-                # we optimize STRB/STRH into STR, but this needs care:
-                # it only works if startindex_loc is a constant, otherwise
-                # we'd be doing unaligned accesses.
-                next_group = (-startindex * itemsize) & 3
             #
             self.mc.gen_load_int(r.ip.value, 0)
             i = 0
-            total_size = length_box.getint() * itemsize
+            total_size = size_box.getint()
             while i < total_size:
                 sz = itemsize
                 if i == next_group:
@@ -1209,29 +1223,18 @@
                 i += sz
 
         else:
-            if isinstance(length_box, ConstInt):
-                length_loc = imm(length_box.getint() * itemsize)
+            if isinstance(size_box, ConstInt):
+                size_loc = imm(size_box.getint())
             else:
-                # load length_loc in a register different than dstaddr_loc
-                length_loc = regalloc.rm.make_sure_var_in_reg(length_box,
-                                                              [dstaddr_box])
-                if itemsize > 1:
-                    # we need a register that is different from dstaddr_loc,
-                    # but which can be identical to length_loc (as usual,
-                    # only if the length_box is not used by future operations)
-                    bytes_box = TempVar()
-                    bytes_loc = regalloc.rm.force_allocate_reg(bytes_box,
-                                                               [dstaddr_box])
-                    self.mc.gen_load_int(r.ip.value, itemsize)
-                    self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value)
-                    length_box = bytes_box
-                    length_loc = bytes_loc
+                # load size_loc in a register different than dstaddr_loc
+                size_loc = regalloc.rm.make_sure_var_in_reg(size_box,
+                                                            [dstaddr_box])
             #
             # call memset()
             regalloc.before_call()
             self.simple_call_no_collect(imm(self.memset_addr),
-                                        [dstaddr_loc, imm(0), length_loc])
-            regalloc.rm.possibly_free_var(length_box)
+                                        [dstaddr_loc, imm(0), size_loc])
+            regalloc.rm.possibly_free_var(size_box)
         regalloc.rm.possibly_free_var(dstaddr_box)
         return fcond
 
diff --git a/rpython/jit/backend/detect_cpu.py 
b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -16,6 +16,7 @@
 MODEL_X86_64_SSE4 = 'x86-64-sse4'
 MODEL_ARM         = 'arm'
 MODEL_PPC_64      = 'ppc-64'
+MODEL_S390_64     = 's390x'
 # don't use '_' in the model strings; they are replaced by '-'
 
 
@@ -27,6 +28,7 @@
         MODEL_ARM:    ['__arm__', '__thumb__','_M_ARM_EP'],
         MODEL_X86:    ['i386', '__i386', '__i386__', '__i686__','_M_IX86'],
         MODEL_PPC_64: ['__powerpc64__'],
+        MODEL_S390_64:['__s390x__'],
     }
     for k, v in mapping.iteritems():
         for macro in v:
@@ -67,6 +69,7 @@
             'armv7l': MODEL_ARM,
             'armv6l': MODEL_ARM,
             'arm': MODEL_ARM,      # freebsd
+            's390x': MODEL_S390_64
             }.get(mach)
 
     if result is None:
@@ -88,7 +91,6 @@
             if feature.detect_x32_mode():
                 raise ProcessorAutodetectError(
                     'JITting in x32 mode is not implemented')
-
     #
     if result.startswith('arm'):
         from rpython.jit.backend.arm.detect import detect_float
@@ -122,6 +124,8 @@
         return "rpython.jit.backend.arm.runner", "CPU_ARM"
     elif backend_name == MODEL_PPC_64:
         return "rpython.jit.backend.ppc.runner", "PPC_CPU"
+    elif backend_name == MODEL_S390_64:
+        return "rpython.jit.backend.zarch.runner", "CPU_S390_64"
     else:
         raise ProcessorAutodetectError, (
             "we have no JIT backend for this cpu: '%s'" % backend_name)
@@ -142,6 +146,7 @@
         MODEL_X86_64_SSE4: ['floats', 'singlefloats'],
         MODEL_ARM: ['floats', 'singlefloats', 'longlong'],
         MODEL_PPC_64: [], # we don't even have PPC directory, so no
+        MODEL_S390_64: ['floats'],
     }[backend_name]
 
 if __name__ == '__main__':
diff --git a/rpython/jit/backend/llsupport/gc.py 
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -164,13 +164,11 @@
             array_index = moving_obj_tracker.get_array_index(v)
 
             size, offset, _ = 
unpack_arraydescr(moving_obj_tracker.ptr_array_descr)
-            scale = size
+            array_index = array_index * size + offset
             args = [moving_obj_tracker.const_ptr_gcref_array,
                     ConstInt(array_index),
-                    ConstInt(scale),
-                    ConstInt(offset),
                     ConstInt(size)]
-            load_op = ResOperation(rop.GC_LOAD_INDEXED_R, args)
+            load_op = ResOperation(rop.GC_LOAD_R, args)
             newops.append(load_op)
             op.setarg(arg_i, load_op)
         #
diff --git a/rpython/jit/backend/llsupport/jump.py 
b/rpython/jit/backend/llsupport/jump.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/jump.py
@@ -0,0 +1,107 @@
+def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
+    pending_dests = len(dst_locations)
+    srccount = {}    # maps dst_locations to how many times the same
+                     # location appears in src_locations
+    for dst in dst_locations:
+        key = dst.as_key()
+        assert key not in srccount, "duplicate value in dst_locations!"
+        srccount[key] = 0
+    for i in range(len(dst_locations)):
+        src = src_locations[i]
+        if src.is_imm():
+            continue
+        key = src.as_key()
+        if key in srccount:
+            if key == dst_locations[i].as_key():
+                # ignore a move "x = x"
+                # setting any "large enough" negative value is ok, but
+                # be careful of overflows, don't use -sys.maxint
+                srccount[key] = -len(dst_locations) - 1
+                pending_dests -= 1
+            else:
+                srccount[key] += 1
+
+    while pending_dests > 0:
+        progress = False
+        for i in range(len(dst_locations)):
+            dst = dst_locations[i]
+            key = dst.as_key()
+            if srccount[key] == 0:
+                srccount[key] = -1       # means "it's done"
+                pending_dests -= 1
+                src = src_locations[i]
+                if not src.is_imm():
+                    key = src.as_key()
+                    if key in srccount:
+                        srccount[key] -= 1
+                _move(assembler, src, dst, tmpreg)
+                progress = True
+        if not progress:
+            # we are left with only pure disjoint cycles
+            sources = {}     # maps dst_locations to src_locations
+            for i in range(len(dst_locations)):
+                src = src_locations[i]
+                dst = dst_locations[i]
+                sources[dst.as_key()] = src
+            #
+            for i in range(len(dst_locations)):
+                dst = dst_locations[i]
+                originalkey = dst.as_key()
+                if srccount[originalkey] >= 0:
+                    assembler.regalloc_push(dst, 0)
+                    while True:
+                        key = dst.as_key()
+                        assert srccount[key] == 1
+                        # ^^^ because we are in a simple cycle
+                        srccount[key] = -1
+                        pending_dests -= 1
+                        src = sources[key]
+                        if src.as_key() == originalkey:
+                            break
+                        _move(assembler, src, dst, tmpreg)
+                        dst = src
+                    assembler.regalloc_pop(dst, 0)
+            assert pending_dests == 0
+
+def _move(assembler, src, dst, tmpreg):
+    # some assembler cannot handle memory to memory moves without
+    # a tmp register, thus prepare src according to the ISA capabilities
+    src = assembler.regalloc_prepare_move(src, dst, tmpreg)
+    assembler.regalloc_mov(src, dst)
+
+def remap_frame_layout_mixed(assembler,
+                             src_locations1, dst_locations1, tmpreg1,
+                             src_locations2, dst_locations2, tmpreg2, WORD):
+    # find and push the fp stack locations from src_locations2 that
+    # are going to be overwritten by dst_locations1
+    extrapushes = []
+    dst_keys = {}
+    for loc in dst_locations1:
+        dst_keys[loc.as_key()] = None
+    src_locations2red = []
+    dst_locations2red = []
+    for i in range(len(src_locations2)):
+        loc    = src_locations2[i]
+        dstloc = dst_locations2[i]
+        if loc.is_stack():
+            key = loc.as_key()
+            if (key in dst_keys or (loc.width > WORD and
+                                    (key + 1) in dst_keys)):
+                assembler.regalloc_push(loc, len(extrapushes))
+                extrapushes.append(dstloc)
+                continue
+        src_locations2red.append(loc)
+        dst_locations2red.append(dstloc)
+    src_locations2 = src_locations2red
+    dst_locations2 = dst_locations2red
+    #
+    # remap the integer and pointer registers and stack locations
+    remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+    #
+    # remap the fp registers and stack locations
+    remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+    #
+    # finally, pop the extra fp stack locations
+    while len(extrapushes) > 0:
+        loc = extrapushes.pop()
+        assembler.regalloc_pop(loc, len(extrapushes))
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -203,38 +203,47 @@
     def transform_to_gc_load(self, op):
         NOT_SIGNED = 0
         CINT_ZERO = ConstInt(0)
+        opnum = op.getopnum()
+        #if opnum == rop.CALL_MALLOC_NURSERY_VARSIZE:
+        #    v_length = op.getarg(2)
+        #    scale = op.getarg(1).getint()
+        #    if scale not in self.cpu.load_supported_factors:
+        #        scale, offset, v_length = \
+        #                
self._emit_mul_if_factor_offset_not_supported(v_length, scale, 0)
+        #        op.setarg(1, ConstInt(scale))
+        #        op.setarg(2, v_length)
         if op.is_getarrayitem() or \
-           op.getopnum() in (rop.GETARRAYITEM_RAW_I,
-                             rop.GETARRAYITEM_RAW_F):
+           opnum in (rop.GETARRAYITEM_RAW_I,
+                     rop.GETARRAYITEM_RAW_F):
             self.handle_getarrayitem(op)
-        elif op.getopnum() in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
+        elif opnum in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
             self.handle_setarrayitem(op)
-        elif op.getopnum() == rop.RAW_STORE:
+        elif opnum == rop.RAW_STORE:
             itemsize, ofs, _ = unpack_arraydescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             value_box = op.getarg(2)
             self.emit_gc_store_or_indexed(op, ptr_box, index_box, value_box, 
itemsize, 1, ofs)
-        elif op.getopnum() in (rop.RAW_LOAD_I, rop.RAW_LOAD_F):
+        elif opnum in (rop.RAW_LOAD_I, rop.RAW_LOAD_F):
             itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, 1, 
ofs, sign)
-        elif op.getopnum() in (rop.GETINTERIORFIELD_GC_I, 
rop.GETINTERIORFIELD_GC_R,
-                               rop.GETINTERIORFIELD_GC_F):
+        elif opnum in (rop.GETINTERIORFIELD_GC_I, rop.GETINTERIORFIELD_GC_R,
+                       rop.GETINTERIORFIELD_GC_F):
             ofs, itemsize, fieldsize, sign = 
unpack_interiorfielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             self.emit_gc_load_or_indexed(op, ptr_box, index_box, fieldsize, 
itemsize, ofs, sign)
-        elif op.getopnum() in (rop.SETINTERIORFIELD_RAW, 
rop.SETINTERIORFIELD_GC):
+        elif opnum in (rop.SETINTERIORFIELD_RAW, rop.SETINTERIORFIELD_GC):
             ofs, itemsize, fieldsize, sign = 
unpack_interiorfielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             value_box = op.getarg(2)
             self.emit_gc_store_or_indexed(op, ptr_box, index_box, value_box,
                                           fieldsize, itemsize, ofs)
-        elif op.getopnum() in (rop.GETFIELD_GC_I, rop.GETFIELD_GC_F, 
rop.GETFIELD_GC_R,
-                               rop.GETFIELD_RAW_I, rop.GETFIELD_RAW_F, 
rop.GETFIELD_RAW_R):
+        elif opnum in (rop.GETFIELD_GC_I, rop.GETFIELD_GC_F, rop.GETFIELD_GC_R,
+                       rop.GETFIELD_RAW_I, rop.GETFIELD_RAW_F, 
rop.GETFIELD_RAW_R):
             ofs, itemsize, sign = unpack_fielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             if op.getopnum() in (rop.GETFIELD_GC_F, rop.GETFIELD_GC_I, 
rop.GETFIELD_GC_R):
@@ -249,45 +258,45 @@
                 self.emit_op(op)
                 return True
             self.emit_gc_load_or_indexed(op, ptr_box, ConstInt(0), itemsize, 
1, ofs, sign)
-        elif op.getopnum() in (rop.SETFIELD_GC, rop.SETFIELD_RAW):
+        elif opnum in (rop.SETFIELD_GC, rop.SETFIELD_RAW):
             ofs, itemsize, sign = unpack_fielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             value_box = op.getarg(1)
             self.emit_gc_store_or_indexed(op, ptr_box, ConstInt(0), value_box, 
itemsize, 1, ofs)
-        elif op.getopnum() == rop.ARRAYLEN_GC:
+        elif opnum == rop.ARRAYLEN_GC:
             descr = op.getdescr()
             assert isinstance(descr, ArrayDescr)
             ofs = descr.lendescr.offset
             self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
                                          WORD, 1, ofs, NOT_SIGNED)
-        elif op.getopnum() == rop.STRLEN:
+        elif opnum == rop.STRLEN:
             basesize, itemsize, ofs_length = get_array_token(rstr.STR,
                                                  
self.cpu.translate_support_code)
             self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
                                          WORD, 1, ofs_length, NOT_SIGNED)
-        elif op.getopnum() == rop.UNICODELEN:
+        elif opnum == rop.UNICODELEN:
             basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
                                                  
self.cpu.translate_support_code)
             self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
                                          WORD, 1, ofs_length, NOT_SIGNED)
-        elif op.getopnum() == rop.STRGETITEM:
+        elif opnum == rop.STRGETITEM:
             basesize, itemsize, ofs_length = get_array_token(rstr.STR,
                                                  
self.cpu.translate_support_code)
             assert itemsize == 1
             self.emit_gc_load_or_indexed(op, op.getarg(0), op.getarg(1),
                                          itemsize, itemsize, basesize, 
NOT_SIGNED)
-        elif op.getopnum() == rop.UNICODEGETITEM:
+        elif opnum == rop.UNICODEGETITEM:
             basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
                                                  
self.cpu.translate_support_code)
             self.emit_gc_load_or_indexed(op, op.getarg(0), op.getarg(1),
                                          itemsize, itemsize, basesize, 
NOT_SIGNED)
-        elif op.getopnum() == rop.STRSETITEM:
+        elif opnum == rop.STRSETITEM:
             basesize, itemsize, ofs_length = get_array_token(rstr.STR,
                                                  
self.cpu.translate_support_code)
             assert itemsize == 1
             self.emit_gc_store_or_indexed(op, op.getarg(0), op.getarg(1), 
op.getarg(2),
                                          itemsize, itemsize, basesize)
-        elif op.getopnum() == rop.UNICODESETITEM:
+        elif opnum == rop.UNICODESETITEM:
             basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
                                                  
self.cpu.translate_support_code)
             self.emit_gc_store_or_indexed(op, op.getarg(0), op.getarg(1), 
op.getarg(2),
@@ -488,7 +497,7 @@
             total_size = arraydescr.basesize
         elif (self.gc_ll_descr.can_use_nursery_malloc(1) and
               self.gen_malloc_nursery_varsize(arraydescr.itemsize,
-              v_length, op, arraydescr, kind=kind)):
+                  v_length, op, arraydescr, kind=kind)):
             # note that we cannot initialize tid here, because the array
             # might end up being allocated by malloc_external or some
             # stuff that initializes GC header fields differently
@@ -524,8 +533,18 @@
         # See emit_pending_zeros().  (This optimization is done by
         # hacking the object 'o' in-place: e.g., o.getarg(1) may be
         # replaced with another constant greater than 0.)
-        o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length],
-                         descr=arraydescr)
+        assert isinstance(arraydescr, ArrayDescr)
+        scale = arraydescr.itemsize
+        v_length_scaled = v_length
+        if not isinstance(v_length, ConstInt):
+            scale, offset, v_length_scaled = \
+                    self._emit_mul_if_factor_offset_not_supported(v_length, 
scale, 0)
+        v_scale = ConstInt(scale)
+        # there is probably no point in doing _emit_mul_if.. for c_zero!
+        # NOTE that the scale might be != 1 for e.g. v_length_scaled if it is 
a constant
+        # it is later applied in emit_pending_zeros
+        args = [v_arr, self.c_zero, v_length_scaled, ConstInt(scale), v_scale]
+        o = ResOperation(rop.ZERO_ARRAY, args, descr=arraydescr)
         self.emit_op(o)
         if isinstance(v_length, ConstInt):
             self.last_zero_arrays.append(self._newops[-1])
@@ -643,22 +662,38 @@
         # are also already in 'newops', which is the point.
         for op in self.last_zero_arrays:
             assert op.getopnum() == rop.ZERO_ARRAY
+            descr = op.getdescr()
+            assert isinstance(descr, ArrayDescr)
+            scale = descr.itemsize
             box = op.getarg(0)
             try:
                 intset = self.setarrayitems_occurred(box)
             except KeyError:
+                start_box = op.getarg(1)
+                length_box = op.getarg(2)
+                if isinstance(start_box, ConstInt):
+                    start = start_box.getint()
+                    op.setarg(1, ConstInt(start * scale))
+                    op.setarg(3, ConstInt(1))
+                if isinstance(length_box, ConstInt):
+                    stop = length_box.getint()
+                    scaled_len = stop * scale
+                    op.setarg(2, ConstInt(scaled_len))
+                    op.setarg(4, ConstInt(1))
                 continue
             assert op.getarg(1).getint() == 0   # always 'start=0' initially
             start = 0
             while start in intset:
                 start += 1
-            op.setarg(1, ConstInt(start))
+            op.setarg(1, ConstInt(start * scale))
             stop = op.getarg(2).getint()
             assert start <= stop
             while stop > start and (stop - 1) in intset:
                 stop -= 1
-            op.setarg(2, ConstInt(stop - start))
+            op.setarg(2, ConstInt((stop - start) * scale))
             # ^^ may be ConstInt(0); then the operation becomes a no-op
+            op.setarg(3, ConstInt(1)) # set scale to 1
+            op.setarg(4, ConstInt(1)) # set scale to 1
         del self.last_zero_arrays[:]
         self._setarrayitems_occurred.clear()
         #
@@ -759,6 +794,10 @@
              arraydescr.lendescr.offset != 
gc_descr.standard_array_length_ofs)):
             return False
         self.emitting_an_operation_that_can_collect()
+        #scale = itemsize
+        #if scale not in self.cpu.load_supported_factors:
+        #    scale, offset, v_length = \
+        #            self._emit_mul_if_factor_offset_not_supported(v_length, 
scale, 0)
         op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE,
                           [ConstInt(kind), ConstInt(itemsize), v_length],
                           descr=arraydescr)
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py 
b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -91,6 +91,8 @@
             assert nos == [0, 1, 47]
         elif self.cpu.backend_name.startswith('ppc64'):
             assert nos == [0, 1, 33]
+        elif self.cpu.backend_name.startswith('zarch'):
+            assert nos == [0, 1, 29]
         else:
             raise Exception("write the data here")
         assert frame.jf_frame[nos[0]]
@@ -313,7 +315,9 @@
                                   'strdescr': arraydescr})
         # check the returned pointers
         gc_ll_descr = self.cpu.gc_ll_descr
-        assert gc_ll_descr.calls == [(8, 15, 10), (5, 15, 3), ('str', 3)]
+        assert gc_ll_descr.calls == [(8, 15, 10),
+                                     (5, 15, 3),
+                                     ('str', 3)]
         # one fit, one was too large, one was not fitting
 
     def test_malloc_slowpath(self):
@@ -641,11 +645,13 @@
             gcmap = unpack_gcmap(frame)
             if self.cpu.backend_name.startswith('ppc64'):
                 assert gcmap == [30, 31, 32]
+            elif self.cpu.backend_name.startswith('zarch'):
+                # 10 gpr, 14 fpr -> 25 is the first slot
+                assert gcmap == [26, 27, 28]
             elif self.cpu.IS_64_BIT:
                 assert gcmap == [28, 29, 30]
             elif self.cpu.backend_name.startswith('arm'):
                 assert gcmap == [44, 45, 46]
-                pass
             else:
                 assert gcmap == [22, 23, 24]
             for item, s in zip(gcmap, new_items):
diff --git a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
@@ -127,7 +127,7 @@
             i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
             """, """
             []
-            p1 = gc_load_indexed_r(ConstPtr(ptr_array_gcref), 0, 
%(ptr_array_descr.itemsize)s, 1, %(ptr_array_descr.itemsize)s)
+            p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
             i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
             """)
         assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 1
@@ -140,10 +140,10 @@
             i2 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
             """, """
             []
-            p1 = gc_load_indexed_r(ConstPtr(ptr_array_gcref), 0, 
%(ptr_array_descr.itemsize)s, 1, %(ptr_array_descr.itemsize)s)
+            p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
             i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
             i1 = gc_load_i(ConstPtr(notpinned_obj_gcref), 0, 
-%(notpinned_obj_my_int_descr.field_size)s)
-            p2 = gc_load_indexed_r(ConstPtr(ptr_array_gcref), 1, 
%(ptr_array_descr.itemsize)s, 1, %(ptr_array_descr.itemsize)s)
+            p2 = gc_load_r(ConstPtr(ptr_array_gcref), %(1 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
             i2 = gc_load_i(p2, 0, -%(pinned_obj_my_int_descr.field_size)s)
             """)
         assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 2
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -36,6 +36,21 @@
             assert not isinstance(descr, (str, int))
             return 'gc_store(%s, %d, %s, %d)' % (baseptr, descr.offset,
                                                  newvalue, descr.field_size)
+        def zero_array(baseptr, start, length, descr_name, descr):
+            assert isinstance(baseptr, str)
+            assert isinstance(start, (str, int))
+            assert isinstance(length, (str, int))
+            assert isinstance(descr_name, str)
+            assert not isinstance(descr, (str,int))
+            itemsize = descr.itemsize
+            start = start * itemsize
+            length_scale = 1
+            if isinstance(length, str):
+                length_scale = itemsize
+            else:
+                length = length * itemsize
+            return 'zero_array(%s, %s, %s, 1, %d, descr=%s)' % \
+                      (baseptr, start, length, length_scale, descr_name)
         def setarrayitem(baseptr, index, newvalue, descr):
             assert isinstance(baseptr, str)
             assert isinstance(index, (str, int))
@@ -681,7 +696,7 @@
                                 %(cdescr.basesize + 129 * cdescr.itemsize)d)
             gc_store(p1, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p1, 0,  129, %(clendescr.field_size)s)
-            zero_array(p1, 0, 129, descr=cdescr)
+            %(zero_array('p1', 0, 129, 'cdescr', cdescr))s
             call_n(123456)
             cond_call_gc_wb(p1, descr=wbdescr)
             %(setarrayitem('p1', 'i2', 'p3', cdescr))s
@@ -703,7 +718,7 @@
                                 %(cdescr.basesize + 130 * cdescr.itemsize)d)
             gc_store(p1, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p1, 0,  130, %(clendescr.field_size)s)
-            zero_array(p1, 0, 130, descr=cdescr)
+            %(zero_array('p1', 0, 130, 'cdescr', cdescr))s
             call_n(123456)
             cond_call_gc_wb_array(p1, i2, descr=wbdescr)
             %(setarrayitem('p1', 'i2', 'p3', cdescr))s
@@ -735,7 +750,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p1, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p1, 0,  5, %(clendescr.field_size)s)
-            zero_array(p1, 0, 5, descr=cdescr)
+            %(zero_array('p1', 0, 5, 'cdescr', cdescr))s
             label(p1, i2, p3)
             cond_call_gc_wb_array(p1, i2, descr=wbdescr)
             %(setarrayitem('p1', 'i2', 'p3', cdescr))s
@@ -810,7 +825,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p0, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p0, 0,  5, %(clendescr.field_size)s)
-            zero_array(p0, 0, 5, descr=cdescr)
+            %(zero_array('p0', 0, 5, 'cdescr', cdescr))s
             %(setarrayitem('p0', 'i2', 'p1', cdescr))s
             jump()
         """)
@@ -828,7 +843,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p0, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p0, 0,  5, %(clendescr.field_size)s)
-            zero_array(p0, 2, 3, descr=cdescr)
+            %(zero_array('p0', 2, 3, 'cdescr', cdescr))s
             %(setarrayitem('p0', 1, 'p1', cdescr))s
             %(setarrayitem('p0', 0, 'p2', cdescr))s
             jump()
@@ -847,7 +862,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p0, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p0, 0,  5, %(clendescr.field_size)s)
-            zero_array(p0, 0, 3, descr=cdescr)
+            %(zero_array('p0', 0, 3, 'cdescr', cdescr))s
             %(setarrayitem('p0', 3, 'p1', cdescr))s
             %(setarrayitem('p0', 4, 'p2', cdescr))s
             jump()
@@ -867,7 +882,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p0, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p0, 0,  5, %(clendescr.field_size)s)
-            zero_array(p0, 0, 5, descr=cdescr)
+            %(zero_array('p0', 0, 5, 'cdescr', cdescr))s
             %(setarrayitem('p0', 3, 'p1', cdescr))s
             %(setarrayitem('p0', 2, 'p2', cdescr))s
             %(setarrayitem('p0', 1, 'p2', cdescr))s
@@ -890,7 +905,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p0, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p0, 0,  5, %(clendescr.field_size)s)
-            zero_array(p0, 5, 0, descr=cdescr)
+            %(zero_array('p0', 5, 0, 'cdescr', cdescr))s
             %(setarrayitem('p0', 3, 'p1', cdescr))s
             %(setarrayitem('p0', 4, 'p2', cdescr))s
             %(setarrayitem('p0', 0, 'p1', cdescr))s
@@ -913,7 +928,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p0, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p0, 0,  5, %(clendescr.field_size)s)
-            zero_array(p0, 1, 4, descr=cdescr)
+            %(zero_array('p0', 1, 4, 'cdescr', cdescr))s
             %(setarrayitem('p0', 0, 'p1', cdescr))s
             call_n(321321)
             cond_call_gc_wb(p0, descr=wbdescr)
@@ -935,7 +950,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             gc_store(p0, 0,  8111, %(tiddescr.field_size)s)
             gc_store(p0, 0,  5, %(clendescr.field_size)s)
-            zero_array(p0, 1, 4, descr=cdescr)
+            %(zero_array('p0', 1, 4, 'cdescr', cdescr))s
             %(setarrayitem('p0', 0, 'p1', cdescr))s
             label(p0, p2)
             cond_call_gc_wb_array(p0, 1, descr=wbdescr)
@@ -952,7 +967,7 @@
             [p1, p2, i3]
             p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
             gc_store(p0, 0,  i3, %(blendescr.field_size)s)
-            zero_array(p0, 0, i3, descr=bdescr)
+            %(zero_array('p0', 0, 'i3', 'bdescr', bdescr))s
             jump()
         """)
 
@@ -966,7 +981,7 @@
             [p1, p2, i3]
             p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
             gc_store(p0, 0,  i3, %(blendescr.field_size)s)
-            zero_array(p0, 0, i3, descr=bdescr)
+            %(zero_array('p0', 0, 'i3', 'bdescr', bdescr))s
             cond_call_gc_wb_array(p0, 0, descr=wbdescr)
             %(setarrayitem('p0', 0, 'p1', bdescr))s
             jump()
diff --git a/rpython/jit/backend/ppc/callbuilder.py 
b/rpython/jit/backend/ppc/callbuilder.py
--- a/rpython/jit/backend/ppc/callbuilder.py
+++ b/rpython/jit/backend/ppc/callbuilder.py
@@ -98,7 +98,7 @@
 
         # We must also copy fnloc into FNREG
         non_float_locs.append(self.fnloc)
-        non_float_regs.append(self.mc.RAW_CALL_REG)     # r2 or r12
+        non_float_regs.append(self.mc.RAW_CALL_REG)
 
         if float_locs:
             assert len(float_locs) <= len(self.FPR_ARGS)
diff --git a/rpython/jit/backend/ppc/opassembler.py 
b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -851,40 +851,6 @@
             self.mc.sldi(scratch_loc.value, loc.value, scale)
         return scratch_loc
 
-    def _apply_scale(self, ofs, index_loc, itemsize):
-        # XXX should die now that getarrayitem and getinteriorfield are gone
-        # but can't because of emit_zero_array() at the moment
-
-        # For arrayitem and interiorfield reads and writes: this returns an
-        # offset suitable for use in ld/ldx or similar instructions.
-        # The result will be either the register r2 or a 16-bit immediate.
-        # The arguments stand for "ofs + index_loc * itemsize",
-        # with the following constrains:
-        assert ofs.is_imm()                # must be an immediate...
-        assert _check_imm_arg(ofs.getint())   # ...that fits 16 bits
-        assert index_loc is not r.SCRATCH2 # can be a reg or imm (any size)
-        assert itemsize.is_imm()           # must be an immediate (any size)
-
-        multiply_by = itemsize.value
-        offset = ofs.getint()
-        if index_loc.is_imm():
-            offset += index_loc.getint() * multiply_by
-            if _check_imm_arg(offset):
-                return imm(offset)
-            else:
-                self.mc.load_imm(r.SCRATCH2, offset)
-                return r.SCRATCH2
-        else:
-            index_loc = self._multiply_by_constant(index_loc, multiply_by,
-                                                   r.SCRATCH2)
-            # here, the new index_loc contains 'index_loc * itemsize'.
-            # If offset != 0 then we have to add it here.  Note that
-            # mc.addi() would not be valid with operand r0.
-            if offset != 0:
-                self.mc.addi(r.SCRATCH2.value, index_loc.value, offset)
-                index_loc = r.SCRATCH2
-            return index_loc
-
     def _copy_in_scratch2(self, loc):
         if loc.is_imm():
             self.mc.li(r.SCRATCH2.value, loc.value)
@@ -903,86 +869,94 @@
         elif itemsize & 2:                self.mc.sthu(a, b, c)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: merge the new s390x backend

Reply via email to