[pypy-commit] pypy reflex-support: merge default into branch; hopefully solves gc crash ...

wlav Sat, 20 Apr 2013 18:53:46 -0700

Author: Wim Lavrijsen <wlavrij...@lbl.gov>
Branch: reflex-support
Changeset: r63535:608cb355ea66
Date: 2013-04-19 10:48 -0700
http://bitbucket.org/pypy/pypy/changeset/608cb355ea66/


Log:    merge default into branch; hopefully solves gc crash ...

diff --git a/rpython/jit/backend/arm/assembler.py 
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -56,7 +56,7 @@
         if we_are_translated():
             self.debug = False
         self.current_clt = looptoken.compiled_loop_token
-        self.mc = InstrBuilder(self.cpu.arch_version)
+        self.mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         self.pending_guards = []
         assert self.datablockwrapper is None
         allblocks = self.get_asmmemmgr_blocks(looptoken)
@@ -80,7 +80,7 @@
         if not self.cpu.propagate_exception_descr:
             return      # not supported (for tests, or non-translated)
         #
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         self._store_and_reset_exception(mc, r.r0)
         ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
         # make sure ofs fits into a register
@@ -165,7 +165,7 @@
         #    |  my own retaddr       |    <-- sp
         #    +-----------------------+
         #
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         # save argument registers and return address
         mc.PUSH([reg.value for reg in r.argument_regs] + [r.ip.value, 
r.lr.value])
         # stack is aligned here
@@ -206,7 +206,7 @@
         # write barriers.  It must save all registers, and optionally
         # all vfp registers.  It takes a single argument which is in r0.
         # It must keep stack alignment accordingly.
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         #
         exc0 = exc1 = None
         mc.PUSH([r.ip.value, r.lr.value]) # push two words to keep alignment
@@ -249,8 +249,10 @@
         else:
             self.wb_slowpath[withcards + 2 * withfloats] = rawstart
 
-    def _build_malloc_slowpath(self):
-        mc = InstrBuilder(self.cpu.arch_version)
+    def _build_malloc_slowpath(self, kind):
+        if kind != 'fixed':
+            return 0
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         self._push_all_regs_to_jitframe(mc, [r.r0, r.r1], 
self.cpu.supports_floats)
         ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
         # store the gc pattern
@@ -287,9 +289,7 @@
         self.store_reg(mc, r.ip, r.fp, ofs)
         # return
         mc.POP([r.ip.value, r.pc.value])
-
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
-        self.malloc_slowpath = rawstart
+        return mc.materialize(self.cpu.asmmemmgr, [])
 
     def _reload_frame_if_necessary(self, mc):
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
@@ -362,7 +362,7 @@
                 self.load_reg(mc, vfpr, r.fp, ofs)
 
     def _build_failure_recovery(self, exc, withfloats=False):
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         self._push_all_regs_to_jitframe(mc, [], withfloats)
 
         if exc:
@@ -645,7 +645,7 @@
                                 expected_size=expected_size)
 
     def _patch_frame_depth(self, adr, allocated_depth):
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         mc.gen_load_int(r.lr.value, allocated_depth)
         mc.copy_to_raw_memory(adr)
 
@@ -721,7 +721,7 @@
         # f) store the address of the new jitframe in the shadowstack
         # c) set the gcmap field to 0 in the new jitframe
         # g) restore registers and return
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats)
         # this is the gcmap stored by push_gcmap(mov=True) in 
_check_stack_frame
         # and the expected_size pushed in _check_stack_frame
@@ -781,7 +781,7 @@
         self.target_tokens_currently_compiling = None
 
     def _patch_stackadjust(self, adr, allocated_depth):
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         mc.gen_load_int(r.lr.value, allocated_depth)
         mc.copy_to_raw_memory(adr)
 
@@ -821,7 +821,7 @@
                 # patch the guard jumpt to the stub
                 # overwrite the generate NOP with a B_offs to the pos of the
                 # stub
-                mc = InstrBuilder(self.cpu.arch_version)
+                mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
                 mc.B_offs(relative_offset, c.get_opposite_of(tok.fcond))
                 mc.copy_to_raw_memory(guard_pos)
             else:
@@ -900,7 +900,7 @@
                 self.mc.ASR_ri(resloc.value, resloc.value, 16)
 
     def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
-        b = InstrBuilder(self.cpu.arch_version)
+        b = InstrBuilder(self.cpu.cpuinfo.arch_version)
         patch_addr = faildescr._arm_failure_recovery_block
         assert patch_addr != 0
         b.B(bridge_addr)
diff --git a/rpython/jit/backend/arm/detect.py 
b/rpython/jit/backend/arm/detect.py
--- a/rpython/jit/backend/arm/detect.py
+++ b/rpython/jit/backend/arm/detect.py
@@ -1,7 +1,6 @@
 import os
 
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
-from rpython.rlib.clibffi import FFI_DEFAULT_ABI, FFI_SYSV, FFI_VFP
 from rpython.rtyper.tool import rffi_platform
 from rpython.translator.platform import CompilationError
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -46,9 +45,11 @@
     # "Processor       : ARMv%d-compatible processor rev 7 (v6l)"
     i = buf.find('ARMv')
     if i == -1:
-        raise ValueError("Unknown Processor entry")
-
-    n = int(buf[i + 4])
+        n = 6
+        debug_print("Could not detect architecture version, "
+                    "falling back to", "ARMv%d" % n)
+    else:
+        n = int(buf[i + 4])
 
     if n < 6:
         raise ValueError("Unsupported ARM architecture version")
diff --git a/rpython/jit/backend/arm/opassembler.py 
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -354,7 +354,7 @@
                     # whether to worry about a CALL that can collect; this
                     # is always true except in call_release_gil
                     can_collect=True):
-        if self.cpu.hf_abi:
+        if self.cpu.cpuinfo.hf_abi:
             stack_args, adr = self._setup_call_hf(adr, arglocs, fcond,
                                             resloc, result_info)
         else:
@@ -382,7 +382,7 @@
 
         # ensure the result is wellformed and stored in the correct location
         if resloc is not None:
-            if resloc.is_vfp_reg() and not self.cpu.hf_abi:
+            if resloc.is_vfp_reg() and not self.cpu.cpuinfo.hf_abi:
                 # move result to the allocated register
                 self.mov_to_vfp_loc(r.r0, r.r1, resloc)
             elif resloc.is_reg() and result_info != (-1, -1):
@@ -1230,7 +1230,7 @@
         baseofs = self.cpu.get_baseofs_of_frame_field()
         newlooptoken.compiled_loop_token.update_frame_info(
             oldlooptoken.compiled_loop_token, baseofs)
-        mc = InstrBuilder(self.cpu.arch_version)
+        mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         mc.B(target)
         mc.copy_to_raw_memory(oldadr)
 
diff --git a/rpython/jit/backend/arm/runner.py 
b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -11,6 +11,10 @@
 
 jitframe.STATICSIZE = JITFRAME_FIXED_SIZE
 
+class CPUInfo(object):
+    hf_abi = False
+    arch_version = 6
+
 class AbstractARMCPU(AbstractLLCPU):
 
     IS_64_BIT = False
@@ -26,13 +30,11 @@
     float_regs = VFPRegisterManager.all_regs
     frame_reg = fp
 
-    hf_abi = False        # use hard float abi flag
-    arch_version = 6      # assume ARMv6 as base case
-
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
         AbstractLLCPU.__init__(self, rtyper, stats, opts,
                                translate_support_code, gcdescr)
+        self.cpuinfo = CPUInfo()
 
     def set_debug(self, flag):
         return self.assembler.set_debug(flag)
@@ -47,8 +49,8 @@
         self.assembler = AssemblerARM(self, self.translate_support_code)
 
     def setup_once(self):
-        self.arch_version = detect_arch_version()
-        self.hf_abi = detect_hardfloat()
+        self.cpuinfo.arch_version = detect_arch_version()
+        self.cpuinfo.hf_abi = detect_hardfloat()
         self.assembler.setup_once()
 
     def finish_once(self):
@@ -92,7 +94,7 @@
         from rpython.jit.backend.arm.codebuilder import InstrBuilder
 
         for jmp, tgt in looptoken.compiled_loop_token.invalidate_positions:
-            mc = InstrBuilder(self.arch_version)
+            mc = InstrBuilder(self.cpuinfo.arch_version)
             mc.B_offs(tgt)
             mc.copy_to_raw_memory(jmp)
         # positions invalidated
diff --git a/rpython/jit/backend/arm/test/test_detect.py 
b/rpython/jit/backend/arm/test/test_detect.py
--- a/rpython/jit/backend/arm/test/test_detect.py
+++ b/rpython/jit/backend/arm/test/test_detect.py
@@ -3,7 +3,32 @@
 from rpython.jit.backend.arm.detect import detect_arch_version
 
 cpuinfo = "Processor : ARMv%d-compatible processor rev 7 (v6l)"""
-
+cpuinfo2 = """processor       : 0
+vendor_id       : GenuineIntel
+cpu family      : 6
+model           : 23
+model name      : Intel(R) Core(TM)2 Duo CPU     E8400  @ 3.00GHz
+stepping        : 10
+microcode       : 0xa07
+cpu MHz         : 2997.000
+cache size      : 6144 KB
+physical id     : 0
+siblings        : 2
+core id         : 0
+cpu cores       : 2
+apicid          : 0
+initial apicid  : 0
+fpu             : yes
+fpu_exception   : yes
+cpuid level     : 13
+wp              : yes
+flags           : fpu vme ...
+bogomips        : 5993.08
+clflush size    : 64
+cache_alignment : 64
+address sizes   : 36 bits physical, 48 bits virtual
+power management:
+"""
 
 def write_cpuinfo(info):
     filepath = udir.join('get_arch_version')
@@ -20,5 +45,4 @@
     assert detect_arch_version(write_cpuinfo(cpuinfo % 8)) == 7
     py.test.raises(ValueError,
             'detect_arch_version(write_cpuinfo(cpuinfo % 5))')
-    py.test.raises(ValueError,
-            'detect_arch_version(write_cpuinfo("Lorem ipsum dolor sit amet, 
consectetur"))')
+    assert detect_arch_version(write_cpuinfo(cpuinfo2)) == 6
diff --git a/rpython/jit/backend/arm/test/test_runner.py 
b/rpython/jit/backend/arm/test/test_runner.py
--- a/rpython/jit/backend/arm/test/test_runner.py
+++ b/rpython/jit/backend/arm/test/test_runner.py
@@ -10,6 +10,7 @@
 from rpython.rtyper.annlowlevel import llhelper
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.metainterp.history import JitCellToken, TargetToken
+from rpython.jit.backend.arm.detect import detect_arch_version
 
 
 CPU = getcpuclass()
@@ -27,7 +28,8 @@
     bridge_loop_instructions = ['ldr', 'mov', 'nop', 'cmp', 'bge',
                                 'push', 'mov', 'mov', 'push', 'mov', 'mov',
                                 'blx', 'mov', 'mov', 'bx']
-    if CPU.arch_version == 7:
+    arch_version = detect_arch_version()
+    if arch_version == 7:
         bridge_loop_instructions = ['ldr', 'mov', 'nop', 'cmp', 'bge',
                                     'push', 'mov', 'mov', 'push', 'mov', 'mov',
                                     'blx', 'mov', 'mov', 'bx']
diff --git a/rpython/jit/backend/test/runner_test.py 
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -3935,3 +3935,19 @@
         descr = self.cpu.get_latest_descr(frame)
         assert descr.identifier == 42
         assert not self.cpu.grab_exc_value(frame)
+
+    def test_setarrayitem_raw_short(self):
+        # setarrayitem_raw(140737353744432, 0, 30583, descr=<ArrayS 2>)
+        A = rffi.CArray(rffi.SHORT)
+        arraydescr = self.cpu.arraydescrof(A)
+        a = lltype.malloc(A, 2, flavor='raw')
+        a[0] = rffi.cast(rffi.SHORT, 666)
+        a[1] = rffi.cast(rffi.SHORT, 777)
+        a_int = rffi.cast(lltype.Signed, a)
+        print 'a_int:', a_int
+        self.execute_operation(rop.SETARRAYITEM_RAW,
+                               [ConstInt(a_int), ConstInt(0), ConstInt(-7654)],
+                               'void', descr=arraydescr)
+        assert rffi.cast(lltype.Signed, a[0]) == -7654
+        assert rffi.cast(lltype.Signed, a[1]) == 777
+        lltype.free(a, flavor='raw')
diff --git a/rpython/jit/backend/x86/regloc.py 
b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -521,17 +521,6 @@
 
         return func_with_new_name(INSN, "INSN_" + name)
 
-    def _16_bit_binaryop(name):
-        def INSN(self, loc1, loc2):
-            # Select 16-bit operand mode
-            self.writechar('\x66')
-            # XXX: Hack to let immediate() in rx86 know to do a 16-bit encoding
-            self._use_16_bit_immediate = True
-            getattr(self, name)(loc1, loc2)
-            self._use_16_bit_immediate = False
-
-        return INSN
-
     def _addr_as_reg_offset(self, addr):
         # Encodes a (64-bit) address as an offset from the scratch register.
         # If we are within a "reuse_scratch_register" block, we remember the
@@ -616,10 +605,10 @@
     NEG = _unaryop('NEG')
 
     CMP = _binaryop('CMP')
-    CMP16 = _16_bit_binaryop('CMP')
+    CMP16 = _binaryop('CMP16')
     MOV = _binaryop('MOV')
     MOV8 = _binaryop('MOV8')
-    MOV16 = _16_bit_binaryop('MOV')
+    MOV16 = _binaryop('MOV16')
     MOVZX8 = _binaryop('MOVZX8')
     MOVSX8 = _binaryop('MOVSX8')
     MOVZX16 = _binaryop('MOVZX16')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -125,10 +125,7 @@
     elif width == 'q' and mc.WORD == 8:
         mc.writeimm64(immediate)
     else:
-        if mc._use_16_bit_immediate:
-            mc.writeimm16(immediate)
-        else:
-            mc.writeimm32(immediate)
+        mc.writeimm32(immediate)
     return 0
 
 def immediate(argnum, width='i'):
@@ -282,16 +279,20 @@
 # (the immediate address itself must be explicitely encoded as well,
 # with immediate(argnum)).
 
-def encode_abs(mc, _1, _2, orbyte):
+@specialize.arg(2)
+def encode_abs(mc, immediate, _, orbyte):
     # expands to either '\x05' on 32-bit, or '\x04\x25' on 64-bit
     if mc.WORD == 8:
         mc.writechar(chr(0x04 | orbyte))
         mc.writechar(chr(0x25))
     else:
         mc.writechar(chr(0x05 | orbyte))
+    # followed by an immediate, always 32 bits
+    mc.writeimm32(immediate)
     return 0
 
-abs_ = encode_abs, 0, None, None
+def abs_(argnum):
+    return encode_abs, argnum, None, None
 
 # ____________________________________________________________
 # For 64-bits mode: the REX.W, REX.R, REX.X, REG.B prefixes
@@ -305,9 +306,6 @@
 def encode_rex(mc, rexbyte, basevalue, orbyte):
     if mc.WORD == 8:
         assert 0 <= rexbyte < 8
-        # XXX: Hack. Ignore REX.W if we are using 16-bit operands
-        if mc._use_16_bit_immediate:
-            basevalue &= ~REX_W
         if basevalue != 0 or rexbyte != 0:
             if basevalue == 0:
                 basevalue = 0x40
@@ -374,9 +372,8 @@
     INSN_br = insn(rex_w, chr(base+1), register(2,8), stack_bp(1))
     INSN_rb = insn(rex_w, chr(base+3), register(1,8), stack_bp(2))
     INSN_rm = insn(rex_w, chr(base+3), register(1,8), mem_reg_plus_const(2))
-    INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_, immediate(2))
-    INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_, immediate(1),
-                    immediate(2,'b'))
+    INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_(2))
+    INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_(1), immediate(2,'b'))
     INSN_mi8 = insn(rex_w, '\x83', orbyte(base), mem_reg_plus_const(1),
                     immediate(2,'b'))
     INSN_bi8 = insn(rex_w, '\x83', orbyte(base), stack_bp(1), immediate(2,'b'))
@@ -449,9 +446,6 @@
 class AbstractX86CodeBuilder(object):
     """Abstract base class."""
 
-    # Used by the 16-bit version of instructions
-    _use_16_bit_immediate = False
-
     def writechar(self, char):
         raise NotImplementedError
 
@@ -489,15 +483,13 @@
     CMP_mi = select_8_or_32_bit_immed(CMP_mi8, CMP_mi32)
     CMP_mr = insn(rex_w, '\x39', register(2, 8), mem_reg_plus_const(1))
 
-    CMP_ji8 = insn(rex_w, '\x83', orbyte(7<<3), abs_,
-                   immediate(1), immediate(2, 'b'))
-    CMP_ji32 = insn(rex_w, '\x81', orbyte(7<<3), abs_,
-                    immediate(1), immediate(2))
+    CMP_ji8 = insn(rex_w, '\x83', orbyte(7<<3), abs_(1), immediate(2, 'b'))
+    CMP_ji32 = insn(rex_w, '\x81', orbyte(7<<3), abs_(1), immediate(2))
     CMP_ji = select_8_or_32_bit_immed(CMP_ji8, CMP_ji32)
-    CMP_jr = insn(rex_w, '\x39', register(2, 8), abs_, immediate(1))
+    CMP_jr = insn(rex_w, '\x39', register(2, 8), abs_(1))
 
     CMP32_mi = insn(rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), 
immediate(2))
-
+    CMP16_mi = insn('\x66', rex_nw, '\x81', orbyte(7<<3), 
mem_reg_plus_const(1), immediate(2, 'h'))
     CMP8_ri = insn(rex_fw, '\x80', byte_register(1), '\xF8', immediate(2, 'b'))
 
     AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), 
'\xC0')
@@ -505,7 +497,7 @@
     OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0')
     OR8_mi = insn(rex_nw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
                   immediate(2, 'b'))
-    OR8_ji = insn(rex_nw, '\x80', orbyte(1<<3), abs_, immediate(1),
+    OR8_ji = insn(rex_nw, '\x80', orbyte(1<<3), abs_(1),
                   immediate(2, 'b'))
 
     NEG_r = insn(rex_w, '\xF7', register(1), '\xD8')
@@ -556,7 +548,7 @@
     LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
     LEA_ra = insn(rex_w, '\x8D', register(1, 8), 
mem_reg_plus_scaled_reg_plus_const(2))
     LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
-    LEA_rj = insn(rex_w, '\x8D', register(1, 8), abs_, immediate(2))
+    LEA_rj = insn(rex_w, '\x8D', register(1, 8), abs_(2))
 
     CALL_l = insn('\xE8', relative(1))
     CALL_r = insn(rex_nw, '\xFF', register(1), chr(0xC0 | (2<<3)))
@@ -583,11 +575,11 @@
 
     TEST8_mi = insn(rex_nw, '\xF6', orbyte(0<<3), mem_reg_plus_const(1), 
immediate(2, 'b'))
     TEST8_bi = insn(rex_nw, '\xF6', orbyte(0<<3), stack_bp(1), immediate(2, 
'b'))
-    TEST8_ji = insn(rex_nw, '\xF6', orbyte(0<<3), abs_, immediate(1), 
immediate(2, 'b'))
+    TEST8_ji = insn(rex_nw, '\xF6', orbyte(0<<3), abs_(1), immediate(2, 'b'))
     TEST_rr = insn(rex_w, '\x85', register(2,8), register(1), '\xC0')
 
     BTS_mr = insn(rex_w, '\x0F\xAB', register(2,8), mem_reg_plus_const(1))
-    BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_, immediate(1))
+    BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_(1))
 
     # x87 instructions
     FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' 
wants L??)
@@ -718,7 +710,7 @@
     add_insn('s', stack_sp(modrm_argnum))
     add_insn('m', mem_reg_plus_const(modrm_argnum))
     add_insn('a', mem_reg_plus_scaled_reg_plus_const(modrm_argnum))
-    add_insn('j', abs_, immediate(modrm_argnum))
+    add_insn('j', abs_(modrm_argnum))
 
 # Define a regular MOV, and a variant MOV32 that only uses the low 4 bytes of a
 # register
@@ -729,6 +721,8 @@
 
 define_modrm_modes('MOV8_*r', [rex_fw, '\x88', byte_register(2, 8)], 
regtype='BYTE')
 define_modrm_modes('MOV8_*i', [rex_fw, '\xC6', orbyte(0<<3)], [immediate(2, 
'b')], regtype='BYTE')
+define_modrm_modes('MOV16_*r', ['\x66', rex_nw, '\x89', register(2, 8)])
+define_modrm_modes('MOV16_*i', ['\x66', rex_nw, '\xC7', orbyte(0<<3)], 
[immediate(2, 'h')])
 
 define_modrm_modes('MOVZX8_r*', [rex_w, '\x0F\xB6', register(1, 8)], 
regtype='BYTE')
 define_modrm_modes('MOVSX8_r*', [rex_w, '\x0F\xBE', register(1, 8)], 
regtype='BYTE')
@@ -766,7 +760,7 @@
     #
     assert insnname_template.count('*') == 1
     add_insn('x', register(2), '\xC0')
-    add_insn('j', abs_, immediate(2))
+    add_insn('j', abs_(2))
     add_insn('m', mem_reg_plus_const(2))
 
 define_pxmm_insn('PADDQ_x*',     '\xD4')
diff --git a/rpython/memory/gc/env.py b/rpython/memory/gc/env.py
--- a/rpython/memory/gc/env.py
+++ b/rpython/memory/gc/env.py
@@ -279,7 +279,7 @@
 def best_nursery_size_for_L2cache(L2cache):
     # Heuristically, the best nursery size to choose is about half
     # of the L2 cache.
-    if L2cache > 1024 * 1024: # we don't want to have nursery estimated
+    if L2cache > 2 * 1024 * 1024: # we don't want to have nursery estimated
         # on L2 when L3 is present
         return L2cache // 2
     else:
diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py
--- a/rpython/memory/gc/minimark.py
+++ b/rpython/memory/gc/minimark.py
@@ -130,6 +130,7 @@
 FORWARDSTUB = lltype.GcStruct('forwarding_stub',
                               ('forw', llmemory.Address))
 FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
+NURSARRAY = lltype.Array(llmemory.Address)
 
 # ____________________________________________________________
 
@@ -263,7 +264,7 @@
         self.nursery_top  = NULL
         self.nursery_real_top = NULL
         self.debug_tiny_nursery = -1
-        self.debug_rotating_nurseries = None
+        self.debug_rotating_nurseries = lltype.nullptr(NURSARRAY)
         self.extra_threshold = 0
         #
         # The ArenaCollection() handles the nonmovable objects allocation.
@@ -350,8 +351,6 @@
             # hacking at the current nursery position in collect_and_reserve().
             if newsize <= 0:
                 newsize = env.estimate_best_nursery_size()
-                #         4*1024*1024   # fixed to 4MB by default
-                #        (it was env.estimate_best_nursery_size())
                 if newsize <= 0:
                     newsize = defaultsize
             if newsize < minsize:
@@ -471,23 +470,32 @@
             # and use them alternatively, while mprotect()ing the unused
             # ones to detect invalid access.
             debug_start("gc-debug")
-            self.debug_rotating_nurseries = []
-            for i in range(22):
+            self.debug_rotating_nurseries = lltype.malloc(
+                NURSARRAY, 22, flavor='raw', track_allocation=False)
+            i = 0
+            while i < 22:
                 nurs = self._alloc_nursery()
                 llarena.arena_protect(nurs, self._nursery_memory_size(), True)
-                self.debug_rotating_nurseries.append(nurs)
+                self.debug_rotating_nurseries[i] = nurs
+                i += 1
             debug_print("allocated", len(self.debug_rotating_nurseries),
                         "extra nurseries")
             debug_stop("gc-debug")
 
     def debug_rotate_nursery(self):
-        if self.debug_rotating_nurseries is not None:
+        if self.debug_rotating_nurseries:
             debug_start("gc-debug")
             oldnurs = self.nursery
             llarena.arena_protect(oldnurs, self._nursery_memory_size(), True)
-            self.debug_rotating_nurseries.append(oldnurs)
             #
-            newnurs = self.debug_rotating_nurseries.pop(0)
+            newnurs = self.debug_rotating_nurseries[0]
+            i = 0
+            while i < len(self.debug_rotating_nurseries) - 1:
+                self.debug_rotating_nurseries[i] = (
+                    self.debug_rotating_nurseries[i + 1])
+                i += 1
+            self.debug_rotating_nurseries[i] = oldnurs
+            #
             llarena.arena_protect(newnurs, self._nursery_memory_size(), False)
             self.nursery = newnurs
             self.nursery_top = self.nursery + self.initial_cleanup
diff --git a/rpython/memory/gctransform/framework.py 
b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -619,6 +619,12 @@
         func = getattr(graph, 'func', None)
         if func and getattr(func, '_gc_no_collect_', False):
             if self.collect_analyzer.analyze_direct_call(graph):
+                print '!'*79
+                ca = CollectAnalyzer(self.translator)
+                ca.verbose = True
+                ca.analyze_direct_call(graph)
+                # ^^^ for the dump of which operation in which graph actually
+                # causes it to return True
                 raise Exception("'no_collect' function can trigger collection:"
                                 " %s" % func)
 
diff --git a/rpython/translator/backendopt/graphanalyze.py 
b/rpython/translator/backendopt/graphanalyze.py
--- a/rpython/translator/backendopt/graphanalyze.py
+++ b/rpython/translator/backendopt/graphanalyze.py
@@ -80,21 +80,21 @@
             if graph is None:
                 x = self.analyze_external_call(op, seen)
                 if self.verbose and x:
-                    print '\tanalyze_external_call %s: %r' % (op, x)
+                    self.dump_info('analyze_external_call %s: %r' % (op, x))
                 return x
             x = self.analyze_direct_call(graph, seen)
             if self.verbose and x:
-                print '\tanalyze_direct_call(%s): %r' % (graph, x)
+                self.dump_info('analyze_direct_call(%s): %r' % (graph, x))
             return x
         elif op.opname == "indirect_call":
             graphs = op.args[-1].value
             if graphs is None:
                 if self.verbose:
-                    print '\t%s to unknown' % (op,)
+                    self.dump_info('%s to unknown' % (op,))
                 return self.top_result()
             x = self.analyze_indirect_call(graphs, seen)
             if self.verbose and x:
-                print '\tanalyze_indirect_call(%s): %r' % (graphs, x)
+                self.dump_info('analyze_indirect_call(%s): %r' % (graphs, x))
             return x
         elif op.opname == "oosend":
             name = op.args[0].value
@@ -106,9 +106,12 @@
             return self.analyze_oosend(TYPE, name, seen)
         x = self.analyze_simple_operation(op, graphinfo)
         if self.verbose and x:
-            print '\t%s: %r' % (op, x)
+            self.dump_info('%s: %r' % (op, x))
         return x
 
+    def dump_info(self, info):
+        print '[%s] %s' % (self.__class__.__name__, info)
+
     def analyze_direct_call(self, graph, seen=None):
         if seen is None:
             seen = DependencyTracker(self)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy reflex-support: merge default into branch; hopefully solves gc crash ...

Reply via email to