Author: fijal Branch: Changeset: r80270:3bc924586ed6 Date: 2015-10-16 12:52 +0200 http://bitbucket.org/pypy/pypy/changeset/3bc924586ed6/
Log: merge diff too long, truncating to 2000 out of 9622 lines diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -76,6 +76,11 @@ if "cppyy" in working_modules: working_modules.remove("cppyy") # depends on ctypes +if sys.platform.startswith("linux"): + _mach = os.popen('uname -m', 'r').read().strip() + if _mach.startswith('ppc'): + working_modules.remove("_continuation") + module_dependencies = { '_multiprocessing': [('objspace.usemodules.time', True), diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py --- a/rpython/jit/backend/arm/assembler.py +++ b/rpython/jit/backend/arm/assembler.py @@ -224,6 +224,10 @@ if not for_frame: self._push_all_regs_to_jitframe(mc, [], withfloats, callee_only=True) else: + # NOTE: don't save registers on the jitframe here! It might + # override already-saved values that will be restored + # later... + # # we're possibly called from the slowpath of malloc # save the caller saved registers # assuming we do not collect here diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -41,10 +41,6 @@ from rpython.jit.backend.llsupport.descr import CallDescr -# xxx hack: set a default value for TargetToken._ll_loop_code. If 0, we know -# that it is a LABEL that was not compiled yet. -TargetToken._ll_loop_code = 0 - class TempInt(TempVar): type = INT @@ -1257,18 +1253,6 @@ prepare_op_call_assembler_f = _prepare_op_call_assembler prepare_op_call_assembler_n = _prepare_op_call_assembler - def _prepare_args_for_new_op(self, new_args): - gc_ll_descr = self.cpu.gc_ll_descr - args = gc_ll_descr.args_for_new(new_args) - arglocs = [] - for i in range(len(args)): - arg = args[i] - t = TempInt() - l = self.force_allocate_reg(t, selected_reg=r.all_regs[i]) - self.assembler.load(l, imm(arg)) - arglocs.append(t) - return arglocs - prepare_op_float_add = prepare_two_regs_op prepare_op_float_sub = prepare_two_regs_op prepare_op_float_mul = prepare_two_regs_op diff --git a/rpython/jit/backend/arm/test/test_runner.py b/rpython/jit/backend/arm/test/test_runner.py --- a/rpython/jit/backend/arm/test/test_runner.py +++ b/rpython/jit/backend/arm/test/test_runner.py @@ -26,24 +26,24 @@ # for the individual tests see # ====> ../../test/runner_test.py - add_loop_instructions = ['ldr', 'adds', 'cmp', 'beq', 'b'] - bridge_loop_instructions = ['ldr', 'mov', 'nop', 'cmp', 'bge', - 'push', 'mov', 'mov', 'push', 'mov', 'mov', - 'blx', 'mov', 'mov', 'bx'] + add_loop_instructions = 'ldr; adds; cmp; beq; b;$' + bridge_loop_instructions = ('ldr; mov; nop; cmp; bge; ' + 'push; mov; mov; push; mov; mov; ' + 'blx; mov; mov; bx;$') arch_version = detect_arch_version() if arch_version == 7: - bridge_loop_instructions = ['ldr', 'mov', 'nop', 'cmp', 'bge', - 'push', 'mov', 'mov', 'push', 'mov', 'mov', - 'blx', 'mov', 'mov', 'bx'] + bridge_loop_instructions = ('ldr; mov; nop; cmp; bge; ' + 'push; mov; mov; push; mov; mov; ' + 'blx; mov; mov; bx;$') else: - bridge_loop_instructions = ['ldr', 'mov', 'nop', 'nop', 'nop', 'cmp', 'bge', - 'push', 'ldr', 'mov', - '*', # inline constant - 'push', 'ldr', 'mov', - '*', # inline constant - 'blx', 'ldr', 'mov', - '*', # inline constant - 'bx'] + bridge_loop_instructions = ('ldr; mov; nop; nop; nop; cmp; bge; ' + 'push; ldr; mov; ' + '[^;]+; ' # inline constant + 'push; ldr; mov; ' + '[^;]+; ' # inline constant + 'blx; ldr; mov; ' + '[^;]+; ' # inline constant + 'bx;$') def get_cpu(self): cpu = CPU(rtyper=None, stats=FakeStats()) diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py --- a/rpython/jit/backend/detect_cpu.py +++ b/rpython/jit/backend/detect_cpu.py @@ -59,6 +59,8 @@ 'i86pc': MODEL_X86, # Solaris/Intel 'x86': MODEL_X86, # Apple 'Power Macintosh': MODEL_PPC_64, + 'ppc64': MODEL_PPC_64, + 'ppc64le': MODEL_PPC_64, 'x86_64': MODEL_X86, 'amd64': MODEL_X86, # freebsd 'AMD64': MODEL_X86, # win64 @@ -118,6 +120,8 @@ return "rpython.jit.backend.x86.runner", "CPU_X86_64_SSE4" elif backend_name == MODEL_ARM: return "rpython.jit.backend.arm.runner", "CPU_ARM" + elif backend_name == MODEL_PPC_64: + return "rpython.jit.backend.ppc.runner", "PPC_CPU" else: raise ProcessorAutodetectError, ( "we have no JIT backend for this cpu: '%s'" % backend_name) diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -243,6 +243,23 @@ self.mc.get_relative_pos()) def call_assembler(self, op, argloc, vloc, result_loc, tmploc): + """ + * argloc: location of the frame argument that we're passing to + the called assembler (this is the first return value + of locs_for_call_assembler()) + + * vloc: location of the virtualizable (not in a register; + this is the optional second return value of + locs_for_call_assembler(), or imm(0) if none returned) + + * result_loc: location of op.result (which is not be + confused with the next one) + + * tmploc: location where the actual call to the other piece + of assembler will return its jitframe result + (which is always a REF), before the helper may be + called + """ descr = op.getdescr() assert isinstance(descr, JitCellToken) # diff --git a/rpython/jit/backend/llsupport/llerrno.py b/rpython/jit/backend/llsupport/llerrno.py --- a/rpython/jit/backend/llsupport/llerrno.py +++ b/rpython/jit/backend/llsupport/llerrno.py @@ -1,14 +1,22 @@ +import sys from rpython.rtyper.lltypesystem import lltype, rffi from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.jit.backend.llsupport.symbolic import WORD +if sys.byteorder == 'little' or sys.maxint <= 2**32: + long2int = int2long = lambda x: x +else: + def long2int(x): return x >> 32 + def int2long(x): return x << 32 + + def get_debug_saved_errno(cpu): - return cpu._debug_errno_container[3] + return long2int(cpu._debug_errno_container[3]) def set_debug_saved_errno(cpu, nerrno): assert nerrno >= 0 - cpu._debug_errno_container[3] = nerrno + cpu._debug_errno_container[3] = int2long(nerrno) def get_rpy_errno_offset(cpu): if cpu.translate_support_code: @@ -19,11 +27,11 @@ def get_debug_saved_alterrno(cpu): - return cpu._debug_errno_container[4] + return long2int(cpu._debug_errno_container[4]) def set_debug_saved_alterrno(cpu, nerrno): assert nerrno >= 0 - cpu._debug_errno_container[4] = nerrno + cpu._debug_errno_container[4] = int2long(nerrno) def get_alt_errno_offset(cpu): if cpu.translate_support_code: diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py b/rpython/jit/backend/llsupport/test/test_gc_integration.py --- a/rpython/jit/backend/llsupport/test/test_gc_integration.py +++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py @@ -3,7 +3,7 @@ """ import py -import re +import re, sys, struct from rpython.jit.metainterp.history import TargetToken, BasicFinalDescr,\ JitCellToken, BasicFailDescr, AbstractDescr from rpython.jit.backend.llsupport.gc import GcLLDescription, GcLLDescr_boehm,\ @@ -90,6 +90,8 @@ assert nos == [0, 1, 25] elif self.cpu.backend_name.startswith('arm'): assert nos == [0, 1, 47] + elif self.cpu.backend_name.startswith('ppc64'): + assert nos == [0, 1, 33] else: raise Exception("write the data here") assert frame.jf_frame[nos[0]] @@ -155,6 +157,8 @@ self.nursery = lltype.malloc(NTP, 64, flavor='raw') for i in range(64): self.nursery[i] = NOT_INITIALIZED + self.nursery_words = rffi.cast(rffi.CArrayPtr(lltype.Signed), + self.nursery) self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 2, flavor='raw') self.addrs[0] = rffi.cast(lltype.Signed, self.nursery) @@ -263,11 +267,11 @@ # slowpath never called assert gc_ll_descr.calls == [] - def test_malloc_nursery_varsize(self): + def test_malloc_nursery_varsize_nonframe(self): self.cpu = self.getcpu(None) A = lltype.GcArray(lltype.Signed) arraydescr = self.cpu.arraydescrof(A) - arraydescr.tid = 15 + arraydescr.tid = 1515 ops = ''' [i0, i1, i2] p0 = call_malloc_nursery_varsize(0, 8, i0, descr=arraydescr) @@ -283,8 +287,8 @@ assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0 assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 2*WORD + 8*1 # check the nursery content and state - assert gc_ll_descr.nursery[0] == chr(15) - assert gc_ll_descr.nursery[2 * WORD + 8] == chr(15) + assert gc_ll_descr.nursery_words[0] == 1515 + assert gc_ll_descr.nursery_words[2 + 8 // WORD] == 1515 assert gc_ll_descr.addrs[0] == nurs_adr + (((4 * WORD + 8*1 + 5*2) + (WORD - 1)) & ~(WORD - 1)) # slowpath never called assert gc_ll_descr.calls == [] @@ -323,11 +327,11 @@ idx = 1 assert len(frame.jf_gcmap) == expected_size if self.cpu.IS_64_BIT: - assert frame.jf_gcmap[idx] == (1<<29) | (1 << 30) + exp_idx = self.cpu.JITFRAME_FIXED_SIZE + 1 # +1 from i0 else: assert frame.jf_gcmap[idx] exp_idx = self.cpu.JITFRAME_FIXED_SIZE - 32 * idx + 1 # +1 from i0 - assert frame.jf_gcmap[idx] == (1 << (exp_idx + 1)) | (1 << exp_idx) + assert frame.jf_gcmap[idx] == (1 << (exp_idx + 1)) | (1 << exp_idx) self.cpu = self.getcpu(check) ops = ''' @@ -609,7 +613,10 @@ cpu = CPU(None, None) cpu.gc_ll_descr = GCDescrShadowstackDirect() wbd = cpu.gc_ll_descr.write_barrier_descr - wbd.jit_wb_if_flag_byteofs = 0 # directly into 'hdr' field + if sys.byteorder == 'little': + wbd.jit_wb_if_flag_byteofs = 0 # directly into 'hdr' field + else: + wbd.jit_wb_if_flag_byteofs = struct.calcsize("l") - 1 S = lltype.GcForwardReference() S.become(lltype.GcStruct('S', ('hdr', lltype.Signed), @@ -636,7 +643,9 @@ frames.append(frame) new_frame = JITFRAME.allocate(frame.jf_frame_info) gcmap = unpack_gcmap(frame) - if self.cpu.IS_64_BIT: + if self.cpu.backend_name.startswith('ppc64'): + assert gcmap == [30, 31, 32] + elif self.cpu.IS_64_BIT: assert gcmap == [28, 29, 30] elif self.cpu.backend_name.startswith('arm'): assert gcmap == [44, 45, 46] @@ -647,6 +656,8 @@ new_frame.jf_frame[item] = rffi.cast(lltype.Signed, s) assert cpu.gc_ll_descr.gcrootmap.stack[0] == rffi.cast(lltype.Signed, frame) cpu.gc_ll_descr.gcrootmap.stack[0] = rffi.cast(lltype.Signed, new_frame) + print '"Collecting" moved the frame from %d to %d' % ( + i, cpu.gc_ll_descr.gcrootmap.stack[0]) frames.append(new_frame) def check2(i): diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -307,7 +307,7 @@ for line in open(str(logfile)): if 'guard_class' in line: guard_class += 1 - # if we get many more guard_classes, it means that we generate + # if we get many more guard_classes (~93), it means that we generate # guards that always fail (the following assert's original purpose # is to catch the following case: each GUARD_CLASS is misgenerated # and always fails with "gcremovetypeptr") diff --git a/rpython/jit/backend/ppc/__init__.py b/rpython/jit/backend/ppc/__init__.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/ppc/__init__.py @@ -0,0 +1,1 @@ +# diff --git a/rpython/jit/backend/ppc/arch.py b/rpython/jit/backend/ppc/arch.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/ppc/arch.py @@ -0,0 +1,82 @@ +# Constants that depend on whether we are on 32-bit or 64-bit + +import sys +from rpython.jit.backend.ppc import register as r + +import sys +if sys.maxint == (2**31 - 1): + assert False, "the ppc backend only supports PPC-64 for now" + WORD = 4 + #DWORD = 2 * WORD + IS_PPC_32 = True + #BACKCHAIN_SIZE = 2 + #FPR_SAVE_AREA = len(NONVOLATILES_FLOAT) * DWORD +else: + WORD = 8 + #DWORD = 2 * WORD + IS_PPC_32 = False + #BACKCHAIN_SIZE = 6 + #FPR_SAVE_AREA = len(NONVOLATILES_FLOAT) * WORD + +IS_PPC_64 = not IS_PPC_32 +MY_COPY_OF_REGS = 0 + +IS_BIG_ENDIAN = sys.byteorder == 'big' +IS_LITTLE_ENDIAN = sys.byteorder == 'little' +assert IS_BIG_ENDIAN ^ IS_LITTLE_ENDIAN + +#FORCE_INDEX = WORD +#GPR_SAVE_AREA = len(NONVOLATILES) * WORD +#FLOAT_INT_CONVERSION = WORD +MAX_REG_PARAMS = 8 +MAX_FREG_PARAMS = 13 +# we need at most 5 instructions to load a constant +# and one instruction to patch the stack pointer +#SIZE_LOAD_IMM_PATCH_SP = 6 + +#FORCE_INDEX_OFS = (len(MANAGED_REGS) + len(MANAGED_FP_REGS)) * WORD + + +# BIG ENDIAN LITTLE ENDIAN +# +# +--------------------+ <- SP + STD_FRAME_SIZE +# | general registers | +# | save area | +# +--------------------+ <- SP + 120 SP + 104 +# | Local vars | +# +--------------------+ <- SP + 112 SP + 96 +# | Parameter save | +# | area (8 args max) | +# +--------------------+ <- SP + 48 SP + 32 +# | TOC (unused) | +# +--------------------+ <- SP + 40 SP + 24 +# | link ed. (unused) | +# +--------------------+ <- SP + 32 absent +# | compiler (unused) | +# +--------------------+ <- SP + 24 absent +# | LR save area | +# +--------------------+ <- SP + 16 SP + 16 +# | CR save (unused) | +# +--------------------+ <- SP + 8 SP + 8 +# | SP back chain | +# +--------------------+ <- SP SP + +# The local variables area contains only a copy of the 2nd argument +# passed to the machine code function, which is the ll_threadlocal_addr. +# The 1st argument, i.e. the GC-managed jitframe, is stored in the +# register r31. + + +LR_BC_OFFSET = 16 +_GAP = 0 if IS_BIG_ENDIAN else 16 +PARAM_SAVE_AREA_OFFSET = 48 - _GAP +LOCAL_VARS_OFFSET = 112 - _GAP +THREADLOCAL_ADDR_OFFSET = LOCAL_VARS_OFFSET +GPR_SAVE_AREA_OFFSET = 120 - _GAP + +REGISTERS_SAVED = [r.r25, r.r26, r.r27, r.r28, r.r29, r.r30, r.r31] +assert REGISTERS_SAVED == [_r for _r in r.NONVOLATILES + if _r in r.MANAGED_REGS or _r == r.r31] + +STD_FRAME_SIZE_IN_BYTES = GPR_SAVE_AREA_OFFSET + len(REGISTERS_SAVED) * WORD +assert STD_FRAME_SIZE_IN_BYTES % 16 == 0 diff --git a/rpython/jit/backend/ppc/callbuilder.py b/rpython/jit/backend/ppc/callbuilder.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/ppc/callbuilder.py @@ -0,0 +1,278 @@ +from rpython.jit.backend.ppc.arch import IS_PPC_64, WORD, PARAM_SAVE_AREA_OFFSET +from rpython.jit.backend.ppc.arch import THREADLOCAL_ADDR_OFFSET +import rpython.jit.backend.ppc.register as r +from rpython.jit.metainterp.history import INT, FLOAT +from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder +from rpython.jit.backend.ppc.jump import remap_frame_layout +from rpython.rlib.objectmodel import we_are_translated +from rpython.jit.backend.llsupport import llerrno +from rpython.rtyper.lltypesystem import rffi + + +def follow_jump(addr): + # xxx implement me + return addr + + +class CallBuilder(AbstractCallBuilder): + GPR_ARGS = [r.r3, r.r4, r.r5, r.r6, r.r7, r.r8, r.r9, r.r10] + FPR_ARGS = r.MANAGED_FP_REGS + assert FPR_ARGS == [r.f1, r.f2, r.f3, r.f4, r.f5, r.f6, r.f7, + r.f8, r.f9, r.f10, r.f11, r.f12, r.f13] + RSHADOWPTR = r.RCS1 + RFASTGILPTR = r.RCS2 + RSHADOWOLD = r.RCS3 + + def __init__(self, assembler, fnloc, arglocs, resloc): + AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs, + resloc, restype=INT, ressize=None) + + def prepare_arguments(self): + assert IS_PPC_64 + self.subtracted_to_sp = 0 + + # Prepare arguments. Note that this follows the convention where + # a prototype is in scope, and doesn't take "..." arguments. If + # you were to call a C function with a "..." argument with cffi, + # it would not go there but instead via libffi. If you pretend + # instead that it takes fixed arguments, then it would arrive here + # but the convention is bogus for floating-point arguments. (And, + # to add to the mess, at least CPython's ctypes cannot be used + # to call a "..." function with floating-point arguments. As I + # guess that it's a problem with libffi, it means PyPy inherits + # the same problem.) + arglocs = self.arglocs + num_args = len(arglocs) + + non_float_locs = [] + non_float_regs = [] + float_locs = [] + for i in range(min(num_args, 8)): + if arglocs[i].type != FLOAT: + non_float_locs.append(arglocs[i]) + non_float_regs.append(self.GPR_ARGS[i]) + else: + float_locs.append(arglocs[i]) + # now 'non_float_locs' and 'float_locs' together contain the + # locations of the first 8 arguments + + if num_args > 8: + # We need to make a larger PPC stack frame, as shown on the + # picture in arch.py. It needs to be 48 bytes + 8 * num_args. + # The new SP back chain location should point to the top of + # the whole stack frame, i.e. jumping over both the existing + # fixed-sise part and the new variable-sized part. + base = PARAM_SAVE_AREA_OFFSET + varsize = base + 8 * num_args + varsize = (varsize + 15) & ~15 # align + self.mc.load(r.SCRATCH2.value, r.SP.value, 0) # SP back chain + self.mc.store_update(r.SCRATCH2.value, r.SP.value, -varsize) + self.subtracted_to_sp = varsize + + # In this variable-sized part, only the arguments from the 8th + # one need to be written, starting at SP + 112 + for n in range(8, num_args): + loc = arglocs[n] + if loc.type != FLOAT: + # after the 8th argument, a non-float location is + # always stored in the stack + if loc.is_reg(): + src = loc + else: + src = r.r2 + self.asm.regalloc_mov(loc, src) + self.mc.std(src.value, r.SP.value, base + 8 * n) + else: + # the first 13 floating-point arguments are all passed + # in the registers f1 to f13, independently on their + # index in the complete list of arguments + if len(float_locs) < len(self.FPR_ARGS): + float_locs.append(loc) + else: + if loc.is_fp_reg(): + src = loc + else: + src = r.FP_SCRATCH + self.asm.regalloc_mov(loc, src) + self.mc.stfd(src.value, r.SP.value, base + 8 * n) + + # We must also copy fnloc into FNREG + non_float_locs.append(self.fnloc) + non_float_regs.append(self.mc.RAW_CALL_REG) # r2 or r12 + + if float_locs: + assert len(float_locs) <= len(self.FPR_ARGS) + remap_frame_layout(self.asm, float_locs, + self.FPR_ARGS[:len(float_locs)], + r.FP_SCRATCH) + + remap_frame_layout(self.asm, non_float_locs, non_float_regs, + r.SCRATCH) + + + def push_gcmap(self): + # we push *now* the gcmap, describing the status of GC registers + # after the rearrangements done just before, ignoring the return + # value r3, if necessary + assert not self.is_call_release_gil + noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack() + gcmap = self.asm._regalloc.get_gcmap([r.r3], noregs=noregs) + self.asm.push_gcmap(self.mc, gcmap, store=True) + + def pop_gcmap(self): + ssreg = None + gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap + if gcrootmap: + if gcrootmap.is_shadow_stack and self.is_call_release_gil: + # in this mode, RSHADOWOLD happens to contain the shadowstack + # top at this point, so reuse it instead of loading it again + ssreg = self.RSHADOWOLD + self.asm._reload_frame_if_necessary(self.mc, shadowstack_reg=ssreg) + + def emit_raw_call(self): + self.mc.raw_call() + + def restore_stack_pointer(self): + if self.subtracted_to_sp != 0: + self.mc.addi(r.SP.value, r.SP.value, self.subtracted_to_sp) + + def load_result(self): + assert (self.resloc is None or + self.resloc is r.r3 or + self.resloc is r.f1) + + + def call_releasegil_addr_and_move_real_arguments(self, fastgil): + assert self.is_call_release_gil + RSHADOWPTR = self.RSHADOWPTR + RFASTGILPTR = self.RFASTGILPTR + RSHADOWOLD = self.RSHADOWOLD + # + # Save this thread's shadowstack pointer into r29, for later comparison + gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap + if gcrootmap: + if gcrootmap.is_shadow_stack: + rst = gcrootmap.get_root_stack_top_addr() + self.mc.load_imm(RSHADOWPTR, rst) + self.mc.load(RSHADOWOLD.value, RSHADOWPTR.value, 0) + # + # change 'rpy_fastgil' to 0 (it should be non-zero right now) + self.mc.load_imm(RFASTGILPTR, fastgil) + self.mc.li(r.r0.value, 0) + self.mc.lwsync() + self.mc.std(r.r0.value, RFASTGILPTR.value, 0) + # + if not we_are_translated(): # for testing: we should not access + self.mc.addi(r.SPP.value, r.SPP.value, 1) # r31 any more + + + def move_real_result_and_call_reacqgil_addr(self, fastgil): + from rpython.jit.backend.ppc.codebuilder import OverwritingBuilder + + # try to reacquire the lock. The following registers are still + # valid from before the call: + RSHADOWPTR = self.RSHADOWPTR # r30: &root_stack_top + RFASTGILPTR = self.RFASTGILPTR # r29: &fastgil + RSHADOWOLD = self.RSHADOWOLD # r28: previous val of root_stack_top + + # Equivalent of 'r10 = __sync_lock_test_and_set(&rpy_fastgil, 1);' + self.mc.li(r.r9.value, 1) + retry_label = self.mc.currpos() + self.mc.ldarx(r.r10.value, 0, RFASTGILPTR.value) # load the lock value + self.mc.stdcxx(r.r9.value, 0, RFASTGILPTR.value) # try to claim lock + self.mc.bc(6, 2, retry_label - self.mc.currpos()) # retry if failed + self.mc.isync() + + self.mc.cmpdi(0, r.r10.value, 0) + b1_location = self.mc.currpos() + self.mc.trap() # boehm: patched with a BEQ: jump if r10 is zero + # shadowstack: patched with BNE instead + + if self.asm.cpu.gc_ll_descr.gcrootmap: + # When doing a call_release_gil with shadowstack, there + # is the risk that the 'rpy_fastgil' was free but the + # current shadowstack can be the one of a different + # thread. So here we check if the shadowstack pointer + # is still the same as before we released the GIL (saved + # in RSHADOWOLD), and if not, we fall back to 'reacqgil_addr'. + self.mc.load(r.r9.value, RSHADOWPTR.value, 0) + self.mc.cmpdi(0, r.r9.value, RSHADOWOLD.value) + bne_location = b1_location + b1_location = self.mc.currpos() + self.mc.trap() + + # revert the rpy_fastgil acquired above, so that the + # general 'reacqgil_addr' below can acquire it again... + # (here, r10 is conveniently zero) + self.mc.std(r.r10.value, RFASTGILPTR.value, 0) + + pmc = OverwritingBuilder(self.mc, bne_location, 1) + pmc.bne(self.mc.currpos() - bne_location) + pmc.overwrite() + # + # Yes, we need to call the reacqgil() function. + # save the result we just got + RSAVEDRES = RFASTGILPTR # can reuse this reg here + reg = self.resloc + if reg is not None: + if reg.is_core_reg(): + self.mc.mr(RSAVEDRES.value, reg.value) + elif reg.is_fp_reg(): + self.mc.stfd(reg.value, r.SP.value, + PARAM_SAVE_AREA_OFFSET + 7 * WORD) + self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr) + self.mc.raw_call() + if reg is not None: + if reg.is_core_reg(): + self.mc.mr(reg.value, RSAVEDRES.value) + elif reg.is_fp_reg(): + self.mc.lfd(reg.value, r.SP.value, + PARAM_SAVE_AREA_OFFSET + 7 * WORD) + + # replace b1_location with BEQ(here) + pmc = OverwritingBuilder(self.mc, b1_location, 1) + pmc.beq(self.mc.currpos() - b1_location) + pmc.overwrite() + + if not we_are_translated(): # for testing: now we can access + self.mc.addi(r.SPP.value, r.SPP.value, -1) # r31 again + + + def write_real_errno(self, save_err): + if save_err & rffi.RFFI_READSAVED_ERRNO: + # Just before a call, read '*_errno' and write it into the + # real 'errno'. A lot of registers are free here, notably + # r11 and r0. + if save_err & rffi.RFFI_ALT_ERRNO: + rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) + else: + rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) + p_errno = llerrno.get_p_errno_offset(self.asm.cpu) + self.mc.ld(r.r11.value, r.SP.value, + THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp) + self.mc.lwz(r.r0.value, r.r11.value, rpy_errno) + self.mc.ld(r.r11.value, r.r11.value, p_errno) + self.mc.stw(r.r0.value, r.r11.value, 0) + elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE: + # Same, but write zero. + p_errno = llerrno.get_p_errno_offset(self.asm.cpu) + self.mc.ld(r.r11.value, r.SP.value, + THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp) + self.mc.ld(r.r11.value, r.r11.value, p_errno) + self.mc.li(r.r0.value, 0) + self.mc.stw(r.r0.value, r.r11.value, 0) + + def read_real_errno(self, save_err): + if save_err & rffi.RFFI_SAVE_ERRNO: + # Just after a call, read the real 'errno' and save a copy of + # it inside our thread-local '*_errno'. Registers r4-r10 + # never contain anything after the call. + if save_err & rffi.RFFI_ALT_ERRNO: + rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) + else: + rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) + p_errno = llerrno.get_p_errno_offset(self.asm.cpu) + self.mc.ld(r.r9.value, r.SP.value, THREADLOCAL_ADDR_OFFSET) + self.mc.ld(r.r10.value, r.r9.value, p_errno) + self.mc.lwz(r.r10.value, r.r10.value, 0) + self.mc.stw(r.r10.value, r.r9.value, rpy_errno) diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/ppc/codebuilder.py @@ -0,0 +1,1292 @@ +import os +from rpython.jit.backend.ppc.ppc_form import PPCForm as Form +from rpython.jit.backend.ppc.locations import RegisterLocation +from rpython.jit.backend.ppc.ppc_field import ppc_fields +from rpython.jit.backend.ppc.arch import (IS_PPC_32, WORD, IS_PPC_64, + LR_BC_OFFSET, IS_BIG_ENDIAN, IS_LITTLE_ENDIAN) +import rpython.jit.backend.ppc.register as r +import rpython.jit.backend.ppc.condition as c +from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin +from rpython.jit.backend.llsupport.assembler import GuardToken +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.jit.metainterp.resoperation import rop +from rpython.tool.udir import udir +from rpython.rlib.objectmodel import we_are_translated + +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.jit.backend.ppc.rassemblermaker import make_rassembler + + +# the following instructions can't accept "r0" as the second argument +# (i.e. the base address): it is recognized as "0" instead, or is +# even invalid (load-with-update, store-with-update). +# +# any load or store instruction +# addi rD, r0, immed +# subi rD, r0, immed +# addis rD, r0, immed +# subis rD, r0, immed + + +A = Form("frD", "frA", "frB", "XO3", "Rc") +A1 = Form("frD", "frB", "XO3", "Rc") +A2 = Form("frD", "frA", "frC", "XO3", "Rc") +A3 = Form("frD", "frA", "frC", "frB", "XO3", "Rc") + +I = Form("LI", "AA", "LK") + +B = Form("BO", "BI", "BD", "AA", "LK") + +SC = Form("AA") # fudge + +DD = Form("rD", "rA", "SIMM") +DDO = Form("rD", "rA", "ds", "XO4") +DS = Form("rA", "rS", "UIMM") + +X = Form("XO1") +XS = Form("rA", "rS", "rB", "XO1", "Rc") +XSO = Form("rS", "rA", "rB", "XO1") +XD = Form("rD", "rA", "rB", "XO1") +XO = Form("rD", "rA", "rB", "OE", "XO2", "Rc") +XO0 = Form("rD", "rA", "OE", "XO2", "Rc") +XDB = Form("frD", "frB", "XO1", "Rc") +XS0 = Form("rA", "rS", "XO1", "Rc") +X0 = Form("rA", "rB", "XO1") +XcAB = Form("crfD", "rA", "rB", "XO1") +XN = Form("rD", "rA", "NB", "XO1") +XL = Form("crbD", "crbA", "crbB", "XO1") +XL1 = Form("crfD", "crfS") +XL2 = Form("crbD", "XO1", "Rc") +XFL = Form("FM", "frB", "XO1", "Rc") +XFX = Form("CRM", "rS", "XO1") +XLL = Form("LL", "XO1") + +MI = Form("rA", "rS", "SH", "MB", "ME", "Rc") +MB = Form("rA", "rS", "rB", "MB", "ME", "Rc") +MDI = Form("rA", "rS", "sh", "mbe", "XO5", "Rc") +MDS = Form("rA", "rS", "rB", "mbe", "XO7", "Rc") + +class BasicPPCAssembler(object): + + def disassemble(cls, inst, labels={}, pc=0): + cache = cls.__dict__.get('idesc cache') + if cache is None: + idescs = cls.get_idescs() + cache = {} + for n, i in idescs: + cache.setdefault(i.specializations[ppc_fields['opcode']], + []).append((n,i)) + setattr(cls, 'idesc cache', cache) + matches = [] + idescs = cache[ppc_fields['opcode'].decode(inst)] + for name, idesc in idescs: + m = idesc.match(inst) + if m > 0: + matches.append((m, idesc, name)) + if matches: + score, idesc, name = max(matches) + return idesc.disassemble(name, inst, labels, pc) + disassemble = classmethod(disassemble) + + # "basic" means no simplified mnemonics + + # I form + b = I(18, AA=0, LK=0) + ba = I(18, AA=1, LK=0) + bl = I(18, AA=0, LK=1) + bla = I(18, AA=1, LK=1) + + # B form + bc = B(16, AA=0, LK=0) + bcl = B(16, AA=0, LK=1) + bca = B(16, AA=1, LK=0) + bcla = B(16, AA=1, LK=1) + + # SC form + sc = SC(17, AA=1) # it's not really the aa field... + + # D form + addi = DD(14) + addic = DD(12) + addicx = DD(13) + addis = DD(15) + + andix = DS(28) + andisx = DS(29) + + cmpi = Form("crfD", "L", "rA", "SIMM")(11) + cmpi.default(L=0).default(crfD=0) + cmpli = Form("crfD", "L", "rA", "UIMM")(10) + cmpli.default(L=0).default(crfD=0) + + lbz = DD(34) + lbzu = DD(35) + ld = DDO(58, XO4=0) + ldu = DDO(58, XO4=1) + lfd = DD(50) + lfdu = DD(51) + lfs = DD(48) + lfsu = DD(49) + lha = DD(42) + lhau = DD(43) + lhz = DD(40) + lhzu = DD(41) + lmw = DD(46) + lwa = DDO(58, XO4=2) + lwz = DD(32) + lwzu = DD(33) + + mulli = DD(7) + ori = DS(24) + oris = DS(25) + + stb = DD(38) + stbu = DD(39) + std = DDO(62, XO4=0) + stdu = DDO(62, XO4=1) + stfd = DD(54) + stfdu = DD(55) + stfs = DD(52) + stfsu = DD(53) + sth = DD(44) + sthu = DD(45) + stmw = DD(47) + stw = DD(36) + stwu = DD(37) + + subfic = DD(8) + tdi = Form("TO", "rA", "SIMM")(2) + twi = Form("TO", "rA", "SIMM")(3) + xori = DS(26) + xoris = DS(27) + + # X form + + and_ = XS(31, XO1=28, Rc=0) + and_x = XS(31, XO1=28, Rc=1) + + andc_ = XS(31, XO1=60, Rc=0) + andc_x = XS(31, XO1=60, Rc=1) + + # is the L bit for 64 bit compares? hmm + cmp = Form("crfD", "L", "rA", "rB", "XO1")(31, XO1=0) + cmp.default(L=0).default(crfD=0) + cmpl = Form("crfD", "L", "rA", "rB", "XO1")(31, XO1=32) + cmpl.default(L=0).default(crfD=0) + + cntlzd = XS0(31, XO1=58, Rc=0) + cntlzdx = XS0(31, XO1=58, Rc=1) + cntlzw = XS0(31, XO1=26, Rc=0) + cntlzwx = XS0(31, XO1=26, Rc=1) + + dcba = X0(31, XO1=758) + dcbf = X0(31, XO1=86) + dcbi = X0(31, XO1=470) + dcbst = X0(31, XO1=54) + dcbt = X0(31, XO1=278) + dcbtst = X0(31, XO1=246) + dcbz = X0(31, XO1=1014) + + eciwx = XD(31, XO1=310) + ecowx = XS(31, XO1=438, Rc=0) + + eieio = X(31, XO1=854) + + eqv = XS(31, XO1=284, Rc=0) + eqvx = XS(31, XO1=284, Rc=1) + + extsb = XS0(31, XO1=954, Rc=0) + extsbx = XS0(31, XO1=954, Rc=1) + + extsh = XS0(31, XO1=922, Rc=0) + extshx = XS0(31, XO1=922, Rc=1) + + extsw = XS0(31, XO1=986, Rc=0) + extswx = XS0(31, XO1=986, Rc=1) + + fabs = XDB(63, XO1=264, Rc=0) + fabsx = XDB(63, XO1=264, Rc=1) + + fcmpo = XcAB(63, XO1=32) + fcmpu = XcAB(63, XO1=0) + + fcfid = XDB(63, XO1=846, Rc=0) + fcfidx = XDB(63, XO1=846, Rc=1) + + fctid = XDB(63, XO1=814, Rc=0) + fctidx = XDB(63, XO1=814, Rc=1) + + fctidz = XDB(63, XO1=815, Rc=0) + fctidzx = XDB(63, XO1=815, Rc=1) + + fctiw = XDB(63, XO1=14, Rc=0) + fctiwx = XDB(63, XO1=14, Rc=1) + + fctiwz = XDB(63, XO1=15, Rc=0) + fctiwzx = XDB(63, XO1=15, Rc=1) + + fmr = XDB(63, XO1=72, Rc=0) + fmrx = XDB(63, XO1=72, Rc=1) + + fnabs = XDB(63, XO1=136, Rc=0) + fnabsx = XDB(63, XO1=136, Rc=1) + + fneg = XDB(63, XO1=40, Rc=0) + fnegx = XDB(63, XO1=40, Rc=1) + + frsp = XDB(63, XO1=12, Rc=0) + frspx = XDB(63, XO1=12, Rc=1) + + fsqrt = XDB(63, XO1=22, Rc=0) + + mffgpr = XS(31, XO1=607, Rc=0) + mftgpr = XS(31, XO1=735, Rc=0) + + icbi = X0(31, XO1=982) + + lbzux = XD(31, XO1=119) + lbzx = XD(31, XO1=87) + ldarx = XD(31, XO1=84) + ldux = XD(31, XO1=53) + ldx = XD(31, XO1=21) + lfdux = XD(31, XO1=631) + lfdx = XD(31, XO1=599) + lfsux = XD(31, XO1=567) + lfsx = XD(31, XO1=535) + lhaux = XD(31, XO1=375) + lhax = XD(31, XO1=343) + lhbrx = XD(31, XO1=790) + lhzux = XD(31, XO1=311) + lhzx = XD(31, XO1=279) + lswi = XD(31, XO1=597) + lswx = XD(31, XO1=533) + lwarx = XD(31, XO1=20) + lwaux = XD(31, XO1=373) + lwax = XD(31, XO1=341) + lwbrx = XD(31, XO1=534) + lwzux = XD(31, XO1=55) + lwzx = XD(31, XO1=23) + + mcrfs = Form("crfD", "crfS", "XO1")(63, XO1=64) + mcrxr = Form("crfD", "XO1")(31, XO1=512) + mfcr = Form("rD", "XO1")(31, XO1=19) + mffs = Form("frD", "XO1", "Rc")(63, XO1=583, Rc=0) + mffsx = Form("frD", "XO1", "Rc")(63, XO1=583, Rc=1) + mfmsr = Form("rD", "XO1")(31, XO1=83) + mfsr = Form("rD", "SR", "XO1")(31, XO1=595) + mfsrin = XDB(31, XO1=659, Rc=0) + + add = XO(31, XO2=266, OE=0, Rc=0) + addx = XO(31, XO2=266, OE=0, Rc=1) + addo = XO(31, XO2=266, OE=1, Rc=0) + addox = XO(31, XO2=266, OE=1, Rc=1) + + addc = XO(31, XO2=10, OE=0, Rc=0) + addcx = XO(31, XO2=10, OE=0, Rc=1) + addco = XO(31, XO2=10, OE=1, Rc=0) + addcox = XO(31, XO2=10, OE=1, Rc=1) + + adde = XO(31, XO2=138, OE=0, Rc=0) + addex = XO(31, XO2=138, OE=0, Rc=1) + addeo = XO(31, XO2=138, OE=1, Rc=0) + addeox = XO(31, XO2=138, OE=1, Rc=1) + + addme = XO(31, rB=0, XO2=234, OE=0, Rc=0) + addmex = XO(31, rB=0, XO2=234, OE=0, Rc=1) + addmeo = XO(31, rB=0, XO2=234, OE=1, Rc=0) + addmeox = XO(31, rB=0, XO2=234, OE=1, Rc=1) + + addze = XO(31, rB=0, XO2=202, OE=0, Rc=0) + addzex = XO(31, rB=0, XO2=202, OE=0, Rc=1) + addzeo = XO(31, rB=0, XO2=202, OE=1, Rc=0) + addzeox = XO(31, rB=0, XO2=202, OE=1, Rc=1) + + bcctr = Form("BO", "BI", "XO1", "LK")(19, XO1=528, LK=0) + bcctrl = Form("BO", "BI", "XO1", "LK")(19, XO1=528, LK=1) + + bclr = Form("BO", "BI", "XO1", "LK")(19, XO1=16, LK=0) + bclrl = Form("BO", "BI", "XO1", "LK")(19, XO1=16, LK=1) + + crand = XL(19, XO1=257) + crandc = XL(19, XO1=129) + creqv = XL(19, XO1=289) + crnand = XL(19, XO1=225) + crnor = XL(19, XO1=33) + cror = XL(19, XO1=449) + crorc = XL(19, XO1=417) + crxor = XL(19, XO1=193) + + divd = XO(31, XO2=489, OE=0, Rc=0) + divdx = XO(31, XO2=489, OE=0, Rc=1) + divdo = XO(31, XO2=489, OE=1, Rc=0) + divdox = XO(31, XO2=489, OE=1, Rc=1) + + divdu = XO(31, XO2=457, OE=0, Rc=0) + divdux = XO(31, XO2=457, OE=0, Rc=1) + divduo = XO(31, XO2=457, OE=1, Rc=0) + divduox = XO(31, XO2=457, OE=1, Rc=1) + + divw = XO(31, XO2=491, OE=0, Rc=0) + divwx = XO(31, XO2=491, OE=0, Rc=1) + divwo = XO(31, XO2=491, OE=1, Rc=0) + divwox = XO(31, XO2=491, OE=1, Rc=1) + + divwu = XO(31, XO2=459, OE=0, Rc=0) + divwux = XO(31, XO2=459, OE=0, Rc=1) + divwuo = XO(31, XO2=459, OE=1, Rc=0) + divwuox = XO(31, XO2=459, OE=1, Rc=1) + + fadd = A(63, XO3=21, Rc=0) + faddx = A(63, XO3=21, Rc=1) + fadds = A(59, XO3=21, Rc=0) + faddsx = A(59, XO3=21, Rc=1) + + fdiv = A(63, XO3=18, Rc=0) + fdivx = A(63, XO3=18, Rc=1) + fdivs = A(59, XO3=18, Rc=0) + fdivsx = A(59, XO3=18, Rc=1) + + fmadd = A3(63, XO3=19, Rc=0) + fmaddx = A3(63, XO3=19, Rc=1) + fmadds = A3(59, XO3=19, Rc=0) + fmaddsx = A3(59, XO3=19, Rc=1) + + fmsub = A3(63, XO3=28, Rc=0) + fmsubx = A3(63, XO3=28, Rc=1) + fmsubs = A3(59, XO3=28, Rc=0) + fmsubsx = A3(59, XO3=28, Rc=1) + + fmul = A2(63, XO3=25, Rc=0) + fmulx = A2(63, XO3=25, Rc=1) + fmuls = A2(59, XO3=25, Rc=0) + fmulsx = A2(59, XO3=25, Rc=1) + + fnmadd = A3(63, XO3=31, Rc=0) + fnmaddx = A3(63, XO3=31, Rc=1) + fnmadds = A3(59, XO3=31, Rc=0) + fnmaddsx = A3(59, XO3=31, Rc=1) + + fnmsub = A3(63, XO3=30, Rc=0) + fnmsubx = A3(63, XO3=30, Rc=1) + fnmsubs = A3(59, XO3=30, Rc=0) + fnmsubsx = A3(59, XO3=30, Rc=1) + + fres = A1(59, XO3=24, Rc=0) + fresx = A1(59, XO3=24, Rc=1) + + frsp = A1(63, XO3=12, Rc=0) + frspx = A1(63, XO3=12, Rc=1) + + frsqrte = A1(63, XO3=26, Rc=0) + frsqrtex = A1(63, XO3=26, Rc=1) + + fsel = A3(63, XO3=23, Rc=0) + fselx = A3(63, XO3=23, Rc=1) + + frsqrt = A1(63, XO3=22, Rc=0) + frsqrtx = A1(63, XO3=22, Rc=1) + frsqrts = A1(59, XO3=22, Rc=0) + frsqrtsx = A1(59, XO3=22, Rc=1) + + fsub = A(63, XO3=20, Rc=0) + fsubx = A(63, XO3=20, Rc=1) + fsubs = A(59, XO3=20, Rc=0) + fsubsx = A(59, XO3=20, Rc=1) + + isync = X(19, XO1=150) + + mcrf = XL1(19) + + mfspr = Form("rD", "spr", "XO1")(31, XO1=339) + mftb = Form("rD", "spr", "XO1")(31, XO1=371) + + mtcrf = XFX(31, XO1=144) + + mtfsb0 = XL2(63, XO1=70, Rc=0) + mtfsb0x = XL2(63, XO1=70, Rc=1) + mtfsb1 = XL2(63, XO1=38, Rc=0) + mtfsb1x = XL2(63, XO1=38, Rc=1) + + mtfsf = XFL(63, XO1=711, Rc=0) + mtfsfx = XFL(63, XO1=711, Rc=1) + + mtfsfi = Form("crfD", "IMM", "XO1", "Rc")(63, XO1=134, Rc=0) + mtfsfix = Form("crfD", "IMM", "XO1", "Rc")(63, XO1=134, Rc=1) + + mtmsr = Form("rS", "XO1")(31, XO1=146) + + mtspr = Form("rS", "spr", "XO1")(31, XO1=467) + + mtsr = Form("rS", "SR", "XO1")(31, XO1=210) + mtsrin = Form("rS", "rB", "XO1")(31, XO1=242) + + mulhd = XO(31, OE=0, XO2=73, Rc=0) + mulhdx = XO(31, OE=0, XO2=73, Rc=1) + + mulhdu = XO(31, OE=0, XO2=9, Rc=0) + mulhdux = XO(31, OE=0, XO2=9, Rc=1) + + mulld = XO(31, OE=0, XO2=233, Rc=0) + mulldx = XO(31, OE=0, XO2=233, Rc=1) + mulldo = XO(31, OE=1, XO2=233, Rc=0) + mulldox = XO(31, OE=1, XO2=233, Rc=1) + + mulhw = XO(31, OE=0, XO2=75, Rc=0) + mulhwx = XO(31, OE=0, XO2=75, Rc=1) + + mulhwu = XO(31, OE=0, XO2=11, Rc=0) + mulhwux = XO(31, OE=0, XO2=11, Rc=1) + + mullw = XO(31, OE=0, XO2=235, Rc=0) + mullwx = XO(31, OE=0, XO2=235, Rc=1) + mullwo = XO(31, OE=1, XO2=235, Rc=0) + mullwox = XO(31, OE=1, XO2=235, Rc=1) + + nand = XS(31, XO1=476, Rc=0) + nandx = XS(31, XO1=476, Rc=1) + + neg = XO0(31, OE=0, XO2=104, Rc=0) + negx = XO0(31, OE=0, XO2=104, Rc=1) + nego = XO0(31, OE=1, XO2=104, Rc=0) + negox = XO0(31, OE=1, XO2=104, Rc=1) + + nor = XS(31, XO1=124, Rc=0) + norx = XS(31, XO1=124, Rc=1) + + or_ = XS(31, XO1=444, Rc=0) + or_x = XS(31, XO1=444, Rc=1) + + orc = XS(31, XO1=412, Rc=0) + orcx = XS(31, XO1=412, Rc=1) + + rfi = X(19, XO1=50) + + rfid = X(19, XO1=18) + + rldcl = MDS(30, XO7=8, Rc=0) + rldclx = MDS(30, XO7=8, Rc=1) + rldcr = MDS(30, XO7=9, Rc=0) + rldcrx = MDS(30, XO7=9, Rc=1) + + rldic = MDI(30, XO5=2, Rc=0) + rldicx = MDI(30, XO5=2, Rc=1) + rldicl = MDI(30, XO5=0, Rc=0) + rldiclx = MDI(30, XO5=0, Rc=1) + rldicr = MDI(30, XO5=1, Rc=0) + rldicrx = MDI(30, XO5=1, Rc=1) + rldimi = MDI(30, XO5=3, Rc=0) + rldimix = MDI(30, XO5=3, Rc=1) + + rlwimi = MI(20, Rc=0) + rlwimix = MI(20, Rc=1) + + rlwinm = MI(21, Rc=0) + rlwinmx = MI(21, Rc=1) + + rlwnm = MB(23, Rc=0) + rlwnmx = MB(23, Rc=1) + + sld = XS(31, XO1=27, Rc=0) + sldx = XS(31, XO1=27, Rc=1) + + slw = XS(31, XO1=24, Rc=0) + slwx = XS(31, XO1=24, Rc=1) + + srad = XS(31, XO1=794, Rc=0) + sradx = XS(31, XO1=794, Rc=1) + + sradi = Form("rA", "rS", "SH", "XO6", "sh", "Rc")(31, XO6=413, Rc=0) + sradix = Form("rA", "rS", "SH", "XO6", "sh", "Rc")(31, XO6=413, Rc=1) + + sraw = XS(31, XO1=792, Rc=0) + srawx = XS(31, XO1=792, Rc=1) + + srawi = Form("rA", "rS", "SH", "XO1", "Rc")(31, XO1=824, Rc=0) + srawix = Form("rA", "rS", "SH", "XO1", "Rc")(31, XO1=824, Rc=1) + + srd = XS(31, XO1=539, Rc=0) + srdx = XS(31, XO1=539, Rc=1) + + srw = XS(31, XO1=536, Rc=0) + srwx = XS(31, XO1=536, Rc=1) + + stbux = XSO(31, XO1=247) + stbx = XSO(31, XO1=215) + stdcxx = Form("rS", "rA", "rB", "XO1", "Rc")(31, XO1=214, Rc=1) + stdux = XSO(31, XO1=181) + stdx = XSO(31, XO1=149) + stfdux = XSO(31, XO1=759) + stfdx = XSO(31, XO1=727) + stfiwx = XSO(31, XO1=983) + stfsux = XSO(31, XO1=695) + stfsx = XSO(31, XO1=663) + sthbrx = XSO(31, XO1=918) + sthux = XSO(31, XO1=439) + sthx = XSO(31, XO1=407) + stswi = Form("rS", "rA", "NB", "XO1")(31, XO1=725) + stswx = XSO(31, XO1=661) + stwbrx = XSO(31, XO1=662) + stwcxx = Form("rS", "rA", "rB", "XO1", "Rc")(31, XO1=150, Rc=1) + stwux = XSO(31, XO1=183) + stwx = XSO(31, XO1=151) + + subf = XO(31, XO2=40, OE=0, Rc=0) + subfx = XO(31, XO2=40, OE=0, Rc=1) + subfo = XO(31, XO2=40, OE=1, Rc=0) + subfox = XO(31, XO2=40, OE=1, Rc=1) + + subfc = XO(31, XO2=8, OE=0, Rc=0) + subfcx = XO(31, XO2=8, OE=0, Rc=1) + subfco = XO(31, XO2=8, OE=1, Rc=0) + subfcox = XO(31, XO2=8, OE=1, Rc=1) + + subfe = XO(31, XO2=136, OE=0, Rc=0) + subfex = XO(31, XO2=136, OE=0, Rc=1) + subfeo = XO(31, XO2=136, OE=1, Rc=0) + subfeox = XO(31, XO2=136, OE=1, Rc=1) + + subfme = XO0(31, OE=0, XO2=232, Rc=0) + subfmex = XO0(31, OE=0, XO2=232, Rc=1) + subfmeo = XO0(31, OE=1, XO2=232, Rc=0) + subfmeox= XO0(31, OE=1, XO2=232, Rc=1) + + subfze = XO0(31, OE=0, XO2=200, Rc=0) + subfzex = XO0(31, OE=0, XO2=200, Rc=1) + subfzeo = XO0(31, OE=1, XO2=200, Rc=0) + subfzeox= XO0(31, OE=1, XO2=200, Rc=1) + + sync = XLL(31, LL=0, XO1=598) + lwsync = XLL(31, LL=1, XO1=598) + + tlbia = X(31, XO1=370) + tlbie = Form("rB", "XO1")(31, XO1=306) + tlbsync = X(31, XO1=566) + + td = Form("TO", "rA", "rB", "XO1")(31, XO1=68) + tw = Form("TO", "rA", "rB", "XO1")(31, XO1=4) + + xor = XS(31, XO1=316, Rc=0) + xorx = XS(31, XO1=316, Rc=1) + +class PPCAssembler(BasicPPCAssembler): + BA = BasicPPCAssembler + + # awkward mnemonics: + # mftb + # most of the branch mnemonics... + + # F.2 Simplified Mnemonics for Subtract Instructions + + def subi(self, rD, rA, value): + self.addi(rD, rA, -value) + def subis(self, rD, rA, value): + self.addis(rD, rA, -value) + def subic(self, rD, rA, value): + self.addic(rD, rA, -value) + def subicx(self, rD, rA, value): + self.addicx(rD, rA, -value) + + def sub(self, rD, rA, rB): + self.subf(rD, rB, rA) + def subc(self, rD, rA, rB): + self.subfc(rD, rB, rA) + def subx(self, rD, rA, rB): + self.subfx(rD, rB, rA) + def subcx(self, rD, rA, rB): + self.subfcx(rD, rB, rA) + def subo(self, rD, rA, rB): + self.subfo(rD, rB, rA) + def subco(self, rD, rA, rB): + self.subfco(rD, rB, rA) + def subox(self, rD, rA, rB): + self.subfox(rD, rB, rA) + def subcox(self, rD, rA, rB): + self.subfcox(rD, rB, rA) + + # F.3 Simplified Mnemonics for Compare Instructions + + cmpdi = BA.cmpi(L=1) + cmpwi = BA.cmpi(L=0) + cmpldi = BA.cmpli(L=1) + cmplwi = BA.cmpli(L=0) + cmpd = BA.cmp(L=1) + cmpw = BA.cmp(L=0) + cmpld = BA.cmpl(L=1) + cmplw = BA.cmpl(L=0) + + # F.4 Simplified Mnemonics for Rotate and Shift Instructions + + def extlwi(self, rA, rS, n, b): + self.rlwinm(rA, rS, b, 0, n-1) + + def extrwi(self, rA, rS, n, b): + self.rlwinm(rA, rS, b+n, 32-n, 31) + + def inslwi(self, rA, rS, n, b): + self.rwlimi(rA, rS, 32-b, b, b + n -1) + + def insrwi(self, rA, rS, n, b): + self.rwlimi(rA, rS, 32-(b+n), b, b + n -1) + + def rotlwi(self, rA, rS, n): + self.rlwinm(rA, rS, n, 0, 31) + + def rotrwi(self, rA, rS, n): + self.rlwinm(rA, rS, 32-n, 0, 31) + + def rotlw(self, rA, rS, rB): + self.rlwnm(rA, rS, rB, 0, 31) + + def slwi(self, rA, rS, n): + self.rlwinm(rA, rS, n, 0, 31-n) + + def srwi(self, rA, rS, n): + self.rlwinm(rA, rS, 32-n, n, 31) + + def sldi(self, rA, rS, n): + self.rldicr(rA, rS, n, 63-n) + + def srdi(self, rA, rS, n): + self.rldicl(rA, rS, 64-n, n) + + # F.5 Simplified Mnemonics for Branch Instructions + + # there's a lot of these! + bt = BA.bc(BO=12) + bf = BA.bc(BO=4) + bdnz = BA.bc(BO=16, BI=0) + bdnzt = BA.bc(BO=8) + bdnzf = BA.bc(BO=0) + bdz = BA.bc(BO=18, BI=0) + bdzt = BA.bc(BO=10) + bdzf = BA.bc(BO=2) + + bta = BA.bca(BO=12) + bfa = BA.bca(BO=4) + bdnza = BA.bca(BO=16, BI=0) + bdnzta = BA.bca(BO=8) + bdnzfa = BA.bca(BO=0) + bdza = BA.bca(BO=18, BI=0) + bdzta = BA.bca(BO=10) + bdzfa = BA.bca(BO=2) + + btl = BA.bcl(BO=12) + bfl = BA.bcl(BO=4) + bdnzl = BA.bcl(BO=16, BI=0) + bdnztl = BA.bcl(BO=8) + bdnzfl = BA.bcl(BO=0) + bdzl = BA.bcl(BO=18, BI=0) + bdztl = BA.bcl(BO=10) + bdzfl = BA.bcl(BO=2) + + btla = BA.bcla(BO=12) + bfla = BA.bcla(BO=4) + bdnzla = BA.bcla(BO=16, BI=0) + bdnztla = BA.bcla(BO=8) + bdnzfla = BA.bcla(BO=0) + bdzla = BA.bcla(BO=18, BI=0) + bdztla = BA.bcla(BO=10) + bdzfla = BA.bcla(BO=2) + + blr = BA.bclr(BO=20, BI=0) + btlr = BA.bclr(BO=12) + bflr = BA.bclr(BO=4) + bdnzlr = BA.bclr(BO=16, BI=0) + bdnztlr = BA.bclr(BO=8) + bdnzflr = BA.bclr(BO=0) + bdzlr = BA.bclr(BO=18, BI=0) + bdztlr = BA.bclr(BO=10) + bdzflr = BA.bclr(BO=2) + + bctr = BA.bcctr(BO=20, BI=0) + btctr = BA.bcctr(BO=12) + bfctr = BA.bcctr(BO=4) + + blrl = BA.bclrl(BO=20, BI=0) + btlrl = BA.bclrl(BO=12) + bflrl = BA.bclrl(BO=4) + bdnzlrl = BA.bclrl(BO=16, BI=0) + bdnztlrl = BA.bclrl(BO=8) + bdnzflrl = BA.bclrl(BO=0) + bdzlrl = BA.bclrl(BO=18, BI=0) + bdztlrl = BA.bclrl(BO=10) + bdzflrl = BA.bclrl(BO=2) + + bctrl = BA.bcctrl(BO=20, BI=0) + btctrl = BA.bcctrl(BO=12) + bfctrl = BA.bcctrl(BO=4) + + # these should/could take a[n optional] crf argument, but it's a + # bit hard to see how to arrange that. + + blt = BA.bc(BO=12, BI=0) + ble = BA.bc(BO=4, BI=1) + beq = BA.bc(BO=12, BI=2) + bge = BA.bc(BO=4, BI=0) + bgt = BA.bc(BO=12, BI=1) + bnl = BA.bc(BO=4, BI=0) + bne = BA.bc(BO=4, BI=2) + bng = BA.bc(BO=4, BI=1) + bso = BA.bc(BO=12, BI=3) + bns = BA.bc(BO=4, BI=3) + bun = BA.bc(BO=12, BI=3) + bnu = BA.bc(BO=4, BI=3) + + blta = BA.bca(BO=12, BI=0) + blea = BA.bca(BO=4, BI=1) + beqa = BA.bca(BO=12, BI=2) + bgea = BA.bca(BO=4, BI=0) + bgta = BA.bca(BO=12, BI=1) + bnla = BA.bca(BO=4, BI=0) + bnea = BA.bca(BO=4, BI=2) + bnga = BA.bca(BO=4, BI=1) + bsoa = BA.bca(BO=12, BI=3) + bnsa = BA.bca(BO=4, BI=3) + buna = BA.bca(BO=12, BI=3) + bnua = BA.bca(BO=4, BI=3) + + bltl = BA.bcl(BO=12, BI=0) + blel = BA.bcl(BO=4, BI=1) + beql = BA.bcl(BO=12, BI=2) + bgel = BA.bcl(BO=4, BI=0) + bgtl = BA.bcl(BO=12, BI=1) + bnll = BA.bcl(BO=4, BI=0) + bnel = BA.bcl(BO=4, BI=2) + bngl = BA.bcl(BO=4, BI=1) + bsol = BA.bcl(BO=12, BI=3) + bnsl = BA.bcl(BO=4, BI=3) + bunl = BA.bcl(BO=12, BI=3) + bnul = BA.bcl(BO=4, BI=3) + + bltla = BA.bcla(BO=12, BI=0) + blela = BA.bcla(BO=4, BI=1) + beqla = BA.bcla(BO=12, BI=2) + bgela = BA.bcla(BO=4, BI=0) + bgtla = BA.bcla(BO=12, BI=1) + bnlla = BA.bcla(BO=4, BI=0) + bnela = BA.bcla(BO=4, BI=2) + bngla = BA.bcla(BO=4, BI=1) + bsola = BA.bcla(BO=12, BI=3) + bnsla = BA.bcla(BO=4, BI=3) + bunla = BA.bcla(BO=12, BI=3) + bnula = BA.bcla(BO=4, BI=3) + + bltlr = BA.bclr(BO=12, BI=0) + blelr = BA.bclr(BO=4, BI=1) + beqlr = BA.bclr(BO=12, BI=2) + bgelr = BA.bclr(BO=4, BI=0) + bgtlr = BA.bclr(BO=12, BI=1) + bnllr = BA.bclr(BO=4, BI=0) + bnelr = BA.bclr(BO=4, BI=2) + bnglr = BA.bclr(BO=4, BI=1) + bsolr = BA.bclr(BO=12, BI=3) + bnslr = BA.bclr(BO=4, BI=3) + bunlr = BA.bclr(BO=12, BI=3) + bnulr = BA.bclr(BO=4, BI=3) + + bltctr = BA.bcctr(BO=12, BI=0) + blectr = BA.bcctr(BO=4, BI=1) + beqctr = BA.bcctr(BO=12, BI=2) + bgectr = BA.bcctr(BO=4, BI=0) + bgtctr = BA.bcctr(BO=12, BI=1) + bnlctr = BA.bcctr(BO=4, BI=0) + bnectr = BA.bcctr(BO=4, BI=2) + bngctr = BA.bcctr(BO=4, BI=1) + bsoctr = BA.bcctr(BO=12, BI=3) + bnsctr = BA.bcctr(BO=4, BI=3) + bunctr = BA.bcctr(BO=12, BI=3) + bnuctr = BA.bcctr(BO=4, BI=3) + + bltlrl = BA.bclrl(BO=12, BI=0) + blelrl = BA.bclrl(BO=4, BI=1) + beqlrl = BA.bclrl(BO=12, BI=2) + bgelrl = BA.bclrl(BO=4, BI=0) + bgtlrl = BA.bclrl(BO=12, BI=1) + bnllrl = BA.bclrl(BO=4, BI=0) + bnelrl = BA.bclrl(BO=4, BI=2) + bnglrl = BA.bclrl(BO=4, BI=1) + bsolrl = BA.bclrl(BO=12, BI=3) + bnslrl = BA.bclrl(BO=4, BI=3) + bunlrl = BA.bclrl(BO=12, BI=3) + bnulrl = BA.bclrl(BO=4, BI=3) + + bltctrl = BA.bcctrl(BO=12, BI=0) + blectrl = BA.bcctrl(BO=4, BI=1) + beqctrl = BA.bcctrl(BO=12, BI=2) + bgectrl = BA.bcctrl(BO=4, BI=0) + bgtctrl = BA.bcctrl(BO=12, BI=1) + bnlctrl = BA.bcctrl(BO=4, BI=0) + bnectrl = BA.bcctrl(BO=4, BI=2) + bngctrl = BA.bcctrl(BO=4, BI=1) + bsoctrl = BA.bcctrl(BO=12, BI=3) + bnsctrl = BA.bcctrl(BO=4, BI=3) + bunctrl = BA.bcctrl(BO=12, BI=3) + bnuctrl = BA.bcctrl(BO=4, BI=3) + + # whew! and we haven't even begun the predicted versions... + + # F.6 Simplified Mnemonics for Condition Register + # Logical Instructions + + crset = BA.creqv(crbA="crbD", crbB="crbD") + crclr = BA.crxor(crbA="crbD", crbB="crbD") + crmove = BA.cror(crbA="crbB") + crnot = BA.crnor(crbA="crbB") + + # F.7 Simplified Mnemonics for Trap Instructions + + trap = BA.tw(TO=31, rA=0, rB=0) + twlt = BA.tw(TO=16) + twle = BA.tw(TO=20) + tweq = BA.tw(TO=4) + twge = BA.tw(TO=12) + twgt = BA.tw(TO=8) + twnl = BA.tw(TO=12) + twng = BA.tw(TO=24) + twllt = BA.tw(TO=2) + twlle = BA.tw(TO=6) + twlge = BA.tw(TO=5) + twlgt = BA.tw(TO=1) + twlnl = BA.tw(TO=5) + twlng = BA.tw(TO=6) + + twlti = BA.twi(TO=16) + twlei = BA.twi(TO=20) + tweqi = BA.twi(TO=4) + twgei = BA.twi(TO=12) + twgti = BA.twi(TO=8) + twnli = BA.twi(TO=12) + twnei = BA.twi(TO=24) + twngi = BA.twi(TO=20) + twllti = BA.twi(TO=2) + twllei = BA.twi(TO=6) + twlgei = BA.twi(TO=5) + twlgti = BA.twi(TO=1) + twlnli = BA.twi(TO=5) + twlngi = BA.twi(TO=6) + + # F.8 Simplified Mnemonics for Special-Purpose + # Registers + + mfctr = BA.mfspr(spr=9) + mflr = BA.mfspr(spr=8) + mftbl = BA.mftb(spr=268) + mftbu = BA.mftb(spr=269) + mfxer = BA.mfspr(spr=1) + + mtctr = BA.mtspr(spr=9) + mtlr = BA.mtspr(spr=8) + mtxer = BA.mtspr(spr=1) + + # F.9 Recommended Simplified Mnemonics + + nop = BA.ori(rS=0, rA=0, UIMM=0) + + li = BA.addi(rA=0) + lis = BA.addis(rA=0) + + mr = BA.or_(rB="rS") + mrx = BA.or_x(rB="rS") + + not_ = BA.nor(rB="rS") + not_x = BA.norx(rB="rS") + + mtcr = BA.mtcrf(CRM=0xFF) + +PPCAssembler = make_rassembler(PPCAssembler) + +def hi(w): + return w >> 16 + +def ha(w): + if (w >> 15) & 1: + return (w >> 16) + 1 + else: + return w >> 16 + +def lo(w): + return w & 0x0000FFFF + +def la(w): + v = w & 0x0000FFFF + if v & 0x8000: + return -((v ^ 0xFFFF) + 1) # "sign extend" to 32 bits + return v + +def highest(w): + return w >> 48 + +def higher(w): + return (w >> 32) & 0x0000FFFF + +def high(w): + return (w >> 16) & 0x0000FFFF + +_eci = ExternalCompilationInfo(post_include_bits=[ + '#define rpython_flush_icache() asm("isync":::"memory")\n' + ]) +flush_icache = rffi.llexternal( + "rpython_flush_icache", + [], + lltype.Void, + compilation_info=_eci, + _nowrapper=True, + sandboxsafe=True) + + +class PPCGuardToken(GuardToken): + def __init__(self, cpu, gcmap, descr, failargs, faillocs, + guard_opnum, frame_depth, fcond=c.cond_none): + GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs, + guard_opnum, frame_depth) + self.fcond = fcond + + +class OverwritingBuilder(PPCAssembler): + def __init__(self, mc, start, num_insts=0): + PPCAssembler.__init__(self) + self.mc = mc + self.index = start + + def currpos(self): + assert 0, "not implemented" + + def write32(self, word): + index = self.index + if IS_BIG_ENDIAN: + self.mc.overwrite(index, chr((word >> 24) & 0xff)) + self.mc.overwrite(index + 1, chr((word >> 16) & 0xff)) + self.mc.overwrite(index + 2, chr((word >> 8) & 0xff)) + self.mc.overwrite(index + 3, chr(word & 0xff)) + elif IS_LITTLE_ENDIAN: + self.mc.overwrite(index , chr(word & 0xff)) + self.mc.overwrite(index + 1, chr((word >> 8) & 0xff)) + self.mc.overwrite(index + 2, chr((word >> 16) & 0xff)) + self.mc.overwrite(index + 3, chr((word >> 24) & 0xff)) + self.index = index + 4 + + def overwrite(self): + pass + +class PPCBuilder(BlockBuilderMixin, PPCAssembler): + def __init__(self): + PPCAssembler.__init__(self) + self.init_block_builder() + self.ops_offset = {} + + def mark_op(self, op): + pos = self.get_relative_pos() + self.ops_offset[op] = pos + + def check(self, desc, v, *args): + desc.__get__(self)(*args) + ins = self.insts.pop() + expected = ins.assemble() + if expected < 0: + expected += 1<<32 + assert v == expected + + def load_imm(self, dest_reg, word): + rD = dest_reg.value + if word <= 32767 and word >= -32768: + self.li(rD, word) + elif IS_PPC_32 or (word <= 2147483647 and word >= -2147483648): + self.lis(rD, hi(word)) + if word & 0xFFFF != 0: + self.ori(rD, rD, lo(word)) + else: + self.load_imm(dest_reg, word>>32) + self.sldi(rD, rD, 32) + if word & 0xFFFF0000 != 0: + self.oris(rD, rD, high(word)) + if word & 0xFFFF != 0: + self.ori(rD, rD, lo(word)) + + def load_imm_plus(self, dest_reg, word): + """Like load_imm(), but with one instruction less, and + leaves the loaded value off by some signed 16-bit difference. + Returns that difference.""" + diff = rffi.cast(lltype.Signed, rffi.cast(rffi.SHORT, word)) + word -= diff + assert word & 0xFFFF == 0 + self.load_imm(dest_reg, word) + return diff + + def load_from_addr(self, rD, addr): + assert rD is not r.r0 + diff = self.load_imm_plus(rD, addr) + if IS_PPC_32: + self.lwz(rD.value, rD.value, diff) + else: + self.ld(rD.value, rD.value, diff) + + def b_offset(self, target): + curpos = self.currpos() + offset = target - curpos + assert offset < (1 << 24) + self.b(offset) + + def b_cond_offset(self, offset, condition): + assert condition != c.cond_none + BI, BO = c.encoding[condition] + + pos = self.currpos() + target_ofs = offset - pos + self.bc(BO, BI, target_ofs) + + def b_cond_abs(self, addr, condition): + assert condition != c.cond_none + BI, BO = c.encoding[condition] + + with scratch_reg(self): + self.load_imm(r.SCRATCH, addr) + self.mtctr(r.SCRATCH.value) + self.bcctr(BO, BI) + + def b_abs(self, address, trap=False): + with scratch_reg(self): + self.load_imm(r.SCRATCH, address) + self.mtctr(r.SCRATCH.value) + if trap: + self.trap() + self.bctr() + + def bl_abs(self, address): + with scratch_reg(self): + self.load_imm(r.SCRATCH, address) + self.mtctr(r.SCRATCH.value) + self.bctrl() + + if IS_BIG_ENDIAN: + RAW_CALL_REG = r.r2 + else: + RAW_CALL_REG = r.r12 + + def raw_call(self, call_reg=RAW_CALL_REG): + """Emit a call to the address stored in the register 'call_reg', + which must be either RAW_CALL_REG or r12. This is a regular C + function pointer, which means on big-endian that it is actually + the address of a three-words descriptor. + """ + if IS_BIG_ENDIAN: + # Load the function descriptor (currently in r2) from memory: + # [r2 + 0] -> ctr + # [r2 + 16] -> r11 + # [r2 + 8] -> r2 (= TOC) + assert self.RAW_CALL_REG is r.r2 + assert call_reg is r.r2 or call_reg is r.r12 + self.ld(r.SCRATCH.value, call_reg.value, 0) + self.ld(r.r11.value, call_reg.value, 16) + self.mtctr(r.SCRATCH.value) + self.ld(r.TOC.value, call_reg.value, 8) # must be last: TOC is r2 + elif IS_LITTLE_ENDIAN: + assert self.RAW_CALL_REG is r.r12 # 'r12' is fixed by this ABI + assert call_reg is r.r12 + self.mtctr(r.r12.value) + # Call the function + self.bctrl() + + + def load(self, target_reg, base_reg, offset): + if IS_PPC_32: + self.lwz(target_reg, base_reg, offset) + else: + self.ld(target_reg, base_reg, offset) + + def loadx(self, target_reg, base_reg, offset_reg): + if IS_PPC_32: + self.lwzx(target_reg, base_reg, offset_reg) + else: + self.ldx(target_reg, base_reg, offset_reg) + + def store(self, from_reg, base_reg, offset): + if IS_PPC_32: + self.stw(from_reg, base_reg, offset) + else: + self.std(from_reg, base_reg, offset) + + def storex(self, from_reg, base_reg, offset_reg): + if IS_PPC_32: + self.stwx(from_reg, base_reg, offset_reg) + else: + self.stdx(from_reg, base_reg, offset_reg) + + def store_update(self, target_reg, from_reg, offset): + if IS_PPC_32: + self.stwu(target_reg, from_reg, offset) + else: + self.stdu(target_reg, from_reg, offset) + + def srli_op(self, target_reg, from_reg, numbits): + if IS_PPC_32: + self.srwi(target_reg, from_reg, numbits) + else: + self.srdi(target_reg, from_reg, numbits) + + def sl_op(self, target_reg, from_reg, numbit_reg): + if IS_PPC_32: + self.slw(target_reg, from_reg, numbit_reg) + else: + self.sld(target_reg, from_reg, numbit_reg) + + def _dump_trace(self, addr, name, formatter=-1): + if not we_are_translated(): + if formatter != -1: + name = name % formatter + dir = udir.ensure('asm', dir=True) + f = dir.join(name).open('wb') + data = rffi.cast(rffi.CCHARP, addr) + for i in range(self.currpos()): + f.write(data[i]) + f.close() + + def write32(self, word): + if IS_BIG_ENDIAN: + self.writechar(chr((word >> 24) & 0xFF)) + self.writechar(chr((word >> 16) & 0xFF)) + self.writechar(chr((word >> 8) & 0xFF)) + self.writechar(chr(word & 0xFF)) + elif IS_LITTLE_ENDIAN: + self.writechar(chr(word & 0xFF)) + self.writechar(chr((word >> 8) & 0xFF)) + self.writechar(chr((word >> 16) & 0xFF)) + self.writechar(chr((word >> 24) & 0xFF)) + + def write64(self, word): + if IS_BIG_ENDIAN: + self.writechar(chr((word >> 56) & 0xFF)) + self.writechar(chr((word >> 48) & 0xFF)) + self.writechar(chr((word >> 40) & 0xFF)) + self.writechar(chr((word >> 32) & 0xFF)) + self.writechar(chr((word >> 24) & 0xFF)) + self.writechar(chr((word >> 16) & 0xFF)) + self.writechar(chr((word >> 8) & 0xFF)) + self.writechar(chr(word & 0xFF)) + elif IS_LITTLE_ENDIAN: + self.writechar(chr(word & 0xFF)) + self.writechar(chr((word >> 8) & 0xFF)) + self.writechar(chr((word >> 16) & 0xFF)) + self.writechar(chr((word >> 24) & 0xFF)) + self.writechar(chr((word >> 32) & 0xFF)) + self.writechar(chr((word >> 40) & 0xFF)) + self.writechar(chr((word >> 48) & 0xFF)) + self.writechar(chr((word >> 56) & 0xFF)) + + def currpos(self): + return self.get_relative_pos() + + def copy_to_raw_memory(self, addr): + self._copy_to_raw_memory(addr) + if we_are_translated(): + flush_icache() + self._dump(addr, "jit-backend-dump", 'ppc') + + def cmp_op(self, block, a, b, imm=False, signed=True, fp=False): + if fp == True: + self.fcmpu(block, a, b) + elif IS_PPC_32: + if signed: + if imm: + # 32 bit immediate signed + self.cmpwi(block, a, b) + else: + # 32 bit signed + self.cmpw(block, a, b) + else: + if imm: + # 32 bit immediate unsigned + self.cmplwi(block, a, b) + else: + # 32 bit unsigned + self.cmplw(block, a, b) + else: + if signed: + if imm: + # 64 bit immediate signed + self.cmpdi(block, a, b) + else: + # 64 bit signed + self.cmpd(block, a, b) + else: + if imm: + # 64 bit immediate unsigned + self.cmpldi(block, a, b) + else: + # 64 bit unsigned + self.cmpld(block, a, b) + + def alloc_scratch_reg(self): + pass + #assert not self.r0_in_use + #self.r0_in_use = True + + def free_scratch_reg(self): + pass + #assert self.r0_in_use + #self.r0_in_use = False + + def get_assembler_function(self): + "NOT_RPYTHON: tests only" + from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager + class FakeCPU: + HAS_CODEMAP = False + asmmemmgr = AsmMemoryManager() + addr = self.materialize(FakeCPU(), []) + if IS_BIG_ENDIAN: + mc = PPCBuilder() + mc.write64(addr) # the 3-words descriptor + mc.write64(0) + mc.write64(0) + addr = mc.materialize(FakeCPU(), []) + return rffi.cast(lltype.Ptr(lltype.FuncType([], lltype.Signed)), addr) + + +class scratch_reg(object): + def __init__(self, mc): + self.mc = mc + + def __enter__(self): + self.mc.alloc_scratch_reg() + + def __exit__(self, *args): + self.mc.free_scratch_reg() + +class BranchUpdater(PPCAssembler): + def __init__(self): + PPCAssembler.__init__(self) + self.init_block_builder() + + def write_to_mem(self, addr): + self.assemble() + self.copy_to_raw_memory(addr) + + def assemble(self, dump=os.environ.has_key('PYPY_DEBUG')): + insns = self.assemble0(dump) + for i in insns: + self.emit(i) + +def b(n): + r = [] + for i in range(32): + r.append(n&1) + n >>= 1 + r.reverse() + return ''.join(map(str, r)) + +def make_operations(): + def not_implemented(builder, trace_op, cpu, *rest_args): + import pdb; pdb.set_trace() + + oplist = [None] * (rop._LAST + 1) + for key, val in rop.__dict__.items(): + if key.startswith("_"): + continue + opname = key.lower() + methname = "emit_%s" % opname + if hasattr(PPCBuilder, methname): + oplist[val] = getattr(PPCBuilder, methname).im_func + else: + oplist[val] = not_implemented + return oplist + +PPCBuilder.operations = make_operations() diff --git a/rpython/jit/backend/ppc/condition.py b/rpython/jit/backend/ppc/condition.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/ppc/condition.py @@ -0,0 +1,32 @@ +EQ = 0 +NE = 1 +LE = 2 +GT = 3 +LT = 4 +GE = 5 +SO = 6 +NS = 7 +cond_none = -1 # invalid + +def negate(cond): + return cond ^ 1 + +assert negate(EQ) == NE _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit