Author: Richard Plangger <planri...@gmail.com> Branch: ppc-vsx-support Changeset: r85487:1360aa62b1ed Date: 2016-07-01 14:00 +0200 http://bitbucket.org/pypy/pypy/changeset/1360aa62b1ed/
Log: add a many details to implement the reduction pattern (ppc, partly working already) diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py --- a/rpython/jit/backend/ppc/codebuilder.py +++ b/rpython/jit/backend/ppc/codebuilder.py @@ -605,12 +605,14 @@ # add xvadddp = XX3(60, XO9=96) xvaddsp = XX3(60, XO9=64) + xsadddp = XX3(60, XO9=32) # sub xvsubdp = XX3(60, XO9=104) xvsubsp = XX3(60, XO9=72) # mul xvmuldp = XX3(60, XO9=112) xvmulsp = XX3(60, XO9=80) + xsmuldp = XX3(60, XO9=46) # div xvdivdp = XX3(60, XO9=102) xvdivsp = XX3(60, XO9=88) @@ -662,6 +664,12 @@ # generic splat xxspltd = XX3_splat(60, XO13=10, OE=0) + xxlxor = XX3(60, XO9=154) + xxlor = XX3(60, XO9=146) + + # vector move register is alias to vector or + xvmr = xxlor + # INTEGER # ------- diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py --- a/rpython/jit/backend/ppc/ppc_assembler.py +++ b/rpython/jit/backend/ppc/ppc_assembler.py @@ -771,7 +771,7 @@ self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE) # size_excluding_failure_stuff = self.mc.get_relative_pos() - self.write_pending_failure_recoveries() + self.write_pending_failure_recoveries(regalloc) full_size = self.mc.get_relative_pos() # self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE) @@ -852,10 +852,12 @@ self.reserve_gcref_table(allgcrefs) startpos = self.mc.get_relative_pos() + self._update_at_exit(arglocs, inputargs, faildescr, regalloc) + self._check_frame_depth(self.mc, regalloc.get_gcmap()) frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations) codeendpos = self.mc.get_relative_pos() - self.write_pending_failure_recoveries() + self.write_pending_failure_recoveries(regalloc) fullsize = self.mc.get_relative_pos() # self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE) @@ -928,7 +930,7 @@ ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap') mc.store(r.SCRATCH.value, r.SPP.value, ofs) - def break_long_loop(self): + def break_long_loop(self, regalloc): # If the loop is too long, the guards in it will jump forward # more than 32 KB. We use an approximate hack to know if we # should break the loop here with an unconditional "b" that @@ -936,15 +938,19 @@ jmp_pos = self.mc.currpos() self.mc.trap() - self.write_pending_failure_recoveries() + self.write_pending_failure_recoveries(regalloc) currpos = self.mc.currpos() pmc = OverwritingBuilder(self.mc, jmp_pos, 1) pmc.b(currpos - jmp_pos) pmc.overwrite() - def generate_quick_failure(self, guardtok): + def generate_quick_failure(self, guardtok, regalloc): startpos = self.mc.currpos() + # + self._update_at_exit(guardtok.fail_locs, guardtok.failargs, + guardtok.faildescr, regalloc) + # faildescrindex, target = self.store_info_on_descr(startpos, guardtok) assert target != 0 self.mc.load_imm(r.r2, target) @@ -957,13 +963,13 @@ self.mc.trap() return startpos - def write_pending_failure_recoveries(self): + def write_pending_failure_recoveries(self, regalloc): # for each pending guard, generate the code of the recovery stub # at the end of self.mc. for i in range(self.pending_guard_tokens_recovered, len(self.pending_guard_tokens)): tok = self.pending_guard_tokens[i] - tok.pos_recovery_stub = self.generate_quick_failure(tok) + tok.pos_recovery_stub = self.generate_quick_failure(tok, regalloc) self.pending_guard_tokens_recovered = len(self.pending_guard_tokens) def patch_pending_failure_recoveries(self, rawstart): @@ -1358,6 +1364,60 @@ self.mc.load_imm(r.SCRATCH, fail_index) self.mc.store(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS) + def stitch_bridge(self, faildescr, target): + """ Stitching means that one can enter a bridge with a complete different register + allocation. This needs remapping which is done here for both normal registers + and accumulation registers. + """ + import pdb; pdb.set_trace() + asminfo, bridge_faildescr, version, looptoken = target + assert isinstance(bridge_faildescr, ResumeGuardDescr) + assert isinstance(faildescr, ResumeGuardDescr) + assert asminfo.rawstart != 0 + self.mc = codebuf.MachineCodeBlockWrapper() + allblocks = self.get_asmmemmgr_blocks(looptoken) + self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, + allblocks) + frame_info = self.datablockwrapper.malloc_aligned( + jitframe.JITFRAMEINFO_SIZE, alignment=WORD) + + self.mc.force_frame_size(DEFAULT_FRAME_BYTES) + # if accumulation is saved at the guard, we need to update it here! + guard_locs = self.rebuild_faillocs_from_descr(faildescr, version.inputargs) + bridge_locs = self.rebuild_faillocs_from_descr(bridge_faildescr, version.inputargs) + #import pdb; pdb.set_trace() + guard_accum_info = faildescr.rd_vector_info + # O(n**2), but usually you only have at most 1 fail argument + while guard_accum_info: + bridge_accum_info = bridge_faildescr.rd_vector_info + while bridge_accum_info: + if bridge_accum_info.failargs_pos == guard_accum_info.failargs_pos: + # the mapping might be wrong! + if bridge_accum_info.location is not guard_accum_info.location: + self.mov(guard_accum_info.location, bridge_accum_info.location) + bridge_accum_info = bridge_accum_info.next() + guard_accum_info = guard_accum_info.next() + + # register mapping is most likely NOT valid, thus remap it in this + # short piece of assembler + assert len(guard_locs) == len(bridge_locs) + for i,gloc in enumerate(guard_locs): + bloc = bridge_locs[i] + bstack = bloc.location_code() == 'b' + gstack = gloc.location_code() == 'b' + if bstack and gstack: + pass + elif gloc is not bloc: + self.mov(gloc, bloc) + offset = self.mc.get_relative_pos() + self.mc.JMP_l(0) + self.mc.writeimm32(0) + self.mc.force_frame_size(DEFAULT_FRAME_BYTES) + rawstart = self.materialize_loop(looptoken) + # update the jump (above) to the real trace + self._patch_jump_to(rawstart + offset, asminfo.rawstart) + # update the guard to jump right to this custom piece of assembler + self.patch_jump_for_descr(faildescr, rawstart) def notimplemented_op(self, op, arglocs, regalloc): msg = '[PPC/asm] %s not implemented\n' % op.getopname() diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py --- a/rpython/jit/backend/ppc/regalloc.py +++ b/rpython/jit/backend/ppc/regalloc.py @@ -10,7 +10,8 @@ from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg, check_imm_box from rpython.jit.backend.ppc.helper import regalloc as helper from rpython.jit.metainterp.history import (Const, ConstInt, ConstFloat, ConstPtr, - INT, REF, FLOAT, VOID, VECTOR) + INT, REF, FLOAT, VOID, VECTOR, + AbstractFailDescr) from rpython.jit.metainterp.history import JitCellToken, TargetToken from rpython.jit.metainterp.resoperation import rop from rpython.jit.backend.ppc import locations @@ -370,7 +371,7 @@ self.vrm._check_invariants() self.ivrm._check_invariants() if self.assembler.mc.get_relative_pos() > self.limit_loop_break: - self.assembler.break_long_loop() + self.assembler.break_long_loop(self) self.limit_loop_break = (self.assembler.mc.get_relative_pos() + LIMIT_LOOP_BREAK) i += 1 @@ -411,10 +412,16 @@ return gcmap def loc(self, var): - if var.type == FLOAT: - return self.fprm.loc(var) + if var.is_vector(): + if var.type == FLOAT: + return self.vrm.loc(var) + else: + return self.ivrm.loc(var) else: - return self.rm.loc(var) + if var.type == FLOAT: + return self.fprm.loc(var) + else: + return self.rm.loc(var) def next_instruction(self): self.rm.next_instruction() @@ -607,11 +614,24 @@ args.append(self.loc(arg)) else: args.append(None) - self.possibly_free_vars(op.getfailargs()) # # generate_quick_failure() produces up to 14 instructions per guard self.limit_loop_break -= 14 * 4 - # + # specifically for vecopt + descr = op.getdescr() + if not descr: + return args + assert isinstance(descr, AbstractFailDescr) + if descr.rd_vector_info: + accuminfo = descr.rd_vector_info + while accuminfo: + i = accuminfo.getpos_in_failargs()+1 + accuminfo.location = args[i] + loc = self.loc(accuminfo.getoriginal()) + args[i] = loc + accuminfo = accuminfo.next() + + self.possibly_free_vars(op.getfailargs()) return args def load_condition_into_cc(self, box): diff --git a/rpython/jit/backend/ppc/runner.py b/rpython/jit/backend/ppc/runner.py --- a/rpython/jit/backend/ppc/runner.py +++ b/rpython/jit/backend/ppc/runner.py @@ -51,7 +51,7 @@ if detect_vsx(): self.vector_ext = AltiVectorExt() self.vector_extension = True - # ??? self.vector_horizontal_operations = True + self.vector_horizontal_operations = True self.assembler.setup_once_vector() @rgc.no_release_gil diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py --- a/rpython/jit/backend/ppc/vector_ext.py +++ b/rpython/jit/backend/ppc/vector_ext.py @@ -10,7 +10,7 @@ from rpython.rlib.objectmodel import we_are_translated from rpython.rtyper.lltypesystem.lloperation import llop from rpython.rtyper.lltypesystem import lltype -from rpython.jit.backend.ppc.locations import imm +from rpython.jit.backend.ppc.locations import imm, RegisterLocation from rpython.jit.backend.ppc.arch import IS_BIG_ENDIAN from rpython.jit.backend.llsupport.vector_ext import VectorExt from rpython.jit.backend.ppc.arch import PARAM_SAVE_AREA_OFFSET @@ -105,7 +105,6 @@ self.mc.vperm(resloc.value, Vhi, Vlo, Vp) else: self.mc.vperm(resloc.value, Vlo, Vhi, Vp) - #self.mc.trap() def _emit_vec_setitem(self, op, arglocs, regalloc): # prepares item scale (raw_store does not) @@ -318,60 +317,57 @@ # index += 1 # self.mc.PBLENDW_xxi(loc.value, temp.value, select) - #def _update_at_exit(self, fail_locs, fail_args, faildescr, regalloc): - # """ If accumulation is done in this loop, at the guard exit - # some vector registers must be adjusted to yield the correct value - # """ - # if not isinstance(faildescr, ResumeGuardDescr): - # return - # assert regalloc is not None - # accum_info = faildescr.rd_vector_info - # while accum_info: - # pos = accum_info.getpos_in_failargs() - # scalar_loc = fail_locs[pos] - # vector_loc = accum_info.location - # # the upper elements will be lost if saved to the stack! - # scalar_arg = accum_info.getoriginal() - # assert isinstance(vector_loc, RegLoc) - # if not isinstance(scalar_loc, RegLoc): - # scalar_loc = regalloc.force_allocate_reg(scalar_arg) - # assert scalar_arg is not None - # if accum_info.accum_operation == '+': - # self._accum_reduce_sum(scalar_arg, vector_loc, scalar_loc) - # elif accum_info.accum_operation == '*': - # self._accum_reduce_mul(scalar_arg, vector_loc, scalar_loc) - # else: - # not_implemented("accum operator %s not implemented" % - # (accum_info.accum_operation)) - # accum_info = accum_info.next() + def _update_at_exit(self, fail_locs, fail_args, faildescr, regalloc): + """ If accumulation is done in this loop, at the guard exit + some vector registers must be adjusted to yield the correct value + """ + if not isinstance(faildescr, ResumeGuardDescr): + return + accum_info = faildescr.rd_vector_info + while accum_info: + pos = accum_info.getpos_in_failargs() + scalar_loc = fail_locs[pos] + vector_loc = accum_info.location + # the upper elements will be lost if saved to the stack! + scalar_arg = accum_info.getoriginal() + if not scalar_loc.is_reg(): + scalar_loc = regalloc.force_allocate_reg(scalar_arg) + assert scalar_arg is not None + if accum_info.accum_operation == '+': + self._accum_reduce_sum(scalar_arg, vector_loc, scalar_loc) + elif accum_info.accum_operation == '*': + self._accum_reduce_mul(scalar_arg, vector_loc, scalar_loc) + else: + not_implemented("accum operator %s not implemented" % + (accum_info.accum_operation)) + accum_info = accum_info.next() - #def _accum_reduce_mul(self, arg, accumloc, targetloc): - # scratchloc = X86_64_XMM_SCRATCH_REG - # self.mov(accumloc, scratchloc) - # # swap the two elements - # self.mc.SHUFPD_xxi(scratchloc.value, scratchloc.value, 0x01) - # self.mc.MULSD(accumloc, scratchloc) - # if accumloc is not targetloc: - # self.mov(accumloc, targetloc) + def _accum_reduce_mul(self, arg, accumloc, targetloc): + notimplemented("[ppc reduce mul]") + #scratchloc = X86_64_XMM_SCRATCH_REG + #self.mov(accumloc, scratchloc) + ## swap the two elements + #self.mc.SHUFPD_xxi(scratchloc.value, scratchloc.value, 0x01) + #self.mc.MULSD(accumloc, scratchloc) + #if accumloc is not targetloc: + # self.mov(accumloc, targetloc) - #def _accum_reduce_sum(self, arg, accumloc, targetloc): - # # Currently the accumulator can ONLY be the biggest - # # size for X86 -> 64 bit float/int - # if arg.type == FLOAT: - # # r = (r[0]+r[1],r[0]+r[1]) - # self.mc.HADDPD(accumloc, accumloc) - # # upper bits (> 64) are dirty (but does not matter) - # if accumloc is not targetloc: - # self.mov(accumloc, targetloc) - # return - # elif arg.type == INT: - # scratchloc = X86_64_SCRATCH_REG - # self.mc.PEXTRQ_rxi(targetloc.value, accumloc.value, 0) - # self.mc.PEXTRQ_rxi(scratchloc.value, accumloc.value, 1) - # self.mc.ADD(targetloc, scratchloc) - # return + def _accum_reduce_sum(self, arg, accumloc, targetloc): + # Currently the accumulator can ONLY be the biggest + # 64 bit float/int + tgt = targetloc.value + acc = accumloc.value + if arg.type == FLOAT: + # r = (r[0]+r[1],r[0]+r[1]) + self.mc.xvmr(tgt, acc, acc) + if IS_BIG_ENDIAN: + self.mc.xxspltd(tgt, acc, acc, 0b00) + else: + self.mc.xxspltd(tgt, acc, acc, 0b01) + self.mc.xsadddp(tgt, tgt, acc) + return - # not_implemented("reduce sum for %s not impl." % arg) + not_implemented("reduce sum for %s not impl." % arg) def emit_vec_int_is_true(self, op, arglocs, regalloc): resloc, argloc, sizeloc = arglocs @@ -408,6 +404,13 @@ self.mc.lvx(resloc.value, off, r.SP.value) flush_vec_cc(self, regalloc, c.EQ, op.bytesize, resloc) + def emit_vec_float_xor(self, op, arglocs, regalloc): + resloc, l0, l1, sizeloc = arglocs + res = resloc.value + r0 = l0.value + r1 = l1.value + self.mc.xxlxor(res, r0, r1) + def emit_vec_float_ne(self, op, arglocs, regalloc): resloc, loc1, loc2, sizeloc = arglocs size = sizeloc.value @@ -565,61 +568,49 @@ #genop_vec_unpack_i = genop_vec_pack_i - #def genop_vec_pack_f(self, op, arglocs, resultloc): - # resloc, srcloc, residxloc, srcidxloc, countloc, sizeloc = arglocs - # assert isinstance(resloc, RegLoc) - # assert isinstance(srcloc, RegLoc) - # count = countloc.value - # residx = residxloc.value - # srcidx = srcidxloc.value - # size = sizeloc.value - # if size == 4: - # si = srcidx - # ri = residx - # k = count - # while k > 0: - # if resloc.is_xmm: - # src = srcloc.value - # if not srcloc.is_xmm: - # # if source is a normal register (unpack) - # assert count == 1 - # assert si == 0 - # self.mov(srcloc, X86_64_XMM_SCRATCH_REG) - # src = X86_64_XMM_SCRATCH_REG.value - # select = ((si & 0x3) << 6)|((ri & 0x3) << 4) - # self.mc.INSERTPS_xxi(resloc.value, src, select) - # else: - # self.mc.PEXTRD_rxi(resloc.value, srcloc.value, si) - # si += 1 - # ri += 1 - # k -= 1 - # elif size == 8: - # assert resloc.is_xmm - # if srcloc.is_xmm: - # if srcidx == 0: - # if residx == 0: - # # r = (s[0], r[1]) - # self.mc.MOVSD(resloc, srcloc) - # else: - # assert residx == 1 - # # r = (r[0], s[0]) - # self.mc.UNPCKLPD(resloc, srcloc) - # else: - # assert srcidx == 1 - # if residx == 0: - # # r = (s[1], r[1]) - # if resloc != srcloc: - # self.mc.UNPCKHPD(resloc, srcloc) - # self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1) - # else: - # assert residx == 1 - # # r = (r[0], s[1]) - # if resloc != srcloc: - # self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1) - # self.mc.UNPCKHPD(resloc, srcloc) - # # if they are equal nothing is to be done + def emit_vec_pack_f(self, op, arglocs, resultloc): + resloc, vloc, srcloc, residxloc, srcidxloc, countloc = arglocs + vec = vloc.value + res = resloc.value + src = srcloc.value + count = countloc.value + residx = residxloc.value + srcidx = srcidxloc.value + size = op.bytesize + assert size == 8 + # srcloc is always a floating point register f, this means it is + # vsr[0] == valueof(f) + if srcidx == 0: + if residx == 0: + # r = (s[0], r[1]) + if IS_BIG_ENDIAN: + self.mc.xxspltd(res, src, vec, 0b10) + else: + self.mc.xxspltd(res, src, vec, 0b01) + else: + assert residx == 1 + # r = (r[0], s[0]) + if IS_BIG_ENDIAN: + self.mc.xxspltd(res, vec, src, 0b00) + else: + self.mc.xxspltd(res, vec, src, 0b11) + else: + assert srcidx == 1 + if residx == 0: + # r = (s[1], r[1]) + if IS_BIG_ENDIAN: + self.mc.xxspltd(res, src, vec, 0b11) + else: + self.mc.xxspltd(res, src, vec, 0b00) + else: + assert residx == 1 + # r = (r[0], s[1]) + if IS_BIG_ENDIAN: + self.mc.xxspltd(res, vec, src, 0b10) + else: + self.mc.xxspltd(res, vec, src, 0b01) - #genop_vec_unpack_f = genop_vec_pack_f + emit_vec_unpack_f = emit_vec_pack_f # needed as soon as PPC's support_singlefloat is implemented! #def genop_vec_cast_float_to_int(self, op, arglocs, regalloc): @@ -627,6 +618,10 @@ #def genop_vec_cast_singlefloat_to_float(self, op, arglocs, regalloc): # self.mc.CVTPS2PD(resloc, arglocs[0]) + def emit_vec_f(self, op, arglocs, regalloc): + pass + emit_vec_i = emit_vec_f + class VectorRegalloc(object): _mixin_ = True @@ -709,9 +704,10 @@ prepare_vec_int_xor = prepare_vec_arith prepare_vec_float_eq = prepare_vec_arith - prepare_vec_float_ne = prepare_vec_float_eq - prepare_vec_int_eq = prepare_vec_float_eq - prepare_vec_int_ne = prepare_vec_float_eq + prepare_vec_float_ne = prepare_vec_arith + prepare_vec_int_eq = prepare_vec_arith + prepare_vec_int_ne = prepare_vec_arith + prepare_vec_float_xor = prepare_vec_arith del prepare_vec_arith @@ -751,24 +747,35 @@ prepare_vec_float_abs = prepare_vec_arith_unary del prepare_vec_arith_unary - #def prepare_vec_pack_i(self, op): - # # new_res = vec_pack_i(res, src, index, count) - # assert isinstance(op, VectorOp) - # arg = op.getarg(1) - # index = op.getarg(2) - # count = op.getarg(3) - # assert isinstance(index, ConstInt) - # assert isinstance(count, ConstInt) - # args = op.getarglist() - # srcloc = self.make_sure_var_in_reg(arg, args) - # resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) - # residx = index.value # where to put it in result? - # srcidx = 0 - # arglocs = [resloc, srcloc, imm(residx), imm(srcidx), - # imm(count.value), imm(op.bytesize)] - # self.perform(op, arglocs, resloc) + def prepare_vec_pack_i(self, op): + # new_res = vec_pack_i(res, src, index, count) + assert isinstance(op, VectorOp) + arg = op.getarg(1) + index = op.getarg(2) + count = op.getarg(3) + assert isinstance(index, ConstInt) + assert isinstance(count, ConstInt) + srcloc = self.ensure_vector_reg(arg) + resloc = self.force_allocate_vector_reg(op) + residx = index.value # where to put it in result? + srcidx = 0 + return [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value)] - #prepare_vec_pack_f = prepare_vec_pack_i + def prepare_vec_pack_f(self, op): + # new_res = vec_pack_i(res, src, index, count) + assert isinstance(op, VectorOp) + arg = op.getarg(1) + index = op.getarg(2) + count = op.getarg(3) + assert isinstance(index, ConstInt) + assert isinstance(count, ConstInt) + assert not arg.is_vector() + srcloc = self.ensure_reg(arg) + vloc = self.ensure_vector_reg(op.getarg(0)) + resloc = self.force_allocate_vector_reg(op) + residx = index.value # where to put it in result? + srcidx = 0 + return [resloc, vloc, srcloc, imm(residx), imm(srcidx), imm(count.value)] #def prepare_vec_unpack_i(self, op): # assert isinstance(op, VectorOp) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -645,7 +645,7 @@ bridge_locs = self.rebuild_faillocs_from_descr(bridge_faildescr, version.inputargs) #import pdb; pdb.set_trace() guard_accum_info = faildescr.rd_vector_info - # O(n^2), but usually you only have at most 1 fail argument + # O(n**2), but usually you only have at most 1 fail argument while guard_accum_info: bridge_accum_info = bridge_faildescr.rd_vector_info while bridge_accum_info: diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -793,7 +793,10 @@ if pack.reduce_init() == 0: vecop = OpHelpers.create_vec(datatype, bytesize, signed, count) oplist.append(vecop) - vecop = VecOperation(rop.VEC_INT_XOR, [vecop, vecop], + opnum = rop.VEC_INT_XOR + if datatype == FLOAT: + opnum = rop.VEC_FLOAT_XOR + vecop = VecOperation(opnum, [vecop, vecop], vecop, count) oplist.append(vecop) elif pack.reduce_init() == 1: diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -994,6 +994,7 @@ '_VEC_ARITHMETIC_LAST', 'VEC_FLOAT_EQ/2b/i', 'VEC_FLOAT_NE/2b/i', + 'VEC_FLOAT_XOR/2/f', 'VEC_INT_IS_TRUE/1b/i', 'VEC_INT_NE/2b/i', 'VEC_INT_EQ/2b/i', diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py --- a/rpython/jit/metainterp/test/test_vector.py +++ b/rpython/jit/metainterp/test/test_vector.py @@ -78,6 +78,7 @@ enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll' def setup_method(self, method): + import pdb; pdb.set_trace() if not self.supports_vector_ext(): py.test.skip("this cpu %s has no implemented vector backend" % CPU) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit