Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79812:fd39e085206b Date: 2015-09-24 17:50 +0200 http://bitbucket.org/pypy/pypy/changeset/fd39e085206b/
Log: x86 assembler half way through, accumulation leaves behind an fail descr that is tried to be stiched (but removed from the trace) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1787,8 +1787,7 @@ self.guard_success_cc = rx86.Conditions['E'] self.implement_guard(guard_token) - def genop_guard_guard_nonnull_class(self, ign_1, guard_op, - guard_token, locs, ign_2): + def genop_guard_guard_nonnull_class(self, guard_op, guard_token, locs, ign): self.mc.CMP(locs[0], imm1) # Patched below self.mc.J_il8(rx86.Conditions['B'], 0) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -63,8 +63,7 @@ save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10] class X86XMMRegisterManager(RegisterManager): - - box_types = [FLOAT, VECTOR] + box_types = [FLOAT, INT] # yes INT! all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7] # we never need lower byte I hope save_around_call_regs = all_regs @@ -203,7 +202,7 @@ return self.fm.get_frame_depth() def possibly_free_var(self, var): - if var.type == FLOAT or var.type == VECTOR: + if var.type == FLOAT or var.is_vector(): self.xrm.possibly_free_var(var) else: self.rm.possibly_free_var(var) @@ -223,7 +222,7 @@ def make_sure_var_in_reg(self, var, forbidden_vars=[], selected_reg=None, need_lower_byte=False): - if var.type == FLOAT or var.type == VECTOR: + if var.type == FLOAT or var.is_vector(): if isinstance(var, ConstFloat): return FloatImmedLoc(var.getfloatstorage()) return self.xrm.make_sure_var_in_reg(var, forbidden_vars, @@ -234,7 +233,7 @@ def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None, need_lower_byte=False): - if var.type == FLOAT or var.type == VECTOR: + if var.type == FLOAT or var.is_vector(): return self.xrm.force_allocate_reg(var, forbidden_vars, selected_reg, need_lower_byte) else: @@ -317,26 +316,15 @@ self.assembler.regalloc_perform_math(op, arglocs, result_loc) def locs_for_fail(self, guard_op): - faillocs = [] + faillocs = [self.loc(arg) for arg in guard_op.getfailargs()] descr = guard_op.getdescr() - for i,arg in enumerate(guard_op.getfailargs()): - if arg is None: - faillocs.append(None) - continue - if arg.is_vector() and arg.getaccum(): - # for an accumulator store the position of the original - # box and in llsupport/assembler save restore information - # on the descriptor - loc = self.loc(accum.getoriginalbox()) - faillocs.append(loc) - assert isinstance(descr, ResumeGuardDescr) - descr.rd_accum_list = AccumInfo(descr.rd_accum_list, - i, accum.operator, - accum.getoriginalbox(), - self.loc(arg)) - else: - faillocs.append(self.loc(arg)) - + if descr and descr.rd_accum_list: + accuminfo = descr.rd_accum_list + while accuminfo: + accuminfo.vector_loc = faillocs[accuminfo.getpos_in_failargs()] + loc = self.loc(accuminfo.getoriginal()) + faillocs[accuminfo.getpos_in_failargs()] = loc + accuminfo = accuminfo.next() return faillocs def perform_guard(self, guard_op, arglocs, result_loc): @@ -406,7 +394,7 @@ def loc(self, v): if v is None: # xxx kludgy return None - if v.type == FLOAT or v.type == VECTOR: + if v.type == FLOAT or v.is_vector(): return self.xrm.loc(v) return self.rm.loc(v) @@ -1392,7 +1380,7 @@ box = op.getarg(i) src_loc = self.loc(box) dst_loc = arglocs[i] - if box.type != FLOAT and box.type != VECTOR: + if box.type != FLOAT and not box.is_vector(): src_locations1.append(src_loc) dst_locations1.append(dst_loc) else: diff --git a/rpython/jit/backend/x86/test/test_x86vector.py b/rpython/jit/backend/x86/test/test_x86vector.py --- a/rpython/jit/backend/x86/test/test_x86vector.py +++ b/rpython/jit/backend/x86/test/test_x86vector.py @@ -14,9 +14,18 @@ class TestBasic(test_basic.Jit386Mixin, test_vector.VectorizeTests): # for the individual tests see # ====> ../../../metainterp/test/test_basic.py + def setup_method(self, method): + clazz = self.CPUClass + def init(*args, **kwargs): + cpu = clazz(*args, **kwargs) + cpu.supports_guard_gc_type = True + return cpu + self.CPUClass = init + enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll' class TestAssembler(BaseTestAssembler): + def imm_4_int32(self, a, b, c, d): adr = self.xrm.assembler.datablockwrapper.malloc_aligned(16, 16) ptr = rffi.cast(rffi.CArrayPtr(rffi.INT), adr) diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py --- a/rpython/jit/backend/x86/vector_ext.py +++ b/rpython/jit/backend/x86/vector_ext.py @@ -568,7 +568,7 @@ args = op.getarglist() base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args) ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) - result_loc = self.force_allocate_reg(op.result) + result_loc = self.force_allocate_reg(op) self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs), imm(integer), imm(aligned)], result_loc) @@ -601,11 +601,10 @@ def consider_vec_arith(self, op): lhs = op.getarg(0) - assert isinstance(lhs, BoxVector) - size = lhs.item_size + size = lhs.bytesize args = op.getarglist() loc1 = self.make_sure_var_in_reg(op.getarg(1), args) - loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) + loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [loc0, loc1, imm(size)], loc0) consider_vec_int_add = consider_vec_arith @@ -622,7 +621,7 @@ assert isinstance(lhs, BoxVector) size = lhs.item_size args = op.getarglist() - res = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) + res = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [res, imm(size)], res) consider_vec_float_neg = consider_vec_arith_unary @@ -631,19 +630,17 @@ def consider_vec_logic(self, op): lhs = op.getarg(0) - assert isinstance(lhs, BoxVector) - size = lhs.item_size args = op.getarglist() source = self.make_sure_var_in_reg(op.getarg(1), args) - result = self.force_result_in_reg(op.result, op.getarg(0), args) - self.perform(op, [source, imm(size)], result) + result = self.xrm.force_result_in_reg(op, op.getarg(0), args) + self.perform(op, [source, imm(lhs.bytesize)], result) def consider_vec_float_eq(self, op, guard_op): lhs = op.getarg(0) assert isinstance(lhs, BoxVector) size = lhs.item_size args = op.getarglist() - lhsloc = self.force_result_in_reg(op.result, op.getarg(0), args) + lhsloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) rhsloc = self.make_sure_var_in_reg(op.getarg(1), args) if guard_op: self.perform_with_guard(op, guard_op, [lhsloc, rhsloc, imm(size)], None) @@ -668,12 +665,11 @@ assert isinstance(count, ConstInt) args = op.getarglist() srcloc = self.make_sure_var_in_reg(arg, args) - resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) + resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) residx = index.value # where to put it in result? srcidx = 0 - assert isinstance(op.result, BoxVector) - size = op.result.getsize() - arglocs = [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value), imm(size)] + arglocs = [resloc, srcloc, imm(residx), imm(srcidx), + imm(count.value), imm(op.bytesize)] self.perform(op, arglocs, resloc) consider_vec_pack_f = consider_vec_pack_i @@ -703,19 +699,15 @@ consider_vec_unpack_f = consider_vec_unpack_i def consider_vec_expand_f(self, op): - result = op.result - assert isinstance(result, BoxVector) arg = op.getarg(0) args = op.getarglist() if arg.is_constant(): - resloc = self.xrm.force_allocate_reg(result) - srcloc = self.xrm.expand_float(result.getsize(), arg) + resloc = self.xrm.force_allocate_reg(op) + srcloc = self.xrm.expand_float(op.bytesize, arg) else: - resloc = self.xrm.force_result_in_reg(op.result, arg, args) + resloc = self.xrm.force_result_in_reg(op, arg, args) srcloc = resloc - - size = op.result.getsize() - self.perform(op, [srcloc, imm(size)], resloc) + self.perform(op, [srcloc, imm(op.bytesize)], resloc) def consider_vec_expand_i(self, op): arg = op.getarg(0) @@ -724,21 +716,15 @@ srcloc = self.rm.convert_to_imm(arg) else: srcloc = self.make_sure_var_in_reg(arg, args) - resloc = self.xrm.force_allocate_reg(op.result, args) - assert isinstance(op.result, BoxVector) - size = op.result.getsize() - self.perform(op, [srcloc, imm(size)], resloc) + resloc = self.xrm.force_allocate_reg(op, args) + self.perform(op, [srcloc, imm(op.bytesize)], resloc) def consider_vec_int_signext(self, op): args = op.getarglist() - resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) - sizearg = op.getarg(0) - result = op.result - assert isinstance(sizearg, BoxVector) - assert isinstance(result, BoxVector) - size = sizearg.getsize() - tosize = result.getsize() - self.perform(op, [resloc, imm(size), imm(tosize)], resloc) + resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) + size = op.cast_from_bytesize() + assert size > 0 + self.perform(op, [resloc, imm(size), imm(op.bytesize)], resloc) def consider_vec_int_is_true(self, op, guard_op): args = op.getarglist() @@ -753,8 +739,8 @@ def _consider_vec(self, op): # pseudo instruction, needed to create a new variable - self.xrm.force_allocate_reg(op.result) - + self.xrm.force_allocate_reg(op) + consider_vec_i = _consider_vec consider_vec_f = _consider_vec @@ -764,7 +750,7 @@ def consider_vec_cast_float_to_int(self, op): args = op.getarglist() srcloc = self.make_sure_var_in_reg(op.getarg(0), args) - resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) + resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [srcloc], resloc) consider_vec_cast_int_to_float = consider_vec_cast_float_to_int diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -301,7 +301,8 @@ from rpython.jit.metainterp.optimizeopt.vector import optimize_vector loop_info, loop_ops = optimize_vector(metainterp_sd, jitdriver_sd, warmstate, - loop_info, loop_ops) + loop_info, loop_ops, + jitcell_token) # loop = create_empty_loop(metainterp) loop.original_jitcell_token = jitcell_token diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -122,7 +122,6 @@ break i -= 1 else: - print "insert at 0", target worklist.insert(0, target) node.clear_dependencies() node.emitted = True @@ -379,7 +378,6 @@ def unpack_from_vector(state, arg, index, count): """ Extract parts of the vector box into another vector box """ - #print "unpack i", index, "c", count, "v", arg assert count > 0 assert index + count <= arg.count args = [arg, ConstInt(index), ConstInt(count)] @@ -555,9 +553,7 @@ descr.rd_accum_list = AccumInfo(descr.rd_accum_list, i, accum.operator, arg, None) seed = accum.getleftmostseed() - print "pre", failargs[i], "=>", failargs[i] = self.renamer.rename_map.get(seed, seed) - print failargs[i] def profitable(self): return self.costmodel.profitable() @@ -613,7 +609,6 @@ if argument and not argument.is_constant(): arg = self.ensure_unpacked(i, argument) if argument is not arg: - print "exchange at", i, fail_args[i], "=", arg fail_args[i] = arg def ensure_unpacked(self, index, arg): @@ -780,12 +775,10 @@ vector register. """ before_count = len(packlist) - print "splitting pack", self pack = self while pack.pack_load(vec_reg_size) > Pack.FULL: pack.clear() oplist, newoplist = pack.slice_operations(vec_reg_size) - print " split of %dx, left: %d" % (len(oplist), len(newoplist)) pack.operations = oplist pack.update_pack_of_nodes() if not pack.leftmost().is_typecast(): @@ -801,7 +794,6 @@ newpack.clear() newpack.operations = [] break - print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]])) pack.update_pack_of_nodes() def slice_operations(self, vec_reg_size): diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -44,19 +44,22 @@ self.jump = jump assert self.jump.getopnum() == rop.JUMP - def finaloplist(self, jitcell_token=None, label=False): + def finaloplist(self, jitcell_token=None, reset_label_token=True, label=False): oplist = [] if jitcell_token: - token = TargetToken(jitcell_token) - token.original_jitcell_token = jitcell_token - jitcell_token.target_tokens.append(token) - self.label.setdescr(token) + if reset_label_token: + token = TargetToken(jitcell_token) + token.original_jitcell_token = jitcell_token + jitcell_token.target_tokens.append(token) + self.label.setdescr(token) if self.prefix_label: token = TargetToken(jitcell_token) token.original_jitcell_token = jitcell_token jitcell_token.target_tokens.append(token) self.prefix_label.setdescr(token) - self.jump.setdescr(token) + self.jump.setdescr(token) + if reset_label_token: + self.jump.setdescr(token) if self.prefix_label: oplist = self.prefix + [self.prefix_label] elif self.prefix: @@ -93,7 +96,8 @@ loop.prefix_label = prefix_label return loop -def optimize_vector(metainterp_sd, jitdriver_sd, warmstate, loop_info, loop_ops): +def optimize_vector(metainterp_sd, jitdriver_sd, warmstate, + loop_info, loop_ops, jitcell_token=None): """ Enter the world of SIMD. Bails if it cannot transform the trace. """ user_code = not jitdriver_sd.vec and warmstate.vec_all loop = VectorLoop(loop_info.label_op, loop_ops[1:-1], loop_ops[-1]) @@ -122,7 +126,7 @@ debug_stop("vec-opt-loop") # info.label_op = loop.label - return info, loop.finaloplist() + return info, loop.finaloplist(jitcell_token=jitcell_token, reset_label_token=False) except NotAVectorizeableLoop: debug_stop("vec-opt-loop") # vectorization is not possible diff --git a/rpython/jit/metainterp/optimizeopt/version.py b/rpython/jit/metainterp/optimizeopt/version.py --- a/rpython/jit/metainterp/optimizeopt/version.py +++ b/rpython/jit/metainterp/optimizeopt/version.py @@ -112,62 +112,10 @@ else: assert 0, "olddescr must be found" - def update_token(self, jitcell_token, all_target_tokens): - # this is only invoked for versioned loops! - # TODO - label_index = index_of_first(rop.LABEL, self.operations, 0) - label = self.operations[label_index] - jump = self.operations[-1] - # - assert jump.getopnum() == rop.JUMP - # - token = TargetToken(jitcell_token) - token.original_jitcell_token = jitcell_token - all_target_tokens.append(token) - if label.getdescr() is None or label.getdescr() is not jump.getdescr(): - label_index = index_of_first(rop.LABEL, self.operations, 1) - if label_index > 0: - second_label = self.operations[label_index] - # set the inner loop - second_label.setdescr(token) - jump.setdescr(token) - # set the first label - token = TargetToken(jitcell_token) - token.original_jitcell_token = jitcell_token - all_target_tokens.append(token) - label.setdescr(token) - return - label.setdescr(token) - jump.setdescr(token) - def create_backend_loop(self, metainterp, jitcell_token): vl = create_empty_loop(metainterp) - vl.operations = self.loop.finaloplist(jitcell_token,True) + vl.operations = self.loop.finaloplist(jitcell_token,True,True) vl.inputargs = self.loop.label.getarglist_copy() vl.original_jitcell_token = jitcell_token return vl - -#def index_of_first(opnum, operations, pass_by=0): -# """ returns the position of the first operation matching the opnum. -# Or -1 if non is found -# """ -# for i,op in enumerate(operations): -# if op.getopnum() == opnum: -# if pass_by == 0: -# return i -# else: -# pass_by -= 1 -# return -1 -# -#def find_first_index(self, opnum, pass_by=0): -# """ return the first index of the operation having the same opnum or -1 """ -# return index_of_first(opnum, self.operations, pass_by) -# -#def find_first(self, opnum, pass_by=0): -# index = self.find_first_index(opnum, pass_by) -# if index != -1: -# return self.operations[index] -# return None - - diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -1185,12 +1185,15 @@ _cast_ops = { 'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4, 2), + 'VEC_CAST_FLOAT_TO_INT': ('f', 8, 'i', 4, 2), 'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8, 2), + 'VEC_CAST_INT_TO_FLOAT': ('i', 4, 'f', 8, 2), 'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4, 2), + 'VEC_CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4, 2), 'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8, 2), + 'VEC_CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8, 2), 'INT_SIGNEXT': ('i', 0, 'i', 0, 0), - #'CAST_PTR_TO_INT': ('r', 0, 'i', 4), - #'CAST_INT_TO_PTR': ('i', 4, 'r', 0), + 'VEC_INT_SIGNEXT': ('i', 0, 'i', 0, 0), } # ____________________________________________________________ @@ -1292,7 +1295,7 @@ else: assert result_type == 'n' if name in _cast_ops: - if name == "INT_SIGNEXT": + if "INT_SIGNEXT" in name: mixins.append(SignExtOp) mixins.append(CastOp) @@ -1302,7 +1305,6 @@ return type(cls_name, bases, dic) setup(__name__ == '__main__') # print out the table when run directly -del _oplist _opboolinverse = { rop.INT_EQ: rop.INT_NE, diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py --- a/rpython/jit/metainterp/resume.py +++ b/rpython/jit/metainterp/resume.py @@ -57,6 +57,9 @@ self.scalar_box = box self.vector_loc = loc + def getoriginal(self): + return self.scalar_box + def getpos_in_failargs(self): return self.scalar_position _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit