Author: Richard Plangger <r...@pasra.at> Branch: vecopt2 Changeset: r77120:0a2d06280172 Date: 2015-04-20 09:33 +0200 http://bitbucket.org/pypy/pypy/changeset/0a2d06280172/
Log: renamed guard_no_early_exit to guard_early_exit started modify the register allocator and the code generation to support the new boxes and instructions added a vectorize test in the backend for x86 diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py --- a/rpython/jit/backend/llgraph/runner.py +++ b/rpython/jit/backend/llgraph/runner.py @@ -855,8 +855,7 @@ argboxes = self.current_op.getarglist() self.do_renaming(argboxes, args) - def execute_guard_no_early_exit(self, descr): - # TODO + def execute_guard_early_exit(self, descr): pass def execute_guard_true(self, descr, arg): diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2414,6 +2414,16 @@ self.save_into_mem(addr, imm0, imm(current)) i += current + # vector operations + def genop_vec_raw_load(self, op, arglocs, resloc): + base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs + assert isinstance(ofs, ImmedLoc) + assert isinstance(size_loc, ImmedLoc) + scale = get_scale(size_loc.value) + src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale) + assert False + #self.load_from_mem(resloc, src_addr, size_loc, sign_loc) + genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST genop_list = [Assembler386.not_implemented_op] * rop._LAST diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -22,7 +22,7 @@ from rpython.jit.codewriter import longlong from rpython.jit.codewriter.effectinfo import EffectInfo from rpython.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr, - ConstFloat, BoxInt, BoxFloat, INT, REF, FLOAT, TargetToken) + ConstFloat, BoxInt, BoxFloat, INT, REF, FLOAT, VECTOR, TargetToken) from rpython.jit.metainterp.resoperation import rop, ResOperation from rpython.rlib import rgc from rpython.rlib.objectmodel import we_are_translated @@ -61,7 +61,7 @@ class X86XMMRegisterManager(RegisterManager): - box_types = [FLOAT] + box_types = [FLOAT, VECTOR] all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7] # we never need lower byte I hope save_around_call_regs = all_regs @@ -208,7 +208,7 @@ def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None, need_lower_byte=False): - if var.type == FLOAT: + if var.type == FLOAT or var.type == VECTOR: return self.xrm.force_allocate_reg(var, forbidden_vars, selected_reg, need_lower_byte) else: @@ -1457,6 +1457,23 @@ self.rm.possibly_free_var(length_box) self.rm.possibly_free_var(dstaddr_box) + # vector operations + def consider_vec_raw_load(self, op): + itemsize, ofs, sign = unpack_arraydescr(op.getdescr()) + args = op.getarglist() + base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args) + ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) + result_loc = self.force_allocate_reg(op.result) + if sign: + sign_loc = imm1 + else: + sign_loc = imm0 + self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs), + sign_loc], result_loc) + + def consider_guard_early_exit(self, op): + pass + def not_implemented_op(self, op): not_implemented("not implemented operation: %s" % op.getopname()) diff --git a/rpython/jit/backend/x86/test/test_vectorize.py b/rpython/jit/backend/x86/test/test_vectorize.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/test/test_vectorize.py @@ -0,0 +1,12 @@ +import py +from rpython.jit.backend.detect_cpu import getcpuclass +from rpython.jit.metainterp.warmspot import ll_meta_interp +from rpython.jit.metainterp.test import support, test_vectorize +from rpython.jit.backend.x86.test import test_basic +from rpython.rlib.jit import JitDriver + + +class TestBasic(test_basic.Jit386Mixin, test_vectorize.VectorizeLLtypeTests): + # for the individual tests see + # ====> ../../../metainterp/test/test_basic.py + pass diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py --- a/rpython/jit/metainterp/blackhole.py +++ b/rpython/jit/metainterp/blackhole.py @@ -1482,9 +1482,8 @@ # if opnum == rop.GUARD_FUTURE_CONDITION: pass - elif opnum == rop.GUARD_NO_EARLY_EXIT: + elif opnum == rop.GUARD_EARLY_EXIT: self.position = resumedescr.rd_frame_info_list.pc - pass elif opnum == rop.GUARD_TRUE: # Produced directly by some goto_if_not_xxx() opcode that did not # jump, but which must now jump. The pc is just after the opcode. diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -685,8 +685,8 @@ class ResumeAtPositionDescr(ResumeGuardDescr): guard_opnum = rop.GUARD_FUTURE_CONDITION -class ResumeAtEarylExitDescr(ResumeGuardDescr): - guard_opnum = rop.GUARD_NO_EARLY_EXIT +class ResumeAtLoopHeaderDescr(ResumeGuardDescr): + guard_opnum = rop.GUARD_EARLY_EXIT class AllVirtuals: llopaque = True @@ -775,8 +775,8 @@ resumedescr = ResumeGuardNotInvalidated() elif opnum == rop.GUARD_FUTURE_CONDITION: resumedescr = ResumeAtPositionDescr() - elif opnum == rop.GUARD_NO_EARLY_EXIT: - resumedescr = ResumeAtEarylExitDescr() + elif opnum == rop.GUARD_EARLY_EXIT: + resumedescr = ResumeAtLoopHeaderDescr() elif opnum == rop.GUARD_VALUE: resumedescr = ResumeGuardValueDescr() elif opnum == rop.GUARD_NONNULL: diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py --- a/rpython/jit/metainterp/history.py +++ b/rpython/jit/metainterp/history.py @@ -927,6 +927,8 @@ insns = {} for loop in self.get_all_loops(): insns = loop.summary(adding_insns=insns, omit_finish=omit_finish) + if 'guard_early_exit' in insns: # XXX + del insns['guard_early_exit'] return self._check_insns(insns, expected, check) def _check_insns(self, insns, expected, check): diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -1,6 +1,6 @@ import py -from rpython.jit.metainterp.compile import ResumeAtEarylExitDescr +from rpython.jit.metainterp import compile from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method from rpython.jit.metainterp.resoperation import rop from rpython.jit.codewriter.effectinfo import EffectInfo @@ -105,13 +105,17 @@ def relax_guard_to(self, guard): """ Relaxes a guard operation to an earlier guard. """ assert self.op.is_guard() - assert guard.op.is_guard() + assert guard.is_guard() - my_op = self.getoperation() - op = guard.getoperation() - my_op.setdescr(ResumeAtEarylExitDescr()) - my_op.setfailargs(op.getfailargs()) - my_op.rd_snapshot = op.rd_snapshot + tgt_op = self.getoperation() + op = guard + #descr = compile.ResumeAtLoopHeaderDescr() + descr = compile.ResumeAtLoopHeaderDescr() + tgt_op.setdescr(descr) + if not we_are_translated(): + tgt_op.setfailargs(op.getfailargs()) + tgt_op.rd_snapshot = op.rd_snapshot + tgt_op.rd_frame_info_list = op.rd_frame_info_list def edge_to(self, to, arg=None, label=None): assert self != to @@ -138,7 +142,7 @@ self.adjacent_list_back = [] def is_guard_early_exit(self): - return self.op.getopnum() == rop.GUARD_NO_EARLY_EXIT + return self.op.getopnum() == rop.GUARD_EARLY_EXIT def loads_from_complex_object(self): return rop._ALWAYS_PURE_LAST <= self.op.getopnum() <= rop._MALLOC_FIRST diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py --- a/rpython/jit/metainterp/optimizeopt/optimizer.py +++ b/rpython/jit/metainterp/optimizeopt/optimizer.py @@ -797,7 +797,7 @@ if op.getdescr() is not None: descr = op.getdescr() assert isinstance(descr, compile.ResumeAtPositionDescr) or \ - isinstance(descr, compile.ResumeAtEarylExitDescr) + isinstance(descr, compile.ResumeAtLoopHeaderDescr) else: descr = compile.invent_fail_descr_for_op(op.getopnum(), self) diff --git a/rpython/jit/metainterp/optimizeopt/simplify.py b/rpython/jit/metainterp/optimizeopt/simplify.py --- a/rpython/jit/metainterp/optimizeopt/simplify.py +++ b/rpython/jit/metainterp/optimizeopt/simplify.py @@ -65,8 +65,8 @@ def optimize_GUARD_FUTURE_CONDITION(self, op): pass - def optimize_GUARD_NO_EARLY_EXIT(self, op): - pass + #def optimize_GUARD_EARLY_EXIT(self, op): + # pass dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_', default=OptSimplify.emit_operation) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -186,13 +186,11 @@ it is unrolled 16 times. (it is the smallest type in the trace) """ ops = """ [p0,i0] - guard_no_early_exit() [] raw_load(p0,i0,descr=chararraydescr) jump(p0,i0) """ opt_ops = """ [p0,i0] - guard_no_early_exit() [] {} jump(p0,i0) """.format(('\n' + ' ' *8).join(['raw_load(p0,i0,descr=chararraydescr)'] * 16)) @@ -672,7 +670,7 @@ def test_packset_extend_load_modify_store(self): ops = """ [p0,i0] - guard_no_early_exit() [] + guard_early_exit() [] i1 = int_add(i0, 1) i2 = int_le(i1, 16) guard_true(i2) [p0, i0] @@ -823,7 +821,7 @@ def test_schedule_vector_operation(self, op, descr, stride): ops = """ [p0,p1,p2,i0] # 0 - guard_no_early_exit() [] + guard_early_exit() [] i10 = int_le(i0, 128) # 1, 8, 15, 22 guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23 i2 = getarrayitem_raw(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24 @@ -841,7 +839,6 @@ i11 = int_le(i1, 128) guard_true(i11) [] i12 = int_add(i1, {stride}) - guard_no_early_exit() [] v1 = vec_getarrayitem_raw(p0, i0, 2, descr={descr}arraydescr) v2 = vec_getarrayitem_raw(p1, i0, 2, descr={descr}arraydescr) v3 = {op}(v1,v2,2) @@ -876,7 +873,7 @@ pytest.skip("") ops = """ [i0, i1, i2, i3, i4, i5, i6, i7] - guard_no_early_exit() [] + guard_early_exit() [] i9 = int_mul(i0, 8) i10 = raw_load(i3, i9, descr=intarraydescr) i11 = int_mul(i0, 8) @@ -895,7 +892,7 @@ def test_vschedule_trace_1(self): ops = """ [i0, i1, i2, i3, i4] - guard_no_early_exit() [] + guard_early_exit() [] i6 = int_mul(i0, 8) i7 = raw_load(i2, i6, descr=intarraydescr) i8 = raw_load(i3, i6, descr=intarraydescr) @@ -915,7 +912,6 @@ i13 = int_add(i11, 1) i18 = int_lt(i13, i1) guard_true(i18) [] - guard_no_early_exit() [] i6 = int_mul(i0, 8) v19 = vec_raw_load(i2, i6, 2, descr=intarraydescr) v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr) @@ -930,7 +926,7 @@ pytest.skip() ops = """ [i0, i1, i2, i3, i4, i5, i6, i7] - guard_no_early_exit() [] + guard_early_exit() [] i8 = raw_load(i3, i0, descr=intarraydescr) i9 = raw_load(i4, i0, descr=intarraydescr) i10 = int_add(i8, i9) @@ -952,7 +948,6 @@ i16 = int_add(i12, 8) i21 = int_lt(i16, i20) guard_true(i21) [] - guard_no_early_exit() [] v22 = vec_raw_load(i3, i0, 2, descr=intarraydescr) v23 = vec_raw_load(i4, i0, 2, descr=intarraydescr) v24 = vec_int_add(v22, v23) diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -1,5 +1,6 @@ import sys import py +from rpython.jit.metainterp.compile import ResumeAtLoopHeaderDescr from rpython.rtyper.lltypesystem import lltype, rffi from rpython.jit.metainterp.history import (ConstInt, VECTOR, BoxVector, TargetToken, JitCellToken) @@ -64,6 +65,8 @@ self.packset = None self.unroll_count = 0 self.smallest_type_bytes = 0 + self.early_exit = None + self.future_condition = None def propagate_all_forward(self): self.clear_newoperations() @@ -95,6 +98,8 @@ self.schedule() def emit_operation(self, op): + if op.getopnum() == rop.GUARD_EARLY_EXIT: + return self._last_emitted_op = op self._newoperations.append(op) @@ -115,19 +120,18 @@ assert label_op.getopnum() == rop.LABEL assert jump_op.is_final() - # XXX self.vec_info.track_memory_refs = True - self.emit_unrolled_operation(label_op) - - # TODO use the new optimizer structure (branch of fijal) - #label_op_args = [self.getvalue(box).get_key_box() for box in label_op.getarglist()] - #values = [self.getvalue(box) for box in label_op.getarglist()] + #guard_ee_op = ResOperation(rop.GUARD_EARLY_EXIT, [], None, ResumeAtLoopHeaderDescr()) + #guard_ee_op.rd_snapshot = Snapshot(None, loop.inputargs[:]) + #self.emit_unrolled_operation(guard_ee_op) operations = [] for i in range(1,op_count-1): + op = loop.operations[i].clone() if loop.operations[i].getopnum() == rop.GUARD_FUTURE_CONDITION: - continue - op = loop.operations[i].clone() + pass + if loop.operations[i].getopnum() == rop.GUARD_EARLY_EXIT: + self.future_condition = op operations.append(op) self.emit_unrolled_operation(op) @@ -146,7 +150,9 @@ rename_map[la] = ja # for op in operations: - if op.getopnum() in (rop.GUARD_NO_EARLY_EXIT, rop.GUARD_FUTURE_CONDITION): + if op.getopnum() == rop.GUARD_FUTURE_CONDITION: + continue # do not unroll this operation twice + if op.getopnum() == rop.GUARD_EARLY_EXIT: continue # do not unroll this operation twice copied_op = op.clone() if copied_op.result is not None: @@ -359,9 +365,11 @@ early_exit_idx = 1 label = self.dependency_graph.getnode(label_idx) ee_guard = self.dependency_graph.getnode(early_exit_idx) - if not ee_guard.getopnum() == rop.GUARD_NO_EARLY_EXIT: + if not ee_guard.is_guard_early_exit(): return # cannot relax + #self.early_exit = ee_guard + for guard_node in self.dependency_graph.guards: if guard_node == ee_guard: continue @@ -391,7 +399,7 @@ guard_node.edge_to(ee_guard, label='pullup') label.remove_edge_to(ee_guard) - guard_node.relax_guard_to(ee_guard) + guard_node.relax_guard_to(self.future_condition) def must_unpack_result_to_exec(op, target_op): # TODO either move to resop or util diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -2133,7 +2133,10 @@ self.resumekey = compile.ResumeFromInterpDescr(original_greenkey) self.history.inputargs = original_boxes[num_green_args:] self.seen_loop_header_for_jdindex = -1 - self.generate_guard(rop.GUARD_NO_EARLY_EXIT) + # can only emit early exit if liveness is present + # TODO think of a better way later + if self.framestack[-1].jitcode.liveness.get(0): + self.generate_guard(rop.GUARD_EARLY_EXIT) try: self.interpret() except SwitchToBlackhole, stb: diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -418,7 +418,7 @@ 'GUARD_NOT_FORCED/0d', # may be called with an exception currently set 'GUARD_NOT_FORCED_2/0d', # same as GUARD_NOT_FORCED, but for finish() 'GUARD_NOT_INVALIDATED/0d', - 'GUARD_NO_EARLY_EXIT/0d', # is removable, may be patched by an optimization + 'GUARD_EARLY_EXIT/0d', 'GUARD_FUTURE_CONDITION/0d', # is removable, may be patched by an optimization '_GUARD_LAST', # ----- end of guard operations ----- diff --git a/rpython/jit/metainterp/test/test_vectorize.py b/rpython/jit/metainterp/test/test_vectorize.py --- a/rpython/jit/metainterp/test/test_vectorize.py +++ b/rpython/jit/metainterp/test/test_vectorize.py @@ -12,7 +12,7 @@ from rpython.rlib.rawstorage import (alloc_raw_storage, raw_storage_setitem, free_raw_storage, raw_storage_getitem) -class VectorizeTests(object): +class VectorizeTests: enable_opts = 'all' def meta_interp(self, f, args, policy=None): @@ -115,5 +115,8 @@ if i > 4: self.check_trace_count(1) -class TestLLtype(VectorizeTests, LLJitMixin): +class VectorizeLLtypeTests(VectorizeTests): pass + +class TestLLtype(VectorizeLLtypeTests, LLJitMixin): + pass _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit