Author: Carl Friedrich Bolz <cfb...@gmx.de> Branch: bridgeopt-improvements Changeset: r92102:3ab4221d7876 Date: 2017-08-07 00:09 +0200 http://bitbucket.org/pypy/pypy/changeset/3ab4221d7876/
Log: experimental attempt to reduce the cost of call_loopinvariant in every bridge that calls a method. approach: pass call_loopinvariant results into failargs (a bit everywhere) and then reuse the result in the bridge. diff --git a/rpython/jit/metainterp/optimizeopt/bridgeopt.py b/rpython/jit/metainterp/optimizeopt/bridgeopt.py --- a/rpython/jit/metainterp/optimizeopt/bridgeopt.py +++ b/rpython/jit/metainterp/optimizeopt/bridgeopt.py @@ -2,6 +2,7 @@ optimizer of the bridge attached to a guard. """ from rpython.jit.metainterp import resumecode +from rpython.rlib.objectmodel import we_are_translated # adds the following sections at the end of the resume code: @@ -22,17 +23,22 @@ # (<box1> <index> <descr> <box2>) length times, if getarrayitem_gc(box1, index, descr) == box2 # both boxes should be in the liveboxes # +# <length> +# (<const> <descr> <box1>) length times, if call_loop_invariant(const, descr) == box1 +# the box should be in the liveboxes # ---- # maybe should be delegated to the optimization classes? -def tag_box(box, liveboxes_from_env, memo): +def tag_box(box, adder): from rpython.jit.metainterp.history import Const if isinstance(box, Const): - return memo.getconst(box) + return adder.memo.getconst(box) else: - return liveboxes_from_env[box] # has to exist + if box in adder.liveboxes_from_env: + return adder.liveboxes_from_env[box] + return adder.liveboxes[box] # has to exist def decode_box(resumestorage, tagged, liveboxes, cpu): from rpython.jit.metainterp.resume import untag, TAGCONST, TAGINT, TAGBOX @@ -54,10 +60,13 @@ raise AssertionError("unreachable") return box -def serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, liveboxes_from_env, memo): +def serialize_optimizer_knowledge(adder, numb_state, liveboxes): + optimizer = adder.optimizer + liveboxes_from_env = adder.liveboxes_from_env available_boxes = {} for box in liveboxes: - if box is not None and box in liveboxes_from_env: + if box is not None and ( + box in adder.liveboxes_from_env or box in adder.liveboxes): available_boxes[box] = None metainterp_sd = optimizer.metainterp_sd @@ -84,7 +93,6 @@ # heap knowledge: we store triples of known heap fields in non-virtual # structs - # XXX could be extended to arrays if optimizer.optheap: triples_struct, triples_array = optimizer.optheap.serialize_optheap(available_boxes) # can only encode descrs that have a known index into @@ -93,20 +101,32 @@ numb_state.append_int(len(triples_struct)) for box1, descr, box2 in triples_struct: descr_index = descr.descr_index - numb_state.append_short(tag_box(box1, liveboxes_from_env, memo)) + numb_state.append_short(tag_box(box1, adder)) numb_state.append_int(descr_index) - numb_state.append_short(tag_box(box2, liveboxes_from_env, memo)) + numb_state.append_short(tag_box(box2, adder)) numb_state.append_int(len(triples_array)) for box1, index, descr, box2 in triples_array: descr_index = descr.descr_index - numb_state.append_short(tag_box(box1, liveboxes_from_env, memo)) + numb_state.append_short(tag_box(box1, adder)) numb_state.append_int(index) numb_state.append_int(descr_index) - numb_state.append_short(tag_box(box2, liveboxes_from_env, memo)) + numb_state.append_short(tag_box(box2, adder)) else: numb_state.append_int(0) numb_state.append_int(0) + # loop invariant calls + if optimizer.optrewrite: + triples = optimizer.optrewrite.serialize_optrewrite(available_boxes) + numb_state.append_int(len(triples)) + for const, descr, box in triples: + descr_index = descr.descr_index + numb_state.append_short(tag_box(const, adder)) + numb_state.append_int(descr_index) + numb_state.append_short(tag_box(box, adder)) + else: + numb_state.append_int(0) + def deserialize_optimizer_knowledge(optimizer, resumestorage, frontend_boxes, liveboxes): reader = resumecode.Reader(resumestorage.rd_numb) assert len(frontend_boxes) == len(liveboxes) @@ -115,6 +135,8 @@ # skip resume section startcount = reader.next_item() reader.jump(startcount - 1) + extracount = reader.next_item() + reader.jump(extracount) # class knowledge bitfield = 0 @@ -132,8 +154,6 @@ optimizer.make_constant_class(box, cls) # heap knowledge - if not optimizer.optheap: - return length = reader.next_item() result_struct = [] for i in range(length): @@ -155,4 +175,59 @@ tagged = reader.next_item() box2 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu) result_array.append((box1, index, descr, box2)) - optimizer.optheap.deserialize_optheap(result_struct, result_array) + if result_struct or result_array: + optimizer.optheap.deserialize_optheap(result_struct, result_array) + + # loop_invariant knowledge + length = reader.next_item() + results = [] + for i in range(length): + tagged = reader.next_item() + box1 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu) + descr_index = reader.next_item() + descr = metainterp_sd.all_descrs[descr_index] + tagged = reader.next_item() + box2 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu) + results.append((box1, descr, box2)) + if results: + optimizer.optrewrite.deserialize_optrewrite(results) + +def consistency_checking_numbering(numb, liveboxes): + if we_are_translated(): + return + # very much a "does not crash" kind of affair + reader = resumecode.Reader(numb) + + # skip resume section + startcount = reader.next_item() + reader.jump(startcount - 1) + extracount = reader.next_item() + reader.jump(extracount) + + mask = 0 + for i, box in enumerate(liveboxes): + if box.type != "r": + continue + if not mask: + bitfield = reader.next_item() + mask = 0b100000 + mask >>= 1 + + length = reader.next_item() + for i in range(length): + tagged = reader.next_item() + descr_index = reader.next_item() + tagged = reader.next_item() + length = reader.next_item() + for i in range(length): + tagged = reader.next_item() + index = reader.next_item() + descr_index = reader.next_item() + tagged = reader.next_item() + + # loop_invariant knowledge + length = reader.next_item() + for i in range(length): + tagged = reader.next_item() + descr_index = reader.next_item() + tagged = reader.next_item() diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py --- a/rpython/jit/metainterp/optimizeopt/optimizer.py +++ b/rpython/jit/metainterp/optimizeopt/optimizer.py @@ -620,7 +620,12 @@ del self.replaces_guard[orig_op] return else: - op = self.emit_guard_operation(op, pendingfields) + extra_liveboxes = [] + # hack, but probably a good one + if len(self.optrewrite.loop_invariant_results) == 1: + extra_liveboxes = [ + self.optrewrite.loop_invariant_results.values()[0][0].get_box_replacement()] + op = self.emit_guard_operation(op, pendingfields, extra_liveboxes) elif op.can_raise(): self.exception_might_have_happened = True opnum = op.opnum @@ -633,7 +638,7 @@ self._really_emitted_operation = op self._newoperations.append(op) - def emit_guard_operation(self, op, pendingfields): + def emit_guard_operation(self, op, pendingfields, extra_liveboxes): guard_op = op # self.replace_op_with(op, op.getopnum()) opnum = guard_op.getopnum() # If guard_(no)_exception is merged with another previous guard, then @@ -653,7 +658,8 @@ op = self._copy_resume_data_from(guard_op, self._last_guard_op) else: - op = self.store_final_boxes_in_guard(guard_op, pendingfields) + op = self.store_final_boxes_in_guard( + guard_op, pendingfields, extra_liveboxes) self._last_guard_op = op # for unrolling for farg in op.getfailargs(): @@ -723,7 +729,7 @@ new_descr.copy_all_attributes_from(old_descr) self._newoperations[old_op_pos] = new_op - def store_final_boxes_in_guard(self, op, pendingfields): + def store_final_boxes_in_guard(self, op, pendingfields, extra_liveboxes): assert pendingfields is not None if op.getdescr() is not None: descr = op.getdescr() @@ -736,7 +742,7 @@ modifier = resume.ResumeDataVirtualAdder(self, descr, op, self.trace, self.resumedata_memo) try: - newboxes = modifier.finish(pendingfields) + newboxes = modifier.finish(pendingfields, extra_liveboxes) if (newboxes is not None and len(newboxes) > self.metainterp_sd.options.failargs_limit): raise resume.TagOverflow diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py --- a/rpython/jit/metainterp/optimizeopt/rewrite.py +++ b/rpython/jit/metainterp/optimizeopt/rewrite.py @@ -28,7 +28,7 @@ def _callback(self, op, old_op): key = make_hashable_int(op.getarg(0).getint()) self.opt.loop_invariant_producer[key] = self.opt.optimizer.getlastop() - self.opt.loop_invariant_results[key] = old_op + self.opt.loop_invariant_results[key] = old_op, op.getarg(0), old_op.getdescr() class OptRewrite(Optimization): @@ -568,13 +568,15 @@ arg = op.getarg(0) # 'arg' must be a Const, because residual_call in codewriter # expects a compile-time constant + # XXX the descr is ignored! let's hope there are no different + # call_loop_invariant around assert isinstance(arg, Const) key = make_hashable_int(arg.getint()) - resvalue = self.loop_invariant_results.get(key, None) + resvalue, arg0, descr = self.loop_invariant_results.get(key, (None, None, None)) if resvalue is not None: resvalue = self.optimizer.force_op_from_preamble(resvalue) - self.loop_invariant_results[key] = resvalue + self.loop_invariant_results[key] = resvalue, arg0, descr self.make_equal_to(op, resvalue) self.last_emitted_operation = REMOVED return @@ -867,6 +869,18 @@ optimize_SAME_AS_R = optimize_SAME_AS_I optimize_SAME_AS_F = optimize_SAME_AS_I + def serialize_optrewrite(self, available_boxes): + triples = [] + for box, arg0, descr in self.loop_invariant_results.values(): + triples.append((arg0, descr, box.get_box_replacement())) + return triples + + def deserialize_optrewrite(self, triples): + for arg, descr, resvalue in triples: + assert isinstance(arg, Const) + key = make_hashable_int(arg.getint()) + self.loop_invariant_results[key] = resvalue, arg, descr + dispatch_opt = make_dispatcher_method(OptRewrite, 'optimize_', default=OptRewrite.emit) optimize_guards = _findall(OptRewrite, 'optimize_', 'GUARD') diff --git a/rpython/jit/metainterp/optimizeopt/shortpreamble.py b/rpython/jit/metainterp/optimizeopt/shortpreamble.py --- a/rpython/jit/metainterp/optimizeopt/shortpreamble.py +++ b/rpython/jit/metainterp/optimizeopt/shortpreamble.py @@ -154,8 +154,11 @@ return op = self.res key = make_hashable_int(op.getarg(0).getint()) - optrewrite.loop_invariant_results[key] = PreambleOp(op, preamble_op, - invented_name) + optrewrite.loop_invariant_results[key] = ( + PreambleOp(op, preamble_op, invented_name), + op.getarg(0), + op.getdescr() + ) def add_op_to_short(self, sb): op = self.res diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py --- a/rpython/jit/metainterp/resume.py +++ b/rpython/jit/metainterp/resume.py @@ -412,7 +412,8 @@ _, tagbits = untag(tagged) return tagbits == TAGVIRTUAL - def finish(self, pending_setfields=[]): + def finish(self, pending_setfields=[], extra_liveboxes=[]): + from rpython.jit.metainterp.optimizeopt.bridgeopt import consistency_checking_numbering optimizer = self.optimizer # compute the numbering storage = self.storage @@ -458,17 +459,31 @@ info = optimizer.getptrinfo(fieldbox) assert info is not None and info.is_virtual() info.visitor_walk_recursive(fieldbox, self, optimizer) + for box in extra_liveboxes: + box = optimizer.get_box_replacement(box) + self.register_box(box) + info = optimizer.getptrinfo(box) + assert info is None or not info.is_virtual() self._number_virtuals(liveboxes, optimizer, num_virtuals) self._add_pending_fields(optimizer, pending_setfields) numb_state.patch(1, len(liveboxes)) - self._add_optimizer_sections(numb_state, liveboxes, liveboxes_from_env) - storage.rd_numb = numb_state.create_numbering() + self._add_extra_box_section(extra_liveboxes, numb_state) + + self._add_optimizer_sections(numb_state, liveboxes) + rd_numb = numb_state.create_numbering() + consistency_checking_numbering(rd_numb, liveboxes) + storage.rd_numb = rd_numb storage.rd_consts = self.memo.consts return liveboxes[:] + def _add_extra_box_section(self, extra_liveboxes, numb_state): + numb_state.append_int(len(extra_liveboxes)) + for box in extra_liveboxes: + numb_state.append_short(self._gettagged(box.get_box_replacement())) + def _number_virtuals(self, liveboxes, optimizer, num_env_virtuals): from rpython.jit.metainterp.optimizeopt.info import AbstractVirtualPtrInfo @@ -584,11 +599,10 @@ return self.liveboxes_from_env[box] return self.liveboxes[box] - def _add_optimizer_sections(self, numb_state, liveboxes, liveboxes_from_env): + def _add_optimizer_sections(self, numb_state, liveboxes): # add extra information about things the optimizer learned from rpython.jit.metainterp.optimizeopt.bridgeopt import serialize_optimizer_knowledge - serialize_optimizer_knowledge( - self.optimizer, numb_state, liveboxes, liveboxes_from_env, self.memo) + serialize_optimizer_knowledge(self, numb_state, liveboxes) class AbstractVirtualInfo(object): kind = REF @@ -1067,6 +1081,7 @@ resumereader.consume_boxes(f.get_current_position_info(), f.registers_i, f.registers_r, f.registers_f) f.handle_rvmprof_enter_on_resume() + resumereader.consume_extra_boxes() return resumereader.liveboxes, virtualizable_boxes, virtualref_boxes @@ -1113,6 +1128,11 @@ virtualref_boxes = self.consume_virtualref_boxes() return virtualizable_boxes, virtualref_boxes + def consume_extra_boxes(self): + extra_boxes_size = self.resumecodereader.next_item() + for i in range(extra_boxes_size): + self.next_ref() # does nothing but read the box! + def allocate_with_vtable(self, descr=None): return self.metainterp.execute_new_with_vtable(descr=descr) diff --git a/rpython/jit/metainterp/resumecode.py b/rpython/jit/metainterp/resumecode.py --- a/rpython/jit/metainterp/resumecode.py +++ b/rpython/jit/metainterp/resumecode.py @@ -18,6 +18,8 @@ until the size of the resume section + [<length> <numb> <numb> ... <numb>] more boxes for the optimizer section + # ----- optimization section <more code> further sections according to bridgeopt.py """ diff --git a/rpython/jit/metainterp/test/test_bridgeopt.py b/rpython/jit/metainterp/test/test_bridgeopt.py --- a/rpython/jit/metainterp/test/test_bridgeopt.py +++ b/rpython/jit/metainterp/test/test_bridgeopt.py @@ -27,6 +27,7 @@ class FakeOptimizer(object): metainterp_sd = None optheap = None + optrewrite = None def __init__(self, dct={}, cpu=None): self.dct = dct @@ -46,6 +47,13 @@ def __init__(self, numb): self.rd_numb = numb +class FakeAdder(object): + def __init__(self, optimizer, liveboxes_from_env, liveboxes, memo): + self.optimizer = optimizer + self.liveboxes_from_env = liveboxes_from_env + self.liveboxes = liveboxes + self.memo = memo + def test_known_classes(): box1 = InputArgRef() box2 = InputArgRef() @@ -57,11 +65,13 @@ numb_state = NumberingState(4) numb_state.append_int(1) # size of resume block + numb_state.append_int(0) # size of extra arg block liveboxes = [InputArgInt(), box2, box1, box3] + adder = FakeAdder(optimizer, {}, {}, None) - serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None) + serialize_optimizer_knowledge(adder, numb_state, liveboxes) - assert unpack_numbering(numb_state.create_numbering()) == [1, 0b010000, 0, 0] + assert unpack_numbering(numb_state.create_numbering()) == [1, 0, 0b010000, 0, 0, 0] rbox1 = InputArgRef() rbox2 = InputArgRef() @@ -93,11 +103,14 @@ numb_state = NumberingState(1) numb_state.append_int(1) # size of resume block + numb_state.append_int(0) # size of extra arg block liveboxes = [box for (box, _) in boxes_known_classes] - serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None) + adder = FakeAdder(optimizer, {}, {}, None) - assert len(numb_state.create_numbering().code) == 3 + math.ceil(len(refboxes) / 6.0) + serialize_optimizer_knowledge(adder, numb_state, liveboxes) + + assert len(numb_state.create_numbering().code) == 5 + math.ceil(len(refboxes) / 6.0) dct = {box: cls for box, known_class in boxes_known_classes @@ -140,6 +153,40 @@ self.check_trace_count(3) self.check_resops(guard_class=1) + def Xtest_bridge_guard_class_virtual(self): + myjitdriver = jit.JitDriver(greens=[], reds='auto') + class A(object): + def f(self): + return 1 + class B(A): + def f(self): + return 2 + class Box(object): + def __init__(self, a): + self.a = a + def f(x, y, n): + if x: + a = A() + else: + a = B() + a.x = 0 + box = Box(a) + res = 0 + while y > 0: + myjitdriver.jit_merge_point() + res += box.a.f() + a.x += 1 + if y > n: + res += 1 + res += box.a.f() + y -= 1 + box = Box(box.a) + return res + res = self.meta_interp(f, [6, 32, 16]) + assert res == f(6, 32, 16) + self.check_trace_count(3) + self.check_resops(guard_class=1) + def test_bridge_field_read(self): myjitdriver = jit.JitDriver(greens=[], reds=['y', 'res', 'n', 'a']) class A(object): @@ -282,3 +329,33 @@ self.check_trace_count(3) self.check_resops(guard_value=1) self.check_resops(getarrayitem_gc_i=5) + + def test_loop_invariant_bridge(self): + myjitdriver = jit.JitDriver(greens = [], reds = ['x', 'res']) + class A(object): + pass + a = A() + a.current_a = A() + a.current_a.x = 12 + @jit.loop_invariant + def f(): + return a.current_a + + def g(x): + res = 0 + while x > 0: + myjitdriver.can_enter_jit(x=x, res=res) + myjitdriver.jit_merge_point(x=x, res=res) + res += jit.promote(f().x) + if x % 5 == 1: + res += 5 + res += jit.promote(f().x) + res += jit.promote(f().x) + x -= 1 + a.current_a = A() + a.current_a.x = 2 + return res + res = self.meta_interp(g, [21]) + assert res == g(21) + self.check_resops(call_r=1) + _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit