Author: Armin Rigo <ar...@tunes.org> Branch: shadowstack-perf-2 Changeset: r84827:95e7ce8adc6f Date: 2016-05-29 22:45 +0200 http://bitbucket.org/pypy/pypy/changeset/95e7ce8adc6f/
Log: Merge add_enter and add_leave_roots_frame into a single function which does hopefully the right thing (including avoiding all gc_enter/gc_leave on fast paths that don't need any gc_save/gc_restore) diff --git a/rpython/memory/gctransform/shadowcolor.py b/rpython/memory/gctransform/shadowcolor.py --- a/rpython/memory/gctransform/shadowcolor.py +++ b/rpython/memory/gctransform/shadowcolor.py @@ -453,7 +453,10 @@ block.operations = newops -def add_leave_roots_frame(graph, regalloc): +def add_enter_leave_roots_frame(graph, regalloc, c_gcdata): + # put 'gc_enter_roots_frame' as late as possible, but before the + # first 'gc_save_root' is reached. + # # put the 'gc_leave_roots_frame' operations as early as possible, # that is, just after the last 'gc_restore_root' reached. This is # done by putting it along a link, such that the previous block @@ -475,138 +478,126 @@ break # done + insert_empty_startblock(graph) entrymap = mkentrymap(graph) - flagged_blocks = set() # blocks with 'gc_restore_root' in them, - # or from which we can reach such a block + + # helpers + + def is_interesting_op(op): + if op.opname == 'gc_restore_root': + return True + if op.opname == 'gc_save_root': + # ignore saves that say "everything is free" + return not (isinstance(op.args[1], Constant) and + isinstance(op.args[1].value, int) and + op.args[1].value == bitmask_all_free) + return False + bitmask_all_free = (1 << regalloc.numcolors) - 1 + + def insert_along_link(link, opname, args, cache): + b2 = link.target + if b2 not in cache: + newblock = Block([v.copy() for v in b2.inputargs]) + newblock.operations.append( + SpaceOperation(opname, args, varoftype(lltype.Void))) + newblock.closeblock(Link(list(newblock.inputargs), b2)) + cache[b2] = newblock + link.target = cache[b2] + + # make a list of blocks with gc_save_root/gc_restore_root in them + interesting_blocks = [] for block in graph.iterblocks(): for op in block.operations: - if op.opname == 'gc_restore_root': - flagged_blocks.add(block) + if is_interesting_op(op): + assert block is not graph.startblock + assert block is not graph.returnblock + interesting_blocks.append(block) break # interrupt this block, go to the next one - links = list(graph.iterlinks()) - links.reverse() - - while True: - prev_length = len(flagged_blocks) - for link in links: - if link.target in flagged_blocks: - flagged_blocks.add(link.prevblock) - if len(flagged_blocks) == prev_length: - break - assert graph.returnblock not in flagged_blocks - assert graph.startblock in flagged_blocks - - extra_blocks = {} - for link in links: - block = link.target - if (link.prevblock in flagged_blocks and - block not in flagged_blocks): - # share the gc_leave_roots_frame if possible - if block not in extra_blocks: - newblock = Block([v.copy() for v in block.inputargs]) - newblock.operations.append( - SpaceOperation('gc_leave_roots_frame', [], - varoftype(lltype.Void))) - newblock.closeblock(Link(list(newblock.inputargs), block)) - extra_blocks[block] = newblock - link.target = extra_blocks[block] - - # check all blocks not in flagged_blocks: they might contain a - # gc_save_root() that writes the bitmask meaning "everything is - # free". Remove such gc_save_root(). - bitmask_all_free = (1 << regalloc.numcolors) - 1 - for block in graph.iterblocks(): - if block in flagged_blocks: - continue - newops = [] - for op in block.operations: - if op.opname == 'gc_save_root': - assert isinstance(op.args[1], Constant) - assert op.args[1].value == bitmask_all_free - else: - newops.append(op) - if len(newops) < len(block.operations): - block.operations = newops - - -def add_enter_roots_frame(graph, regalloc, c_gcdata): - # symmetrical operation from add_leave_roots_frame(): put - # 'gc_enter_roots_frame' as late as possible, but before the - # first 'gc_save_root' and not in any loop. - if regalloc is None: - return - - flagged_blocks = {} # blocks with 'gc_save_root' in them, - # or which can be reached from such a block - bitmask_all_free = (1 << regalloc.numcolors) - 1 - for block in graph.iterblocks(): - for i, op in enumerate(block.operations): - if op.opname == 'gc_save_root': - if (isinstance(op.args[1], Constant) and - isinstance(op.args[1].value, int) and - op.args[1].value == bitmask_all_free): - pass # ignore saves that say "everything is free" - else: - flagged_blocks[block] = i - break # interrupt this block, go to the next one - - pending = flagged_blocks.keys() + # compute the blocks such that 'gc_save_root/gc_restore_root' + # exist anywhere before the start of this block + before_blocks = set() + pending = list(interesting_blocks) + seen = set(pending) while pending: block = pending.pop() for link in block.exits: - if link.target not in flagged_blocks: + before_blocks.add(link.target) + if link.target not in seen: + seen.add(link.target) pending.append(link.target) - flagged_blocks[link.target] = -1 - #assert flagged_blocks[graph.returnblock] == -1, except if the - # returnblock is never reachable at all + assert graph.startblock not in before_blocks + # compute the blocks such that 'gc_save_root/gc_restore_root' + # exist anywhere after the start of this block + after_blocks = set(interesting_blocks) + pending = list(interesting_blocks) + while pending: + block = pending.pop() + for link in entrymap[block]: + if link.prevblock is not None: + if link.prevblock not in after_blocks: + after_blocks.add(link.prevblock) + pending.append(link.prevblock) + assert graph.returnblock not in after_blocks + + # this is the set of blocks such that, at the start of the block, + # we're "in frame", i.e. there are 'gc_save_root/gc_restore_root' + # both before and after the start of the block. + inside_blocks = before_blocks & after_blocks + inside_or_interesting_blocks = set(interesting_blocks) | inside_blocks + + # if a block contains gc_save_root/gc_restore_root but is not + # an "inside_block", then add gc_enter_roots_frame where needed c_num = Constant(regalloc.numcolors, lltype.Signed) - extra_blocks = {} - for link in list(graph.iterlinks()): - block = link.target - if (link.prevblock not in flagged_blocks and - block in flagged_blocks and - flagged_blocks[block] == -1): - # share the gc_enter_roots_frame if possible - if block not in extra_blocks: - newblock = Block([v.copy() for v in block.inputargs]) - newblock.operations.append( - SpaceOperation('gc_enter_roots_frame', [c_gcdata, c_num], - varoftype(lltype.Void))) - newblock.closeblock(Link(list(newblock.inputargs), block)) - extra_blocks[block] = newblock - link.target = extra_blocks[block] - - for block, i in flagged_blocks.items(): - if i >= 0: + for block in interesting_blocks: + if block not in inside_blocks: + i = 0 + while not is_interesting_op(block.operations[i]): + i += 1 block.operations.insert(i, SpaceOperation('gc_enter_roots_frame', [c_gcdata, c_num], varoftype(lltype.Void))) - # check all blocks not in flagged_blocks, or before the - # gc_enter_roots_frame: they might contain a gc_save_root() that writes - # the bitmask meaning "everything is free". Remove such gc_save_root(). - bitmask_all_free = (1 << regalloc.numcolors) - 1 + # If a link goes from a "non-inside, non-interesting block" + # straight to an "inside_block", insert a gc_enter_roots_frame + # along the link. Similarly, if a block is a "inside-or- + # interesting_block" and exits with a link going to a + # "non-inside_block", then insert a gc_leave_roots_frame along the + # link. + cache1 = {} + cache2 = {} + for block in list(graph.iterblocks()): + if block not in inside_or_interesting_blocks: + for link in block.exits: + if link.target in inside_blocks: + insert_along_link(link, 'gc_enter_roots_frame', + [c_gcdata, c_num], cache1) + else: + for link in block.exits: + if link.target not in inside_blocks: + insert_along_link(link, 'gc_leave_roots_frame', + [], cache2) + + # check all blocks not in "inside_block": they might contain a + # gc_save_root() that writes the bitmask meaning "everything is + # free". Look only before gc_enter_roots_frame, if there is one + # in that block. Remove these out-of-frame gc_save_root(). for block in graph.iterblocks(): - # 'operations-up-to-limit' are the operations that occur before - # gc_enter_roots_frame. If flagged_blocks contains -1, then none - # are; if flagged_blocks does not contain block, then all are. - limit = flagged_blocks.get(block, len(block.operations)) - if limit < 0: - continue - newops = [] - for op in block.operations[:limit]: - if op.opname == 'gc_save_root': - assert isinstance(op.args[1], Constant) - assert op.args[1].value == bitmask_all_free - else: - newops.append(op) - if len(newops) < limit: - block.operations = newops + block.operations[limit:] + if block not in inside_blocks: + newops = [] + for i, op in enumerate(block.operations): + if op.opname == 'gc_enter_roots_frame': + newops.extend(block.operations[i:]) + break + if op.opname == 'gc_save_root' and not is_interesting_op(op): + pass # don't add in newops + else: + newops.append(op) + if len(newops) < len(block.operations): + block.operations = newops - join_blocks(graph) # for the extra new blocks made in this function as - # well as in earlier functions + join_blocks(graph) # for the extra new blocks made in this function class GCBitmaskTooLong(Exception): @@ -686,7 +677,7 @@ locsaved[v] = frozenset() elif op.opname == 'gc_leave_roots_frame': if not currently_in_frame: - raise PostProcessCheckError(graph, block, op,'double leave') + raise PostProcessCheckError(graph, block, op, 'not entered') currently_in_frame = False elif is_trivial_rewrite(op) and currently_in_frame: locsaved[op.result] = locsaved[op.args[0]] @@ -731,8 +722,7 @@ expand_push_roots(graph, regalloc) move_pushes_earlier(graph, regalloc) expand_pop_roots(graph, regalloc) - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, c_gcdata) + add_enter_leave_roots_frame(graph, regalloc, c_gcdata) checkgraph(graph) postprocess_double_check(graph) return (regalloc is not None) diff --git a/rpython/memory/gctransform/test/test_shadowcolor.py b/rpython/memory/gctransform/test/test_shadowcolor.py --- a/rpython/memory/gctransform/test/test_shadowcolor.py +++ b/rpython/memory/gctransform/test/test_shadowcolor.py @@ -326,8 +326,7 @@ expand_push_roots(graph, regalloc) move_pushes_earlier(graph, regalloc) expand_pop_roots(graph, regalloc) - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) assert graphmodel.summary(graph) == { 'int_mul': 1, 'gc_enter_roots_frame': 1, @@ -370,8 +369,7 @@ 'int_sub': 1, 'direct_call': 2, } - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) postprocess_double_check(graph) def test_remove_intrablock_push_roots(): @@ -426,8 +424,7 @@ 'int_sub': 1, 'direct_call': 2, } - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) postprocess_double_check(graph) def test_move_pushes_earlier_rename_2(): @@ -458,8 +455,7 @@ 'int_sub': 1, 'direct_call': 2, } - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) postprocess_double_check(graph) def test_move_pushes_earlier_rename_3(): @@ -492,8 +488,7 @@ 'int_sub': 2, 'direct_call': 2, } - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) postprocess_double_check(graph) def test_move_pushes_earlier_rename_4(): @@ -534,8 +529,7 @@ 'int_sub': 3, 'direct_call': 2, } - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) postprocess_double_check(graph) def test_add_leave_roots_frame_1(): @@ -562,8 +556,7 @@ expand_push_roots(graph, regalloc) move_pushes_earlier(graph, regalloc) expand_pop_roots(graph, regalloc) - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) assert len(graph.startblock.exits) == 2 for link in graph.startblock.exits: assert [op.opname for op in link.target.operations] == [ @@ -595,8 +588,7 @@ expand_push_roots(graph, regalloc) move_pushes_earlier(graph, regalloc) expand_pop_roots(graph, regalloc) - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) assert [op.opname for op in graph.startblock.operations] == [ 'gc_enter_roots_frame', 'gc_save_root', @@ -676,8 +668,7 @@ expand_push_roots(graph, regalloc) move_pushes_earlier(graph, regalloc) expand_pop_roots(graph, regalloc) - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) postprocess_double_check(graph) def test_add_enter_roots_frame_remove_empty(): @@ -708,8 +699,7 @@ expand_push_roots(graph, regalloc) move_pushes_earlier(graph, regalloc) expand_pop_roots(graph, regalloc) - add_leave_roots_frame(graph, regalloc) - add_enter_roots_frame(graph, regalloc, Constant('fake gcdata')) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) assert [op.opname for op in graph.startblock.operations] == [ "direct_call", "gc_enter_roots_frame", @@ -722,6 +712,35 @@ ] postprocess_double_check(graph) +def test_add_enter_roots_frame_avoided(): + def g(x): + return x + def f(x, n): + if n > 100: + llop.gc_push_roots(lltype.Void, x) + g(x) + llop.gc_pop_roots(lltype.Void, x) + return x + + graph = make_graph(f, [llmemory.GCREF, int]) + regalloc = allocate_registers(graph) + expand_push_roots(graph, regalloc) + move_pushes_earlier(graph, regalloc) + expand_pop_roots(graph, regalloc) + add_enter_leave_roots_frame(graph, regalloc, Constant('fake gcdata')) + assert [op.opname for op in graph.startblock.operations] == [ + 'int_gt', 'same_as'] + [fastpath, slowpath] = graph.startblock.exits + assert fastpath.target is graph.returnblock + block2 = slowpath.target + assert [op.opname for op in block2.operations] == [ + 'gc_enter_roots_frame', + 'gc_save_root', + 'direct_call', + 'gc_restore_root', + 'gc_leave_roots_frame'] + postprocess_double_check(graph) + def test_fix_graph_after_inlining(): # the graph of f looks like it inlined another graph, which itself # would be "if x > 100: foobar()". The foobar() function is supposed diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py --- a/rpython/translator/c/funcgen.py +++ b/rpython/translator/c/funcgen.py @@ -172,11 +172,19 @@ def cfunction_body(self): graph = self.graph - if (len(graph.startblock.operations) >= 1 and - graph.startblock.operations[0].opname == 'gc_enter_roots_frame'): - for line in self.gcpolicy.enter_roots_frame(self, - graph.startblock.operations[0]): + + # ----- for gc_enter_roots_frame + _seen = set() + for block in graph.iterblocks(): + for op in block.operations: + if op.opname == 'gc_enter_roots_frame': + _seen.add(tuple(op.args)) + if _seen: + assert len(_seen) == 1, ( + "multiple different gc_enter_roots_frame in %r" % (graph,)) + for line in self.gcpolicy.enter_roots_frame(self, list(_seen)[0]): yield line + # ----- done yield 'goto block0;' # to avoid a warning "this label is not used" diff --git a/rpython/translator/c/gc.py b/rpython/translator/c/gc.py --- a/rpython/translator/c/gc.py +++ b/rpython/translator/c/gc.py @@ -397,9 +397,8 @@ from rpython.memory.gctransform import shadowstack return shadowstack.ShadowStackFrameworkGCTransformer(translator) - def enter_roots_frame(self, funcgen, op): - numcolors = op.args[1].value - c_gcdata = op.args[0] + def enter_roots_frame(self, funcgen, (c_gcdata, c_numcolors)): + numcolors = c_numcolors.value # XXX hard-code the field name here gcpol_ss = '%s->gcd_inst_root_stack_top' % funcgen.expr(c_gcdata) # @@ -415,8 +414,6 @@ raise Exception("gc_pop_roots should be removed by postprocess_graph") def OP_GC_ENTER_ROOTS_FRAME(self, funcgen, op): - if op is not funcgen.graph.startblock.operations[0]: - raise Exception("gc_enter_roots_frame as a non-initial instruction") return '%s = (void *)(ss+1);' % funcgen.gcpol_ss def OP_GC_LEAVE_ROOTS_FRAME(self, funcgen, op): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit