Bernd Schmidt <ber...@codesourcery.com> writes: > +/* A for_each_rtx subroutine of record_hard_reg_sets. */ > +static int > +record_hard_reg_uses_1 (rtx *px, void *data) > +{ > + rtx x = *px; > + HARD_REG_SET *pused = (HARD_REG_SET *)data; > + > + if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER) > + { > + int nregs = hard_regno_nregs[REGNO (x)][GET_MODE (x)]; > + while (nregs-- > 0) > + SET_HARD_REG_BIT (*pused, REGNO (x) + nregs); > + }
add_to_hard_reg_set (pused, GET_MODE (x), REGNO (x)); > +/* A subroutine of requires_stack_frame_p, called via for_each_rtx. > + If any change is made, set CHANGED > + to true. */ Strange line break, and comment doesn't match code (no "changed" variable). Probably moot though because: > +/* Return true if INSN requires the stack frame to be set up. > + PROLOGUE_USED contains the hard registers used in the function > + prologue. */ > +static bool > +requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used) > +{ >[...] > + if (for_each_rtx (&PATTERN (insn), frame_required_for_rtx, NULL)) > + return true; > + CLEAR_HARD_REG_SET (hardregs); > + note_stores (PATTERN (insn), record_hard_reg_sets, &hardregs); > + if (hard_reg_set_intersect_p (hardregs, prologue_used)) > + return true; > + AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); > + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) > + if (TEST_HARD_REG_BIT (hardregs, regno) > + && df_regs_ever_live_p (regno)) > + return true; ...I suppose this ought to use DF instead. Does something guarantee that non-local gotos are marked as needing a frame? Why do we need to check specifically for pic_offset_table_rtx? Isn't it handled by the generic "live registers set by the prologue" test? Reason for asking is that pic_offset_table_rtx can be a global value, such as for mips*-elf. > -/* Insert gen_return at the end of block BB. This also means updating > - block_for_insn appropriately. */ > + > +static rtx > +gen_return_pattern (bool simple_p) > +{ > +#ifdef HAVE_simple_return > + return simple_p ? gen_simple_return () : gen_return (); > +#else > + gcc_assert (!simple_p); > + return gen_return (); > +#endif > +} Missing function comment. > + > +/* Insert an appropriate return pattern at the end of block BB. This > + also means updating block_for_insn appropriately. */ > > static void > -emit_return_into_block (basic_block bb) > +emit_return_into_block (bool simple_p, basic_block bb) Pedantic, but the comment should reference SIMPLE_P. > +#ifdef HAVE_simple_return > + /* Try to perform a kind of shrink-wrapping, making sure the > + prologue/epilogue is emitted only around those parts of the > + function that require it. */ > + > + if (flag_shrink_wrap && HAVE_simple_return && !flag_non_call_exceptions > + && HAVE_prologue && !crtl->calls_eh_return) > + { Maybe it should be obvious, but could you add a comment explaining why flag_non_call_exceptions and crtl->calls_eh_return need to be checked explicitly? I can see why we don't want to optimise functions that call eh_return, but I was curious why it wasn't handled by the general insn-level analysis. Would checking prologue_seq != NULL be better than HAVE_prologue? > + HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge; > + rtx p_insn; > + > + VEC(basic_block, heap) *vec; > + basic_block bb; > + bitmap_head bb_antic_flags; > + bitmap_head bb_on_list; > + > + /* Compute the registers set and used in the prologue. */ > + CLEAR_HARD_REG_SET (prologue_clobbered); > + CLEAR_HARD_REG_SET (prologue_used); > + for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) > + { > + HARD_REG_SET this_used; > + if (!NONDEBUG_INSN_P (p_insn)) > + continue; > + > + CLEAR_HARD_REG_SET (this_used); > + note_uses (&PATTERN (p_insn), record_hard_reg_uses, > + &this_used); > + AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); > + IOR_HARD_REG_SET (prologue_used, this_used); > + note_stores (PATTERN (p_insn), record_hard_reg_sets, > + &prologue_clobbered); > + } Should this iterate over split_prologue_seq as well? Could we combine prologue_seq and split_prologue_seq into a single sequence? > + bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); > + bitmap_initialize (&bb_on_list, &bitmap_default_obstack); > + > + vec = VEC_alloc (basic_block, heap, n_basic_blocks); > + > + FOR_EACH_BB (bb) > + { > + rtx insn; > + FOR_BB_INSNS (bb, insn) > + { > + if (requires_stack_frame_p (insn, prologue_used)) > + { > + bitmap_set_bit (&bb_flags, bb->index); > + VEC_quick_push (basic_block, vec, bb); > + break; > + } > + } Excess { and } in for loop. > + } > + > + /* For every basic block that needs a prologue, mark all blocks > + reachable from it, so as to ensure they are also seen as > + requiring a prologue. */ > + while (!VEC_empty (basic_block, vec)) > + { > + basic_block tmp_bb = VEC_pop (basic_block, vec); > + edge e; > + edge_iterator ei; > + FOR_EACH_EDGE (e, ei, tmp_bb->succs) > + { > + if (e->dest == EXIT_BLOCK_PTR > + || bitmap_bit_p (&bb_flags, e->dest->index)) > + continue; > + bitmap_set_bit (&bb_flags, e->dest->index); > + VEC_quick_push (basic_block, vec, e->dest); > + } Don't know which is the preferred style, but: if (e->dest != EXIT_BLOCK_PTR && bitmap_set_bit (&bb_flags, e->dest->index)) VEC_quick_push (basic_block, vec, e->dest); should be a little more efficient. > + /* Now walk backwards from every block that is marked as needing > + a prologue to compute the bb_antic_flags bitmap. */ > + bitmap_copy (&bb_antic_flags, &bb_flags); > + FOR_EACH_BB (bb) > + { > + edge e; > + edge_iterator ei; > + if (!bitmap_bit_p (&bb_flags, bb->index)) > + continue; > + FOR_EACH_EDGE (e, ei, bb->preds) > + if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) > + { > + VEC_quick_push (basic_block, vec, e->src); > + bitmap_set_bit (&bb_on_list, e->src->index); > + } Here too I think we want: FOR_EACH_EDGE (e, ei, bb->preds) if (!bitmap_bit_p (&bb_antic_flags, e->src->index) && bitmap_set_bit (&bb_on_list, e->src->index)) VEC_quick_push (basic_block, vec, e->src); in this case to avoid pushing the same thing twice. > + bitmap_clear_bit (&bb_on_list, tmp_bb->index); > + FOR_EACH_EDGE (e, ei, tmp_bb->succs) > + { > + if (!bitmap_bit_p (&bb_antic_flags, e->dest->index)) > + { > + all_set = false; > + break; > + } > + } Excess { and } in for loop. > + if (all_set) > + { > + bitmap_set_bit (&bb_antic_flags, tmp_bb->index); > + FOR_EACH_EDGE (e, ei, tmp_bb->preds) > + if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) > + { > + VEC_quick_push (basic_block, vec, e->src); > + bitmap_set_bit (&bb_on_list, e->src->index); > + } > + } Same: FOR_EACH_EDGE (e, ei, tmp_bb->preds) if (!bitmap_bit_p (&bb_antic_flags, e->src->index) && bitmap_set_bit (&bb_on_list, e->src->index)) VEC_quick_push (basic_block, vec, e->src); comment as above. > + /* Test whether the prologue is known to clobber any register > + (other than FP or SP) which are live on the edge. */ > + CLEAR_HARD_REG_SET (prologue_clobbered); > + for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) > + if (NONDEBUG_INSN_P (p_insn)) > + note_stores (PATTERN (p_insn), record_hard_reg_sets, > + &prologue_clobbered); > + for (p_insn = split_prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) > + if (NONDEBUG_INSN_P (p_insn)) > + note_stores (PATTERN (p_insn), record_hard_reg_sets, > + &prologue_clobbered); > + CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); > + if (frame_pointer_needed) > + CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); Seems like we should be able to reuse the insn walk from the beginning of the enclosing if statement. > +#ifdef HAVE_simple_return > + simple_p = (entry_edge != orig_entry_edge > + ? !bitmap_bit_p (&bb_flags, bb->index) : false); > +#else > + simple_p = false; > +#endif Why is this bb_flags rather than bb_antic_flags? simple_p = (entry_edge != orig_entry_edge && !bitmap_bit_p (&bb_flags, bb->index)); seems more readable, but I suppose that's personal taste. > + gcc_assert (simple_p); > + new_bb = split_edge (e); > + emit_barrier_after (BB_END (new_bb)); > + emit_return_into_block (simple_p, new_bb); > +#ifdef HAVE_simple_return > + if (simple_p) Check is redundant given the gcc_assert. > + /* If this block has only one successor, it both jumps > + and falls through to the fallthru block, so we can't > + delete the edge. */ > + if (single_succ_p (bb)) > + continue; Seems odd that this could happen in optimised code, but if it did, wouldn't it invalidate the simple_p transformation? Seems like the non-fallthrough edge would use a simple return but the fallthrough one wouldn't. > - start_sequence (); > - emit_note (NOTE_INSN_EPILOGUE_BEG); > - emit_insn (gen_sibcall_epilogue ()); > - seq = get_insns (); > - end_sequence (); > + ep_seq = gen_sibcall_epilogue (); > + if (ep_seq) > + { Why the new null check? Richard