Agree. Let't stop at first insn after entry block notes. As for the test it looks like mcount is general i?86 name.
Bootstrap and make check are in progress. 2014-10-28 Evgeny Stupachenko <evstu...@gmail.com> gcc/testsuite PR target/63534 * gcc.target/i386/mcount_pic.c: New. gcc/ PR target/63534 * config/i386/i386.c (ix86_init_pic_reg): Emit SET_GOT to REAL_PIC_OFFSET_TABLE_REGNUM for mcount profiling. (ix86_save_reg): Save REAL_PIC_OFFSET_TABLE_REGNUM when profiling using mcount in 32bit PIC mode. (ix86_elim_entry_set_got): New. (ix86_expand_prologue): For the mcount profiling emit new SET_GOT in PROLOGUE, delete initial if possible. 2014-10-22 Evgeny Stupachenko <evstu...@gmail.com> PR rtl-optimization/63618 * gcc.target/i386/pr63618.c: New. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6235c4f..a38bc33 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -6190,8 +6190,15 @@ ix86_init_pic_reg (void) } else { - rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx)); + /* If there is future mcount call in the function it is more profitable + to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ + rtx reg = crtl->profile + ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) + : pic_offset_table_rtx; + rtx insn = emit_insn (gen_set_got (reg)); RTX_FRAME_RELATED_P (insn) = 1; + if (crtl->profile) + emit_move_insn (pic_offset_table_rtx, reg); add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); } @@ -9471,15 +9478,23 @@ ix86_select_alt_pic_regnum (void) static bool ix86_save_reg (unsigned int regno, bool maybe_eh_return) { - if (pic_offset_table_rtx - && !ix86_use_pseudo_pic_reg () - && regno == REAL_PIC_OFFSET_TABLE_REGNUM - && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) - || crtl->profile - || crtl->calls_eh_return - || crtl->uses_const_pool - || cfun->has_nonlocal_label)) - return ix86_select_alt_pic_regnum () == INVALID_REGNUM; + if (regno == REAL_PIC_OFFSET_TABLE_REGNUM + && pic_offset_table_rtx) + { + if (ix86_use_pseudo_pic_reg ()) + { + /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to + _mcount in prologue. */ + if (!TARGET_64BIT && flag_pic && crtl->profile) + return true; + } + else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) + || crtl->profile + || crtl->calls_eh_return + || crtl->uses_const_pool + || cfun->has_nonlocal_label) + return ix86_select_alt_pic_regnum () == INVALID_REGNUM; + } if (crtl->calls_eh_return && maybe_eh_return) { @@ -10818,6 +10833,30 @@ ix86_finalize_stack_realign_flags (void) crtl->stack_realign_finalized = true; } +/* Delete SET_GOT right after entry block if it is allocated to reg. */ + +static void +ix86_elim_entry_set_got (rtx reg) +{ + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; + rtx_insn *c_insn; + FOR_BB_INSNS (bb, c_insn) + if (GET_CODE (c_insn) != NOTE) + break; + if (NONJUMP_INSN_P (c_insn)) + { + rtx pat = PATTERN (c_insn); + if (GET_CODE (pat) == PARALLEL) + { + rtx vec = XVECEXP (pat, 0, 0); + if (GET_CODE (vec) == SET + && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT + && REGNO (XEXP (vec, 0)) == REGNO (reg)) + delete_insn (c_insn); + } + } +} + /* Expand the prologue into a bunch of separate insns. */ void @@ -11271,6 +11310,20 @@ ix86_expand_prologue (void) if (!sse_registers_saved) ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); + /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT + in PROLOGUE. */ + if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) + { + rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); + insn = emit_insn (gen_set_got (pic)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); + emit_insn (gen_prologue_use (pic)); + /* Deleting already emmitted SET_GOT if exist and allocated to + REAL_PIC_OFFSET_TABLE_REGNUM. */ + ix86_elim_entry_set_got (pic); + } + if (crtl->drap_reg && !crtl->stack_realign_needed) { /* vDRAP is setup but after reload it turns out stack realign diff --git a/gcc/testsuite/gcc.target/i386/mcount_pic.c b/gcc/testsuite/gcc.target/i386/mcount_pic.c new file mode 100644 index 0000000..08ecd07 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mcount_pic.c @@ -0,0 +1,13 @@ +/* Test check correct mcount generation. */ +/* { dg-do run } */ +/* { dg-require-effective-target ia32 } */ +/* { dg-options "-O2 -fpic -p -save-temps" } */ + +int main () +{ + return 0; +} + +/* { dg-final { scan-assembler "mcount" } } */ +/* { dg-final { scan-assembler "get_pc_thunk" } } */ +/* { dg-final { cleanup-saved-temps } } */ On Tue, Oct 28, 2014 at 4:36 PM, Jakub Jelinek <ja...@redhat.com> wrote: > On Tue, Oct 28, 2014 at 04:10:12PM +0300, Evgeny Stupachenko wrote: >> Thank you, Jakub. >> >> The following patch passed bootstrap, gcc make check and spec2000 with >> "-p -m32 -fPIC". >> Is it ok? >> >> ChangeLog: >> >> 2014-10-28 Evgeny Stupachenko <evstu...@gmail.com> >> >> gcc/testsuite >> * gcc.target/i386/mcount_pic.c: New. >> >> gcc/ > > Please mention > PR target/63534 > in the ChangeLog entry. > >> @@ -10818,6 +10833,36 @@ ix86_finalize_stack_realign_flags (void) >> crtl->stack_realign_finalized = true; >> } >> >> +/* Delete first SET_GOT allocated to reg. */ >> + >> +static void >> +ix86_elim_set_got (rtx reg) >> +{ >> + basic_block bb; >> + FOR_EACH_BB_FN (bb, cfun) >> + { >> + rtx_insn *c_insn; >> + FOR_BB_INSNS (bb, c_insn) >> + { >> + if (GET_CODE (c_insn) == INSN) >> + { >> + rtx pat = PATTERN (c_insn); >> + if (GET_CODE (pat) == PARALLEL) >> + { >> + rtx vec = XVECEXP (pat, 0, 0); >> + if (GET_CODE (vec) == SET >> + && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT >> + && REGNO (XEXP (vec, 0)) == REGNO (reg)) >> + { >> + delete_insn (c_insn); >> + return; >> + } >> + } >> + } >> + } >> + } >> +} >> + > > This is unsafe. What I meant is to just remove set_got insn > that is at the beginning of the first basic block (successor of > entry bb), perhaps after some notes. You can perhaps generalize > that a little bit and look at other insns in the first bb, as long > as no NONDEBUG_INSN_P insn preceeding satisfy > modified_in_p (reg, c_insn) or reg_referenced_p (reg, PATTERN (c_insn)) > or so. But certainly stop at the end of the first bb or at any > real insn that might clobber/set %ebx or use it. > > Also, instead of GET_CODE (c_insn) == INSN please use > NONJUMP_INSN_P (c_insn). > >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/i386/mcount_pic.c >> @@ -0,0 +1,12 @@ >> +/* Test check correct mcount generation. */ >> +/* { dg-do run } */ >> +/* { dg-require-effective-target ia32 } */ >> +/* { dg-options "-O2 -fpic -p -save-temps" } */ >> + >> +int main() >> +{ >> + return 0; >> +} >> + >> +/* { dg-final { scan-assembler "mcount" } } */ >> +/* { dg-final { scan-assembler "get_pc_thunk" } } */ > > Missing cleanup-save-temps. Is _mcount the name of the profiling routine > on all i?86 targets? > > Jakub