Andi Kleen <a...@linux.intel.com> writes: Ping!^2
> Andi Kleen <a...@firstfloor.org> writes: > > Ping! > >> From: Andi Kleen <a...@linux.intel.com> >> >> When instrumenting programs using __fentry__ it is often useful >> to instrument the function return too. Traditionally this >> has been done by patching the return address on the stack >> frame on entry. However this is fairly complicated (trace >> function has to emulate a stack) and also slow because >> it causes a branch misprediction on every return. >> >> Add an option to generate call or nop instrumentation for >> every return instead, including patch sections. >> >> This will increase the program size slightly, but can be a >> lot faster and simpler. >> >> This version only instruments true returns, not sibling >> calls or tail recursion. This matches the semantics of the >> original stack. >> >> gcc/: >> >> 2018-11-04 Andi Kleen <a...@linux.intel.com> >> >> * config/i386/i386-opts.h (enum instrument_return): Add. >> * config/i386/i386.c (output_return_instrumentation): Add. >> (ix86_output_function_return): Call output_return_instrumentation. >> (ix86_output_call_insn): Call output_return_instrumentation. >> * config/i386/i386.opt: Add -minstrument-return=. >> * doc/invoke.texi (-minstrument-return): Document. >> >> gcc/testsuite/: >> >> 2018-11-04 Andi Kleen <a...@linux.intel.com> >> >> * gcc.target/i386/returninst1.c: New test. >> * gcc.target/i386/returninst2.c: New test. >> * gcc.target/i386/returninst3.c: New test. >> --- >> gcc/config/i386/i386-opts.h | 6 ++++ >> gcc/config/i386/i386.c | 36 +++++++++++++++++++++ >> gcc/config/i386/i386.opt | 21 ++++++++++++ >> gcc/doc/invoke.texi | 14 ++++++++ >> gcc/testsuite/gcc.target/i386/returninst1.c | 14 ++++++++ >> gcc/testsuite/gcc.target/i386/returninst2.c | 21 ++++++++++++ >> gcc/testsuite/gcc.target/i386/returninst3.c | 9 ++++++ >> 7 files changed, 121 insertions(+) >> create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c >> create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c >> create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c >> >> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h >> index 46366cbfa72..35e9413100e 100644 >> --- a/gcc/config/i386/i386-opts.h >> +++ b/gcc/config/i386/i386-opts.h >> @@ -119,4 +119,10 @@ enum indirect_branch { >> indirect_branch_thunk_extern >> }; >> >> +enum instrument_return { >> + instrument_return_none = 0, >> + instrument_return_call, >> + instrument_return_nop5 >> +}; >> + >> #endif >> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >> index f9ef0b4445b..f7cd94a8139 100644 >> --- a/gcc/config/i386/i386.c >> +++ b/gcc/config/i386/i386.c >> @@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op) >> return "%!jmp\t%A0"; >> } >> >> +/* Output return instrumentation for current function if needed. */ >> + >> +static void >> +output_return_instrumentation (void) >> +{ >> + if (ix86_instrument_return != instrument_return_none >> + && flag_fentry >> + && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) >> + { >> + if (ix86_flag_record_return) >> + fprintf (asm_out_file, "1:\n"); >> + switch (ix86_instrument_return) >> + { >> + case instrument_return_call: >> + fprintf (asm_out_file, "\tcall\t__return__\n"); >> + break; >> + case instrument_return_nop5: >> + /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ >> + fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); >> + break; >> + case instrument_return_none: >> + break; >> + } >> + >> + if (ix86_flag_record_return) >> + { >> + fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); >> + fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); >> + fprintf (asm_out_file, "\t.previous\n"); >> + } >> + } >> +} >> + >> /* Output function return. CALL_OP is the jump target. Add a REP >> prefix to RET if LONG_P is true and function return is kept. */ >> >> const char * >> ix86_output_function_return (bool long_p) >> { >> + output_return_instrumentation (); >> + >> if (cfun->machine->function_return_type != indirect_branch_keep) >> { >> char thunk_name[32]; >> @@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op) >> >> if (SIBLING_CALL_P (insn)) >> { >> + output_return_instrumentation (); >> if (direct_p) >> { >> if (ix86_nopic_noplt_attribute_p (call_op)) >> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt >> index e7fbf9b6f99..5925b75244f 100644 >> --- a/gcc/config/i386/i386.opt >> +++ b/gcc/config/i386/i386.opt >> @@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code >> generation. >> mcldemote >> Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save >> Support CLDEMOTE built-in functions and code generation. >> + >> +minstrument-return= >> +Target Report RejectNegative Joined Enum(instrument_return) >> Var(ix86_instrument_return) Init(instrument_return_none) >> +Instrument function exit in instrumented functions with __fentry__. >> + >> +Enum >> +Name(instrument_return) Type(enum instrument_return) >> +Known choices for return instrumentation with -minstrument-return= >> + >> +EnumValue >> +Enum(instrument_return) String(none) Value(instrument_return_none) >> + >> +EnumValue >> +Enum(instrument_return) String(call) Value(instrument_return_call) >> + >> +EnumValue >> +Enum(instrument_return) String(nop5) Value(instrument_return_nop5) >> + >> +mrecord-return >> +Target Report Var(ix86_flag_record_return) Init(0) >> +Generate a __return_loc section pointing to all return instrumentation code. >> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi >> index 1743c64582e..939be3e251b 100644 >> --- a/gcc/doc/invoke.texi >> +++ b/gcc/doc/invoke.texi >> @@ -1301,6 +1301,7 @@ See RS/6000 and PowerPC Options. >> -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol >> -m32 -m64 -mx32 -m16 -miamcu -mlarge-data-threshold=@var{num} @gol >> -msse2avx -mfentry -mrecord-mcount -mnop-mcount -m8bit-idiv @gol >> +-minstrument-return=@var{type} @gol >> -mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol >> -malign-data=@var{type} -mstack-protector-guard=@var{guard} @gol >> -mstack-protector-guard-reg=@var{reg} @gol >> @@ -28442,6 +28443,19 @@ the profiling functions as NOPs. This is useful >> when they >> should be patched in later dynamically. This is likely only >> useful together with @option{-mrecord-mcount}. >> >> +@item -minstrument-return=@var{type} >> +@opindex minstrument-return >> +Instrument function exit in -pg -mfentry instrumented functions with >> +call to specified function. This only instruments true returns ending >> +with ret, but not sibling calls ending with jump. Valid types >> +are @var{none} to not instrument, @var{call} to generate a call to >> __return__, >> +or @var{nop5} to generate a 5 byte nop. >> + >> +@item -mrecord-return >> +@itemx -mno-record-return >> +@opindex mrecord-return >> +Generate a __return_loc section pointing to all return instrumentation code. >> + >> @item -mskip-rax-setup >> @itemx -mno-skip-rax-setup >> @opindex mskip-rax-setup >> diff --git a/gcc/testsuite/gcc.target/i386/returninst1.c >> b/gcc/testsuite/gcc.target/i386/returninst1.c >> new file mode 100644 >> index 00000000000..f970e75a774 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/i386/returninst1.c >> @@ -0,0 +1,14 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-pg -mfentry -minstrument-return=call -mrecord-return" } */ >> +/* { dg-final { scan-assembler "call.*__return__" } } */ >> +/* { dg-final { scan-assembler "section.*return_loc" } } */ >> + >> +int func(int a) >> +{ >> + return a+1; >> +} >> + >> +int func2(int a) >> +{ >> + return a+1; >> +} >> diff --git a/gcc/testsuite/gcc.target/i386/returninst2.c >> b/gcc/testsuite/gcc.target/i386/returninst2.c >> new file mode 100644 >> index 00000000000..716b38556dd >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/i386/returninst2.c >> @@ -0,0 +1,21 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-pg -mfentry -minstrument-return=nop5 -mrecord-return" } */ >> +/* { dg-final { scan-assembler-times "0x0f, 0x1f, 0x44, 0x00, 0x00" 3 } } */ >> +/* { dg-final { scan-assembler "section.*return_loc" } } */ >> + >> +int func(int a) >> +{ >> + return a+1; >> +} >> + >> +int func2(int a) >> +{ >> + return a+1; >> +} >> + >> +extern void func4(int); >> + >> +int func3(int a) >> +{ >> + func4(a + 1); >> +} >> diff --git a/gcc/testsuite/gcc.target/i386/returninst3.c >> b/gcc/testsuite/gcc.target/i386/returninst3.c >> new file mode 100644 >> index 00000000000..5bbc60e8bd4 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/i386/returninst3.c >> @@ -0,0 +1,9 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-pg -mfentry -minstrument-return=call" } */ >> +/* { dg-final { scan-assembler-not "call.*__return__" } } */ >> + >> +__attribute__((no_instrument_function)) >> +int func(int a) >> +{ >> + return a+1; >> +}