Andi Kleen <a...@firstfloor.org> writes: Ping!
> From: Andi Kleen <a...@linux.intel.com> > > The Linux kernel dynamically patches in __fentry__ calls in and > out at runtime. This allows using function tracing for debugging > in production kernels without (significant) performance penalty. > > For this it needs a table pointing to each __fentry__ call. > > The way it is currently implemented is that a special > perl script scans the object file, generates the table in a special > section. When the kernel boots up it nops the calls, and > then later patches in the calls again as needed. > > The recordmcount.pl script in the kernel works, but it seems > cleaner and faster to support the code generation of the patch table > directly in gcc. > > This also allows to nop the calls directly at code generation > time, which allows to skip a patching step at kernel boot. > I also expect that a patchable production tracing facility is also useful > for other applications. > > For example it could be used in ftracer > (https://github.com/andikleen/ftracer) > > Having a nop area at the beginning of each function can be also > also useful for other things. For example it can be used to patch > functions at runtime to point to different functions, to do > binary updates without restarting the program (like ksplice or > similar) > > This patch implements two new options for the i386 target: > > -mrecord-mcount > Generate a __mcount_loc section entry for each __fentry__ or mcount > call. The section is compatible with the kernel convention > and the data is put into a section loaded at runtime. > > -mnop-mcount > Generate the mcount/__fentry__ call as 5 byte nop that can be > patched in later. The nop is generated as a single instruction, > as the Linux kernel run time patching relies on this. > > Limitations: > - I didn't implement -mnop-mcount for -fPIC. This > would need a good single instruction 6 byte NOP and it seems a > bit pointless, as the patching would prevent text sharing. > - I didn't implement noping for targets that pass a variable > to mcount. > - The facility could be useful on architectures too. Currently > the mcount code is target specific, so I made it a i386 option. > > Passes bootstrap and testing on x86_64-linux. > > Cc: rost...@goodmis.org > > gcc/: > > 2014-09-01 Andi Kleen <a...@linux.intel.com> > > * config/i386/i386.c (x86_print_call_or_nop): New function. > (x86_function_profiler): Support -mnop-mcount and > -mrecord-mcount. > * config/i386/i386.opt (-mnop-mcount, -mrecord-mcount): Add > * doc/invoke.texi: Document -mnop-mcount, -mrecord-mcount > * testsuite/gcc/gcc.target/i386/nop-mcount.c: New file. > * testsuite/gcc/gcc.target/i386/record-mcount.c: New file. > --- > gcc/config/i386/i386.c | 34 > +++++++++++++++++++++++---- > gcc/config/i386/i386.opt | 9 +++++++ > gcc/doc/invoke.texi | 17 +++++++++++++- > gcc/testsuite/gcc.target/i386/nop-mcount.c | 24 +++++++++++++++++++ > gcc/testsuite/gcc.target/i386/record-mcount.c | 24 +++++++++++++++++++ > 5 files changed, 102 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/nop-mcount.c > create mode 100644 gcc/testsuite/gcc.target/i386/record-mcount.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 61b33782..a651aa1 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -3974,6 +3974,13 @@ ix86_option_override_internal (bool main_args_p, > } > } > > +#ifndef NO_PROFILE_COUNTERS > + if (flag_nop_mcount) > + error ("-mnop-mcount is not compatible with this target"); > +#endif > + if (flag_nop_mcount && flag_pic) > + error ("-mnop-mcount is not implemented for -fPIC"); > + > /* Accept -msseregparm only if at least SSE support is enabled. */ > if (TARGET_SSEREGPARM_P (opts->x_target_flags) > && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) > @@ -39042,6 +39049,17 @@ x86_field_alignment (tree field, int computed) > return computed; > } > > +/* Print call to TARGET to FILE. */ > + > +static void > +x86_print_call_or_nop (FILE *file, const char *target) > +{ > + if (flag_nop_mcount) > + fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */ > + else > + fprintf (file, "1:\tcall\t%s\n", target); > +} > + > /* Output assembler code to FILE to increment profiler label # LABELNO > for profiling a function entry. */ > void > @@ -39049,7 +39067,6 @@ x86_function_profiler (FILE *file, int labelno > ATTRIBUTE_UNUSED) > { > const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE > : MCOUNT_NAME); > - > if (TARGET_64BIT) > { > #ifndef NO_PROFILE_COUNTERS > @@ -39057,9 +39074,9 @@ x86_function_profiler (FILE *file, int labelno > ATTRIBUTE_UNUSED) > #endif > > if (!TARGET_PECOFF && flag_pic) > - fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); > + fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); > else > - fprintf (file, "\tcall\t%s\n", mcount_name); > + x86_print_call_or_nop (file, mcount_name); > } > else if (flag_pic) > { > @@ -39067,7 +39084,7 @@ x86_function_profiler (FILE *file, int labelno > ATTRIBUTE_UNUSED) > fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER > "\n", > LPREFIX, labelno); > #endif > - fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name); > + fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); > } > else > { > @@ -39075,7 +39092,14 @@ x86_function_profiler (FILE *file, int labelno > ATTRIBUTE_UNUSED) > fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", > LPREFIX, labelno); > #endif > - fprintf (file, "\tcall\t%s\n", mcount_name); > + x86_print_call_or_nop (file, mcount_name); > + } > + > + if (flag_record_mcount) > + { > + fprintf (file, "\t.section __mcount_loc, \"r\"\n"); > + fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); > + fprintf (file, "\t.previous\n"); > } > } > > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index 9208b76..acf6b37 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -789,6 +789,15 @@ mfentry > Target Report Var(flag_fentry) Init(-1) > Emit profiling counter call at function entry before prologue. > > +mrecord-mcount > +Target Report Var(flag_record_mcount) Init(0) > +Generate __mcount_loc section with all mcount or __fentry__ calls. > + > +mnop-mcount > +Target Report Var(flag_nop_mcount) Init(0) > +Generate mcount/__fentry__ calls as nops. To activate they need to be > +patched in. > + > m8bit-idiv > Target Report Mask(USE_8BIT_IDIV) Save > Expand 32bit/64bit integer divide into 8bit unsigned integer divide with > run-time check > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index d15d4a9..43fd6b7 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -691,7 +691,7 @@ Objective-C and Objective-C++ Dialects}. > -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol > -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol > -m32 -m64 -mx32 -m16 -mlarge-data-threshold=@var{num} @gol > --msse2avx -mfentry -m8bit-idiv @gol > +-msse2avx -mfentry -mrecord-mcount -mnop-mcount -m8bit-idiv @gol > -mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol > -mstack-protector-guard=@var{guard}} > > @@ -15954,6 +15954,21 @@ counter call before the prologue. > Note: On x86 architectures the attribute @code{ms_hook_prologue} > isn't possible at the moment for @option{-mfentry} and @option{-pg}. > > +@item -mrecord-mcount > +@itemx -mno-record-mcount > +@opindex mrecord-mcount > +If profiling is active (@option{-pg}), generate a __mcount_loc section > +that contains pointers to each profiling call. This is useful for > +automatically patching and out calls. > + > +@item -mnop-mcount > +@itemx -mno-nop-mcount > +@opindex mnop-mcount > +If profiling is active (@option{-pg}), generate the calls to > +the profiling functions as nops. This is useful when they > +should be patched in later dynamically. This is likely only > +useful together with @option{-mrecord-mcount}. > + > @item -m8bit-idiv > @itemx -mno-8bit-idiv > @opindex 8bit-idiv > diff --git a/gcc/testsuite/gcc.target/i386/nop-mcount.c > b/gcc/testsuite/gcc.target/i386/nop-mcount.c > new file mode 100644 > index 0000000..2592231 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/nop-mcount.c > @@ -0,0 +1,24 @@ > +/* Test -mnop-mcount */ > +/* { dg-do compile } */ > +/* { dg-options "-pg -mfentry -mrecord-mcount -mnop-mcount" } */ > +/* { dg-final { scan-assembler-not "__fentry__" } } */ > +/* Origin: Andi Kleen */ > +extern void foobar(char *); > + > +void func(void) > +{ > + foobar ("Hello world\n"); > +} > + > +void func2(void) > +{ > + int i; > + for (i = 0; i < 10; i++) > + foobar ("Hello world"); > +} > + > +void func3(a) > +char *a; > +{ > + foobar("Hello world"); > +} > diff --git a/gcc/testsuite/gcc.target/i386/record-mcount.c > b/gcc/testsuite/gcc.target/i386/record-mcount.c > new file mode 100644 > index 0000000..dae413e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/record-mcount.c > @@ -0,0 +1,24 @@ > +/* Test -mrecord-mcount */ > +/* { dg-do compile } */ > +/* { dg-options "-pg -mrecord-mcount" } */ > +/* { dg-final { scan-assembler "mcount_loc" } } */ > +/* Origin: Andi Kleen */ > +extern void foobar(char *); > + > +void func(void) > +{ > + foobar ("Hello world\n"); > +} > + > +void func2(void) > +{ > + int i; > + for (i = 0; i < 10; i++) > + foobar ("Hello world"); > +} > + > +void func3(a) > +char *a; > +{ > + foobar("Hello world"); > +} -- a...@linux.intel.com -- Speaking for myself only