Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Jan Kiszka
Steven Rostedt wrote:
> Index: linux-compile-i386.git/Makefile
> ===
> --- linux-compile-i386.git.orig/Makefile  2008-01-09 14:09:36.0 
> -0500
> +++ linux-compile-i386.git/Makefile   2008-01-09 14:10:07.0 -0500
> @@ -509,6 +509,10 @@ endif
>  
>  include $(srctree)/arch/$(SRCARCH)/Makefile
>  
> +# MCOUNT expects frame pointer

This comment looks stray.

> +ifdef CONFIG_MCOUNT
> +KBUILD_CFLAGS+= -pg
> +endif
>  ifdef CONFIG_FRAME_POINTER
>  KBUILD_CFLAGS+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
>  else
> Index: linux-compile-i386.git/arch/x86/Kconfig
> ===
> --- linux-compile-i386.git.orig/arch/x86/Kconfig  2008-01-09 
> 14:09:36.0 -0500
> +++ linux-compile-i386.git/arch/x86/Kconfig   2008-01-09 14:10:07.0 
> -0500
> @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
>   bool
>   default y
>  
> +config ARCH_HAS_MCOUNT
> +   bool
> +   default y
> +
>  config CLOCKSOURCE_WATCHDOG
>   bool
>   default y
> Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32
> ===
> --- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32   2008-01-09 
> 14:09:36.0 -0500
> +++ linux-compile-i386.git/arch/x86/kernel/Makefile_322008-01-09 
> 14:10:07.0 -0500
> @@ -23,6 +23,7 @@ obj-$(CONFIG_APM)   += apm_32.o
>  obj-$(CONFIG_X86_SMP)+= smp_32.o smpboot_32.o tsc_sync.o
>  obj-$(CONFIG_SMP)+= smpcommon_32.o
>  obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o
> +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o

So far the code organization is different for 32 and 64 bit. I would
suggest to either

 o move both trampolines into entry_*.S or
 o put them in something like mcount-wrapper_32/64.S.

>  obj-$(CONFIG_X86_MPPARSE)+= mpparse_32.o
>  obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o
>  obj-$(CONFIG_X86_IO_APIC)+= io_apic_32.o
> Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S
> ===
> --- /dev/null 1970-01-01 00:00:00.0 +
> +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S   2008-01-09 
> 14:10:07.0 -0500
> @@ -0,0 +1,25 @@
> +/*
> + *  linux/arch/x86/mcount-wrapper.S
> + *
> + *  Copyright (C) 2004 Ingo Molnar
> + */
> +
> +.globl mcount
> +mcount:
> + cmpl $0, mcount_enabled
> + jz out
> +
> + push %ebp
> + mov %esp, %ebp

What is the benefit of having a call frame in this trampoline? We used
to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was
derived from the -rt code), but I just successfully tested a removal
patch. Also glibc [1] doesn't include it.

> + pushl %eax
> + pushl %ecx
> + pushl %edx
> +
> + call __mcount

I think this indirection should be avoided, just like the 64-bit version
and glibc do.

> +
> + popl %edx
> + popl %ecx
> + popl %eax
> + popl %ebp
> +out:
> + ret

...

> Index: linux-compile-i386.git/lib/tracing/mcount.c
> ===
> --- /dev/null 1970-01-01 00:00:00.0 +
> +++ linux-compile-i386.git/lib/tracing/mcount.c   2008-01-09 
> 14:10:07.0 -0500
> @@ -0,0 +1,77 @@
> +/*
> + * Infrastructure for profiling code inserted by 'gcc -pg'.
> + *
> + * Copyright (C) 2007 Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
> + *
> + * Converted to be more generic:
> + *   Copyright (C) 2007-2008 Steven Rostedt <[EMAIL PROTECTED]>
> + *
> + * From code in the latency_tracer, that is:
> + *
> + *  Copyright (C) 2004-2006 Ingo Molnar
> + *  Copyright (C) 2004 William Lee Irwin III
> + */
> +
> +#include 
> +#include 
> +
> +/*
> + * Since we have nothing protecting between the test of
> + * mcount_trace_function and the call to it, we can't
> + * set it to NULL without risking a race that will have
> + * the kernel call the NULL pointer. Instead, we just
> + * set the function pointer to a dummy function.
> + */
> +notrace void dummy_mcount_tracer(unsigned long ip,
> +  unsigned long parent_ip)
> +{
> + /* do nothing */
> +}
> +
> +mcount_func_t mcount_trace_function __read_mostly = dummy_mcount_tracer;
> +int mcount_enabled __read_mostly;
> +
> +/** __mcount - hook for profiling
> + *
> + * This routine is called from the arch specific mcount routine, that in 
> turn is
> + * called from code inserted by gcc -pg.
> + */
> +notrace void __mcount(void)
> +{
> + mcount_trace_function(CALLER_ADDR1, CALLER_ADDR2);
> +}

mcount_trace_function should always be called from the assembly
trampoline, IMO.

> +EXPORT_SYMBOL_GPL(mcount);
> +/*
> + * The above EXPORT_SYMBOL is for the gcc call of mcount and not the
> + * function __mcount that it is underneath. I put the export there
> + * 

Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Steven Rostedt
On Thu, 10 Jan 2008, Jan Kiszka wrote:
> > ===
> > --- /dev/null   1970-01-01 00:00:00.0 +
> > +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 
> > 14:10:07.0 -0500
> > @@ -0,0 +1,25 @@
> > +/*
> > + *  linux/arch/x86/mcount-wrapper.S
> > + *
> > + *  Copyright (C) 2004 Ingo Molnar
> > + */
> > +
> > +.globl mcount
> > +mcount:
> > +   cmpl $0, mcount_enabled
> > +   jz out
> > +
> > +   push %ebp
> > +   mov %esp, %ebp
>
> What is the benefit of having a call frame in this trampoline? We used
> to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was
> derived from the -rt code), but I just successfully tested a removal
> patch. Also glibc [1] doesn't include it.

OK, I just tried this out on i386, and it works fine.

>
> > +   pushl %eax
> > +   pushl %ecx
> > +   pushl %edx
> > +
> > +   call __mcount
>
> I think this indirection should be avoided, just like the 64-bit version
> and glibc do.

I also did this too.

>
> > +
> > +   popl %edx
> > +   popl %ecx
> > +   popl %eax
> > +   popl %ebp
> > +out:
> > +   ret
>

I'll go try the updates on x86_64 now.

Thanks for the tips!

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Steven Rostedt

On Thu, 10 Jan 2008, Jan Kiszka wrote:

> Steven Rostedt wrote:
> > Index: linux-compile-i386.git/Makefile
> > ===
> > --- linux-compile-i386.git.orig/Makefile2008-01-09 14:09:36.0 
> > -0500
> > +++ linux-compile-i386.git/Makefile 2008-01-09 14:10:07.0 -0500
> > @@ -509,6 +509,10 @@ endif
> >
> >  include $(srctree)/arch/$(SRCARCH)/Makefile
> >
> > +# MCOUNT expects frame pointer
>
> This comment looks stray.

Actually it's not ;-)

The original code had something like this:

#if CONFIG_MCOUNT
KBUILD_CFLAGS += ...
#else
#if CONFIG_FRAME_POINTER
KBUILD_CFLAGS += ...
#else
KBUILD_CFLAGS += ...
#endif
#endif

And Sam Ravnborg suggested to put that logic into the Kbuild system. For
which I did, but I put that comment there to just let others know that
MCOUNT expects the flags of FRAME_POINTER. But, I guess we can nuke that
comment anyway. It just leads to confusion.

>
> > +ifdef CONFIG_MCOUNT
> > +KBUILD_CFLAGS  += -pg
> > +endif
> >  ifdef CONFIG_FRAME_POINTER
> >  KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
> >  else
> > Index: linux-compile-i386.git/arch/x86/Kconfig
> > ===
> > --- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 
> > 14:09:36.0 -0500
> > +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 
> > -0500
> > @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
> > bool
> > default y
> >
> > +config ARCH_HAS_MCOUNT
> > +   bool
> > +   default y
> > +
> >  config CLOCKSOURCE_WATCHDOG
> > bool
> > default y
> > Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32
> > ===
> > --- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32 2008-01-09 
> > 14:09:36.0 -0500
> > +++ linux-compile-i386.git/arch/x86/kernel/Makefile_32  2008-01-09 
> > 14:10:07.0 -0500
> > @@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o
> >  obj-$(CONFIG_X86_SMP)  += smp_32.o smpboot_32.o tsc_sync.o
> >  obj-$(CONFIG_SMP)  += smpcommon_32.o
> >  obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline_32.o
> > +obj-$(CONFIG_MCOUNT)   += mcount-wrapper.o
>
> So far the code organization is different for 32 and 64 bit. I would
> suggest to either
>
>  o move both trampolines into entry_*.S or
>  o put them in something like mcount-wrapper_32/64.S.

Yeah, that's a relic from -rt. I never liked that, but I was just too lazy
to change it. I think I'll move the mcount_wrapper into entry_32.S

>
> >  obj-$(CONFIG_X86_MPPARSE)  += mpparse_32.o
> >  obj-$(CONFIG_X86_LOCAL_APIC)   += apic_32.o nmi_32.o
> >  obj-$(CONFIG_X86_IO_APIC)  += io_apic_32.o
> > Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S
> > ===
> > --- /dev/null   1970-01-01 00:00:00.0 +
> > +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 
> > 14:10:07.0 -0500
> > @@ -0,0 +1,25 @@
> > +/*
> > + *  linux/arch/x86/mcount-wrapper.S
> > + *
> > + *  Copyright (C) 2004 Ingo Molnar
> > + */
> > +
> > +.globl mcount
> > +mcount:
> > +   cmpl $0, mcount_enabled
> > +   jz out
> > +
> > +   push %ebp
> > +   mov %esp, %ebp
>
> What is the benefit of having a call frame in this trampoline? We used
> to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was
> derived from the -rt code), but I just successfully tested a removal
> patch. Also glibc [1] doesn't include it.

Hmm, what about having frame pointers on? Isn't that a requirement?

>
> > +   pushl %eax
> > +   pushl %ecx
> > +   pushl %edx
> > +
> > +   call __mcount
>
> I think this indirection should be avoided, just like the 64-bit version
> and glibc do.

I thought about that too, but didn't have the time to look into the
calling convention for that. 

# objdump --start-address 0x`nm /lib/libc-2.7.so | sed -ne '/ 
mcount$/s/^\([0-9a-f]*\).*/\1/p'` -D /lib/libc-2.7.so |head -28 |tail -12
49201cd0 <_mcount>:
49201cd0:   50  push   %eax
49201cd1:   51  push   %ecx
49201cd2:   52  push   %edx
49201cd3:   8b 54 24 0c mov0xc(%esp),%edx
49201cd7:   8b 45 04mov0x4(%ebp),%eax
49201cda:   e8 91 f4 ff ff  call   49201170
<__mcount_internal>
49201cdf:   5a  pop%edx
49201ce0:   59  pop%ecx
49201ce1:   58  pop%eax
49201ce2:   c3  ret
49201ce3:   90  nop

Until I found out about the frame pointers, I'll leave in the ebp copy.

>
> > +
> > +   popl %edx
> > +   popl %ecx
> > +   popl %eax
> > +   popl %ebp
> > +out:
> > +   ret
>
> 

[...]

> > +/** __mcount - hook for profiling
> > 

Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Steven Rostedt

Hi Sam,

On Thu, 10 Jan 2008, Sam Ravnborg wrote:

> Hi Steven.
>
> > Index: linux-compile-i386.git/arch/x86/Kconfig
> > ===
> > --- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 
> > 14:09:36.0 -0500
> > +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 
> > -0500
> > @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
> > bool
> > default y
> >
> > +config ARCH_HAS_MCOUNT
> > +   bool
> > +   default y
> > +
>
> Please use the following scheme:
>
> arch/x86/Kconfig:
>  config X86
> + select HAVE_MCOUNT
>
> lib/tracing/Kconfig
>
> + # ARCH shall select HAVE_MCOUNT if they provide this function
> + config HAVE_MCOUNT
> + bool
> +
> + config MCOUNT
> + bool
> + select FRAME_POINTER
>
> And then in your later patches:
> +config MCOUNT_TRACER
> +   bool "Profiler instrumentation based tracer"
> +   depends on DEBUG_KERNEL && HAVE_MCOUNT
> +   select MCOUNT
> +   help
> + Use profiler

Thanks, this does look like a cleaner approach. I'll implement it into
my next series.

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Sam Ravnborg
Hi Steven.

> Index: linux-compile-i386.git/arch/x86/Kconfig
> ===
> --- linux-compile-i386.git.orig/arch/x86/Kconfig  2008-01-09 
> 14:09:36.0 -0500
> +++ linux-compile-i386.git/arch/x86/Kconfig   2008-01-09 14:10:07.0 
> -0500
> @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
>   bool
>   default y
>  
> +config ARCH_HAS_MCOUNT
> +   bool
> +   default y
> +

Please use the following scheme:

arch/x86/Kconfig:
 config X86
+   select HAVE_MCOUNT

lib/tracing/Kconfig

+ # ARCH shall select HAVE_MCOUNT if they provide this function
+ config HAVE_MCOUNT
+   bool
+
+ config MCOUNT
+   bool
+   select FRAME_POINTER

And then in your later patches:
+config MCOUNT_TRACER
+   bool "Profiler instrumentation based tracer"
+   depends on DEBUG_KERNEL && HAVE_MCOUNT
+   select MCOUNT
+   help
+ Use profiler

The "default n" is a noop since this is the default.
And note that the depends on is removed from MCOUNT
because you use it as a select target (so dependencies
are not checked anyway).

With this scheme implmented you:
- Use new naming convention (HAVE_*)
- Avoid defining one config variable per arch
- Do not have dependencies on selected symbols
- More compact representation in arch Kconfig files

Sam
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Sam Ravnborg
Hi Steven.

 Index: linux-compile-i386.git/arch/x86/Kconfig
 ===
 --- linux-compile-i386.git.orig/arch/x86/Kconfig  2008-01-09 
 14:09:36.0 -0500
 +++ linux-compile-i386.git/arch/x86/Kconfig   2008-01-09 14:10:07.0 
 -0500
 @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
   bool
   default y
  
 +config ARCH_HAS_MCOUNT
 +   bool
 +   default y
 +

Please use the following scheme:

arch/x86/Kconfig:
 config X86
+   select HAVE_MCOUNT

lib/tracing/Kconfig

+ # ARCH shall select HAVE_MCOUNT if they provide this function
+ config HAVE_MCOUNT
+   bool
+
+ config MCOUNT
+   bool
+   select FRAME_POINTER

And then in your later patches:
+config MCOUNT_TRACER
+   bool Profiler instrumentation based tracer
+   depends on DEBUG_KERNEL  HAVE_MCOUNT
+   select MCOUNT
+   help
+ Use profiler

The default n is a noop since this is the default.
And note that the depends on is removed from MCOUNT
because you use it as a select target (so dependencies
are not checked anyway).

With this scheme implmented you:
- Use new naming convention (HAVE_*)
- Avoid defining one config variable per arch
- Do not have dependencies on selected symbols
- More compact representation in arch Kconfig files

Sam
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Steven Rostedt

Hi Sam,

On Thu, 10 Jan 2008, Sam Ravnborg wrote:

 Hi Steven.

  Index: linux-compile-i386.git/arch/x86/Kconfig
  ===
  --- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 
  14:09:36.0 -0500
  +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 
  -0500
  @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
  bool
  default y
 
  +config ARCH_HAS_MCOUNT
  +   bool
  +   default y
  +

 Please use the following scheme:

 arch/x86/Kconfig:
  config X86
 + select HAVE_MCOUNT

 lib/tracing/Kconfig

 + # ARCH shall select HAVE_MCOUNT if they provide this function
 + config HAVE_MCOUNT
 + bool
 +
 + config MCOUNT
 + bool
 + select FRAME_POINTER

 And then in your later patches:
 +config MCOUNT_TRACER
 +   bool Profiler instrumentation based tracer
 +   depends on DEBUG_KERNEL  HAVE_MCOUNT
 +   select MCOUNT
 +   help
 + Use profiler

Thanks, this does look like a cleaner approach. I'll implement it into
my next series.

-- Steve

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Steven Rostedt

On Thu, 10 Jan 2008, Jan Kiszka wrote:

 Steven Rostedt wrote:
  Index: linux-compile-i386.git/Makefile
  ===
  --- linux-compile-i386.git.orig/Makefile2008-01-09 14:09:36.0 
  -0500
  +++ linux-compile-i386.git/Makefile 2008-01-09 14:10:07.0 -0500
  @@ -509,6 +509,10 @@ endif
 
   include $(srctree)/arch/$(SRCARCH)/Makefile
 
  +# MCOUNT expects frame pointer

 This comment looks stray.

Actually it's not ;-)

The original code had something like this:

#if CONFIG_MCOUNT
KBUILD_CFLAGS += ...
#else
#if CONFIG_FRAME_POINTER
KBUILD_CFLAGS += ...
#else
KBUILD_CFLAGS += ...
#endif
#endif

And Sam Ravnborg suggested to put that logic into the Kbuild system. For
which I did, but I put that comment there to just let others know that
MCOUNT expects the flags of FRAME_POINTER. But, I guess we can nuke that
comment anyway. It just leads to confusion.


  +ifdef CONFIG_MCOUNT
  +KBUILD_CFLAGS  += -pg
  +endif
   ifdef CONFIG_FRAME_POINTER
   KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
   else
  Index: linux-compile-i386.git/arch/x86/Kconfig
  ===
  --- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 
  14:09:36.0 -0500
  +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 
  -0500
  @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
  bool
  default y
 
  +config ARCH_HAS_MCOUNT
  +   bool
  +   default y
  +
   config CLOCKSOURCE_WATCHDOG
  bool
  default y
  Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32
  ===
  --- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32 2008-01-09 
  14:09:36.0 -0500
  +++ linux-compile-i386.git/arch/x86/kernel/Makefile_32  2008-01-09 
  14:10:07.0 -0500
  @@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o
   obj-$(CONFIG_X86_SMP)  += smp_32.o smpboot_32.o tsc_sync.o
   obj-$(CONFIG_SMP)  += smpcommon_32.o
   obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline_32.o
  +obj-$(CONFIG_MCOUNT)   += mcount-wrapper.o

 So far the code organization is different for 32 and 64 bit. I would
 suggest to either

  o move both trampolines into entry_*.S or
  o put them in something like mcount-wrapper_32/64.S.

Yeah, that's a relic from -rt. I never liked that, but I was just too lazy
to change it. I think I'll move the mcount_wrapper into entry_32.S


   obj-$(CONFIG_X86_MPPARSE)  += mpparse_32.o
   obj-$(CONFIG_X86_LOCAL_APIC)   += apic_32.o nmi_32.o
   obj-$(CONFIG_X86_IO_APIC)  += io_apic_32.o
  Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S
  ===
  --- /dev/null   1970-01-01 00:00:00.0 +
  +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 
  14:10:07.0 -0500
  @@ -0,0 +1,25 @@
  +/*
  + *  linux/arch/x86/mcount-wrapper.S
  + *
  + *  Copyright (C) 2004 Ingo Molnar
  + */
  +
  +.globl mcount
  +mcount:
  +   cmpl $0, mcount_enabled
  +   jz out
  +
  +   push %ebp
  +   mov %esp, %ebp

 What is the benefit of having a call frame in this trampoline? We used
 to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was
 derived from the -rt code), but I just successfully tested a removal
 patch. Also glibc [1] doesn't include it.

Hmm, what about having frame pointers on? Isn't that a requirement?


  +   pushl %eax
  +   pushl %ecx
  +   pushl %edx
  +
  +   call __mcount

 I think this indirection should be avoided, just like the 64-bit version
 and glibc do.

I thought about that too, but didn't have the time to look into the
calling convention for that. does a quick look at glibc

# objdump --start-address 0x`nm /lib/libc-2.7.so | sed -ne '/ 
mcount$/s/^\([0-9a-f]*\).*/\1/p'` -D /lib/libc-2.7.so |head -28 |tail -12
49201cd0 _mcount:
49201cd0:   50  push   %eax
49201cd1:   51  push   %ecx
49201cd2:   52  push   %edx
49201cd3:   8b 54 24 0c mov0xc(%esp),%edx
49201cd7:   8b 45 04mov0x4(%ebp),%eax
49201cda:   e8 91 f4 ff ff  call   49201170
__mcount_internal
49201cdf:   5a  pop%edx
49201ce0:   59  pop%ecx
49201ce1:   58  pop%eax
49201ce2:   c3  ret
49201ce3:   90  nop

Until I found out about the frame pointers, I'll leave in the ebp copy.


  +
  +   popl %edx
  +   popl %ecx
  +   popl %eax
  +   popl %ebp
  +out:
  +   ret

 

[...]

  +/** __mcount - hook for profiling
  + *
  + * This routine is called from the arch specific mcount routine, that in 
  turn is
  + * called from code inserted by gcc -pg.
  + */
  +notrace void 

Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Steven Rostedt
On Thu, 10 Jan 2008, Jan Kiszka wrote:
  ===
  --- /dev/null   1970-01-01 00:00:00.0 +
  +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 
  14:10:07.0 -0500
  @@ -0,0 +1,25 @@
  +/*
  + *  linux/arch/x86/mcount-wrapper.S
  + *
  + *  Copyright (C) 2004 Ingo Molnar
  + */
  +
  +.globl mcount
  +mcount:
  +   cmpl $0, mcount_enabled
  +   jz out
  +
  +   push %ebp
  +   mov %esp, %ebp

 What is the benefit of having a call frame in this trampoline? We used
 to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was
 derived from the -rt code), but I just successfully tested a removal
 patch. Also glibc [1] doesn't include it.

OK, I just tried this out on i386, and it works fine.


  +   pushl %eax
  +   pushl %ecx
  +   pushl %edx
  +
  +   call __mcount

 I think this indirection should be avoided, just like the 64-bit version
 and glibc do.

I also did this too.


  +
  +   popl %edx
  +   popl %ecx
  +   popl %eax
  +   popl %ebp
  +out:
  +   ret


I'll go try the updates on x86_64 now.

Thanks for the tips!

-- Steve

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-10 Thread Jan Kiszka
Steven Rostedt wrote:
 Index: linux-compile-i386.git/Makefile
 ===
 --- linux-compile-i386.git.orig/Makefile  2008-01-09 14:09:36.0 
 -0500
 +++ linux-compile-i386.git/Makefile   2008-01-09 14:10:07.0 -0500
 @@ -509,6 +509,10 @@ endif
  
  include $(srctree)/arch/$(SRCARCH)/Makefile
  
 +# MCOUNT expects frame pointer

This comment looks stray.

 +ifdef CONFIG_MCOUNT
 +KBUILD_CFLAGS+= -pg
 +endif
  ifdef CONFIG_FRAME_POINTER
  KBUILD_CFLAGS+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
  else
 Index: linux-compile-i386.git/arch/x86/Kconfig
 ===
 --- linux-compile-i386.git.orig/arch/x86/Kconfig  2008-01-09 
 14:09:36.0 -0500
 +++ linux-compile-i386.git/arch/x86/Kconfig   2008-01-09 14:10:07.0 
 -0500
 @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
   bool
   default y
  
 +config ARCH_HAS_MCOUNT
 +   bool
 +   default y
 +
  config CLOCKSOURCE_WATCHDOG
   bool
   default y
 Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32
 ===
 --- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32   2008-01-09 
 14:09:36.0 -0500
 +++ linux-compile-i386.git/arch/x86/kernel/Makefile_322008-01-09 
 14:10:07.0 -0500
 @@ -23,6 +23,7 @@ obj-$(CONFIG_APM)   += apm_32.o
  obj-$(CONFIG_X86_SMP)+= smp_32.o smpboot_32.o tsc_sync.o
  obj-$(CONFIG_SMP)+= smpcommon_32.o
  obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o
 +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o

So far the code organization is different for 32 and 64 bit. I would
suggest to either

 o move both trampolines into entry_*.S or
 o put them in something like mcount-wrapper_32/64.S.

  obj-$(CONFIG_X86_MPPARSE)+= mpparse_32.o
  obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o
  obj-$(CONFIG_X86_IO_APIC)+= io_apic_32.o
 Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S
 ===
 --- /dev/null 1970-01-01 00:00:00.0 +
 +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S   2008-01-09 
 14:10:07.0 -0500
 @@ -0,0 +1,25 @@
 +/*
 + *  linux/arch/x86/mcount-wrapper.S
 + *
 + *  Copyright (C) 2004 Ingo Molnar
 + */
 +
 +.globl mcount
 +mcount:
 + cmpl $0, mcount_enabled
 + jz out
 +
 + push %ebp
 + mov %esp, %ebp

What is the benefit of having a call frame in this trampoline? We used
to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was
derived from the -rt code), but I just successfully tested a removal
patch. Also glibc [1] doesn't include it.

 + pushl %eax
 + pushl %ecx
 + pushl %edx
 +
 + call __mcount

I think this indirection should be avoided, just like the 64-bit version
and glibc do.

 +
 + popl %edx
 + popl %ecx
 + popl %eax
 + popl %ebp
 +out:
 + ret

...

 Index: linux-compile-i386.git/lib/tracing/mcount.c
 ===
 --- /dev/null 1970-01-01 00:00:00.0 +
 +++ linux-compile-i386.git/lib/tracing/mcount.c   2008-01-09 
 14:10:07.0 -0500
 @@ -0,0 +1,77 @@
 +/*
 + * Infrastructure for profiling code inserted by 'gcc -pg'.
 + *
 + * Copyright (C) 2007 Arnaldo Carvalho de Melo [EMAIL PROTECTED]
 + *
 + * Converted to be more generic:
 + *   Copyright (C) 2007-2008 Steven Rostedt [EMAIL PROTECTED]
 + *
 + * From code in the latency_tracer, that is:
 + *
 + *  Copyright (C) 2004-2006 Ingo Molnar
 + *  Copyright (C) 2004 William Lee Irwin III
 + */
 +
 +#include linux/module.h
 +#include linux/mcount.h
 +
 +/*
 + * Since we have nothing protecting between the test of
 + * mcount_trace_function and the call to it, we can't
 + * set it to NULL without risking a race that will have
 + * the kernel call the NULL pointer. Instead, we just
 + * set the function pointer to a dummy function.
 + */
 +notrace void dummy_mcount_tracer(unsigned long ip,
 +  unsigned long parent_ip)
 +{
 + /* do nothing */
 +}
 +
 +mcount_func_t mcount_trace_function __read_mostly = dummy_mcount_tracer;
 +int mcount_enabled __read_mostly;
 +
 +/** __mcount - hook for profiling
 + *
 + * This routine is called from the arch specific mcount routine, that in 
 turn is
 + * called from code inserted by gcc -pg.
 + */
 +notrace void __mcount(void)
 +{
 + mcount_trace_function(CALLER_ADDR1, CALLER_ADDR2);
 +}

mcount_trace_function should always be called from the assembly
trampoline, IMO.

 +EXPORT_SYMBOL_GPL(mcount);
 +/*
 + * The above EXPORT_SYMBOL is for the gcc call of mcount and not the
 + * function __mcount that it is underneath. I put the export there
 + * to fool checkpatch.pl. It wants that export to be with the
 + * function, but that function happens to be 

[RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-09 Thread Steven Rostedt
If CONFIG_MCOUNT is selected and /proc/sys/kernel/mcount_enabled is set to a
non-zero value the mcount routine will be called everytime we enter a kernel
function that is not marked with the "notrace" attribute.

The mcount routine will then call a registered function if a function
happens to be registered.

[This code has been highly hacked by Steven Rostedt, so don't
 blame Arnaldo for all of this ;-) ]

Signed-off-by: Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 Makefile |4 ++
 arch/x86/Kconfig |4 ++
 arch/x86/kernel/Makefile_32  |1 
 arch/x86/kernel/entry_64.S   |   40 
 arch/x86/kernel/mcount-wrapper.S |   25 
 include/linux/linkage.h  |2 +
 include/linux/mcount.h   |   21 ++
 kernel/sysctl.c  |   11 +
 lib/Kconfig.debug|2 +
 lib/Makefile |2 +
 lib/tracing/Kconfig  |7 +++
 lib/tracing/Makefile |3 +
 lib/tracing/mcount.c |   77 +++
 13 files changed, 199 insertions(+)
 create mode 100644 arch/i386/kernel/mcount-wrapper.S
 create mode 100644 lib/tracing/Kconfig
 create mode 100644 lib/tracing/Makefile
 create mode 100644 lib/tracing/mcount.c
 create mode 100644 lib/tracing/mcount.h

Index: linux-compile-i386.git/Makefile
===
--- linux-compile-i386.git.orig/Makefile2008-01-09 14:09:36.0 
-0500
+++ linux-compile-i386.git/Makefile 2008-01-09 14:10:07.0 -0500
@@ -509,6 +509,10 @@ endif
 
 include $(srctree)/arch/$(SRCARCH)/Makefile
 
+# MCOUNT expects frame pointer
+ifdef CONFIG_MCOUNT
+KBUILD_CFLAGS  += -pg
+endif
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
Index: linux-compile-i386.git/arch/x86/Kconfig
===
--- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 
14:09:36.0 -0500
+++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 
-0500
@@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
bool
default y
 
+config ARCH_HAS_MCOUNT
+   bool
+   default y
+
 config CLOCKSOURCE_WATCHDOG
bool
default y
Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32
===
--- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32 2008-01-09 
14:09:36.0 -0500
+++ linux-compile-i386.git/arch/x86/kernel/Makefile_32  2008-01-09 
14:10:07.0 -0500
@@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o
 obj-$(CONFIG_X86_SMP)  += smp_32.o smpboot_32.o tsc_sync.o
 obj-$(CONFIG_SMP)  += smpcommon_32.o
 obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline_32.o
+obj-$(CONFIG_MCOUNT)   += mcount-wrapper.o
 obj-$(CONFIG_X86_MPPARSE)  += mpparse_32.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic_32.o nmi_32.o
 obj-$(CONFIG_X86_IO_APIC)  += io_apic_32.o
Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 
14:10:07.0 -0500
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/x86/mcount-wrapper.S
+ *
+ *  Copyright (C) 2004 Ingo Molnar
+ */
+
+.globl mcount
+mcount:
+   cmpl $0, mcount_enabled
+   jz out
+
+   push %ebp
+   mov %esp, %ebp
+   pushl %eax
+   pushl %ecx
+   pushl %edx
+
+   call __mcount
+
+   popl %edx
+   popl %ecx
+   popl %eax
+   popl %ebp
+out:
+   ret
Index: linux-compile-i386.git/include/linux/linkage.h
===
--- linux-compile-i386.git.orig/include/linux/linkage.h 2008-01-09 
14:09:36.0 -0500
+++ linux-compile-i386.git/include/linux/linkage.h  2008-01-09 
14:10:07.0 -0500
@@ -3,6 +3,8 @@
 
 #include 
 
+#define notrace __attribute__((no_instrument_function))
+
 #ifdef __cplusplus
 #define CPP_ASMLINKAGE extern "C"
 #else
Index: linux-compile-i386.git/kernel/sysctl.c
===
--- linux-compile-i386.git.orig/kernel/sysctl.c 2008-01-09 14:09:36.0 
-0500
+++ linux-compile-i386.git/kernel/sysctl.c  2008-01-09 14:10:07.0 
-0500
@@ -46,6 +46,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -470,6 +471,16 @@ static struct ctl_table kern_table[] = {
.mode   = 0644,
.proc_handler   = _dointvec,
},
+#ifdef CONFIG_MCOUNT
+   {
+   .ctl_name   = CTL_UNNUMBERED,
+   .procname   = "mcount_enabled",
+  

[RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation

2008-01-09 Thread Steven Rostedt
If CONFIG_MCOUNT is selected and /proc/sys/kernel/mcount_enabled is set to a
non-zero value the mcount routine will be called everytime we enter a kernel
function that is not marked with the notrace attribute.

The mcount routine will then call a registered function if a function
happens to be registered.

[This code has been highly hacked by Steven Rostedt, so don't
 blame Arnaldo for all of this ;-) ]

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
Signed-off-by: Steven Rostedt [EMAIL PROTECTED]
---
 Makefile |4 ++
 arch/x86/Kconfig |4 ++
 arch/x86/kernel/Makefile_32  |1 
 arch/x86/kernel/entry_64.S   |   40 
 arch/x86/kernel/mcount-wrapper.S |   25 
 include/linux/linkage.h  |2 +
 include/linux/mcount.h   |   21 ++
 kernel/sysctl.c  |   11 +
 lib/Kconfig.debug|2 +
 lib/Makefile |2 +
 lib/tracing/Kconfig  |7 +++
 lib/tracing/Makefile |3 +
 lib/tracing/mcount.c |   77 +++
 13 files changed, 199 insertions(+)
 create mode 100644 arch/i386/kernel/mcount-wrapper.S
 create mode 100644 lib/tracing/Kconfig
 create mode 100644 lib/tracing/Makefile
 create mode 100644 lib/tracing/mcount.c
 create mode 100644 lib/tracing/mcount.h

Index: linux-compile-i386.git/Makefile
===
--- linux-compile-i386.git.orig/Makefile2008-01-09 14:09:36.0 
-0500
+++ linux-compile-i386.git/Makefile 2008-01-09 14:10:07.0 -0500
@@ -509,6 +509,10 @@ endif
 
 include $(srctree)/arch/$(SRCARCH)/Makefile
 
+# MCOUNT expects frame pointer
+ifdef CONFIG_MCOUNT
+KBUILD_CFLAGS  += -pg
+endif
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
Index: linux-compile-i386.git/arch/x86/Kconfig
===
--- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 
14:09:36.0 -0500
+++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 
-0500
@@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE
bool
default y
 
+config ARCH_HAS_MCOUNT
+   bool
+   default y
+
 config CLOCKSOURCE_WATCHDOG
bool
default y
Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32
===
--- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32 2008-01-09 
14:09:36.0 -0500
+++ linux-compile-i386.git/arch/x86/kernel/Makefile_32  2008-01-09 
14:10:07.0 -0500
@@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o
 obj-$(CONFIG_X86_SMP)  += smp_32.o smpboot_32.o tsc_sync.o
 obj-$(CONFIG_SMP)  += smpcommon_32.o
 obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline_32.o
+obj-$(CONFIG_MCOUNT)   += mcount-wrapper.o
 obj-$(CONFIG_X86_MPPARSE)  += mpparse_32.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic_32.o nmi_32.o
 obj-$(CONFIG_X86_IO_APIC)  += io_apic_32.o
Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 
14:10:07.0 -0500
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/x86/mcount-wrapper.S
+ *
+ *  Copyright (C) 2004 Ingo Molnar
+ */
+
+.globl mcount
+mcount:
+   cmpl $0, mcount_enabled
+   jz out
+
+   push %ebp
+   mov %esp, %ebp
+   pushl %eax
+   pushl %ecx
+   pushl %edx
+
+   call __mcount
+
+   popl %edx
+   popl %ecx
+   popl %eax
+   popl %ebp
+out:
+   ret
Index: linux-compile-i386.git/include/linux/linkage.h
===
--- linux-compile-i386.git.orig/include/linux/linkage.h 2008-01-09 
14:09:36.0 -0500
+++ linux-compile-i386.git/include/linux/linkage.h  2008-01-09 
14:10:07.0 -0500
@@ -3,6 +3,8 @@
 
 #include asm/linkage.h
 
+#define notrace __attribute__((no_instrument_function))
+
 #ifdef __cplusplus
 #define CPP_ASMLINKAGE extern C
 #else
Index: linux-compile-i386.git/kernel/sysctl.c
===
--- linux-compile-i386.git.orig/kernel/sysctl.c 2008-01-09 14:09:36.0 
-0500
+++ linux-compile-i386.git/kernel/sysctl.c  2008-01-09 14:10:07.0 
-0500
@@ -46,6 +46,7 @@
 #include linux/nfs_fs.h
 #include linux/acpi.h
 #include linux/reboot.h
+#include linux/mcount.h
 
 #include asm/uaccess.h
 #include asm/processor.h
@@ -470,6 +471,16 @@ static struct ctl_table kern_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec,
},
+#ifdef CONFIG_MCOUNT
+   {
+