Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-18 Thread Sergey Senozhatsky
On (02/19/16 10:39), Joonsoo Kim wrote:
[..]
> > not sure if it's worth mentioning in the comment, but the other
> > concern here is the performance impact of an extra function call,
> > I believe. otherwise, Joonsoo would just do:
> 
> It's very natural thing so I'm not sure it is worth mentioning.

agree.

> > and in mm/debug_page_ref.c
> >
> > void __page_ref_set(struct page *page, int v)
> > {
> > if (trace_page_ref_set_enabled())
> > trace_page_ref_set(page, v);
> > }
> > EXPORT_SYMBOL(__page_ref_set);
> > EXPORT_TRACEPOINT_SYMBOL(page_ref_set);
> 
> It is what I did in v1.

ah... indeed. well, "That was a year ago, how am I suppose to remember"

-ss


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-18 Thread Steven Rostedt
On Fri, 19 Feb 2016 10:39:10 +0900
Joonsoo Kim  wrote:


> > not sure if it's worth mentioning in the comment, but the other
> > concern here is the performance impact of an extra function call,
> > I believe. otherwise, Joonsoo would just do:  
> 
> It's very natural thing so I'm not sure it is worth mentioning.
> 

Agreed.

-- Steve


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-18 Thread Joonsoo Kim
2016-02-19 9:34 GMT+09:00 Sergey Senozhatsky
:
> On (02/18/16 09:29), Steven Rostedt wrote:
>> > diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
>> > index 534249c..fd6d9a5 100644
>> > --- a/include/linux/page_ref.h
>> > +++ b/include/linux/page_ref.h
>> > @@ -1,6 +1,54 @@
>> >  #include 
>> >  #include 
>> >  #include 
>> > +#include 
>> > +
>> > +extern struct tracepoint __tracepoint_page_ref_set;
>> > +extern struct tracepoint __tracepoint_page_ref_mod;
>> > +extern struct tracepoint __tracepoint_page_ref_mod_and_test;
>> > +extern struct tracepoint __tracepoint_page_ref_mod_and_return;
>> > +extern struct tracepoint __tracepoint_page_ref_mod_unless;
>> > +extern struct tracepoint __tracepoint_page_ref_freeze;
>> > +extern struct tracepoint __tracepoint_page_ref_unfreeze;
>> > +
>> > +#ifdef CONFIG_DEBUG_PAGE_REF
>>
>> Please add a comment here. Something to the effect of:
>>
>> /*
>>  * Ideally we would want to use the trace__enabled() helper
>>  * functions. But due to include header file issues, that is not
>>  * feasible. Instead we have to open code the static key functions.
>>  *
>>  * See trace_##name##_enabled(void) in include/linux/tracepoint.h
>>  */
>>
>
> not sure if it's worth mentioning in the comment, but the other
> concern here is the performance impact of an extra function call,
> I believe. otherwise, Joonsoo would just do:

It's very natural thing so I'm not sure it is worth mentioning.

> in include/linux/page_ref.h
>
> static inline void set_page_count(struct page *page, int v)
> {
> atomic_set(&page->_count, v);
> __page_ref_set(page, v);
> }
> ...
>
>
>
> and in mm/debug_page_ref.c
>
> void __page_ref_set(struct page *page, int v)
> {
> if (trace_page_ref_set_enabled())
> trace_page_ref_set(page, v);
> }
> EXPORT_SYMBOL(__page_ref_set);
> EXPORT_TRACEPOINT_SYMBOL(page_ref_set);

It is what I did in v1.

Thanks.


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-18 Thread Joonsoo Kim
2016-02-18 23:29 GMT+09:00 Steven Rostedt :
> On Mon, 15 Feb 2016 12:04:50 +0900
> js1...@gmail.com wrote:
>
>
>> diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
>> index 534249c..fd6d9a5 100644
>> --- a/include/linux/page_ref.h
>> +++ b/include/linux/page_ref.h
>> @@ -1,6 +1,54 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +
>> +extern struct tracepoint __tracepoint_page_ref_set;
>> +extern struct tracepoint __tracepoint_page_ref_mod;
>> +extern struct tracepoint __tracepoint_page_ref_mod_and_test;
>> +extern struct tracepoint __tracepoint_page_ref_mod_and_return;
>> +extern struct tracepoint __tracepoint_page_ref_mod_unless;
>> +extern struct tracepoint __tracepoint_page_ref_freeze;
>> +extern struct tracepoint __tracepoint_page_ref_unfreeze;
>> +
>> +#ifdef CONFIG_DEBUG_PAGE_REF
>
> Please add a comment here. Something to the effect of:

Okay!

> /*
>  * Ideally we would want to use the trace__enabled() helper
>  * functions. But due to include header file issues, that is not
>  * feasible. Instead we have to open code the static key functions.
>  *
>  * See trace_##name##_enabled(void) in include/linux/tracepoint.h
>  */
>
> I may have to work on something that lets these helpers be defined in
> headers. I have some ideas on how to do that. But for now, this
> solution is fine.

Okay.

Thanks.


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-18 Thread Sergey Senozhatsky
On (02/18/16 09:29), Steven Rostedt wrote:
> > diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
> > index 534249c..fd6d9a5 100644
> > --- a/include/linux/page_ref.h
> > +++ b/include/linux/page_ref.h
> > @@ -1,6 +1,54 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +
> > +extern struct tracepoint __tracepoint_page_ref_set;
> > +extern struct tracepoint __tracepoint_page_ref_mod;
> > +extern struct tracepoint __tracepoint_page_ref_mod_and_test;
> > +extern struct tracepoint __tracepoint_page_ref_mod_and_return;
> > +extern struct tracepoint __tracepoint_page_ref_mod_unless;
> > +extern struct tracepoint __tracepoint_page_ref_freeze;
> > +extern struct tracepoint __tracepoint_page_ref_unfreeze;
> > +
> > +#ifdef CONFIG_DEBUG_PAGE_REF
> 
> Please add a comment here. Something to the effect of:
> 
> /*
>  * Ideally we would want to use the trace__enabled() helper
>  * functions. But due to include header file issues, that is not
>  * feasible. Instead we have to open code the static key functions.
>  *
>  * See trace_##name##_enabled(void) in include/linux/tracepoint.h
>  */
> 

not sure if it's worth mentioning in the comment, but the other
concern here is the performance impact of an extra function call,
I believe. otherwise, Joonsoo would just do:

in include/linux/page_ref.h

static inline void set_page_count(struct page *page, int v)
{
atomic_set(&page->_count, v);
__page_ref_set(page, v);
}
...



and in mm/debug_page_ref.c

void __page_ref_set(struct page *page, int v)
{
if (trace_page_ref_set_enabled())
trace_page_ref_set(page, v);
}
EXPORT_SYMBOL(__page_ref_set);
EXPORT_TRACEPOINT_SYMBOL(page_ref_set);
...

-ss


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-18 Thread Steven Rostedt
On Mon, 15 Feb 2016 12:04:50 +0900
js1...@gmail.com wrote:


> diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
> index 534249c..fd6d9a5 100644
> --- a/include/linux/page_ref.h
> +++ b/include/linux/page_ref.h
> @@ -1,6 +1,54 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +
> +extern struct tracepoint __tracepoint_page_ref_set;
> +extern struct tracepoint __tracepoint_page_ref_mod;
> +extern struct tracepoint __tracepoint_page_ref_mod_and_test;
> +extern struct tracepoint __tracepoint_page_ref_mod_and_return;
> +extern struct tracepoint __tracepoint_page_ref_mod_unless;
> +extern struct tracepoint __tracepoint_page_ref_freeze;
> +extern struct tracepoint __tracepoint_page_ref_unfreeze;
> +
> +#ifdef CONFIG_DEBUG_PAGE_REF

Please add a comment here. Something to the effect of:

/*
 * Ideally we would want to use the trace__enabled() helper
 * functions. But due to include header file issues, that is not
 * feasible. Instead we have to open code the static key functions.
 *
 * See trace_##name##_enabled(void) in include/linux/tracepoint.h
 */

I may have to work on something that lets these helpers be defined in
headers. I have some ideas on how to do that. But for now, this
solution is fine.

-- Steve


> +#define page_ref_tracepoint_active(t) static_key_false(&(t).key)
> +
> +extern void __page_ref_set(struct page *page, int v);
> +extern void __page_ref_mod(struct page *page, int v);
> +extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
> +extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
> +extern void __page_ref_mod_unless(struct page *page, int v, int u);
> +extern void __page_ref_freeze(struct page *page, int v, int ret);
> +extern void __page_ref_unfreeze(struct page *page, int v);
> +
> +#else
> +
> +#define page_ref_tracepoint_active(t) false
> +
> +static inline void __page_ref_set(struct page *page, int v)
> +{
> +}
> +static inline void __page_ref_mod(struct page *page, int v)
> +{
> +}
> +static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
> +{
> +}
> +static inline void __page_ref_mod_and_return(struct page *page, int v, int 
> ret)
> +{
> +}
> +static inline void __page_ref_mod_unless(struct page *page, int v, int u)
> +{
> +}
> +static inline void __page_ref_freeze(struct page *page, int v, int ret)
> +{
> +}
> +static inline void __page_ref_unfreeze(struct page *page, int v)
> +{
> +}
> +
> +#endif
>  
>


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-18 Thread Steven Rostedt
On Thu, 18 Feb 2016 16:46:08 +0900
Joonsoo Kim  wrote:

> 2016-02-16 10:16 GMT+09:00 Steven Rostedt :
> > On Tue, 16 Feb 2016 09:47:20 +0900
> > Joonsoo Kim  wrote:
> >  
> >> > They return true when CONFIG_TRACEPOINTS is configured in and the
> >> > tracepoint is enabled, and false otherwise.  
> >>
> >> This implementation is what you proposed before. Please refer below
> >> link and source.
> >>
> >> https://lkml.org/lkml/2015/12/9/699
> >> arch/x86/include/asm/msr.h  
> >
> > That was a year ago, how am I suppose to remember ;-)  
> 
> I think you are smart enough to remember. :)
> I will add it on commit description on next spin.
> 
>

Better yet, add it to the code. I'll reply to the patch.

-- Steve


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-17 Thread Joonsoo Kim
2016-02-16 10:16 GMT+09:00 Steven Rostedt :
> On Tue, 16 Feb 2016 09:47:20 +0900
> Joonsoo Kim  wrote:
>
>> > They return true when CONFIG_TRACEPOINTS is configured in and the
>> > tracepoint is enabled, and false otherwise.
>>
>> This implementation is what you proposed before. Please refer below
>> link and source.
>>
>> https://lkml.org/lkml/2015/12/9/699
>> arch/x86/include/asm/msr.h
>
> That was a year ago, how am I suppose to remember ;-)

I think you are smart enough to remember. :)
I will add it on commit description on next spin.

>>
>> There is header file dependency problem between mm.h and tracepoint.h.
>> page_ref.h should be included in mm.h and tracepoint.h cannot
>> be included in this case.
>
> Ah, OK, I forgot about that. I'll take another look at it again.
>
> A lot happened since then, that's all a fog to me.

Okay. Please let me know result of another look.

Thanks.


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-15 Thread Steven Rostedt
On Tue, 16 Feb 2016 09:47:20 +0900
Joonsoo Kim  wrote:

> > They return true when CONFIG_TRACEPOINTS is configured in and the
> > tracepoint is enabled, and false otherwise.  
> 
> This implementation is what you proposed before. Please refer below
> link and source.
> 
> https://lkml.org/lkml/2015/12/9/699
> arch/x86/include/asm/msr.h

That was a year ago, how am I suppose to remember ;-)

> 
> There is header file dependency problem between mm.h and tracepoint.h.
> page_ref.h should be included in mm.h and tracepoint.h cannot
> be included in this case.

Ah, OK, I forgot about that. I'll take another look at it again.

A lot happened since then, that's all a fog to me.

-- Steve


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-15 Thread Joonsoo Kim
On Mon, Feb 15, 2016 at 11:07:41AM -0500, Steven Rostedt wrote:
> On Mon, 15 Feb 2016 12:04:50 +0900
> js1...@gmail.com wrote:
> 
> > From: Joonsoo Kim 
> > 
> > CMA allocation should be guaranteed to succeed by definition, but,
> > unfortunately, it would be failed sometimes. It is hard to track down
> > the problem, because it is related to page reference manipulation and
> > we don't have any facility to analyze it.
> > 
> > This patch adds tracepoints to track down page reference manipulation.
> > With it, we can find exact reason of failure and can fix the problem.
> > Following is an example of tracepoint output.
> > 
> > <...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
> > count=1 mapcount=0 mapping=(nil) mt=4 val=1
> > <...>-9018  [004]92.678378: kernel_stack:
> >  => get_page_from_freelist (81176659)
> >  => __alloc_pages_nodemask (81176d22)
> >  => alloc_pages_vma (811bf675)
> >  => handle_mm_fault (8119e693)
> >  => __do_page_fault (810631ea)
> >  => trace_do_page_fault (81063543)
> >  => do_async_page_fault (8105c40a)
> >  => async_page_fault (817581d8)  
> > [snip]
> > <...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 
> > flags=0x40048 count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
> > [snip]
> > ...
> > ...
> > <...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
> > end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
> > [snip]
> > <...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
> > flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 
> > ret=1
> >  => release_pages (8117c9e4)
> >  => free_pages_and_swap_cache (811b0697)
> >  => tlb_flush_mmu_free (81199616)
> >  => tlb_finish_mmu (8119a62c)
> >  => exit_mmap (811a53f7)
> >  => mmput (81073f47)
> >  => do_exit (810794e9)
> >  => do_group_exit (81079def)
> >  => SyS_exit_group (81079e74)
> >  => entry_SYSCALL_64_fastpath (817560b6)  
> > 
> > This output shows that problem comes from exit path. In exit path,
> > to improve performance, pages are not freed immediately. They are gathered
> > and processed by batch. During this process, migration cannot be possible
> > and CMA allocation is failed. This problem is hard to find without this
> > page reference tracepoint facility.
> > 
> > Enabling this feature bloat kernel text 30 KB in my configuration.
> > 
> >textdata bss dec hex filename
> > 121273272243616 1507328 15878271 f2487f vmlinux_disabled
> > 121572082258880 1507328 15923416 f2f8d8 vmlinux_enabled
> > 
> > v2:
> > o Use static key of each tracepoints to avoid function call overhead
> > when tracepoints are disabled.
> > o Print human-readable page flag through show_page_flags()
> > o Add more description to Kconfig.debug.
> > 
> > Acked-by: Michal Nazarewicz 
> > Signed-off-by: Joonsoo Kim 
> > ---
> >  include/linux/page_ref.h|  90 +--
> >  include/trace/events/page_ref.h | 133 
> > 
> >  mm/Kconfig.debug|  13 
> >  mm/Makefile |   1 +
> >  mm/debug_page_ref.c |  53 
> >  5 files changed, 285 insertions(+), 5 deletions(-)
> >  create mode 100644 include/trace/events/page_ref.h
> >  create mode 100644 mm/debug_page_ref.c
> > 
> > diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
> > index 534249c..fd6d9a5 100644
> > --- a/include/linux/page_ref.h
> > +++ b/include/linux/page_ref.h
> > @@ -1,6 +1,54 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +
> > +extern struct tracepoint __tracepoint_page_ref_set;
> > +extern struct tracepoint __tracepoint_page_ref_mod;
> > +extern struct tracepoint __tracepoint_page_ref_mod_and_test;
> > +extern struct tracepoint __tracepoint_page_ref_mod_and_return;
> > +extern struct tracepoint __tracepoint_page_ref_mod_unless;
> > +extern struct tracepoint __tracepoint_page_ref_freeze;
> > +extern struct tracepoint __tracepoint_page_ref_unfreeze;
> > +
> > +#ifdef CONFIG_DEBUG_PAGE_REF
> > +#define page_ref_tracepoint_active(t) static_key_false(&(t).key)
> 
> Please don't open code this. Use the following instead:
> 
>   trace_page_ref_set_enabled()
>   trace_page_ref_mod_enabled()
>   trace_page_ref_mod_and_test_enabled()
>   trace_page_ref_mod_and_return_enabled()
>   trace_page_ref_mod_unless_enabled()
>   trace_page_ref_freeze_enabled()
>   trace_page_ref_unfreeze_enabled()
> 
> They return true when CONFIG_TRACEPOINTS is configured in and the
> tracepoint is enabled, and false otherwise.

This implementation is what you proposed before. Please refer below
link and source.

https://lkml.org/lkml/2015/12/9/699
arch/x86/include/asm/msr.h

There is header file dependency problem between mm.h and tracepoint.h.
page_ref.h should be include

Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-15 Thread Steven Rostedt
On Mon, 15 Feb 2016 12:04:50 +0900
js1...@gmail.com wrote:

> From: Joonsoo Kim 
> 
> CMA allocation should be guaranteed to succeed by definition, but,
> unfortunately, it would be failed sometimes. It is hard to track down
> the problem, because it is related to page reference manipulation and
> we don't have any facility to analyze it.
> 
> This patch adds tracepoints to track down page reference manipulation.
> With it, we can find exact reason of failure and can fix the problem.
> Following is an example of tracepoint output.
> 
> <...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
> count=1 mapcount=0 mapping=(nil) mt=4 val=1
> <...>-9018  [004]92.678378: kernel_stack:
>  => get_page_from_freelist (81176659)
>  => __alloc_pages_nodemask (81176d22)
>  => alloc_pages_vma (811bf675)
>  => handle_mm_fault (8119e693)
>  => __do_page_fault (810631ea)
>  => trace_do_page_fault (81063543)
>  => do_async_page_fault (8105c40a)
>  => async_page_fault (817581d8)  
> [snip]
> <...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 
> flags=0x40048 count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
> [snip]
> ...
> ...
> <...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
> end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
> [snip]
> <...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
> flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 ret=1
>  => release_pages (8117c9e4)
>  => free_pages_and_swap_cache (811b0697)
>  => tlb_flush_mmu_free (81199616)
>  => tlb_finish_mmu (8119a62c)
>  => exit_mmap (811a53f7)
>  => mmput (81073f47)
>  => do_exit (810794e9)
>  => do_group_exit (81079def)
>  => SyS_exit_group (81079e74)
>  => entry_SYSCALL_64_fastpath (817560b6)  
> 
> This output shows that problem comes from exit path. In exit path,
> to improve performance, pages are not freed immediately. They are gathered
> and processed by batch. During this process, migration cannot be possible
> and CMA allocation is failed. This problem is hard to find without this
> page reference tracepoint facility.
> 
> Enabling this feature bloat kernel text 30 KB in my configuration.
> 
>textdata bss dec hex filename
> 121273272243616 1507328 15878271 f2487f vmlinux_disabled
> 121572082258880 1507328 15923416 f2f8d8 vmlinux_enabled
> 
> v2:
> o Use static key of each tracepoints to avoid function call overhead
> when tracepoints are disabled.
> o Print human-readable page flag through show_page_flags()
> o Add more description to Kconfig.debug.
> 
> Acked-by: Michal Nazarewicz 
> Signed-off-by: Joonsoo Kim 
> ---
>  include/linux/page_ref.h|  90 +--
>  include/trace/events/page_ref.h | 133 
> 
>  mm/Kconfig.debug|  13 
>  mm/Makefile |   1 +
>  mm/debug_page_ref.c |  53 
>  5 files changed, 285 insertions(+), 5 deletions(-)
>  create mode 100644 include/trace/events/page_ref.h
>  create mode 100644 mm/debug_page_ref.c
> 
> diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
> index 534249c..fd6d9a5 100644
> --- a/include/linux/page_ref.h
> +++ b/include/linux/page_ref.h
> @@ -1,6 +1,54 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +
> +extern struct tracepoint __tracepoint_page_ref_set;
> +extern struct tracepoint __tracepoint_page_ref_mod;
> +extern struct tracepoint __tracepoint_page_ref_mod_and_test;
> +extern struct tracepoint __tracepoint_page_ref_mod_and_return;
> +extern struct tracepoint __tracepoint_page_ref_mod_unless;
> +extern struct tracepoint __tracepoint_page_ref_freeze;
> +extern struct tracepoint __tracepoint_page_ref_unfreeze;
> +
> +#ifdef CONFIG_DEBUG_PAGE_REF
> +#define page_ref_tracepoint_active(t) static_key_false(&(t).key)

Please don't open code this. Use the following instead:

  trace_page_ref_set_enabled()
  trace_page_ref_mod_enabled()
  trace_page_ref_mod_and_test_enabled()
  trace_page_ref_mod_and_return_enabled()
  trace_page_ref_mod_unless_enabled()
  trace_page_ref_freeze_enabled()
  trace_page_ref_unfreeze_enabled()

They return true when CONFIG_TRACEPOINTS is configured in and the
tracepoint is enabled, and false otherwise.

-- Steve


> +
> +extern void __page_ref_set(struct page *page, int v);
> +extern void __page_ref_mod(struct page *page, int v);
> +extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
> +extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
> +extern void __page_ref_mod_unless(struct page *page, int v, int u);
> +extern void __page_ref_freeze(struct page *page, int v, int ret);
> +extern void __page_ref_unfreeze(struct page *page, int v);
> +
> +#else
> +
>


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-15 Thread Joonsoo Kim
2016-02-15 14:28 GMT+09:00 Sergey Senozhatsky
:
> On (02/15/16 14:08), Sergey Senozhatsky wrote:
>>
>> will this compile with !CONFIG_TRACEPOINTS config?
>>

Yes, even if !CONFIG_TRACEPOINTS, it is compiled well.

> uh.. sorry, was composed in email client. seems the correct way to do it is
>
> +#if defined CONFIG_DEBUG_PAGE_REF && defined CONFIG_TRACEPOINTS
>
>  #include 
>
>  #define page_ref_tracepoint_active(t) static_key_false(&(t).key)
>
>  extern struct tracepoint __tracepoint_page_ref_set;
>  ...
>
>  extern void __page_ref_set(struct page *page, int v);
>  ...
>
> #else
>
>  #define page_ref_tracepoint_active(t) false
>
>  static inline void __page_ref_set(struct page *page, int v)
>  {
>  }
>  ...
>
> #endif
>
>
>
> or add a dependency of PAGE_REF on CONFIG_TRACEPOINTS in Kconfig.

Thanks for catching it.
I will add "depends on CONFIG_TRACEPOINTS" to Kconfig because
this feature has no meaning if !CONFIG_TRACEPOINTS.


Thanks.


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-14 Thread Sergey Senozhatsky
On (02/15/16 14:08), Sergey Senozhatsky wrote:
> 
> will this compile with !CONFIG_TRACEPOINTS config?
> 

uh.. sorry, was composed in email client. seems the correct way to do it is

+#if defined CONFIG_DEBUG_PAGE_REF && defined CONFIG_TRACEPOINTS

 #include 

 #define page_ref_tracepoint_active(t) static_key_false(&(t).key)

 extern struct tracepoint __tracepoint_page_ref_set;
 ...

 extern void __page_ref_set(struct page *page, int v);
 ...

#else

 #define page_ref_tracepoint_active(t) false

 static inline void __page_ref_set(struct page *page, int v)
 {
 }
 ...

#endif



or add a dependency of PAGE_REF on CONFIG_TRACEPOINTS in Kconfig.

-ss


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-14 Thread Sergey Senozhatsky
Hello Joonsoo,

On (02/15/16 12:04), js1...@gmail.com wrote:
[..]
> <...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
> count=1 mapcount=0 mapping=(nil) mt=4 val=1
> <...>-9018  [004]92.678378: kernel_stack:
>  => get_page_from_freelist (81176659)
>  => __alloc_pages_nodemask (81176d22)
>  => alloc_pages_vma (811bf675)
>  => handle_mm_fault (8119e693)
>  => __do_page_fault (810631ea)
>  => trace_do_page_fault (81063543)
>  => do_async_page_fault (8105c40a)
>  => async_page_fault (817581d8)
> [snip]
> <...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 
> flags=0x40048 count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
> [snip]
[..]
> o Print human-readable page flag through show_page_flags()

not even a nitpick, just for note, the examples don't use show_page_flags().


[..]
> diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
> index 534249c..fd6d9a5 100644
> --- a/include/linux/page_ref.h
> +++ b/include/linux/page_ref.h
> @@ -1,6 +1,54 @@
>  #include 
>  #include 
>  #include 

will this compile with !CONFIG_TRACEPOINTS config?

+#ifdef CONFIG_TRACEPOINTS
 #include 

 extern struct tracepoint __tracepoint_page_ref_set;
 extern struct tracepoint __tracepoint_page_ref_mod;
 extern struct tracepoint __tracepoint_page_ref_mod_and_test;
 extern struct tracepoint __tracepoint_page_ref_mod_and_return;
 extern struct tracepoint __tracepoint_page_ref_mod_unless;
 extern struct tracepoint __tracepoint_page_ref_freeze;
 extern struct tracepoint __tracepoint_page_ref_unfreeze;

 #ifdef CONFIG_DEBUG_PAGE_REF
 #define page_ref_tracepoint_active(t) static_key_false(&(t).key)

 extern void __page_ref_set(struct page *page, int v);
 extern void __page_ref_mod(struct page *page, int v);
 extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
 extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
 extern void __page_ref_mod_unless(struct page *page, int v, int u);
 extern void __page_ref_freeze(struct page *page, int v, int ret);
 extern void __page_ref_unfreeze(struct page *page, int v);

 #else

 #define page_ref_tracepoint_active(t) false

 static inline void __page_ref_set(struct page *page, int v)
 {
 }
 static inline void __page_ref_mod(struct page *page, int v)
 {
 }
 static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
 {
 }
 static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
 {
 }
 static inline void __page_ref_mod_unless(struct page *page, int v, int u)
 {
 }
 static inline void __page_ref_freeze(struct page *page, int v, int ret)
 {
 }
 static inline void __page_ref_unfreeze(struct page *page, int v)
 {
 }

 #endif /* CONFIG_DEBUG_PAGE_REF */

+#endif /* CONFIG_TRACEPOINTS */

-ss


[PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-02-14 Thread js1304
From: Joonsoo Kim 

CMA allocation should be guaranteed to succeed by definition, but,
unfortunately, it would be failed sometimes. It is hard to track down
the problem, because it is related to page reference manipulation and
we don't have any facility to analyze it.

This patch adds tracepoints to track down page reference manipulation.
With it, we can find exact reason of failure and can fix the problem.
Following is an example of tracepoint output.

<...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
count=1 mapcount=0 mapping=(nil) mt=4 val=1
<...>-9018  [004]92.678378: kernel_stack:
 => get_page_from_freelist (81176659)
 => __alloc_pages_nodemask (81176d22)
 => alloc_pages_vma (811bf675)
 => handle_mm_fault (8119e693)
 => __do_page_fault (810631ea)
 => trace_do_page_fault (81063543)
 => do_async_page_fault (8105c40a)
 => async_page_fault (817581d8)
[snip]
<...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 flags=0x40048 
count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
[snip]
...
...
<...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
[snip]
<...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 ret=1
 => release_pages (8117c9e4)
 => free_pages_and_swap_cache (811b0697)
 => tlb_flush_mmu_free (81199616)
 => tlb_finish_mmu (8119a62c)
 => exit_mmap (811a53f7)
 => mmput (81073f47)
 => do_exit (810794e9)
 => do_group_exit (81079def)
 => SyS_exit_group (81079e74)
 => entry_SYSCALL_64_fastpath (817560b6)

This output shows that problem comes from exit path. In exit path,
to improve performance, pages are not freed immediately. They are gathered
and processed by batch. During this process, migration cannot be possible
and CMA allocation is failed. This problem is hard to find without this
page reference tracepoint facility.

Enabling this feature bloat kernel text 30 KB in my configuration.

   textdata bss dec hex filename
121273272243616 1507328 15878271 f2487f vmlinux_disabled
121572082258880 1507328 15923416 f2f8d8 vmlinux_enabled

v2:
o Use static key of each tracepoints to avoid function call overhead
when tracepoints are disabled.
o Print human-readable page flag through show_page_flags()
o Add more description to Kconfig.debug.

Acked-by: Michal Nazarewicz 
Signed-off-by: Joonsoo Kim 
---
 include/linux/page_ref.h|  90 +--
 include/trace/events/page_ref.h | 133 
 mm/Kconfig.debug|  13 
 mm/Makefile |   1 +
 mm/debug_page_ref.c |  53 
 5 files changed, 285 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/events/page_ref.h
 create mode 100644 mm/debug_page_ref.c

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 534249c..fd6d9a5 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -1,6 +1,54 @@
 #include 
 #include 
 #include 
+#include 
+
+extern struct tracepoint __tracepoint_page_ref_set;
+extern struct tracepoint __tracepoint_page_ref_mod;
+extern struct tracepoint __tracepoint_page_ref_mod_and_test;
+extern struct tracepoint __tracepoint_page_ref_mod_and_return;
+extern struct tracepoint __tracepoint_page_ref_mod_unless;
+extern struct tracepoint __tracepoint_page_ref_freeze;
+extern struct tracepoint __tracepoint_page_ref_unfreeze;
+
+#ifdef CONFIG_DEBUG_PAGE_REF
+#define page_ref_tracepoint_active(t) static_key_false(&(t).key)
+
+extern void __page_ref_set(struct page *page, int v);
+extern void __page_ref_mod(struct page *page, int v);
+extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
+extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
+extern void __page_ref_mod_unless(struct page *page, int v, int u);
+extern void __page_ref_freeze(struct page *page, int v, int ret);
+extern void __page_ref_unfreeze(struct page *page, int v);
+
+#else
+
+#define page_ref_tracepoint_active(t) false
+
+static inline void __page_ref_set(struct page *page, int v)
+{
+}
+static inline void __page_ref_mod(struct page *page, int v)
+{
+}
+static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_mod_unless(struct page *page, int v, int u)
+{
+}
+static inline void __page_ref_freeze(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_unfreeze(struct page *page, int v)
+{
+}
+
+#endif
 
 static inline int page_count(struct page *page)
 {
@@ -10,6 +58,8 @@ static inline int page_count(struct page *page)
 static inline void set_page_count(

Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-12-09 Thread Joonsoo Kim
On Wed, Dec 09, 2015 at 10:36:48PM -0500, Steven Rostedt wrote:
> On Thu, 10 Dec 2015 11:50:15 +0900
> Joonsoo Kim  wrote:
> 
> > Output of cpu 3, 7 are mixed and it's not easy to analyze it.
> > 
> > I think that it'd be better not to sort stack trace. How do
> > you think about it? Could you fix it, please?
> 
> It may not be that easy to fix because of the sorting algorithm. That
> would require looking going ahead one more event each time and then
> checking if its a stacktrace. I may look at it and see if I can come up
> with something that's not too invasive in the algorithms.

Okay.

> That said, for now you can use the --cpu option. I'm not sure I ever
> documented it as it was originally added for debugging, but I use it
> enough that it may be worth while to officially support it.
> 
>  trace-cmd report --cpu 3
> 
> Will show you just cpu 3 and nothing else. Which is what I use a lot.

Thanks for the input. It works but it's not sufficient to me.
Page reference is manipulated by multiple cpus so it's better to
analyze unified output.

> 
> But doing the stack trace thing may be something to fix as well. I'll
> see what I can do, but no guarantees.

Okay. Don't be hurry. :)
trace-cmd is excellent and works well for me as it is.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-12-09 Thread Steven Rostedt
On Thu, 10 Dec 2015 11:50:15 +0900
Joonsoo Kim  wrote:

> Output of cpu 3, 7 are mixed and it's not easy to analyze it.
> 
> I think that it'd be better not to sort stack trace. How do
> you think about it? Could you fix it, please?

It may not be that easy to fix because of the sorting algorithm. That
would require looking going ahead one more event each time and then
checking if its a stacktrace. I may look at it and see if I can come up
with something that's not too invasive in the algorithms.

That said, for now you can use the --cpu option. I'm not sure I ever
documented it as it was originally added for debugging, but I use it
enough that it may be worth while to officially support it.

 trace-cmd report --cpu 3

Will show you just cpu 3 and nothing else. Which is what I use a lot.

But doing the stack trace thing may be something to fix as well. I'll
see what I can do, but no guarantees.

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-12-09 Thread Joonsoo Kim
On Wed, Dec 09, 2015 at 03:01:54PM -0500, Steven Rostedt wrote:
> On Thu, 3 Dec 2015 13:16:58 +0900
> Joonsoo Kim  wrote:
> 
> > On Tue, Nov 24, 2015 at 10:45:28AM +0900, Joonsoo Kim wrote:
> > > On Mon, Nov 23, 2015 at 09:26:04AM -0500, Steven Rostedt wrote:  
> > > > On Mon, 23 Nov 2015 17:28:05 +0900
> > > > Joonsoo Kim  wrote:
> > > >   
> > > > > On Fri, Nov 20, 2015 at 11:42:25AM -0500, Steven Rostedt wrote:  
> > > > > > On Fri, 20 Nov 2015 15:33:25 +0900
> > > > > > Joonsoo Kim  wrote:
> > > > > > 
> > > > > > 
> > > > > > > Steven, is it possible to add tracepoint to inlined fucntion such 
> > > > > > > as
> > > > > > > get_page() in include/linux/mm.h?
> > > > > > 
> > > > > > I highly recommend against it. The tracepoint code adds a bit of 
> > > > > > bloat,
> > > > > > and if you inline it, you add that bloat to every use case. Also, 
> > > > > > it
> > > > > 
> > > > > Is it worse than adding function call to my own stub function into
> > > > > inlined function such as get_page(). I implemented it as following.
> > > > > 
> > > > > get_page()
> > > > > {
> > > > > atomic_inc()
> > > > > stub_get_page()
> > > > > }
> > > > > 
> > > > > stub_get_page() in foo.c
> > > > > {
> > > > > trace_page_ref_get_page()
> > > > > }  
> > > > 
> > > > Now you just slowed down the fast path. But what you could do is:
> > > > 
> > > > get_page()
> > > > {
> > > > atomic_inc();
> > > > if (trace_page_ref_get_page_enabled())
> > > > stub_get_page();
> > > > }
> > > > 
> > > > Now that "trace_page_ref_get_page_enabled()" will turn into:
> > > > 
> > > > if (static_key_false(&__tracepoint_page_ref_get_page.key)) {
> > > > 
> > > > which is a jump label (nop when disabled, a jmp when enabled). That's
> > > > less bloat but doesn't solve the include problem. You still need to add
> > > > the include of that will cause havoc with other tracepoints.  
> > > 
> > > Yes, It also has a include dependency problem so I can't use
> > > trace_page_ref_get_page_enabled() in mm.h. BTW, I tested following
> > > implementation and it works fine.
> > > 
> > > extern struct tracepoint __tracepoint_page_ref_get_page;
> > > 
> > > get_page()
> > > {
> > > atomic_inc()
> > > if (static_key_false(&__tracepoint_page_ref_get_page.key))
> > > stub_get_page()
> > > }
> > > 
> > > This would not slow down fast path although it can't prevent bloat.
> > > I know that it isn't good code practice, but, this page reference
> > > handling functions have complex include dependency so I'm not sure
> > > I can solve it completely. For this special case, can I use
> > > this raw data structure?
> > >   
> > 
> > Steven, any comment?
> 
> Sorry for the later reply, I was going to reply but then got called off
> to do something else, and then forgot about this message :-/

No problem. :)

> 
> I wanted you to look at what Andi has done here:
> 
> http://lkml.kernel.org/r/1449018060-1742-2-git-send-email-a...@firstfloor.org
> 
>  and here
> 
> http://lkml.kernel.org/r/1449018060-1742-3-git-send-email-a...@firstfloor.org

Wow...They look like what I'm looking for. Nice!
Thanks for the pointer!

I have one more question about trace-cmd.
'trace-cmd report' shows time-sorted output even stack trace. See
following example.

trace-cmd-6338  [003]54.046508: page_ref_mod:...
trace-cmd-6583  [007]54.046509: page_ref_mod:...
trace-cmd-6338  [003]54.046515: kernel_stack: 
=> do_wp_page (811a0c6f)

  
=> handle_mm_fault (811a34e2)
=> __do_page_fault (810632da)
=> trace_do_page_fault (81063633)
=> do_async_page_fault (8105c3ea) 
=> async_page_fault (817733f8)  

 
trace-cmd-6583  [007]54.046515: kernel_stack:  

  
=> do_wp_page (811a0c6f)
=> handle_mm_fault (811a34e2)
...

Output of cpu 3, 7 are mixed and it's not easy to analyze it.

I think that it'd be better not to sort stack trace. How do
you think about it? Could you fix it, please?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-12-09 Thread Steven Rostedt
On Thu, 3 Dec 2015 13:16:58 +0900
Joonsoo Kim  wrote:

> On Tue, Nov 24, 2015 at 10:45:28AM +0900, Joonsoo Kim wrote:
> > On Mon, Nov 23, 2015 at 09:26:04AM -0500, Steven Rostedt wrote:  
> > > On Mon, 23 Nov 2015 17:28:05 +0900
> > > Joonsoo Kim  wrote:
> > >   
> > > > On Fri, Nov 20, 2015 at 11:42:25AM -0500, Steven Rostedt wrote:  
> > > > > On Fri, 20 Nov 2015 15:33:25 +0900
> > > > > Joonsoo Kim  wrote:
> > > > > 
> > > > > 
> > > > > > Steven, is it possible to add tracepoint to inlined fucntion such as
> > > > > > get_page() in include/linux/mm.h?
> > > > > 
> > > > > I highly recommend against it. The tracepoint code adds a bit of 
> > > > > bloat,
> > > > > and if you inline it, you add that bloat to every use case. Also, it  
> > > > >   
> > > > 
> > > > Is it worse than adding function call to my own stub function into
> > > > inlined function such as get_page(). I implemented it as following.
> > > > 
> > > > get_page()
> > > > {
> > > > atomic_inc()
> > > > stub_get_page()
> > > > }
> > > > 
> > > > stub_get_page() in foo.c
> > > > {
> > > > trace_page_ref_get_page()
> > > > }  
> > > 
> > > Now you just slowed down the fast path. But what you could do is:
> > > 
> > > get_page()
> > > {
> > >   atomic_inc();
> > >   if (trace_page_ref_get_page_enabled())
> > >   stub_get_page();
> > > }
> > > 
> > > Now that "trace_page_ref_get_page_enabled()" will turn into:
> > > 
> > >   if (static_key_false(&__tracepoint_page_ref_get_page.key)) {
> > > 
> > > which is a jump label (nop when disabled, a jmp when enabled). That's
> > > less bloat but doesn't solve the include problem. You still need to add
> > > the include of that will cause havoc with other tracepoints.  
> > 
> > Yes, It also has a include dependency problem so I can't use
> > trace_page_ref_get_page_enabled() in mm.h. BTW, I tested following
> > implementation and it works fine.
> > 
> > extern struct tracepoint __tracepoint_page_ref_get_page;
> > 
> > get_page()
> > {
> > atomic_inc()
> > if (static_key_false(&__tracepoint_page_ref_get_page.key))
> > stub_get_page()
> > }
> > 
> > This would not slow down fast path although it can't prevent bloat.
> > I know that it isn't good code practice, but, this page reference
> > handling functions have complex include dependency so I'm not sure
> > I can solve it completely. For this special case, can I use
> > this raw data structure?
> >   
> 
> Steven, any comment?

Sorry for the later reply, I was going to reply but then got called off
to do something else, and then forgot about this message :-/

I wanted you to look at what Andi has done here:

http://lkml.kernel.org/r/1449018060-1742-2-git-send-email-a...@firstfloor.org

 and here

http://lkml.kernel.org/r/1449018060-1742-3-git-send-email-a...@firstfloor.org

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-12-02 Thread Joonsoo Kim
On Tue, Nov 24, 2015 at 10:45:28AM +0900, Joonsoo Kim wrote:
> On Mon, Nov 23, 2015 at 09:26:04AM -0500, Steven Rostedt wrote:
> > On Mon, 23 Nov 2015 17:28:05 +0900
> > Joonsoo Kim  wrote:
> > 
> > > On Fri, Nov 20, 2015 at 11:42:25AM -0500, Steven Rostedt wrote:
> > > > On Fri, 20 Nov 2015 15:33:25 +0900
> > > > Joonsoo Kim  wrote:
> > > > 
> > > >   
> > > > > Steven, is it possible to add tracepoint to inlined fucntion such as
> > > > > get_page() in include/linux/mm.h?  
> > > > 
> > > > I highly recommend against it. The tracepoint code adds a bit of bloat,
> > > > and if you inline it, you add that bloat to every use case. Also, it  
> > > 
> > > Is it worse than adding function call to my own stub function into
> > > inlined function such as get_page(). I implemented it as following.
> > > 
> > > get_page()
> > > {
> > > atomic_inc()
> > > stub_get_page()
> > > }
> > > 
> > > stub_get_page() in foo.c
> > > {
> > > trace_page_ref_get_page()
> > > }
> > 
> > Now you just slowed down the fast path. But what you could do is:
> > 
> > get_page()
> > {
> > atomic_inc();
> > if (trace_page_ref_get_page_enabled())
> > stub_get_page();
> > }
> > 
> > Now that "trace_page_ref_get_page_enabled()" will turn into:
> > 
> > if (static_key_false(&__tracepoint_page_ref_get_page.key)) {
> > 
> > which is a jump label (nop when disabled, a jmp when enabled). That's
> > less bloat but doesn't solve the include problem. You still need to add
> > the include of that will cause havoc with other tracepoints.
> 
> Yes, It also has a include dependency problem so I can't use
> trace_page_ref_get_page_enabled() in mm.h. BTW, I tested following
> implementation and it works fine.
> 
> extern struct tracepoint __tracepoint_page_ref_get_page;
> 
> get_page()
> {
> atomic_inc()
> if (static_key_false(&__tracepoint_page_ref_get_page.key))
> stub_get_page()
> }
> 
> This would not slow down fast path although it can't prevent bloat.
> I know that it isn't good code practice, but, this page reference
> handling functions have complex include dependency so I'm not sure
> I can solve it completely. For this special case, can I use
> this raw data structure?
> 

Steven, any comment?

Thanks.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-23 Thread Joonsoo Kim
On Mon, Nov 23, 2015 at 09:26:04AM -0500, Steven Rostedt wrote:
> On Mon, 23 Nov 2015 17:28:05 +0900
> Joonsoo Kim  wrote:
> 
> > On Fri, Nov 20, 2015 at 11:42:25AM -0500, Steven Rostedt wrote:
> > > On Fri, 20 Nov 2015 15:33:25 +0900
> > > Joonsoo Kim  wrote:
> > > 
> > >   
> > > > Steven, is it possible to add tracepoint to inlined fucntion such as
> > > > get_page() in include/linux/mm.h?  
> > > 
> > > I highly recommend against it. The tracepoint code adds a bit of bloat,
> > > and if you inline it, you add that bloat to every use case. Also, it  
> > 
> > Is it worse than adding function call to my own stub function into
> > inlined function such as get_page(). I implemented it as following.
> > 
> > get_page()
> > {
> > atomic_inc()
> > stub_get_page()
> > }
> > 
> > stub_get_page() in foo.c
> > {
> > trace_page_ref_get_page()
> > }
> 
> Now you just slowed down the fast path. But what you could do is:
> 
> get_page()
> {
>   atomic_inc();
>   if (trace_page_ref_get_page_enabled())
>   stub_get_page();
> }
> 
> Now that "trace_page_ref_get_page_enabled()" will turn into:
> 
>   if (static_key_false(&__tracepoint_page_ref_get_page.key)) {
> 
> which is a jump label (nop when disabled, a jmp when enabled). That's
> less bloat but doesn't solve the include problem. You still need to add
> the include of that will cause havoc with other tracepoints.

Yes, It also has a include dependency problem so I can't use
trace_page_ref_get_page_enabled() in mm.h. BTW, I tested following
implementation and it works fine.

extern struct tracepoint __tracepoint_page_ref_get_page;

get_page()
{
atomic_inc()
if (static_key_false(&__tracepoint_page_ref_get_page.key))
stub_get_page()
}

This would not slow down fast path although it can't prevent bloat.
I know that it isn't good code practice, but, this page reference
handling functions have complex include dependency so I'm not sure
I can solve it completely. For this special case, can I use
this raw data structure?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-23 Thread Joonsoo Kim
On Mon, Nov 23, 2015 at 09:26:04AM -0500, Steven Rostedt wrote:
> On Mon, 23 Nov 2015 17:28:05 +0900
> Joonsoo Kim  wrote:
> 
> > On Fri, Nov 20, 2015 at 11:42:25AM -0500, Steven Rostedt wrote:
> > > On Fri, 20 Nov 2015 15:33:25 +0900
> > > Joonsoo Kim  wrote:
> > > 
> > >   
> > > > Steven, is it possible to add tracepoint to inlined fucntion such as
> > > > get_page() in include/linux/mm.h?  
> > > 
> > > I highly recommend against it. The tracepoint code adds a bit of bloat,
> > > and if you inline it, you add that bloat to every use case. Also, it  
> > 
> > Is it worse than adding function call to my own stub function into
> > inlined function such as get_page(). I implemented it as following.
> > 
> > get_page()
> > {
> > atomic_inc()
> > stub_get_page()
> > }
> > 
> > stub_get_page() in foo.c
> > {
> > trace_page_ref_get_page()
> > }
> 
> Now you just slowed down the fast path. But what you could do is:
> 
> get_page()
> {
>   atomic_inc();
>   if (trace_page_ref_get_page_enabled())
>   stub_get_page();
> }
> 
> Now that "trace_page_ref_get_page_enabled()" will turn into:
> 
>   if (static_key_false(&__tracepoint_page_ref_get_page.key)) {
> 
> which is a jump label (nop when disabled, a jmp when enabled). That's
> less bloat but doesn't solve the include problem. You still need to add
> the include of that will cause havoc with other tracepoints.

Yes, it also has include dependency problem so I can't use
trace_page_ref_get_page_enabled() in mm.h.

BTW, I try to open code trace_page_ref_get_page_enabled() in
get_page() as following and it works fine.

extern struct tracepoint __tracepoint_page_ref_get_page;

get_page()
{
atomic_inc()
if (static_key_false(&__tracepoint_page_ref_get_page.key))
stub_get_page()
}

I know that it's not good coding practice to use raw data structure,
but, page reference management functions has complex dependency
so I'm not sure I can solve it completely. For this special case, can
I use it?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-23 Thread Steven Rostedt
On Mon, 23 Nov 2015 17:28:05 +0900
Joonsoo Kim  wrote:

> On Fri, Nov 20, 2015 at 11:42:25AM -0500, Steven Rostedt wrote:
> > On Fri, 20 Nov 2015 15:33:25 +0900
> > Joonsoo Kim  wrote:
> > 
> >   
> > > Steven, is it possible to add tracepoint to inlined fucntion such as
> > > get_page() in include/linux/mm.h?  
> > 
> > I highly recommend against it. The tracepoint code adds a bit of bloat,
> > and if you inline it, you add that bloat to every use case. Also, it  
> 
> Is it worse than adding function call to my own stub function into
> inlined function such as get_page(). I implemented it as following.
> 
> get_page()
> {
> atomic_inc()
> stub_get_page()
> }
> 
> stub_get_page() in foo.c
> {
> trace_page_ref_get_page()
> }

Now you just slowed down the fast path. But what you could do is:

get_page()
{
atomic_inc();
if (trace_page_ref_get_page_enabled())
stub_get_page();
}

Now that "trace_page_ref_get_page_enabled()" will turn into:

if (static_key_false(&__tracepoint_page_ref_get_page.key)) {

which is a jump label (nop when disabled, a jmp when enabled). That's
less bloat but doesn't solve the include problem. You still need to add
the include of that will cause havoc with other tracepoints.

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-23 Thread Joonsoo Kim
On Fri, Nov 20, 2015 at 11:42:25AM -0500, Steven Rostedt wrote:
> On Fri, 20 Nov 2015 15:33:25 +0900
> Joonsoo Kim  wrote:
> 
> 
> > Steven, is it possible to add tracepoint to inlined fucntion such as
> > get_page() in include/linux/mm.h?
> 
> I highly recommend against it. The tracepoint code adds a bit of bloat,
> and if you inline it, you add that bloat to every use case. Also, it

Is it worse than adding function call to my own stub function into
inlined function such as get_page(). I implemented it as following.

get_page()
{
atomic_inc()
stub_get_page()
}

stub_get_page() in foo.c
{
trace_page_ref_get_page()
}

> makes things difficult if this file is included in other files that
> create tracepoints, which I could easily imagine would be the case.
> That is, if a tracepoint file in include/trace/events/foo.h needs to
> include include/linux/mm.h, when you do CREATE_TRACEPOINTS for foo.h,
> it will create tracepoints for mm.h as to use tracepoints there you
> would need to include the include/trace/events/mm.h (or whatever its
> name is), and that has caused issues in the past.
> 
> Now, if you still want to have these tracepoints in the inlined
> function, it would be best to add a new file mm_trace.h? or something
> that would include it, and then have only the .c files include that
> directly. Do not put it into mm.h as that would definitely cause
> tracepoint include troubles.

Okay. If I choose this way, I have to change too many places and churn
the code. If bloat of my implementation is similar with this suggestion,
I prefer my implementation.

Thanks for good advice.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-20 Thread Steven Rostedt
On Fri, 20 Nov 2015 15:33:25 +0900
Joonsoo Kim  wrote:


> Steven, is it possible to add tracepoint to inlined fucntion such as
> get_page() in include/linux/mm.h?

I highly recommend against it. The tracepoint code adds a bit of bloat,
and if you inline it, you add that bloat to every use case. Also, it
makes things difficult if this file is included in other files that
create tracepoints, which I could easily imagine would be the case.
That is, if a tracepoint file in include/trace/events/foo.h needs to
include include/linux/mm.h, when you do CREATE_TRACEPOINTS for foo.h,
it will create tracepoints for mm.h as to use tracepoints there you
would need to include the include/trace/events/mm.h (or whatever its
name is), and that has caused issues in the past.

Now, if you still want to have these tracepoints in the inlined
function, it would be best to add a new file mm_trace.h? or something
that would include it, and then have only the .c files include that
directly. Do not put it into mm.h as that would definitely cause
tracepoint include troubles.

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-19 Thread Joonsoo Kim
Ccing Steven.

Hello,

On Wed, Nov 18, 2015 at 04:34:30PM +0100, Vlastimil Babka wrote:
> On 11/09/2015 08:23 AM, Joonsoo Kim wrote:
> > CMA allocation should be guaranteed to succeed by definition, but,
> > unfortunately, it would be failed sometimes. It is hard to track down
> > the problem, because it is related to page reference manipulation and
> > we don't have any facility to analyze it.
> 
> Reminds me of the PeterZ's VM_PINNED patchset. What happened to it?
> https://lwn.net/Articles/600502/
> 
> > This patch adds tracepoints to track down page reference manipulation.
> > With it, we can find exact reason of failure and can fix the problem.
> > Following is an example of tracepoint output.
> > 
> > <...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
> > count=1 mapcount=0 mapping=(nil) mt=4 val=1
> > <...>-9018  [004]92.678378: kernel_stack:
> >  => get_page_from_freelist (81176659)
> >  => __alloc_pages_nodemask (81176d22)
> >  => alloc_pages_vma (811bf675)
> >  => handle_mm_fault (8119e693)
> >  => __do_page_fault (810631ea)
> >  => trace_do_page_fault (81063543)
> >  => do_async_page_fault (8105c40a)
> >  => async_page_fault (817581d8)
> > [snip]
> > <...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 
> > flags=0x40048 count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
> > [snip]
> > ...
> > ...
> > <...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
> > end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
> > [snip]
> > <...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
> > flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 
> > ret=1
> >  => release_pages (8117c9e4)
> >  => free_pages_and_swap_cache (811b0697)
> >  => tlb_flush_mmu_free (81199616)
> >  => tlb_finish_mmu (8119a62c)
> >  => exit_mmap (811a53f7)
> >  => mmput (81073f47)
> >  => do_exit (810794e9)
> >  => do_group_exit (81079def)
> >  => SyS_exit_group (81079e74)
> >  => entry_SYSCALL_64_fastpath (817560b6)
> > 
> > This output shows that problem comes from exit path. In exit path,
> > to improve performance, pages are not freed immediately. They are gathered
> > and processed by batch. During this process, migration cannot be possible
> > and CMA allocation is failed. This problem is hard to find without this
> > page reference tracepoint facility.
> 
> Yeah but when you realized it was this problem, what was the fix? Probably not
> remove batching from exit path? Shouldn't CMA in this case just try waiting 
> for
> the pins to go away, which would eventually happen? And for long-term pins,
> VM_PINNED would make sure the pages are migrated away from CMA pageblocks 
> first?
> 
> So I'm worried that this is quite nontrivial change for a very specific 
> usecase.

Minchan already answered it. Thanks, Minchan.
This also can be used for memory-offlining and compaction.

> > Enabling this feature bloat kernel text 20 KB in my configuration.
> 
> It's not just that, see below.
> 
> [...]
> 
> 
> >  static inline int page_ref_freeze(struct page *page, int count)
> >  {
> > -   return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
> > +   int ret = likely(atomic_cmpxchg(&page->_count, count, 0) == count);
> 
> The "likely" mean makes no sense anymore, doe it?

I don't know how compiler uses this hint exactly but it will have same
effect as before. Why do you think it makes no sense now?

> 
> > diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
> > index 957d3da..71d2399 100644
> > --- a/mm/Kconfig.debug
> > +++ b/mm/Kconfig.debug
> > @@ -28,3 +28,7 @@ config DEBUG_PAGEALLOC
> >  
> >  config PAGE_POISONING
> > bool
> > +
> > +config DEBUG_PAGE_REF
> > +   bool "Enable tracepoint to track down page reference manipulation"
> 
> So you should probably state the costs. Which is the extra memory, and also 
> that
> all the page ref manipulations are now turned to function calls, even if the
> tracepoints are disabled.

Yes, will do.

> Patch 1 didn't change that many callsites, so maybe it
> would be feasible to have the tracepoints inline, where being disabled has
> near-zero overhead?

Hmm...Although I changed just one get_page() in previous patch, it would
be used in many places. So, it's not feasible to inline tracepoints to
each callsites directly.

In fact, I tried to inline tracepoint into wrapper function directly,
but, it fails due to (maybe) header file dependency.

Steven, is it possible to add tracepoint to inlined fucntion such as
get_page() in include/linux/mm.h?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-18 Thread Minchan Kim
On Wed, Nov 18, 2015 at 04:34:30PM +0100, Vlastimil Babka wrote:
> On 11/09/2015 08:23 AM, Joonsoo Kim wrote:
> > CMA allocation should be guaranteed to succeed by definition, but,
> > unfortunately, it would be failed sometimes. It is hard to track down
> > the problem, because it is related to page reference manipulation and
> > we don't have any facility to analyze it.
> 
> Reminds me of the PeterZ's VM_PINNED patchset. What happened to it?
> https://lwn.net/Articles/600502/
> 
> > This patch adds tracepoints to track down page reference manipulation.
> > With it, we can find exact reason of failure and can fix the problem.
> > Following is an example of tracepoint output.
> > 
> > <...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
> > count=1 mapcount=0 mapping=(nil) mt=4 val=1
> > <...>-9018  [004]92.678378: kernel_stack:
> >  => get_page_from_freelist (81176659)
> >  => __alloc_pages_nodemask (81176d22)
> >  => alloc_pages_vma (811bf675)
> >  => handle_mm_fault (8119e693)
> >  => __do_page_fault (810631ea)
> >  => trace_do_page_fault (81063543)
> >  => do_async_page_fault (8105c40a)
> >  => async_page_fault (817581d8)
> > [snip]
> > <...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 
> > flags=0x40048 count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
> > [snip]
> > ...
> > ...
> > <...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
> > end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
> > [snip]
> > <...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
> > flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 
> > ret=1
> >  => release_pages (8117c9e4)
> >  => free_pages_and_swap_cache (811b0697)
> >  => tlb_flush_mmu_free (81199616)
> >  => tlb_finish_mmu (8119a62c)
> >  => exit_mmap (811a53f7)
> >  => mmput (81073f47)
> >  => do_exit (810794e9)
> >  => do_group_exit (81079def)
> >  => SyS_exit_group (81079e74)
> >  => entry_SYSCALL_64_fastpath (817560b6)
> > 
> > This output shows that problem comes from exit path. In exit path,
> > to improve performance, pages are not freed immediately. They are gathered
> > and processed by batch. During this process, migration cannot be possible
> > and CMA allocation is failed. This problem is hard to find without this
> > page reference tracepoint facility.
> 
> Yeah but when you realized it was this problem, what was the fix? Probably not
> remove batching from exit path? Shouldn't CMA in this case just try waiting 
> for
> the pins to go away, which would eventually happen? And for long-term pins,
> VM_PINNED would make sure the pages are migrated away from CMA pageblocks 
> first?
> 
> So I'm worried that this is quite nontrivial change for a very specific 
> usecase.

This patch is not to solve the problem but just to expose what is culprit.

For using VM_PINNED, firstly, we should know where are long-term pins.
There are obviously clear places which will be first target if we use
VM_PINNED but somewhere are vague. For vague places, this patch will
help finding there. Even we don't use VM_PINNED, this patch will expose
current obstacle which can help to understand current problems.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-18 Thread Vlastimil Babka
On 11/09/2015 08:23 AM, Joonsoo Kim wrote:
> CMA allocation should be guaranteed to succeed by definition, but,
> unfortunately, it would be failed sometimes. It is hard to track down
> the problem, because it is related to page reference manipulation and
> we don't have any facility to analyze it.

Reminds me of the PeterZ's VM_PINNED patchset. What happened to it?
https://lwn.net/Articles/600502/

> This patch adds tracepoints to track down page reference manipulation.
> With it, we can find exact reason of failure and can fix the problem.
> Following is an example of tracepoint output.
> 
> <...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
> count=1 mapcount=0 mapping=(nil) mt=4 val=1
> <...>-9018  [004]92.678378: kernel_stack:
>  => get_page_from_freelist (81176659)
>  => __alloc_pages_nodemask (81176d22)
>  => alloc_pages_vma (811bf675)
>  => handle_mm_fault (8119e693)
>  => __do_page_fault (810631ea)
>  => trace_do_page_fault (81063543)
>  => do_async_page_fault (8105c40a)
>  => async_page_fault (817581d8)
> [snip]
> <...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 
> flags=0x40048 count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
> [snip]
> ...
> ...
> <...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
> end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
> [snip]
> <...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
> flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 ret=1
>  => release_pages (8117c9e4)
>  => free_pages_and_swap_cache (811b0697)
>  => tlb_flush_mmu_free (81199616)
>  => tlb_finish_mmu (8119a62c)
>  => exit_mmap (811a53f7)
>  => mmput (81073f47)
>  => do_exit (810794e9)
>  => do_group_exit (81079def)
>  => SyS_exit_group (81079e74)
>  => entry_SYSCALL_64_fastpath (817560b6)
> 
> This output shows that problem comes from exit path. In exit path,
> to improve performance, pages are not freed immediately. They are gathered
> and processed by batch. During this process, migration cannot be possible
> and CMA allocation is failed. This problem is hard to find without this
> page reference tracepoint facility.

Yeah but when you realized it was this problem, what was the fix? Probably not
remove batching from exit path? Shouldn't CMA in this case just try waiting for
the pins to go away, which would eventually happen? And for long-term pins,
VM_PINNED would make sure the pages are migrated away from CMA pageblocks first?

So I'm worried that this is quite nontrivial change for a very specific usecase.

> Enabling this feature bloat kernel text 20 KB in my configuration.

It's not just that, see below.

[...]


>  static inline int page_ref_freeze(struct page *page, int count)
>  {
> - return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
> + int ret = likely(atomic_cmpxchg(&page->_count, count, 0) == count);

The "likely" mean makes no sense anymore, doe it?

> diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
> index 957d3da..71d2399 100644
> --- a/mm/Kconfig.debug
> +++ b/mm/Kconfig.debug
> @@ -28,3 +28,7 @@ config DEBUG_PAGEALLOC
>  
>  config PAGE_POISONING
>   bool
> +
> +config DEBUG_PAGE_REF
> + bool "Enable tracepoint to track down page reference manipulation"

So you should probably state the costs. Which is the extra memory, and also that
all the page ref manipulations are now turned to function calls, even if the
tracepoints are disabled. Patch 1 didn't change that many callsites, so maybe it
would be feasible to have the tracepoints inline, where being disabled has
near-zero overhead?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-10 Thread Michal Nazarewicz
On Mon, Nov 09 2015, Joonsoo Kim wrote:
> CMA allocation should be guaranteed to succeed by definition, 

Uh?  That’s a peculiar statement.  Which is to say that it’s not true.

> but,
> unfortunately, it would be failed sometimes. It is hard to track down
> the problem, because it is related to page reference manipulation and
> we don't have any facility to analyze it.
>
> This patch adds tracepoints to track down page reference manipulation.
> With it, we can find exact reason of failure and can fix the problem.
> Following is an example of tracepoint output.
>
> <...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
> count=1 mapcount=0 mapping=(nil) mt=4 val=1
> <...>-9018  [004]92.678378: kernel_stack:
>  => get_page_from_freelist (81176659)
>  => __alloc_pages_nodemask (81176d22)
>  => alloc_pages_vma (811bf675)
>  => handle_mm_fault (8119e693)
>  => __do_page_fault (810631ea)
>  => trace_do_page_fault (81063543)
>  => do_async_page_fault (8105c40a)
>  => async_page_fault (817581d8)
> [snip]
> <...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 
> flags=0x40048 count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
> [snip]
> ...
> ...
> <...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
> end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
> [snip]
> <...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
> flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 ret=1
>  => release_pages (8117c9e4)
>  => free_pages_and_swap_cache (811b0697)
>  => tlb_flush_mmu_free (81199616)
>  => tlb_finish_mmu (8119a62c)
>  => exit_mmap (811a53f7)
>  => mmput (81073f47)
>  => do_exit (810794e9)
>  => do_group_exit (81079def)
>  => SyS_exit_group (81079e74)
>  => entry_SYSCALL_64_fastpath (817560b6)
>
> This output shows that problem comes from exit path. In exit path,
> to improve performance, pages are not freed immediately. They are gathered
> and processed by batch. During this process, migration cannot be possible
> and CMA allocation is failed. This problem is hard to find without this
> page reference tracepoint facility.
>
> Enabling this feature bloat kernel text 20 KB in my configuration.
>
>textdata bss dec hex filename
> 120412722223424 1507328 15772024 f0a978 vmlinux_disabled
> 120648442225920 1507328 15798092 f10f4c vmlinux_enabled
>
> Signed-off-by: Joonsoo Kim 

Acked-by: Michal Nazarewicz 

> ---
>  include/trace/events/page_ref.h | 128 
> 

I haven’t really looked at the above file though.

-- 
Best regards,_ _
.o. | Liege of Serenely Enlightened Majesty of o' \,=./ `o
..o | Computer Science,  ミハウ “mina86” ナザレヴイツ  (o o)
ooo +-ooO--(_)--Ooo--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2015-11-08 Thread Joonsoo Kim
CMA allocation should be guaranteed to succeed by definition, but,
unfortunately, it would be failed sometimes. It is hard to track down
the problem, because it is related to page reference manipulation and
we don't have any facility to analyze it.

This patch adds tracepoints to track down page reference manipulation.
With it, we can find exact reason of failure and can fix the problem.
Following is an example of tracepoint output.

<...>-9018  [004]92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 
count=1 mapcount=0 mapping=(nil) mt=4 val=1
<...>-9018  [004]92.678378: kernel_stack:
 => get_page_from_freelist (81176659)
 => __alloc_pages_nodemask (81176d22)
 => alloc_pages_vma (811bf675)
 => handle_mm_fault (8119e693)
 => __do_page_fault (810631ea)
 => trace_do_page_fault (81063543)
 => do_async_page_fault (8105c40a)
 => async_page_fault (817581d8)
[snip]
<...>-9018  [004]92.678379: page_ref_mod: pfn=0x17ac9 flags=0x40048 
count=2 mapcount=1 mapping=0x880015a78dc1 mt=4 val=1
[snip]
...
...
<...>-9131  [001]93.174468: test_pages_isolated:  start_pfn=0x17800 
end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
[snip]
<...>-9018  [004]93.174843: page_ref_mod_and_test: pfn=0x17ac9 
flags=0x40068 count=0 mapcount=0 mapping=0x880015a78dc1 mt=4 val=-1 ret=1
 => release_pages (8117c9e4)
 => free_pages_and_swap_cache (811b0697)
 => tlb_flush_mmu_free (81199616)
 => tlb_finish_mmu (8119a62c)
 => exit_mmap (811a53f7)
 => mmput (81073f47)
 => do_exit (810794e9)
 => do_group_exit (81079def)
 => SyS_exit_group (81079e74)
 => entry_SYSCALL_64_fastpath (817560b6)

This output shows that problem comes from exit path. In exit path,
to improve performance, pages are not freed immediately. They are gathered
and processed by batch. During this process, migration cannot be possible
and CMA allocation is failed. This problem is hard to find without this
page reference tracepoint facility.

Enabling this feature bloat kernel text 20 KB in my configuration.

   textdata bss dec hex filename
120412722223424 1507328 15772024 f0a978 vmlinux_disabled
120648442225920 1507328 15798092 f10f4c vmlinux_enabled

Signed-off-by: Joonsoo Kim 
---
 include/linux/page_ref.h|  67 +++--
 include/trace/events/page_ref.h | 128 
 mm/Kconfig.debug|   4 ++
 mm/Makefile |   1 +
 mm/debug_page_ref.c |  46 +++
 5 files changed, 241 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/events/page_ref.h
 create mode 100644 mm/debug_page_ref.c

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 534249c..de81073 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -2,6 +2,42 @@
 #include 
 #include 
 
+#ifdef CONFIG_DEBUG_PAGE_REF
+extern void __page_ref_set(struct page *page, int v);
+extern void __page_ref_mod(struct page *page, int v);
+extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
+extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
+extern void __page_ref_mod_unless(struct page *page, int v, int u);
+extern void __page_ref_freeze(struct page *page, int v, int ret);
+extern void __page_ref_unfreeze(struct page *page, int v);
+
+#else
+
+
+static inline void __page_ref_set(struct page *page, int v)
+{
+}
+static inline void __page_ref_mod(struct page *page, int v)
+{
+}
+static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_mod_unless(struct page *page, int v, int u)
+{
+}
+static inline void __page_ref_freeze(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_unfreeze(struct page *page, int v)
+{
+}
+
+#endif
+
 static inline int page_count(struct page *page)
 {
return atomic_read(&compound_head(page)->_count);
@@ -10,6 +46,7 @@ static inline int page_count(struct page *page)
 static inline void set_page_count(struct page *page, int v)
 {
atomic_set(&page->_count, v);
+   __page_ref_set(page, v);
 }
 
 /*
@@ -24,46 +61,65 @@ static inline void init_page_count(struct page *page)
 static inline void page_ref_add(struct page *page, int nr)
 {
atomic_add(nr, &page->_count);
+   __page_ref_mod(page, nr);
 }
 
 static inline void page_ref_sub(struct page *page, int nr)
 {
atomic_sub(nr, &page->_count);
+   __page_ref_mod(page, -nr);
 }
 
 static inline void page_ref_inc(struct page *page)
 {
atomic_inc(&page->_count);
+   __page_ref_mod(page, 1);
 }
 
 static inline void page_ref_dec(struct page *page)
 {
atomic_dec(&page->_count);
+   __page_ref_mod(page, -1);
 }
 
 static inline int page_r