Em Wed, Jun 19, 2019 at 09:44:28AM +0300, Adrian Hunter escreveu:
> Commit f08046cb3082 ("perf thread-stack: Represent jmps to the start of a
> different symbol") had the side-effect of introducing more stack entries
> before return from kernel space. When user space is also traced, those
> entries are popped before entry to user space, but when user space is not
> traced, they get stuck at the bottom of the stack, making the stack grow
> progressively larger. Fix by detecting a return-from-kernel branch type,
> and popping kernel addresses from the stack then.
> 
> Note, the problem and fix affect the exported Call Graph / Tree but not
> the callindent option used by "perf script --call-trace".
> 
> Example:
> 
>   perf-with-kcore record example -e intel_pt//k -- ls
>   perf-with-kcore script --itrace=bep -s 
> ~/libexec/perf-core/scripts/python/export-to-sqlite.py example.db branches 
> calls
>   ~/libexec/perf-core/scripts/python/exported-sql-viewer.py example.db

Trying with you above instructions I hit some snags, 'perf-with-kcore'
expects the first arg to be the directory where it'll find the perf.data
file, etc, right? I.e. to work I had to do:

[root@quaco tmp]# rm -rf bep
[root@quaco tmp]# rm -rf example.db 
[root@quaco tmp]# perf-with-kcore record bep -e intel_pt//k -- ls
Recording
Using /root/bin/perf
perf version 5.2.rc4.gd1d5628fa057
/root/bin/perf record -o bep/perf.data -e intel_pt//k -- ls
<SNIP>
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.123 MB bep/perf.data ]
Copying kcore
Done
[root@quaco tmp]# perf-with-kcore script bep --itrace=bep -s 
~acme/libexec/perf-core/scripts/python/export-to-sqlite.py example.db branches 
calls
Using /root/bin/perf
perf version 5.2.rc4.gd1d5628fa057
/root/bin/perf script -i bep/perf.data --kallsyms=bep/kcore_dir/kallsyms 
--itrace=bep -s /home/acme/libexec/perf-core/scripts/python/export-to-sqlite.py 
example.db branches calls
2019-06-24 15:29:54.910209 Creating database ...
2019-06-24 15:29:54.913351 Writing records...
2019-06-24 15:29:58.009226 Adding indexes
2019-06-24 15:29:58.024644 Done
[root@quaco tmp]#

I.e. add 'bep' after 'script' and before '--itrace-bep'.

I'm fixing this up in the comment, please check.

With that in place I can reproduce your results below.


Thanks,

- Arnaldo
 
>   Menu option: Reports -> Context-Sensitive Call Graph
> 
>   Before: (showing Call Path column only)
> 
>     Call Path
>     ▶ perf
>     ▼ ls
>       ▼ 12111:12111
>         ▶ setup_new_exec
>         ▶ __task_pid_nr_ns
>         ▶ perf_event_pid_type
>         ▶ perf_event_comm_output
>         ▶ perf_iterate_ctx
>         ▶ perf_iterate_sb
>         ▶ perf_event_comm
>         ▶ __set_task_comm
>         ▶ load_elf_binary
>         ▶ search_binary_handler
>         ▶ __do_execve_file.isra.41
>         ▶ __x64_sys_execve
>         ▶ do_syscall_64
>         ▼ entry_SYSCALL_64_after_hwframe
>           ▼ swapgs_restore_regs_and_return_to_usermode
>             ▼ native_iret
>               ▶ error_entry
>               ▶ do_page_fault
>               ▼ error_exit
>                 ▼ retint_user
>                   ▶ prepare_exit_to_usermode
>                   ▼ native_iret
>                     ▶ error_entry
>                     ▶ do_page_fault
>                     ▼ error_exit
>                       ▼ retint_user
>                         ▶ prepare_exit_to_usermode
>                         ▼ native_iret
>                           ▶ error_entry
>                           ▶ do_page_fault
>                           ▼ error_exit
>                             ▼ retint_user
>                               ▶ prepare_exit_to_usermode
>                               ▶ native_iret

T

 
>   After: (showing Call Path column only)
> 
>     Call Path
>     ▶ perf
>     ▼ ls
>       ▼ 12111:12111
>         ▶ setup_new_exec
>         ▶ __task_pid_nr_ns
>         ▶ perf_event_pid_type
>         ▶ perf_event_comm_output
>         ▶ perf_iterate_ctx
>         ▶ perf_iterate_sb
>         ▶ perf_event_comm
>         ▶ __set_task_comm
>         ▶ load_elf_binary
>         ▶ search_binary_handler
>         ▶ __do_execve_file.isra.41
>         ▶ __x64_sys_execve
>         ▶ do_syscall_64
>         ▶ entry_SYSCALL_64_after_hwframe
>         ▶ page_fault
>         ▼ entry_SYSCALL_64
>           ▼ do_syscall_64
>             ▶ __x64_sys_brk
>             ▶ __x64_sys_access
>             ▶ __x64_sys_openat
>             ▶ __x64_sys_newfstat
>             ▶ __x64_sys_mmap
>             ▶ __x64_sys_close
>             ▶ __x64_sys_read
>             ▶ __x64_sys_mprotect
>             ▶ __x64_sys_arch_prctl
>             ▶ __x64_sys_munmap
>             ▶ exit_to_usermode_loop
>             ▶ __x64_sys_set_tid_address
>             ▶ __x64_sys_set_robust_list
>             ▶ __x64_sys_rt_sigaction
>             ▶ __x64_sys_rt_sigprocmask
>             ▶ __x64_sys_prlimit64
>             ▶ __x64_sys_statfs
>             ▶ __x64_sys_ioctl
>             ▶ __x64_sys_getdents64
>             ▶ __x64_sys_write
>             ▶ __x64_sys_exit_group
> 
> Signed-off-by: Adrian Hunter <[email protected]>
> Fixes: f08046cb3082 ("perf thread-stack: Represent jmps to the start of a 
> different symbol")
> Cc: [email protected]
> ---
>  tools/perf/util/thread-stack.c | 30 +++++++++++++++++++++++++++++-
>  1 file changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
> index 8e390f78486f..f91c00dfe23b 100644
> --- a/tools/perf/util/thread-stack.c
> +++ b/tools/perf/util/thread-stack.c
> @@ -637,6 +637,23 @@ static int thread_stack__bottom(struct thread_stack *ts,
>                                    true, false);
>  }
>  
> +static int thread_stack__pop_ks(struct thread *thread, struct thread_stack 
> *ts,
> +                             struct perf_sample *sample, u64 ref)
> +{
> +     u64 tm = sample->time;
> +     int err;
> +
> +     /* Return to userspace, so pop all kernel addresses */
> +     while (thread_stack__in_kernel(ts)) {
> +             err = thread_stack__call_return(thread, ts, --ts->cnt,
> +                                             tm, ref, true);
> +             if (err)
> +                     return err;
> +     }
> +
> +     return 0;
> +}
> +
>  static int thread_stack__no_call_return(struct thread *thread,
>                                       struct thread_stack *ts,
>                                       struct perf_sample *sample,
> @@ -919,7 +936,18 @@ int thread_stack__process(struct thread *thread, struct 
> comm *comm,
>                       ts->rstate = X86_RETPOLINE_DETECTED;
>  
>       } else if (sample->flags & PERF_IP_FLAG_RETURN) {
> -             if (!sample->ip || !sample->addr)
> +             if (!sample->addr) {
> +                     u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
> +                                              PERF_IP_FLAG_INTERRUPT;
> +
> +                     if (!(sample->flags & return_from_kernel))
> +                             return 0;
> +
> +                     /* Pop kernel stack */
> +                     return thread_stack__pop_ks(thread, ts, sample, ref);
> +             }
> +
> +             if (!sample->ip)
>                       return 0;
>  
>               /* x86 retpoline 'return' doesn't match the stack */
> -- 
> 2.17.1

-- 

- Arnaldo

Reply via email to