Re: [PATCH v9 2/7] perf/x86/intel: Record branch type
On Mon, Jul 17, 2017 at 04:28:38PM +0800, Jin, Yao wrote: > I'm also waiting for Peter's review comments for this patch update. Aside from Jiri's comment (which I agree with) Acked-by: Peter Zijlstra (Intel) for the first two patches.
Re: [PATCH v9 2/7] perf/x86/intel: Record branch type
On 7/17/2017 4:11 PM, Jiri Olsa wrote: On Mon, Jul 17, 2017 at 07:06:38PM +0800, Jin Yao wrote: SNIP +#define X86_BR_TYPE_MAP_MAX16 + +static int +common_branch_type(int type) +{ + int i; + const int branch_map[X86_BR_TYPE_MAP_MAX] = { + PERF_BR_CALL, /* X86_BR_CALL */ + PERF_BR_RET,/* X86_BR_RET */ + PERF_BR_SYSCALL,/* X86_BR_SYSCALL */ + PERF_BR_SYSRET, /* X86_BR_SYSRET */ + PERF_BR_UNKNOWN,/* X86_BR_INT */ + PERF_BR_UNKNOWN,/* X86_BR_IRET */ + PERF_BR_COND, /* X86_BR_JCC */ + PERF_BR_UNCOND, /* X86_BR_JMP */ + PERF_BR_UNKNOWN,/* X86_BR_IRQ */ + PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ + PERF_BR_UNKNOWN,/* X86_BR_ABORT */ + PERF_BR_UNKNOWN,/* X86_BR_IN_TX */ + PERF_BR_UNKNOWN,/* X86_BR_NO_TX */ + PERF_BR_CALL, /* X86_BR_ZERO_CALL */ + PERF_BR_UNKNOWN,/* X86_BR_CALL_STACK */ + PERF_BR_IND,/* X86_BR_IND_JMP */ + }; should the branch_map array be static? having it on stack makes the compiler to create it every time we call the function jirka OK, agree to let branch_map array be static. I will add this in v10. I'm also waiting for Peter's review comments for this patch update. Thanks Jin Yao + + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */ + + if (type) { + i = __ffs(type); + if (i < X86_BR_TYPE_MAP_MAX) + return branch_map[i]; + } + + return PERF_BR_UNKNOWN; +} + /* * implement actual branch filter based on user demand. * Hardware may not exactly satisfy that request, thus @@ -942,7 +987,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) bool compress = false; /* if sampling all branches, then nothing to filter */ - if ((br_sel & X86_BR_ALL) == X86_BR_ALL) + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) return; for (i = 0; i < cpuc->lbr_stack.nr; i++) { @@ -963,6 +1009,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].from = 0; compress = true; } + + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) + cpuc->lbr_entries[i].type = common_branch_type(type); } if (!compress) -- 2.7.4
Re: [PATCH v9 2/7] perf/x86/intel: Record branch type
On Mon, Jul 17, 2017 at 07:06:38PM +0800, Jin Yao wrote: SNIP > +#define X86_BR_TYPE_MAP_MAX 16 > + > +static int > +common_branch_type(int type) > +{ > + int i; > + const int branch_map[X86_BR_TYPE_MAP_MAX] = { > + PERF_BR_CALL, /* X86_BR_CALL */ > + PERF_BR_RET,/* X86_BR_RET */ > + PERF_BR_SYSCALL,/* X86_BR_SYSCALL */ > + PERF_BR_SYSRET, /* X86_BR_SYSRET */ > + PERF_BR_UNKNOWN,/* X86_BR_INT */ > + PERF_BR_UNKNOWN,/* X86_BR_IRET */ > + PERF_BR_COND, /* X86_BR_JCC */ > + PERF_BR_UNCOND, /* X86_BR_JMP */ > + PERF_BR_UNKNOWN,/* X86_BR_IRQ */ > + PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ > + PERF_BR_UNKNOWN,/* X86_BR_ABORT */ > + PERF_BR_UNKNOWN,/* X86_BR_IN_TX */ > + PERF_BR_UNKNOWN,/* X86_BR_NO_TX */ > + PERF_BR_CALL, /* X86_BR_ZERO_CALL */ > + PERF_BR_UNKNOWN,/* X86_BR_CALL_STACK */ > + PERF_BR_IND,/* X86_BR_IND_JMP */ > + }; should the branch_map array be static? having it on stack makes the compiler to create it every time we call the function jirka > + > + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */ > + > + if (type) { > + i = __ffs(type); > + if (i < X86_BR_TYPE_MAP_MAX) > + return branch_map[i]; > + } > + > + return PERF_BR_UNKNOWN; > +} > + > /* > * implement actual branch filter based on user demand. > * Hardware may not exactly satisfy that request, thus > @@ -942,7 +987,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) > bool compress = false; > > /* if sampling all branches, then nothing to filter */ > - if ((br_sel & X86_BR_ALL) == X86_BR_ALL) > + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && > + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) > return; > > for (i = 0; i < cpuc->lbr_stack.nr; i++) { > @@ -963,6 +1009,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) > cpuc->lbr_entries[i].from = 0; > compress = true; > } > + > + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) > + cpuc->lbr_entries[i].type = common_branch_type(type); > } > > if (!compress) > -- > 2.7.4 >