On Mon, Jul 17, 2017 at 07:06:38PM +0800, Jin Yao wrote:

SNIP

> +#define X86_BR_TYPE_MAP_MAX  16
> +
> +static int
> +common_branch_type(int type)
> +{
> +     int i;
> +     const int branch_map[X86_BR_TYPE_MAP_MAX] = {
> +             PERF_BR_CALL,           /* X86_BR_CALL */
> +             PERF_BR_RET,            /* X86_BR_RET */
> +             PERF_BR_SYSCALL,        /* X86_BR_SYSCALL */
> +             PERF_BR_SYSRET,         /* X86_BR_SYSRET */
> +             PERF_BR_UNKNOWN,        /* X86_BR_INT */
> +             PERF_BR_UNKNOWN,        /* X86_BR_IRET */
> +             PERF_BR_COND,           /* X86_BR_JCC */
> +             PERF_BR_UNCOND,         /* X86_BR_JMP */
> +             PERF_BR_UNKNOWN,        /* X86_BR_IRQ */
> +             PERF_BR_IND_CALL,       /* X86_BR_IND_CALL */
> +             PERF_BR_UNKNOWN,        /* X86_BR_ABORT */
> +             PERF_BR_UNKNOWN,        /* X86_BR_IN_TX */
> +             PERF_BR_UNKNOWN,        /* X86_BR_NO_TX */
> +             PERF_BR_CALL,           /* X86_BR_ZERO_CALL */
> +             PERF_BR_UNKNOWN,        /* X86_BR_CALL_STACK */
> +             PERF_BR_IND,            /* X86_BR_IND_JMP */
> +     };

should the branch_map array be static? having it on stack makes
the compiler to create it every time we call the function

jirka

> +
> +     type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
> +
> +     if (type) {
> +             i = __ffs(type);
> +             if (i < X86_BR_TYPE_MAP_MAX)
> +                     return branch_map[i];
> +     }
> +
> +     return PERF_BR_UNKNOWN;
> +}
> +
>  /*
>   * implement actual branch filter based on user demand.
>   * Hardware may not exactly satisfy that request, thus
> @@ -942,7 +987,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
>       bool compress = false;
>  
>       /* if sampling all branches, then nothing to filter */
> -     if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
> +     if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
> +         ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
>               return;
>  
>       for (i = 0; i < cpuc->lbr_stack.nr; i++) {
> @@ -963,6 +1009,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
>                       cpuc->lbr_entries[i].from = 0;
>                       compress = true;
>               }
> +
> +             if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
> +                     cpuc->lbr_entries[i].type = common_branch_type(type);
>       }
>  
>       if (!compress)
> -- 
> 2.7.4
> 

Reply via email to