Re: [PATCH v9 2/7] perf/x86/intel: Record branch type

2017-07-18 Thread Peter Zijlstra
On Mon, Jul 17, 2017 at 04:28:38PM +0800, Jin, Yao wrote:
> I'm also waiting for Peter's review comments for this patch update.

Aside from Jiri's comment (which I agree with)

Acked-by: Peter Zijlstra (Intel) 

for the first two patches.


Re: [PATCH v9 2/7] perf/x86/intel: Record branch type

2017-07-17 Thread Jin, Yao



On 7/17/2017 4:11 PM, Jiri Olsa wrote:

On Mon, Jul 17, 2017 at 07:06:38PM +0800, Jin Yao wrote:

SNIP


+#define X86_BR_TYPE_MAP_MAX16
+
+static int
+common_branch_type(int type)
+{
+   int i;
+   const int branch_map[X86_BR_TYPE_MAP_MAX] = {
+   PERF_BR_CALL,   /* X86_BR_CALL */
+   PERF_BR_RET,/* X86_BR_RET */
+   PERF_BR_SYSCALL,/* X86_BR_SYSCALL */
+   PERF_BR_SYSRET, /* X86_BR_SYSRET */
+   PERF_BR_UNKNOWN,/* X86_BR_INT */
+   PERF_BR_UNKNOWN,/* X86_BR_IRET */
+   PERF_BR_COND,   /* X86_BR_JCC */
+   PERF_BR_UNCOND, /* X86_BR_JMP */
+   PERF_BR_UNKNOWN,/* X86_BR_IRQ */
+   PERF_BR_IND_CALL,   /* X86_BR_IND_CALL */
+   PERF_BR_UNKNOWN,/* X86_BR_ABORT */
+   PERF_BR_UNKNOWN,/* X86_BR_IN_TX */
+   PERF_BR_UNKNOWN,/* X86_BR_NO_TX */
+   PERF_BR_CALL,   /* X86_BR_ZERO_CALL */
+   PERF_BR_UNKNOWN,/* X86_BR_CALL_STACK */
+   PERF_BR_IND,/* X86_BR_IND_JMP */
+   };

should the branch_map array be static? having it on stack makes
the compiler to create it every time we call the function

jirka


OK, agree to let branch_map array be static. I will add this in v10.

I'm also waiting for Peter's review comments for this patch update.

Thanks
Jin Yao




+
+   type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+
+   if (type) {
+   i = __ffs(type);
+   if (i < X86_BR_TYPE_MAP_MAX)
+   return branch_map[i];
+   }
+
+   return PERF_BR_UNKNOWN;
+}
+
  /*
   * implement actual branch filter based on user demand.
   * Hardware may not exactly satisfy that request, thus
@@ -942,7 +987,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
bool compress = false;
  
  	/* if sampling all branches, then nothing to filter */

-   if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+   if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+   ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
return;
  
  	for (i = 0; i < cpuc->lbr_stack.nr; i++) {

@@ -963,6 +1009,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].from = 0;
compress = true;
}
+
+   if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+   cpuc->lbr_entries[i].type = common_branch_type(type);
}
  
  	if (!compress)

--
2.7.4





Re: [PATCH v9 2/7] perf/x86/intel: Record branch type

2017-07-17 Thread Jiri Olsa
On Mon, Jul 17, 2017 at 07:06:38PM +0800, Jin Yao wrote:

SNIP

> +#define X86_BR_TYPE_MAP_MAX  16
> +
> +static int
> +common_branch_type(int type)
> +{
> + int i;
> + const int branch_map[X86_BR_TYPE_MAP_MAX] = {
> + PERF_BR_CALL,   /* X86_BR_CALL */
> + PERF_BR_RET,/* X86_BR_RET */
> + PERF_BR_SYSCALL,/* X86_BR_SYSCALL */
> + PERF_BR_SYSRET, /* X86_BR_SYSRET */
> + PERF_BR_UNKNOWN,/* X86_BR_INT */
> + PERF_BR_UNKNOWN,/* X86_BR_IRET */
> + PERF_BR_COND,   /* X86_BR_JCC */
> + PERF_BR_UNCOND, /* X86_BR_JMP */
> + PERF_BR_UNKNOWN,/* X86_BR_IRQ */
> + PERF_BR_IND_CALL,   /* X86_BR_IND_CALL */
> + PERF_BR_UNKNOWN,/* X86_BR_ABORT */
> + PERF_BR_UNKNOWN,/* X86_BR_IN_TX */
> + PERF_BR_UNKNOWN,/* X86_BR_NO_TX */
> + PERF_BR_CALL,   /* X86_BR_ZERO_CALL */
> + PERF_BR_UNKNOWN,/* X86_BR_CALL_STACK */
> + PERF_BR_IND,/* X86_BR_IND_JMP */
> + };

should the branch_map array be static? having it on stack makes
the compiler to create it every time we call the function

jirka

> +
> + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
> +
> + if (type) {
> + i = __ffs(type);
> + if (i < X86_BR_TYPE_MAP_MAX)
> + return branch_map[i];
> + }
> +
> + return PERF_BR_UNKNOWN;
> +}
> +
>  /*
>   * implement actual branch filter based on user demand.
>   * Hardware may not exactly satisfy that request, thus
> @@ -942,7 +987,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
>   bool compress = false;
>  
>   /* if sampling all branches, then nothing to filter */
> - if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
> + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
> + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
>   return;
>  
>   for (i = 0; i < cpuc->lbr_stack.nr; i++) {
> @@ -963,6 +1009,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
>   cpuc->lbr_entries[i].from = 0;
>   compress = true;
>   }
> +
> + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
> + cpuc->lbr_entries[i].type = common_branch_type(type);
>   }
>  
>   if (!compress)
> -- 
> 2.7.4
>