Re: [PATCH V5 04/25] perf/x86/intel: Hybrid PMU support for perf capabilities

2021-04-08 Thread Liang, Kan




On 4/8/2021 9:40 AM, Peter Zijlstra wrote:

@@ -4330,7 +4347,7 @@ static int intel_pmu_check_period(struct perf_event 
*event, u64 value)
  
  static int intel_pmu_aux_output_match(struct perf_event *event)

  {
-   if (!x86_pmu.intel_cap.pebs_output_pt_available)
+   if (!intel_pmu_has_cap(event, PERF_CAP_PT_IDX))
return 0;
  
  	return is_intel_pt_event(event);

these sites can have !event->pmu ?



I don't think the event->pmu can be NULL, but it could be pt_pmu.pmu.
If so, it should be a problem.

I think I will still use the x86_pmu.intel_cap.pebs_output_pt_available 
here in V6. The worst case is that we lost the PEBS via PT support on 
the small core for now.


I guess Alexander may provide a separate patch later to enable the PEBS 
via PT support on the ADL small core.


Thanks,
Kan


Re: [PATCH V5 04/25] perf/x86/intel: Hybrid PMU support for perf capabilities

2021-04-08 Thread Liang, Kan




On 4/8/2021 1:00 PM, Peter Zijlstra wrote:

On Mon, Apr 05, 2021 at 08:10:46AM -0700, kan.li...@linux.intel.com wrote:

+#define is_hybrid()(!!x86_pmu.num_hybrid_pmus)


Given this is sprinkled all over the place, can you make this a
static_key_false + static_branch_unlikely() such that the hybrid case is
out-of-line?



Sure, I will add a new static_key_false "perf_is_hybrid" to indicate a 
hybrid system as below (not test yet).


diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f8d1222..bd6412e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -54,6 +54,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {

 DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
+DEFINE_STATIC_KEY_FALSE(perf_is_hybrid);

 /*
  * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2b553d9..7cef3cd 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6119,6 +6119,7 @@ __init int intel_pmu_init(void)
 GFP_KERNEL);
if (!x86_pmu.hybrid_pmu)
return -ENOMEM;
+   static_branch_enable(_is_hybrid);
x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;

x86_pmu.late_ack = true;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index bfbecde..d6383d1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -663,8 +663,8 @@ static __always_inline struct x86_hybrid_pmu 
*hybrid_pmu(struct pmu *pmu)

return container_of(pmu, struct x86_hybrid_pmu, pmu);
 }

-/* The number of hybrid PMUs implies whether it's a hybrid system */
-#define is_hybrid()(!!x86_pmu.num_hybrid_pmus)
+extern struct static_key_false perf_is_hybrid;
+#define is_hybrid()static_branch_unlikely(_is_hybrid)

 #define hybrid(_pmu, _field)   \
 ({ \

Thanks,
Kan


Re: [PATCH V5 04/25] perf/x86/intel: Hybrid PMU support for perf capabilities

2021-04-08 Thread Peter Zijlstra
On Mon, Apr 05, 2021 at 08:10:46AM -0700, kan.li...@linux.intel.com wrote:
> +#define is_hybrid()  (!!x86_pmu.num_hybrid_pmus)

Given this is sprinkled all over the place, can you make this a
static_key_false + static_branch_unlikely() such that the hybrid case is
out-of-line?


Re: [PATCH V5 04/25] perf/x86/intel: Hybrid PMU support for perf capabilities

2021-04-08 Thread Peter Zijlstra
On Mon, Apr 05, 2021 at 08:10:46AM -0700, kan.li...@linux.intel.com wrote:
> +static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
> +{
> + union perf_capabilities *intel_cap;
> +
> + intel_cap = is_hybrid() ? _pmu(event->pmu)->intel_cap :
> +   _pmu.intel_cap;

This isn't:

intel_cap = _pmu(event->pmu)->intel_cap;

because..

> +
> + return test_bit(idx, (unsigned long *)_cap->capabilities);
> +}


> @@ -3712,7 +3721,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
>* with a slots event as group leader. When the slots event
>* is used in a metrics group, it too cannot support sampling.
>*/
> - if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
> + if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && 
> is_topdown_event(event)) {
>   if (event->attr.config1 || event->attr.config2)
>   return -EINVAL;
>  

> @@ -4330,7 +4347,7 @@ static int intel_pmu_check_period(struct perf_event 
> *event, u64 value)
>  
>  static int intel_pmu_aux_output_match(struct perf_event *event)
>  {
> - if (!x86_pmu.intel_cap.pebs_output_pt_available)
> + if (!intel_pmu_has_cap(event, PERF_CAP_PT_IDX))
>   return 0;
>  
>   return is_intel_pt_event(event);

these sites can have !event->pmu ?


Re: [PATCH V5 04/25] perf/x86/intel: Hybrid PMU support for perf capabilities

2021-04-08 Thread Peter Zijlstra
On Thu, Apr 08, 2021 at 03:40:56PM +0200, Peter Zijlstra wrote:
> On Mon, Apr 05, 2021 at 08:10:46AM -0700, kan.li...@linux.intel.com wrote:
> > +static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
> > +{
> > +   union perf_capabilities *intel_cap;
> > +
> > +   intel_cap = is_hybrid() ? _pmu(event->pmu)->intel_cap :
> > + _pmu.intel_cap;
> 
> This isn't:
> 
>   intel_cap = _pmu(event->pmu)->intel_cap;

Ah no, its because you want a pointer and GCC is being silly about that.

I have something for that, hold on ;-)


[PATCH V5 04/25] perf/x86/intel: Hybrid PMU support for perf capabilities

2021-04-05 Thread kan . liang
From: Kan Liang 

Some platforms, e.g. Alder Lake, have hybrid architecture. Although most
PMU capabilities are the same, there are still some unique PMU
capabilities for different hybrid PMUs. Perf should register a dedicated
pmu for each hybrid PMU.

Add a new struct x86_hybrid_pmu, which saves the dedicated pmu and
capabilities for each hybrid PMU.

The architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicates the
architecture features which are available on all hybrid PMUs. The
architecture features are stored in the global x86_pmu.intel_cap.

For Alder Lake, the model-specific features are perf metrics and
PEBS-via-PT. The corresponding bits of the global x86_pmu.intel_cap
should be 0 for these two features. Perf should not use the global
intel_cap to check the features on a hybrid system.
Add a dedicated intel_cap in the x86_hybrid_pmu to store the
model-specific capabilities. Use the dedicated intel_cap to replace
the global intel_cap for thse two features. The dedicated intel_cap
will be set in the following "Add Alder Lake Hybrid support" patch.

Add is_hybrid() to distinguish a hybrid system. ADL may have an
alternative configuration. With that configuration, the
X86_FEATURE_HYBRID_CPU is not set. Perf cannot rely on the feature bit.
The number of hybrid PMUs implies whether it's a hybrid system. The
number will be assigned in the following "Add Alder Lake Hybrid support"
patch as well.

Suggested-by: Peter Zijlstra (Intel) 
Signed-off-by: Kan Liang 
---
 arch/x86/events/core.c   |  6 --
 arch/x86/events/intel/core.c | 27 ++-
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/perf_event.h | 34 ++
 arch/x86/include/asm/msr-index.h |  3 +++
 5 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e564e96..d3d3c6b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1105,8 +1105,9 @@ static void del_nr_metric_event(struct cpu_hw_events 
*cpuc,
 static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
 int max_count, int n)
 {
+   union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap);
 
-   if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
+   if (intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
return -EINVAL;
 
if (n >= max_count + cpuc->n_metric)
@@ -1582,6 +1583,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 static void x86_pmu_del(struct perf_event *event, int flags)
 {
struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
+   union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap);
int i;
 
/*
@@ -1621,7 +1623,7 @@ static void x86_pmu_del(struct perf_event *event, int 
flags)
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
-   if (x86_pmu.intel_cap.perf_metrics)
+   if (intel_cap.perf_metrics)
del_nr_metric_event(cpuc, event);
 
perf_event_update_userpage(event);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f116c63..494b9bc 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3646,6 +3646,15 @@ static inline bool is_mem_loads_aux_event(struct 
perf_event *event)
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == 
X86_CONFIG(.event=0x03, .umask=0x82);
 }
 
+static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
+{
+   union perf_capabilities *intel_cap;
+
+   intel_cap = is_hybrid() ? _pmu(event->pmu)->intel_cap :
+ _pmu.intel_cap;
+
+   return test_bit(idx, (unsigned long *)_cap->capabilities);
+}
 
 static int intel_pmu_hw_config(struct perf_event *event)
 {
@@ -3712,7 +3721,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
 * with a slots event as group leader. When the slots event
 * is used in a metrics group, it too cannot support sampling.
 */
-   if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
+   if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && 
is_topdown_event(event)) {
if (event->attr.config1 || event->attr.config2)
return -EINVAL;
 
@@ -4219,8 +4228,16 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.version > 1)
flip_smm_bit(_pmu.attr_freeze_on_smi);
 
-   /* Disable perf metrics if any added CPU doesn't support it. */
-   if (x86_pmu.intel_cap.perf_metrics) {
+   /*
+* Disable perf metrics if any added CPU doesn't support it.
+*
+* Turn off the check for a hybrid architecture, because the
+* architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate
+* the architecture features. The perf metrics is a model-specific
+* feature for now. The