Re: [PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-17 Thread Thomas Gleixner
On Fri, 6 Jan 2017, Vikas Shivappa wrote:
> Introduce a scheduling hook finish_arch_pre_lock_switch which is
> called just after the perf sched_in during context switch. This method
> handles both cat and cqm sched in scenarios.

Sure, we need yet another special hook. What's wrong with
finish_arch_post_lock_switch()?

And again. This wants to be a seperate patch to the core code with a proper
justification for that hook. Dammit. Changelogs are supposed to explain WHY
not WHAT. How often do I have to explain that again?

> The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
> patch integrates the two msr writes to one. The common sched_in patch
> checks if the per cpu cache has a different RMID or CLOSid than the task
> and does the MSR write.
> 
> During sched_in the task uses the task RMID if the task is monitored or
> else uses the task's cgroup rmid.

And that's relevant for that patch because it explains the existing
behaviour of the RMID, right?

Darn, again you create a unreviewable hodgepodge of changes. The whole
split of the RMID handling into a perf part and the actual RMID update can
be done as a seperate patch before switching over to the combined
RMID/COSID update mechanism.

> +DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
> +
>  /*
>   * Groups of events that have the same target(s), one RMID per group.
>   */
> @@ -108,7 +103,7 @@ struct sample {
>   * Likewise, an rmid value of -1 is used to indicate "no rmid currently
>   * assigned" and is used as part of the rotation code.
>   */
> -static inline bool __rmid_valid(u32 rmid)
> +bool __rmid_valid(u32 rmid)

And once more this becomes global because there is no user outside of cqm.c.

>  {
>   if (!rmid || rmid > cqm_max_rmid)
>   return false;
> @@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 
> rmid, int domain)
>   *
>   * We expect to be called with cache_mutex held.
>   */
> -static u32 __get_rmid(int domain)
> +u32 __get_rmid(int domain)

Ditto.

>  {
>   struct list_head *cqm_flist;
>   struct cqm_rmid_entry *entry;
> @@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
>   on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
>  }
>  
> +#ifdef CONFIG_CGROUP_PERF
> +struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
> +{
> + struct cgrp_cqm_info *ccinfo = NULL;
> + struct perf_cgroup *pcgrp;
> +
> + pcgrp = perf_cgroup_from_task(tsk, NULL);
> +
> + if (!pcgrp)
> + return NULL;
> + else
> + ccinfo = cgrp_to_cqm_info(pcgrp);
> +
> + return ccinfo;

What the heck? Either you do:

struct cgrp_cqm_info *ccinfo = NULL;
struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
ccinfo = cgrp_to_cqm_info(pcgrp);

return ccinfo;

or

struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
return cgrp_to_cqm_info(pcgrp);
return NULL;

But the above combination does not make any sense at all. Hack at it until
it compiles and works by chance is not a really good engineering principle.

> +}
> +#endif
> +
>  static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
>  {
>   if (rmid != NULL) {
> @@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
>  static void intel_cqm_event_start(struct perf_event *event, int mode)
>  {
>   struct intel_pqr_state *state = this_cpu_ptr(_state);
> - u32 rmid;
>  
>   if (!(event->hw.cqm_state & PERF_HES_STOPPED))
>   return;
>  
>   event->hw.cqm_state &= ~PERF_HES_STOPPED;
>  
> - alloc_needed_pkg_rmid(event->hw.cqm_rmid);
> -
> - rmid = event->hw.cqm_rmid[pkg_id];
> - state->rmid = rmid;
> - wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
> + if (is_task_event(event)) {
> + alloc_needed_pkg_rmid(event->hw.cqm_rmid);
> + state->next_task_rmid = event->hw.cqm_rmid[pkg_id];

Huch? When is this going to be evaluated? Assume the task is running on a
CPU in NOHZ full mode in user space w/o ever going through schedule. How is
that supposed to activate the event ever? Not, AFAICT.

> + }
>  }
>  
>  static void intel_cqm_event_stop(struct perf_event *event, int mode)
>  {
> + struct intel_pqr_state *state = this_cpu_ptr(_state);
> +
>   if (event->hw.cqm_state & PERF_HES_STOPPED)
>   return;
>  
>   event->hw.cqm_state |= PERF_HES_STOPPED;
> + state->next_task_rmid = 0;

Ditto.

>  }
>  
>  static int intel_cqm_event_add(struct perf_event *event, int mode)
> @@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
>   if (mbm_enabled)
>   pr_info("Intel MBM enabled\n");
>  
> + static_branch_enable(_enable_key);
> +
> +++ b/arch/x86/include/asm/intel_pqr_common.h
> @@ -0,0 +1,38 @@
> +#ifndef _ASM_X86_INTEL_PQR_COMMON_H
> +#define 

Re: [PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-17 Thread Thomas Gleixner
On Fri, 6 Jan 2017, Vikas Shivappa wrote:
> Introduce a scheduling hook finish_arch_pre_lock_switch which is
> called just after the perf sched_in during context switch. This method
> handles both cat and cqm sched in scenarios.

Sure, we need yet another special hook. What's wrong with
finish_arch_post_lock_switch()?

And again. This wants to be a seperate patch to the core code with a proper
justification for that hook. Dammit. Changelogs are supposed to explain WHY
not WHAT. How often do I have to explain that again?

> The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
> patch integrates the two msr writes to one. The common sched_in patch
> checks if the per cpu cache has a different RMID or CLOSid than the task
> and does the MSR write.
> 
> During sched_in the task uses the task RMID if the task is monitored or
> else uses the task's cgroup rmid.

And that's relevant for that patch because it explains the existing
behaviour of the RMID, right?

Darn, again you create a unreviewable hodgepodge of changes. The whole
split of the RMID handling into a perf part and the actual RMID update can
be done as a seperate patch before switching over to the combined
RMID/COSID update mechanism.

> +DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
> +
>  /*
>   * Groups of events that have the same target(s), one RMID per group.
>   */
> @@ -108,7 +103,7 @@ struct sample {
>   * Likewise, an rmid value of -1 is used to indicate "no rmid currently
>   * assigned" and is used as part of the rotation code.
>   */
> -static inline bool __rmid_valid(u32 rmid)
> +bool __rmid_valid(u32 rmid)

And once more this becomes global because there is no user outside of cqm.c.

>  {
>   if (!rmid || rmid > cqm_max_rmid)
>   return false;
> @@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 
> rmid, int domain)
>   *
>   * We expect to be called with cache_mutex held.
>   */
> -static u32 __get_rmid(int domain)
> +u32 __get_rmid(int domain)

Ditto.

>  {
>   struct list_head *cqm_flist;
>   struct cqm_rmid_entry *entry;
> @@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
>   on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
>  }
>  
> +#ifdef CONFIG_CGROUP_PERF
> +struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
> +{
> + struct cgrp_cqm_info *ccinfo = NULL;
> + struct perf_cgroup *pcgrp;
> +
> + pcgrp = perf_cgroup_from_task(tsk, NULL);
> +
> + if (!pcgrp)
> + return NULL;
> + else
> + ccinfo = cgrp_to_cqm_info(pcgrp);
> +
> + return ccinfo;

What the heck? Either you do:

struct cgrp_cqm_info *ccinfo = NULL;
struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
ccinfo = cgrp_to_cqm_info(pcgrp);

return ccinfo;

or

struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
return cgrp_to_cqm_info(pcgrp);
return NULL;

But the above combination does not make any sense at all. Hack at it until
it compiles and works by chance is not a really good engineering principle.

> +}
> +#endif
> +
>  static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
>  {
>   if (rmid != NULL) {
> @@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
>  static void intel_cqm_event_start(struct perf_event *event, int mode)
>  {
>   struct intel_pqr_state *state = this_cpu_ptr(_state);
> - u32 rmid;
>  
>   if (!(event->hw.cqm_state & PERF_HES_STOPPED))
>   return;
>  
>   event->hw.cqm_state &= ~PERF_HES_STOPPED;
>  
> - alloc_needed_pkg_rmid(event->hw.cqm_rmid);
> -
> - rmid = event->hw.cqm_rmid[pkg_id];
> - state->rmid = rmid;
> - wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
> + if (is_task_event(event)) {
> + alloc_needed_pkg_rmid(event->hw.cqm_rmid);
> + state->next_task_rmid = event->hw.cqm_rmid[pkg_id];

Huch? When is this going to be evaluated? Assume the task is running on a
CPU in NOHZ full mode in user space w/o ever going through schedule. How is
that supposed to activate the event ever? Not, AFAICT.

> + }
>  }
>  
>  static void intel_cqm_event_stop(struct perf_event *event, int mode)
>  {
> + struct intel_pqr_state *state = this_cpu_ptr(_state);
> +
>   if (event->hw.cqm_state & PERF_HES_STOPPED)
>   return;
>  
>   event->hw.cqm_state |= PERF_HES_STOPPED;
> + state->next_task_rmid = 0;

Ditto.

>  }
>  
>  static int intel_cqm_event_add(struct perf_event *event, int mode)
> @@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
>   if (mbm_enabled)
>   pr_info("Intel MBM enabled\n");
>  
> + static_branch_enable(_enable_key);
> +
> +++ b/arch/x86/include/asm/intel_pqr_common.h
> @@ -0,0 +1,38 @@
> +#ifndef _ASM_X86_INTEL_PQR_COMMON_H
> +#define 

Re: [PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-17 Thread Shivappa Vikas



On Tue, 17 Jan 2017, Thomas Gleixner wrote:


On Fri, 6 Jan 2017, Vikas Shivappa wrote:

Introduce a scheduling hook finish_arch_pre_lock_switch which is
called just after the perf sched_in during context switch. This method
handles both cat and cqm sched in scenarios.


Sure, we need yet another special hook. What's wrong with
finish_arch_post_lock_switch()?

And again. This wants to be a seperate patch to the core code with a proper
justification for that hook. Dammit. Changelogs are supposed to explain WHY
not WHAT. How often do I have to explain that again?


Will fix. Will split this into three.
The sched hook (using the finish_arch_post_lock_switch) patch , perf sched in 
patch, actual write to msr.





The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
patch integrates the two msr writes to one. The common sched_in patch
checks if the per cpu cache has a different RMID or CLOSid than the task
and does the MSR write.

During sched_in the task uses the task RMID if the task is monitored or
else uses the task's cgroup rmid.


And that's relevant for that patch because it explains the existing
behaviour of the RMID, right?

Darn, again you create a unreviewable hodgepodge of changes. The whole
split of the RMID handling into a perf part and the actual RMID update can
be done as a seperate patch before switching over to the combined
RMID/COSID update mechanism.


+DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
+
 /*
  * Groups of events that have the same target(s), one RMID per group.
  */
@@ -108,7 +103,7 @@ struct sample {
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(u32 rmid)
+bool __rmid_valid(u32 rmid)


And once more this becomes global because there is no user outside of cqm.c.


 {
if (!rmid || rmid > cqm_max_rmid)
return false;
@@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid, 
int domain)
  *
  * We expect to be called with cache_mutex held.
  */
-static u32 __get_rmid(int domain)
+u32 __get_rmid(int domain)


Ditto.


Will fix the unnecessary globals. This should have been removed in this version 
as all of this should have gone with the removal of continuous monitoring.





 {
struct list_head *cqm_flist;
struct cqm_rmid_entry *entry;
@@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
 }

+#ifdef CONFIG_CGROUP_PERF
+struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
+{
+   struct cgrp_cqm_info *ccinfo = NULL;
+   struct perf_cgroup *pcgrp;
+
+   pcgrp = perf_cgroup_from_task(tsk, NULL);
+
+   if (!pcgrp)
+   return NULL;
+   else
+   ccinfo = cgrp_to_cqm_info(pcgrp);
+
+   return ccinfo;


What the heck? Either you do:

struct cgrp_cqm_info *ccinfo = NULL;
struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
ccinfo = cgrp_to_cqm_info(pcgrp);

return ccinfo;


Will fix.



or

struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
return cgrp_to_cqm_info(pcgrp);
return NULL;

But the above combination does not make any sense at all. Hack at it until
it compiles and works by chance is not a really good engineering principle.


+}
+#endif
+
 static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
 {
if (rmid != NULL) {
@@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
struct intel_pqr_state *state = this_cpu_ptr(_state);
-   u32 rmid;

if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;

event->hw.cqm_state &= ~PERF_HES_STOPPED;

-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
-
-   rmid = event->hw.cqm_rmid[pkg_id];
-   state->rmid = rmid;
-   wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+   if (is_task_event(event)) {
+   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   state->next_task_rmid = event->hw.cqm_rmid[pkg_id];


Huch? When is this going to be evaluated? Assume the task is running on a
CPU in NOHZ full mode in user space w/o ever going through schedule. How is
that supposed to activate the event ever? Not, AFAICT.


+   }
 }

 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
+   struct intel_pqr_state *state = this_cpu_ptr(_state);
+
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;

event->hw.cqm_state |= PERF_HES_STOPPED;
+   state->next_task_rmid = 0;


Ditto.


 }

 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
 

Re: [PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-17 Thread Shivappa Vikas



On Tue, 17 Jan 2017, Thomas Gleixner wrote:


On Fri, 6 Jan 2017, Vikas Shivappa wrote:

Introduce a scheduling hook finish_arch_pre_lock_switch which is
called just after the perf sched_in during context switch. This method
handles both cat and cqm sched in scenarios.


Sure, we need yet another special hook. What's wrong with
finish_arch_post_lock_switch()?

And again. This wants to be a seperate patch to the core code with a proper
justification for that hook. Dammit. Changelogs are supposed to explain WHY
not WHAT. How often do I have to explain that again?


Will fix. Will split this into three.
The sched hook (using the finish_arch_post_lock_switch) patch , perf sched in 
patch, actual write to msr.





The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
patch integrates the two msr writes to one. The common sched_in patch
checks if the per cpu cache has a different RMID or CLOSid than the task
and does the MSR write.

During sched_in the task uses the task RMID if the task is monitored or
else uses the task's cgroup rmid.


And that's relevant for that patch because it explains the existing
behaviour of the RMID, right?

Darn, again you create a unreviewable hodgepodge of changes. The whole
split of the RMID handling into a perf part and the actual RMID update can
be done as a seperate patch before switching over to the combined
RMID/COSID update mechanism.


+DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
+
 /*
  * Groups of events that have the same target(s), one RMID per group.
  */
@@ -108,7 +103,7 @@ struct sample {
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(u32 rmid)
+bool __rmid_valid(u32 rmid)


And once more this becomes global because there is no user outside of cqm.c.


 {
if (!rmid || rmid > cqm_max_rmid)
return false;
@@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid, 
int domain)
  *
  * We expect to be called with cache_mutex held.
  */
-static u32 __get_rmid(int domain)
+u32 __get_rmid(int domain)


Ditto.


Will fix the unnecessary globals. This should have been removed in this version 
as all of this should have gone with the removal of continuous monitoring.





 {
struct list_head *cqm_flist;
struct cqm_rmid_entry *entry;
@@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
 }

+#ifdef CONFIG_CGROUP_PERF
+struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
+{
+   struct cgrp_cqm_info *ccinfo = NULL;
+   struct perf_cgroup *pcgrp;
+
+   pcgrp = perf_cgroup_from_task(tsk, NULL);
+
+   if (!pcgrp)
+   return NULL;
+   else
+   ccinfo = cgrp_to_cqm_info(pcgrp);
+
+   return ccinfo;


What the heck? Either you do:

struct cgrp_cqm_info *ccinfo = NULL;
struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
ccinfo = cgrp_to_cqm_info(pcgrp);

return ccinfo;


Will fix.



or

struct perf_cgroup *pcgrp;

pcgrp = perf_cgroup_from_task(tsk, NULL);
if (pcgrp)
return cgrp_to_cqm_info(pcgrp);
return NULL;

But the above combination does not make any sense at all. Hack at it until
it compiles and works by chance is not a really good engineering principle.


+}
+#endif
+
 static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
 {
if (rmid != NULL) {
@@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
struct intel_pqr_state *state = this_cpu_ptr(_state);
-   u32 rmid;

if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;

event->hw.cqm_state &= ~PERF_HES_STOPPED;

-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
-
-   rmid = event->hw.cqm_rmid[pkg_id];
-   state->rmid = rmid;
-   wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+   if (is_task_event(event)) {
+   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   state->next_task_rmid = event->hw.cqm_rmid[pkg_id];


Huch? When is this going to be evaluated? Assume the task is running on a
CPU in NOHZ full mode in user space w/o ever going through schedule. How is
that supposed to activate the event ever? Not, AFAICT.


+   }
 }

 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
+   struct intel_pqr_state *state = this_cpu_ptr(_state);
+
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;

event->hw.cqm_state |= PERF_HES_STOPPED;
+   state->next_task_rmid = 0;


Ditto.


 }

 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
 

[PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-06 Thread Vikas Shivappa
Introduce a scheduling hook finish_arch_pre_lock_switch which is
called just after the perf sched_in during context switch. This method
handles both cat and cqm sched in scenarios.
The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
patch integrates the two msr writes to one. The common sched_in patch
checks if the per cpu cache has a different RMID or CLOSid than the task
and does the MSR write.

During sched_in the task uses the task RMID if the task is monitored or
else uses the task's cgroup rmid.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c  | 45 ++-
 arch/x86/include/asm/intel_pqr_common.h  | 38 +
 arch/x86/include/asm/intel_rdt.h | 39 -
 arch/x86/include/asm/intel_rdt_common.h  | 11 
 arch/x86/include/asm/processor.h |  4 ++
 arch/x86/kernel/cpu/Makefile |  1 +
 arch/x86/kernel/cpu/intel_rdt_common.c   | 98 
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  4 +-
 arch/x86/kernel/process_32.c |  4 --
 arch/x86/kernel/process_64.c |  4 --
 kernel/sched/core.c  |  1 +
 kernel/sched/sched.h |  3 +
 12 files changed, 188 insertions(+), 64 deletions(-)
 create mode 100644 arch/x86/include/asm/intel_pqr_common.h
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_common.c

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index c6479ae..597a184 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -28,13 +28,6 @@
 static bool cqm_enabled, mbm_enabled;
 unsigned int cqm_socket_max;
 
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 static struct hrtimer *mbm_timers;
 /**
  * struct sample - mbm event's (local or total) data
@@ -74,6 +67,8 @@ struct sample {
 static DEFINE_MUTEX(cache_mutex);
 static DEFINE_RAW_SPINLOCK(cache_lock);
 
+DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
+
 /*
  * Groups of events that have the same target(s), one RMID per group.
  */
@@ -108,7 +103,7 @@ struct sample {
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(u32 rmid)
+bool __rmid_valid(u32 rmid)
 {
if (!rmid || rmid > cqm_max_rmid)
return false;
@@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid, 
int domain)
  *
  * We expect to be called with cache_mutex held.
  */
-static u32 __get_rmid(int domain)
+u32 __get_rmid(int domain)
 {
struct list_head *cqm_flist;
struct cqm_rmid_entry *entry;
@@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
+{
+   struct cgrp_cqm_info *ccinfo = NULL;
+   struct perf_cgroup *pcgrp;
+
+   pcgrp = perf_cgroup_from_task(tsk, NULL);
+
+   if (!pcgrp)
+   return NULL;
+   else
+   ccinfo = cgrp_to_cqm_info(pcgrp);
+
+   return ccinfo;
+}
+#endif
+
 static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
 {
if (rmid != NULL) {
@@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
struct intel_pqr_state *state = this_cpu_ptr(_state);
-   u32 rmid;
 
if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
-
-   rmid = event->hw.cqm_rmid[pkg_id];
-   state->rmid = rmid;
-   wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+   if (is_task_event(event)) {
+   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
+   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
+   struct intel_pqr_state *state = this_cpu_ptr(_state);
+
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;
 
event->hw.cqm_state |= PERF_HES_STOPPED;
+   state->next_task_rmid = 0;
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
if (mbm_enabled)
pr_info("Intel MBM enabled\n");
 
+   static_branch_enable(_enable_key);
+
/*
 * Setup the hot cpu notifier 

[PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-06 Thread Vikas Shivappa
Introduce a scheduling hook finish_arch_pre_lock_switch which is
called just after the perf sched_in during context switch. This method
handles both cat and cqm sched in scenarios.
The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
patch integrates the two msr writes to one. The common sched_in patch
checks if the per cpu cache has a different RMID or CLOSid than the task
and does the MSR write.

During sched_in the task uses the task RMID if the task is monitored or
else uses the task's cgroup rmid.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c  | 45 ++-
 arch/x86/include/asm/intel_pqr_common.h  | 38 +
 arch/x86/include/asm/intel_rdt.h | 39 -
 arch/x86/include/asm/intel_rdt_common.h  | 11 
 arch/x86/include/asm/processor.h |  4 ++
 arch/x86/kernel/cpu/Makefile |  1 +
 arch/x86/kernel/cpu/intel_rdt_common.c   | 98 
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  4 +-
 arch/x86/kernel/process_32.c |  4 --
 arch/x86/kernel/process_64.c |  4 --
 kernel/sched/core.c  |  1 +
 kernel/sched/sched.h |  3 +
 12 files changed, 188 insertions(+), 64 deletions(-)
 create mode 100644 arch/x86/include/asm/intel_pqr_common.h
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_common.c

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index c6479ae..597a184 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -28,13 +28,6 @@
 static bool cqm_enabled, mbm_enabled;
 unsigned int cqm_socket_max;
 
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 static struct hrtimer *mbm_timers;
 /**
  * struct sample - mbm event's (local or total) data
@@ -74,6 +67,8 @@ struct sample {
 static DEFINE_MUTEX(cache_mutex);
 static DEFINE_RAW_SPINLOCK(cache_lock);
 
+DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
+
 /*
  * Groups of events that have the same target(s), one RMID per group.
  */
@@ -108,7 +103,7 @@ struct sample {
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(u32 rmid)
+bool __rmid_valid(u32 rmid)
 {
if (!rmid || rmid > cqm_max_rmid)
return false;
@@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid, 
int domain)
  *
  * We expect to be called with cache_mutex held.
  */
-static u32 __get_rmid(int domain)
+u32 __get_rmid(int domain)
 {
struct list_head *cqm_flist;
struct cqm_rmid_entry *entry;
@@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
+{
+   struct cgrp_cqm_info *ccinfo = NULL;
+   struct perf_cgroup *pcgrp;
+
+   pcgrp = perf_cgroup_from_task(tsk, NULL);
+
+   if (!pcgrp)
+   return NULL;
+   else
+   ccinfo = cgrp_to_cqm_info(pcgrp);
+
+   return ccinfo;
+}
+#endif
+
 static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
 {
if (rmid != NULL) {
@@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
struct intel_pqr_state *state = this_cpu_ptr(_state);
-   u32 rmid;
 
if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
-
-   rmid = event->hw.cqm_rmid[pkg_id];
-   state->rmid = rmid;
-   wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+   if (is_task_event(event)) {
+   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
+   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
+   struct intel_pqr_state *state = this_cpu_ptr(_state);
+
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;
 
event->hw.cqm_state |= PERF_HES_STOPPED;
+   state->next_task_rmid = 0;
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
if (mbm_enabled)
pr_info("Intel MBM enabled\n");
 
+   static_branch_enable(_enable_key);
+
/*
 * Setup the hot cpu notifier once we are sure cqm
 * is enabled to avoid 

[PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-06 Thread Vikas Shivappa
Introduce a scheduling hook finish_arch_pre_lock_switch which is
called just after the perf sched_in during context switch. This method
handles both cat and cqm sched in scenarios.
The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
patch integrates the two msr writes to one. The common sched_in patch
checks if the per cpu cache has a different RMID or CLOSid than the task
and does the MSR write.

During sched_in the task uses the task RMID if the task is monitored or
else uses the task's cgroup rmid.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c  | 45 ++-
 arch/x86/include/asm/intel_pqr_common.h  | 38 +
 arch/x86/include/asm/intel_rdt.h | 39 -
 arch/x86/include/asm/intel_rdt_common.h  | 11 
 arch/x86/include/asm/processor.h |  4 ++
 arch/x86/kernel/cpu/Makefile |  1 +
 arch/x86/kernel/cpu/intel_rdt_common.c   | 98 
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  4 +-
 arch/x86/kernel/process_32.c |  4 --
 arch/x86/kernel/process_64.c |  4 --
 kernel/sched/core.c  |  1 +
 kernel/sched/sched.h |  3 +
 12 files changed, 188 insertions(+), 64 deletions(-)
 create mode 100644 arch/x86/include/asm/intel_pqr_common.h
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_common.c

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index c6479ae..597a184 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -28,13 +28,6 @@
 static bool cqm_enabled, mbm_enabled;
 unsigned int cqm_socket_max;
 
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 static struct hrtimer *mbm_timers;
 /**
  * struct sample - mbm event's (local or total) data
@@ -74,6 +67,8 @@ struct sample {
 static DEFINE_MUTEX(cache_mutex);
 static DEFINE_RAW_SPINLOCK(cache_lock);
 
+DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
+
 /*
  * Groups of events that have the same target(s), one RMID per group.
  */
@@ -108,7 +103,7 @@ struct sample {
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(u32 rmid)
+bool __rmid_valid(u32 rmid)
 {
if (!rmid || rmid > cqm_max_rmid)
return false;
@@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid, 
int domain)
  *
  * We expect to be called with cache_mutex held.
  */
-static u32 __get_rmid(int domain)
+u32 __get_rmid(int domain)
 {
struct list_head *cqm_flist;
struct cqm_rmid_entry *entry;
@@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
+{
+   struct cgrp_cqm_info *ccinfo = NULL;
+   struct perf_cgroup *pcgrp;
+
+   pcgrp = perf_cgroup_from_task(tsk, NULL);
+
+   if (!pcgrp)
+   return NULL;
+   else
+   ccinfo = cgrp_to_cqm_info(pcgrp);
+
+   return ccinfo;
+}
+#endif
+
 static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
 {
if (rmid != NULL) {
@@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
struct intel_pqr_state *state = this_cpu_ptr(_state);
-   u32 rmid;
 
if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
-
-   rmid = event->hw.cqm_rmid[pkg_id];
-   state->rmid = rmid;
-   wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+   if (is_task_event(event)) {
+   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
+   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
+   struct intel_pqr_state *state = this_cpu_ptr(_state);
+
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;
 
event->hw.cqm_state |= PERF_HES_STOPPED;
+   state->next_task_rmid = 0;
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
if (mbm_enabled)
pr_info("Intel MBM enabled\n");
 
+   static_branch_enable(_enable_key);
+
/*
 * Setup the hot cpu notifier 

[PATCH 07/12] x86/rdt,cqm: Scheduling support update

2017-01-06 Thread Vikas Shivappa
Introduce a scheduling hook finish_arch_pre_lock_switch which is
called just after the perf sched_in during context switch. This method
handles both cat and cqm sched in scenarios.
The IA32_PQR_ASSOC MSR is used by cat(cache allocation) and cqm and this
patch integrates the two msr writes to one. The common sched_in patch
checks if the per cpu cache has a different RMID or CLOSid than the task
and does the MSR write.

During sched_in the task uses the task RMID if the task is monitored or
else uses the task's cgroup rmid.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c  | 45 ++-
 arch/x86/include/asm/intel_pqr_common.h  | 38 +
 arch/x86/include/asm/intel_rdt.h | 39 -
 arch/x86/include/asm/intel_rdt_common.h  | 11 
 arch/x86/include/asm/processor.h |  4 ++
 arch/x86/kernel/cpu/Makefile |  1 +
 arch/x86/kernel/cpu/intel_rdt_common.c   | 98 
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  4 +-
 arch/x86/kernel/process_32.c |  4 --
 arch/x86/kernel/process_64.c |  4 --
 kernel/sched/core.c  |  1 +
 kernel/sched/sched.h |  3 +
 12 files changed, 188 insertions(+), 64 deletions(-)
 create mode 100644 arch/x86/include/asm/intel_pqr_common.h
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_common.c

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index c6479ae..597a184 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -28,13 +28,6 @@
 static bool cqm_enabled, mbm_enabled;
 unsigned int cqm_socket_max;
 
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 static struct hrtimer *mbm_timers;
 /**
  * struct sample - mbm event's (local or total) data
@@ -74,6 +67,8 @@ struct sample {
 static DEFINE_MUTEX(cache_mutex);
 static DEFINE_RAW_SPINLOCK(cache_lock);
 
+DEFINE_STATIC_KEY_FALSE(cqm_enable_key);
+
 /*
  * Groups of events that have the same target(s), one RMID per group.
  */
@@ -108,7 +103,7 @@ struct sample {
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(u32 rmid)
+bool __rmid_valid(u32 rmid)
 {
if (!rmid || rmid > cqm_max_rmid)
return false;
@@ -161,7 +156,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid, 
int domain)
  *
  * We expect to be called with cache_mutex held.
  */
-static u32 __get_rmid(int domain)
+u32 __get_rmid(int domain)
 {
struct list_head *cqm_flist;
struct cqm_rmid_entry *entry;
@@ -368,6 +363,23 @@ static void init_mbm_sample(u32 *rmid, u32 evt_type)
on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk)
+{
+   struct cgrp_cqm_info *ccinfo = NULL;
+   struct perf_cgroup *pcgrp;
+
+   pcgrp = perf_cgroup_from_task(tsk, NULL);
+
+   if (!pcgrp)
+   return NULL;
+   else
+   ccinfo = cgrp_to_cqm_info(pcgrp);
+
+   return ccinfo;
+}
+#endif
+
 static inline void cqm_enable_mon(struct cgrp_cqm_info *cqm_info, u32 *rmid)
 {
if (rmid != NULL) {
@@ -713,26 +725,27 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
struct intel_pqr_state *state = this_cpu_ptr(_state);
-   u32 rmid;
 
if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
-
-   rmid = event->hw.cqm_rmid[pkg_id];
-   state->rmid = rmid;
-   wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+   if (is_task_event(event)) {
+   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
+   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
+   struct intel_pqr_state *state = this_cpu_ptr(_state);
+
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;
 
event->hw.cqm_state |= PERF_HES_STOPPED;
+   state->next_task_rmid = 0;
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1366,6 +1379,8 @@ static int __init intel_cqm_init(void)
if (mbm_enabled)
pr_info("Intel MBM enabled\n");
 
+   static_branch_enable(_enable_key);
+
/*
 * Setup the hot cpu notifier once we are sure cqm
 * is enabled to avoid