[PATCH 10/12] perf/core,x86/cqm: Add read for Cgroup events,per pkg reads.

2017-01-06 Thread Vikas Shivappa
For cqm cgroup events, the events can be read even if the event was not
active on the cpu on which the event is being read. This is because the
RMIDs are per package and hence if we read the llc_occupancy value on a
cpu x, we are really reading the occupancy for the package where cpu x
belongs.

This patch adds a PERF_INACTIVE_CPU_READ_PKG to indicate this behaviour
of cqm and also changes the perf/core to still call the reads even when
the event is inactive on the cpu for cgroup events. The task events have
event->cpu as -1 and hence it does not apply for task events.

Tests: perf stat -C  would not return a count before this patch to
the perf/core.  After this patch the count of the package is returned to
the perf/core. We still dont see the count in the perf user mode - that
is fixed in next patches.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c | 31 ---
 arch/x86/include/asm/intel_rdt_common.h |  2 +-
 include/linux/perf_event.h  | 19 ---
 kernel/events/core.c| 16 
 4 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 92efe12..3f5860c 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -111,6 +111,13 @@ bool __rmid_valid(u32 rmid)
return true;
 }
 
+static inline bool __rmid_valid_raw(u32 rmid)
+{
+   if (rmid > cqm_max_rmid)
+   return false;
+   return true;
+}
+
 static u64 __rmid_read(u32 rmid)
 {
u64 val;
@@ -884,16 +891,16 @@ static u64 intel_cqm_event_count(struct perf_event *event)
return __perf_event_count(event);
 }
 
-void alloc_needed_pkg_rmid(u32 *cqm_rmid)
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid)
 {
unsigned long flags;
u32 rmid;
 
if (WARN_ON(!cqm_rmid))
-   return;
+   return -EINVAL;
 
if (cqm_rmid == cqm_rootcginfo.rmid || cqm_rmid[pkg_id])
-   return;
+   return 0;
 
raw_spin_lock_irqsave(_lock, flags);
 
@@ -902,6 +909,8 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
cqm_rmid[pkg_id] = rmid;
 
raw_spin_unlock_irqrestore(_lock, flags);
+
+   return rmid;
 }
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
@@ -913,10 +922,8 @@ static void intel_cqm_event_start(struct perf_event 
*event, int mode)
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   if (is_task_event(event)) {
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (is_task_event(event))
state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
-   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
@@ -932,10 +939,19 @@ static void intel_cqm_event_stop(struct perf_event 
*event, int mode)
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
+   u32 rmid;
+
event->hw.cqm_state = PERF_HES_STOPPED;
 
-   if ((mode & PERF_EF_START))
+   /*
+* If Lazy RMID alloc fails indicate the error to the user.
+   */
+   if ((mode & PERF_EF_START)) {
+   rmid = alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (!__rmid_valid_raw(rmid))
+   return -EINVAL;
intel_cqm_event_start(event, mode);
+   }
 
return 0;
 }
@@ -1048,6 +1064,7 @@ static int intel_cqm_event_init(struct perf_event *event)
 * cgroup hierarchies.
 */
event->event_caps |= PERF_EV_CAP_CGROUP_NO_RECURSION;
+   event->event_caps |= PERF_EV_CAP_INACTIVE_CPU_READ_PKG;
 
mutex_lock(_mutex);
 
diff --git a/arch/x86/include/asm/intel_rdt_common.h 
b/arch/x86/include/asm/intel_rdt_common.h
index 544acaa..fcaaaeb 100644
--- a/arch/x86/include/asm/intel_rdt_common.h
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -27,7 +27,7 @@ struct intel_pqr_state {
 
 u32 __get_rmid(int domain);
 bool __rmid_valid(u32 rmid);
-void alloc_needed_pkg_rmid(u32 *cqm_rmid);
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid);
 struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk);
 
 extern struct cgrp_cqm_info cqm_rootcginfo;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 410642a..adfddec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -525,10 +525,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event 
*,
  * PERF_EV_CAP_CGROUP_NO_RECURSION: A cgroup event that handles its own
  * cgroup scoping. It does not need to be enabled for all of its descendants
  * cgroups.
+ * PERF_EV_CAP_INACTIVE_CPU_READ_PKG: A cgroup event where we can read
+ * the package count on any cpu on the pkg even if inactive.
  */
-#define PERF_EV_CAP_SOFTWARE   BIT(0)
-#define PERF_EV_CAP_READ_ACTIVE_PKG

[PATCH 10/12] perf/core,x86/cqm: Add read for Cgroup events,per pkg reads.

2017-01-06 Thread Vikas Shivappa
For cqm cgroup events, the events can be read even if the event was not
active on the cpu on which the event is being read. This is because the
RMIDs are per package and hence if we read the llc_occupancy value on a
cpu x, we are really reading the occupancy for the package where cpu x
belongs.

This patch adds a PERF_INACTIVE_CPU_READ_PKG to indicate this behaviour
of cqm and also changes the perf/core to still call the reads even when
the event is inactive on the cpu for cgroup events. The task events have
event->cpu as -1 and hence it does not apply for task events.

Tests: perf stat -C  would not return a count before this patch to
the perf/core.  After this patch the count of the package is returned to
the perf/core. We still dont see the count in the perf user mode - that
is fixed in next patches.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c | 31 ---
 arch/x86/include/asm/intel_rdt_common.h |  2 +-
 include/linux/perf_event.h  | 19 ---
 kernel/events/core.c| 16 
 4 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 92efe12..3f5860c 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -111,6 +111,13 @@ bool __rmid_valid(u32 rmid)
return true;
 }
 
+static inline bool __rmid_valid_raw(u32 rmid)
+{
+   if (rmid > cqm_max_rmid)
+   return false;
+   return true;
+}
+
 static u64 __rmid_read(u32 rmid)
 {
u64 val;
@@ -884,16 +891,16 @@ static u64 intel_cqm_event_count(struct perf_event *event)
return __perf_event_count(event);
 }
 
-void alloc_needed_pkg_rmid(u32 *cqm_rmid)
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid)
 {
unsigned long flags;
u32 rmid;
 
if (WARN_ON(!cqm_rmid))
-   return;
+   return -EINVAL;
 
if (cqm_rmid == cqm_rootcginfo.rmid || cqm_rmid[pkg_id])
-   return;
+   return 0;
 
raw_spin_lock_irqsave(_lock, flags);
 
@@ -902,6 +909,8 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
cqm_rmid[pkg_id] = rmid;
 
raw_spin_unlock_irqrestore(_lock, flags);
+
+   return rmid;
 }
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
@@ -913,10 +922,8 @@ static void intel_cqm_event_start(struct perf_event 
*event, int mode)
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   if (is_task_event(event)) {
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (is_task_event(event))
state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
-   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
@@ -932,10 +939,19 @@ static void intel_cqm_event_stop(struct perf_event 
*event, int mode)
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
+   u32 rmid;
+
event->hw.cqm_state = PERF_HES_STOPPED;
 
-   if ((mode & PERF_EF_START))
+   /*
+* If Lazy RMID alloc fails indicate the error to the user.
+   */
+   if ((mode & PERF_EF_START)) {
+   rmid = alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (!__rmid_valid_raw(rmid))
+   return -EINVAL;
intel_cqm_event_start(event, mode);
+   }
 
return 0;
 }
@@ -1048,6 +1064,7 @@ static int intel_cqm_event_init(struct perf_event *event)
 * cgroup hierarchies.
 */
event->event_caps |= PERF_EV_CAP_CGROUP_NO_RECURSION;
+   event->event_caps |= PERF_EV_CAP_INACTIVE_CPU_READ_PKG;
 
mutex_lock(_mutex);
 
diff --git a/arch/x86/include/asm/intel_rdt_common.h 
b/arch/x86/include/asm/intel_rdt_common.h
index 544acaa..fcaaaeb 100644
--- a/arch/x86/include/asm/intel_rdt_common.h
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -27,7 +27,7 @@ struct intel_pqr_state {
 
 u32 __get_rmid(int domain);
 bool __rmid_valid(u32 rmid);
-void alloc_needed_pkg_rmid(u32 *cqm_rmid);
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid);
 struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk);
 
 extern struct cgrp_cqm_info cqm_rootcginfo;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 410642a..adfddec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -525,10 +525,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event 
*,
  * PERF_EV_CAP_CGROUP_NO_RECURSION: A cgroup event that handles its own
  * cgroup scoping. It does not need to be enabled for all of its descendants
  * cgroups.
+ * PERF_EV_CAP_INACTIVE_CPU_READ_PKG: A cgroup event where we can read
+ * the package count on any cpu on the pkg even if inactive.
  */
-#define PERF_EV_CAP_SOFTWARE   BIT(0)
-#define PERF_EV_CAP_READ_ACTIVE_PKGBIT(1)
-#define PERF_EV_CAP_CGROUP_NO_RECURSION

[PATCH 10/12] perf/core,x86/cqm: Add read for Cgroup events,per pkg reads.

2017-01-06 Thread Vikas Shivappa
For cqm cgroup events, the events can be read even if the event was not
active on the cpu on which the event is being read. This is because the
RMIDs are per package and hence if we read the llc_occupancy value on a
cpu x, we are really reading the occupancy for the package where cpu x
belongs.

This patch adds a PERF_INACTIVE_CPU_READ_PKG to indicate this behaviour
of cqm and also changes the perf/core to still call the reads even when
the event is inactive on the cpu for cgroup events. The task events have
event->cpu as -1 and hence it does not apply for task events.

Tests: perf stat -C  would not return a count before this patch to
the perf/core.  After this patch the count of the package is returned to
the perf/core. We still dont see the count in the perf user mode - that
is fixed in next patches.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c | 31 ---
 arch/x86/include/asm/intel_rdt_common.h |  2 +-
 include/linux/perf_event.h  | 19 ---
 kernel/events/core.c| 16 
 4 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 92efe12..3f5860c 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -111,6 +111,13 @@ bool __rmid_valid(u32 rmid)
return true;
 }
 
+static inline bool __rmid_valid_raw(u32 rmid)
+{
+   if (rmid > cqm_max_rmid)
+   return false;
+   return true;
+}
+
 static u64 __rmid_read(u32 rmid)
 {
u64 val;
@@ -884,16 +891,16 @@ static u64 intel_cqm_event_count(struct perf_event *event)
return __perf_event_count(event);
 }
 
-void alloc_needed_pkg_rmid(u32 *cqm_rmid)
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid)
 {
unsigned long flags;
u32 rmid;
 
if (WARN_ON(!cqm_rmid))
-   return;
+   return -EINVAL;
 
if (cqm_rmid == cqm_rootcginfo.rmid || cqm_rmid[pkg_id])
-   return;
+   return 0;
 
raw_spin_lock_irqsave(_lock, flags);
 
@@ -902,6 +909,8 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
cqm_rmid[pkg_id] = rmid;
 
raw_spin_unlock_irqrestore(_lock, flags);
+
+   return rmid;
 }
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
@@ -913,10 +922,8 @@ static void intel_cqm_event_start(struct perf_event 
*event, int mode)
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   if (is_task_event(event)) {
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (is_task_event(event))
state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
-   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
@@ -932,10 +939,19 @@ static void intel_cqm_event_stop(struct perf_event 
*event, int mode)
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
+   u32 rmid;
+
event->hw.cqm_state = PERF_HES_STOPPED;
 
-   if ((mode & PERF_EF_START))
+   /*
+* If Lazy RMID alloc fails indicate the error to the user.
+   */
+   if ((mode & PERF_EF_START)) {
+   rmid = alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (!__rmid_valid_raw(rmid))
+   return -EINVAL;
intel_cqm_event_start(event, mode);
+   }
 
return 0;
 }
@@ -1048,6 +1064,7 @@ static int intel_cqm_event_init(struct perf_event *event)
 * cgroup hierarchies.
 */
event->event_caps |= PERF_EV_CAP_CGROUP_NO_RECURSION;
+   event->event_caps |= PERF_EV_CAP_INACTIVE_CPU_READ_PKG;
 
mutex_lock(_mutex);
 
diff --git a/arch/x86/include/asm/intel_rdt_common.h 
b/arch/x86/include/asm/intel_rdt_common.h
index 544acaa..fcaaaeb 100644
--- a/arch/x86/include/asm/intel_rdt_common.h
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -27,7 +27,7 @@ struct intel_pqr_state {
 
 u32 __get_rmid(int domain);
 bool __rmid_valid(u32 rmid);
-void alloc_needed_pkg_rmid(u32 *cqm_rmid);
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid);
 struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk);
 
 extern struct cgrp_cqm_info cqm_rootcginfo;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 410642a..adfddec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -525,10 +525,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event 
*,
  * PERF_EV_CAP_CGROUP_NO_RECURSION: A cgroup event that handles its own
  * cgroup scoping. It does not need to be enabled for all of its descendants
  * cgroups.
+ * PERF_EV_CAP_INACTIVE_CPU_READ_PKG: A cgroup event where we can read
+ * the package count on any cpu on the pkg even if inactive.
  */
-#define PERF_EV_CAP_SOFTWARE   BIT(0)
-#define PERF_EV_CAP_READ_ACTIVE_PKG

[PATCH 10/12] perf/core,x86/cqm: Add read for Cgroup events,per pkg reads.

2017-01-06 Thread Vikas Shivappa
For cqm cgroup events, the events can be read even if the event was not
active on the cpu on which the event is being read. This is because the
RMIDs are per package and hence if we read the llc_occupancy value on a
cpu x, we are really reading the occupancy for the package where cpu x
belongs.

This patch adds a PERF_INACTIVE_CPU_READ_PKG to indicate this behaviour
of cqm and also changes the perf/core to still call the reads even when
the event is inactive on the cpu for cgroup events. The task events have
event->cpu as -1 and hence it does not apply for task events.

Tests: perf stat -C  would not return a count before this patch to
the perf/core.  After this patch the count of the package is returned to
the perf/core. We still dont see the count in the perf user mode - that
is fixed in next patches.

Patch is based on David Carrillo-Cisneros  patches
in cqm2 series.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/events/intel/cqm.c | 31 ---
 arch/x86/include/asm/intel_rdt_common.h |  2 +-
 include/linux/perf_event.h  | 19 ---
 kernel/events/core.c| 16 
 4 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 92efe12..3f5860c 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -111,6 +111,13 @@ bool __rmid_valid(u32 rmid)
return true;
 }
 
+static inline bool __rmid_valid_raw(u32 rmid)
+{
+   if (rmid > cqm_max_rmid)
+   return false;
+   return true;
+}
+
 static u64 __rmid_read(u32 rmid)
 {
u64 val;
@@ -884,16 +891,16 @@ static u64 intel_cqm_event_count(struct perf_event *event)
return __perf_event_count(event);
 }
 
-void alloc_needed_pkg_rmid(u32 *cqm_rmid)
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid)
 {
unsigned long flags;
u32 rmid;
 
if (WARN_ON(!cqm_rmid))
-   return;
+   return -EINVAL;
 
if (cqm_rmid == cqm_rootcginfo.rmid || cqm_rmid[pkg_id])
-   return;
+   return 0;
 
raw_spin_lock_irqsave(_lock, flags);
 
@@ -902,6 +909,8 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
cqm_rmid[pkg_id] = rmid;
 
raw_spin_unlock_irqrestore(_lock, flags);
+
+   return rmid;
 }
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
@@ -913,10 +922,8 @@ static void intel_cqm_event_start(struct perf_event 
*event, int mode)
 
event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-   if (is_task_event(event)) {
-   alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (is_task_event(event))
state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
-   }
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
@@ -932,10 +939,19 @@ static void intel_cqm_event_stop(struct perf_event 
*event, int mode)
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
+   u32 rmid;
+
event->hw.cqm_state = PERF_HES_STOPPED;
 
-   if ((mode & PERF_EF_START))
+   /*
+* If Lazy RMID alloc fails indicate the error to the user.
+   */
+   if ((mode & PERF_EF_START)) {
+   rmid = alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+   if (!__rmid_valid_raw(rmid))
+   return -EINVAL;
intel_cqm_event_start(event, mode);
+   }
 
return 0;
 }
@@ -1048,6 +1064,7 @@ static int intel_cqm_event_init(struct perf_event *event)
 * cgroup hierarchies.
 */
event->event_caps |= PERF_EV_CAP_CGROUP_NO_RECURSION;
+   event->event_caps |= PERF_EV_CAP_INACTIVE_CPU_READ_PKG;
 
mutex_lock(_mutex);
 
diff --git a/arch/x86/include/asm/intel_rdt_common.h 
b/arch/x86/include/asm/intel_rdt_common.h
index 544acaa..fcaaaeb 100644
--- a/arch/x86/include/asm/intel_rdt_common.h
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -27,7 +27,7 @@ struct intel_pqr_state {
 
 u32 __get_rmid(int domain);
 bool __rmid_valid(u32 rmid);
-void alloc_needed_pkg_rmid(u32 *cqm_rmid);
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid);
 struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk);
 
 extern struct cgrp_cqm_info cqm_rootcginfo;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 410642a..adfddec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -525,10 +525,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event 
*,
  * PERF_EV_CAP_CGROUP_NO_RECURSION: A cgroup event that handles its own
  * cgroup scoping. It does not need to be enabled for all of its descendants
  * cgroups.
+ * PERF_EV_CAP_INACTIVE_CPU_READ_PKG: A cgroup event where we can read
+ * the package count on any cpu on the pkg even if inactive.
  */
-#define PERF_EV_CAP_SOFTWARE   BIT(0)
-#define PERF_EV_CAP_READ_ACTIVE_PKGBIT(1)
-#define PERF_EV_CAP_CGROUP_NO_RECURSION