Re: [PATCH v2 06/32] perf/x86/intel/cqm: add per-package RMIDs, data and locks

2016-05-18 Thread Thomas Gleixner
On Wed, 11 May 2016, David Carrillo-Cisneros wrote:
> +/* Init cqm pkg_data for @cpu 's package. */
> +static int pkg_data_init_cpu(int cpu)
> +{
> + struct pkg_data *pkg_data;
> + struct cpuinfo_x86 *c = _data(cpu);
> + u16 pkg_id = topology_physical_package_id(cpu);
> +
> + if (cqm_pkgs_data[pkg_id])
> + return 0;
> +
> +
> + pkg_data = kmalloc_node(sizeof(struct pkg_data),
> + GFP_KERNEL, cpu_to_node(cpu));

kzalloc_node() please

> + if (!pkg_data)
> + return -ENOMEM;
> +
> + pkg_data->max_rmid = c->x86_cache_max_rmid;
> +
> + /* Does hardware has more rmids than this driver can handle? */
> + if (WARN_ON(pkg_data->max_rmid >= INVALID_RMID))
> + pkg_data->max_rmid = INVALID_RMID - 1;
> +
> + if (c->x86_cache_occ_scale != cqm_l3_scale) {
> + pr_err("Multiple LLC scale values, disabling\n");
> + kfree(pkg_data);
> + return -EINVAL;
> + }

Please move this check before the allocation. 

> + pkg_data->prmids_by_rmid = kmalloc_node(
> + sizeof(struct prmid *) * (1 + pkg_data->max_rmid),
> + GFP_KERNEL, cpu_to_node(cpu));

kzalloc_node()

> +
> + if (!pkg_data) {
> + kfree(pkg_data);

Huch? You alloc pkg_data->prmids_by_rmid and then check pkg_data, which is
guaranteed to be non NULL here.

> + return -ENOMEM;
> + }
> +
> + INIT_LIST_HEAD(_data->free_prmids_pool);
> +
> + mutex_init(_data->pkg_data_mutex);
> + raw_spin_lock_init(_data->pkg_data_lock);
> +
> + /* XXX: Chose randomly*/

Please add coherent comments, e.g.:

   /*
* We should select the rotation_cpu randomly in the package,
* because . Use the current cpu for now.
*/

> + pkg_data->rotation_cpu = cpu;
> +
> + cqm_pkgs_data[pkg_id] = pkg_data;
> + return 0;
> +}
> +
> +static int intel_cqm_setup_pkg_prmid_pools(u16 pkg_id)
> +{
> + int r;
> + unsigned long flags;
> + struct prmid *prmid;
> + struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];

This variable ordering is really hard to read.

struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
struct prmid *prmid;
unsigned long flags;
int r;

Parses faster, but that's my personal taste and the least of my worries with
this patch.

> +
> + if (!__valid_pkg_id(pkg_id))
> + return -EINVAL;
> +
> + for (r = 0; r <= pkg_data->max_rmid; r++) {
> +
> + prmid = kmalloc_node(sizeof(struct prmid), GFP_KERNEL,
> +  cpu_to_node(pkg_data->rotation_cpu));
> + if (!prmid)
> + goto fail;
> +
> + atomic64_set(>last_read_value, 0L);
> + atomic64_set(>last_read_time, 0L);
> + INIT_LIST_HEAD(>pool_entry);
> + prmid->rmid = r;
> +
> + /* Lock needed if called during CPU hotplug. */
> + raw_spin_lock_irqsave_nested(
> + _data->pkg_data_lock, flags, pkg_id);
> + pkg_data->prmids_by_rmid[r] = prmid;

So this adds the prmid to the array, but it does not link it into the free
pool list. So the fail path is completely bogus.

> + /* RMID 0 is special and makes the root of rmid hierarchy. */

This comment does not make any sense here.

> + raw_spin_unlock_irqrestore(_data->pkg_data_lock, flags);
> + }
> + return 0;
> +fail:
> + while (!list_empty(_data->free_prmids_pool)) {
> + prmid = list_first_entry(_data->free_prmids_pool,
> +  struct prmid, pool_entry);
> + list_del(>pool_entry);
> + kfree(pkg_data->prmids_by_rmid[prmid->rmid]);
> + kfree(prmid);
> + }
> + return -ENOMEM;

Please split out the fail path into a seperate function so it can be reused
for error handling and module removal.

> +/*
> + * Determine if @a and @b measure the same set of tasks.
> + *
> + * If @a and @b measure the same set of tasks then we want to share a
> + * single RMID.
> + */
> +static bool __match_event(struct perf_event *a, struct perf_event *b)

Can you please split out the event support and just keep the infrastructure
setup in this patch for review purposes? We can fold stuff together once we
are done with this.

> +static inline void cqm_pick_event_reader(int cpu)
> +{
> + u16 pkg_id = topology_physical_package_id(cpu);
> + /* XXX: lock, check if rotation cpu is online, maybe */

So I assume 'XXX' means: FIXME or such. 

1) If you have no idea whether you need the lock then you better make your
   mind up before posting.

2) @cpu is online as this is called from CPU_STARTING resp. from the
   init code.

So please can you fix this stuff and/or remove crap comments like the above.

> + /*
> +  * Pick a reader if there isn't one already.
> +  */
> + if (cqm_pkgs_data[pkg_id]->rotation_cpu 

Re: [PATCH v2 06/32] perf/x86/intel/cqm: add per-package RMIDs, data and locks

2016-05-18 Thread Thomas Gleixner
On Wed, 11 May 2016, David Carrillo-Cisneros wrote:
> +/* Init cqm pkg_data for @cpu 's package. */
> +static int pkg_data_init_cpu(int cpu)
> +{
> + struct pkg_data *pkg_data;
> + struct cpuinfo_x86 *c = _data(cpu);
> + u16 pkg_id = topology_physical_package_id(cpu);
> +
> + if (cqm_pkgs_data[pkg_id])
> + return 0;
> +
> +
> + pkg_data = kmalloc_node(sizeof(struct pkg_data),
> + GFP_KERNEL, cpu_to_node(cpu));

kzalloc_node() please

> + if (!pkg_data)
> + return -ENOMEM;
> +
> + pkg_data->max_rmid = c->x86_cache_max_rmid;
> +
> + /* Does hardware has more rmids than this driver can handle? */
> + if (WARN_ON(pkg_data->max_rmid >= INVALID_RMID))
> + pkg_data->max_rmid = INVALID_RMID - 1;
> +
> + if (c->x86_cache_occ_scale != cqm_l3_scale) {
> + pr_err("Multiple LLC scale values, disabling\n");
> + kfree(pkg_data);
> + return -EINVAL;
> + }

Please move this check before the allocation. 

> + pkg_data->prmids_by_rmid = kmalloc_node(
> + sizeof(struct prmid *) * (1 + pkg_data->max_rmid),
> + GFP_KERNEL, cpu_to_node(cpu));

kzalloc_node()

> +
> + if (!pkg_data) {
> + kfree(pkg_data);

Huch? You alloc pkg_data->prmids_by_rmid and then check pkg_data, which is
guaranteed to be non NULL here.

> + return -ENOMEM;
> + }
> +
> + INIT_LIST_HEAD(_data->free_prmids_pool);
> +
> + mutex_init(_data->pkg_data_mutex);
> + raw_spin_lock_init(_data->pkg_data_lock);
> +
> + /* XXX: Chose randomly*/

Please add coherent comments, e.g.:

   /*
* We should select the rotation_cpu randomly in the package,
* because . Use the current cpu for now.
*/

> + pkg_data->rotation_cpu = cpu;
> +
> + cqm_pkgs_data[pkg_id] = pkg_data;
> + return 0;
> +}
> +
> +static int intel_cqm_setup_pkg_prmid_pools(u16 pkg_id)
> +{
> + int r;
> + unsigned long flags;
> + struct prmid *prmid;
> + struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];

This variable ordering is really hard to read.

struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
struct prmid *prmid;
unsigned long flags;
int r;

Parses faster, but that's my personal taste and the least of my worries with
this patch.

> +
> + if (!__valid_pkg_id(pkg_id))
> + return -EINVAL;
> +
> + for (r = 0; r <= pkg_data->max_rmid; r++) {
> +
> + prmid = kmalloc_node(sizeof(struct prmid), GFP_KERNEL,
> +  cpu_to_node(pkg_data->rotation_cpu));
> + if (!prmid)
> + goto fail;
> +
> + atomic64_set(>last_read_value, 0L);
> + atomic64_set(>last_read_time, 0L);
> + INIT_LIST_HEAD(>pool_entry);
> + prmid->rmid = r;
> +
> + /* Lock needed if called during CPU hotplug. */
> + raw_spin_lock_irqsave_nested(
> + _data->pkg_data_lock, flags, pkg_id);
> + pkg_data->prmids_by_rmid[r] = prmid;

So this adds the prmid to the array, but it does not link it into the free
pool list. So the fail path is completely bogus.

> + /* RMID 0 is special and makes the root of rmid hierarchy. */

This comment does not make any sense here.

> + raw_spin_unlock_irqrestore(_data->pkg_data_lock, flags);
> + }
> + return 0;
> +fail:
> + while (!list_empty(_data->free_prmids_pool)) {
> + prmid = list_first_entry(_data->free_prmids_pool,
> +  struct prmid, pool_entry);
> + list_del(>pool_entry);
> + kfree(pkg_data->prmids_by_rmid[prmid->rmid]);
> + kfree(prmid);
> + }
> + return -ENOMEM;

Please split out the fail path into a seperate function so it can be reused
for error handling and module removal.

> +/*
> + * Determine if @a and @b measure the same set of tasks.
> + *
> + * If @a and @b measure the same set of tasks then we want to share a
> + * single RMID.
> + */
> +static bool __match_event(struct perf_event *a, struct perf_event *b)

Can you please split out the event support and just keep the infrastructure
setup in this patch for review purposes? We can fold stuff together once we
are done with this.

> +static inline void cqm_pick_event_reader(int cpu)
> +{
> + u16 pkg_id = topology_physical_package_id(cpu);
> + /* XXX: lock, check if rotation cpu is online, maybe */

So I assume 'XXX' means: FIXME or such. 

1) If you have no idea whether you need the lock then you better make your
   mind up before posting.

2) @cpu is online as this is called from CPU_STARTING resp. from the
   init code.

So please can you fix this stuff and/or remove crap comments like the above.

> + /*
> +  * Pick a reader if there isn't one already.
> +  */
> + if (cqm_pkgs_data[pkg_id]->rotation_cpu 

[PATCH v2 06/32] perf/x86/intel/cqm: add per-package RMIDs, data and locks

2016-05-11 Thread David Carrillo-Cisneros
Introduce struct pkg_data that contains all per-package CQM data for new
CQM driver. The per-package data is:
  1) A pool of free prmids (per-package per RMID). Each package may have
  different number of prmids (different hw max_rmid_index).
  2) lock and mutex that protect the prmids pools, changes to the pmonr
  state, and the rotation logic.
  The per-package separation of locks reduces the contention for each
 lock and mutex compared with the previous version that had system-wide
 mutex and lock.

More per-package data will be added in future patches is this series.

Reviewed-by: Stephane Eranian 
Signed-off-by: David Carrillo-Cisneros 
---
 arch/x86/events/intel/cqm.c | 499 
 arch/x86/events/intel/cqm.h |  62 ++
 include/linux/perf_event.h  |   7 +
 3 files changed, 568 insertions(+)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 2daee37..54f219f 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -12,6 +12,8 @@
 #define MSR_IA32_QM_CTR0x0c8e
 #define MSR_IA32_QM_EVTSEL 0x0c8d
 
+static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+
 #define RMID_VAL_ERROR (1ULL << 63)
 #define RMID_VAL_UNAVAIL   (1ULL << 62)
 
@@ -69,3 +71,500 @@ static inline int __cqm_prmid_update(struct prmid *prmid,
 
return 1;
 }
+
+/*
+ * A cache groups is a group of perf_events with the same target (thread,
+ * cgroup, CPU or system-wide). Each cache group receives has one RMID.
+ * Cache groups are protected by cqm_mutex.
+ */
+static LIST_HEAD(cache_groups);
+static DEFINE_MUTEX(cqm_mutex);
+
+struct pkg_data **cqm_pkgs_data;
+
+static inline bool __valid_pkg_id(u16 pkg_id)
+{
+   return pkg_id < topology_max_packages();
+}
+
+/* Init cqm pkg_data for @cpu 's package. */
+static int pkg_data_init_cpu(int cpu)
+{
+   struct pkg_data *pkg_data;
+   struct cpuinfo_x86 *c = _data(cpu);
+   u16 pkg_id = topology_physical_package_id(cpu);
+
+   if (cqm_pkgs_data[pkg_id])
+   return 0;
+
+
+   pkg_data = kmalloc_node(sizeof(struct pkg_data),
+   GFP_KERNEL, cpu_to_node(cpu));
+   if (!pkg_data)
+   return -ENOMEM;
+
+   pkg_data->max_rmid = c->x86_cache_max_rmid;
+
+   /* Does hardware has more rmids than this driver can handle? */
+   if (WARN_ON(pkg_data->max_rmid >= INVALID_RMID))
+   pkg_data->max_rmid = INVALID_RMID - 1;
+
+   if (c->x86_cache_occ_scale != cqm_l3_scale) {
+   pr_err("Multiple LLC scale values, disabling\n");
+   kfree(pkg_data);
+   return -EINVAL;
+   }
+
+   pkg_data->prmids_by_rmid = kmalloc_node(
+   sizeof(struct prmid *) * (1 + pkg_data->max_rmid),
+   GFP_KERNEL, cpu_to_node(cpu));
+
+   if (!pkg_data) {
+   kfree(pkg_data);
+   return -ENOMEM;
+   }
+
+   INIT_LIST_HEAD(_data->free_prmids_pool);
+
+   mutex_init(_data->pkg_data_mutex);
+   raw_spin_lock_init(_data->pkg_data_lock);
+
+   /* XXX: Chose randomly*/
+   pkg_data->rotation_cpu = cpu;
+
+   cqm_pkgs_data[pkg_id] = pkg_data;
+   return 0;
+}
+
+static int intel_cqm_setup_pkg_prmid_pools(u16 pkg_id)
+{
+   int r;
+   unsigned long flags;
+   struct prmid *prmid;
+   struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
+
+   if (!__valid_pkg_id(pkg_id))
+   return -EINVAL;
+
+   for (r = 0; r <= pkg_data->max_rmid; r++) {
+
+   prmid = kmalloc_node(sizeof(struct prmid), GFP_KERNEL,
+cpu_to_node(pkg_data->rotation_cpu));
+   if (!prmid)
+   goto fail;
+
+   atomic64_set(>last_read_value, 0L);
+   atomic64_set(>last_read_time, 0L);
+   INIT_LIST_HEAD(>pool_entry);
+   prmid->rmid = r;
+
+   /* Lock needed if called during CPU hotplug. */
+   raw_spin_lock_irqsave_nested(
+   _data->pkg_data_lock, flags, pkg_id);
+   pkg_data->prmids_by_rmid[r] = prmid;
+
+
+   /* RMID 0 is special and makes the root of rmid hierarchy. */
+   raw_spin_unlock_irqrestore(_data->pkg_data_lock, flags);
+   }
+   return 0;
+fail:
+   while (!list_empty(_data->free_prmids_pool)) {
+   prmid = list_first_entry(_data->free_prmids_pool,
+struct prmid, pool_entry);
+   list_del(>pool_entry);
+   kfree(pkg_data->prmids_by_rmid[prmid->rmid]);
+   kfree(prmid);
+   }
+   return -ENOMEM;
+}
+
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ *
+ * If @a and @b measure the same set of tasks then we want to share a
+ * single RMID.
+ */
+static bool __match_event(struct perf_event *a, struct 

[PATCH v2 06/32] perf/x86/intel/cqm: add per-package RMIDs, data and locks

2016-05-11 Thread David Carrillo-Cisneros
Introduce struct pkg_data that contains all per-package CQM data for new
CQM driver. The per-package data is:
  1) A pool of free prmids (per-package per RMID). Each package may have
  different number of prmids (different hw max_rmid_index).
  2) lock and mutex that protect the prmids pools, changes to the pmonr
  state, and the rotation logic.
  The per-package separation of locks reduces the contention for each
 lock and mutex compared with the previous version that had system-wide
 mutex and lock.

More per-package data will be added in future patches is this series.

Reviewed-by: Stephane Eranian 
Signed-off-by: David Carrillo-Cisneros 
---
 arch/x86/events/intel/cqm.c | 499 
 arch/x86/events/intel/cqm.h |  62 ++
 include/linux/perf_event.h  |   7 +
 3 files changed, 568 insertions(+)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 2daee37..54f219f 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -12,6 +12,8 @@
 #define MSR_IA32_QM_CTR0x0c8e
 #define MSR_IA32_QM_EVTSEL 0x0c8d
 
+static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+
 #define RMID_VAL_ERROR (1ULL << 63)
 #define RMID_VAL_UNAVAIL   (1ULL << 62)
 
@@ -69,3 +71,500 @@ static inline int __cqm_prmid_update(struct prmid *prmid,
 
return 1;
 }
+
+/*
+ * A cache groups is a group of perf_events with the same target (thread,
+ * cgroup, CPU or system-wide). Each cache group receives has one RMID.
+ * Cache groups are protected by cqm_mutex.
+ */
+static LIST_HEAD(cache_groups);
+static DEFINE_MUTEX(cqm_mutex);
+
+struct pkg_data **cqm_pkgs_data;
+
+static inline bool __valid_pkg_id(u16 pkg_id)
+{
+   return pkg_id < topology_max_packages();
+}
+
+/* Init cqm pkg_data for @cpu 's package. */
+static int pkg_data_init_cpu(int cpu)
+{
+   struct pkg_data *pkg_data;
+   struct cpuinfo_x86 *c = _data(cpu);
+   u16 pkg_id = topology_physical_package_id(cpu);
+
+   if (cqm_pkgs_data[pkg_id])
+   return 0;
+
+
+   pkg_data = kmalloc_node(sizeof(struct pkg_data),
+   GFP_KERNEL, cpu_to_node(cpu));
+   if (!pkg_data)
+   return -ENOMEM;
+
+   pkg_data->max_rmid = c->x86_cache_max_rmid;
+
+   /* Does hardware has more rmids than this driver can handle? */
+   if (WARN_ON(pkg_data->max_rmid >= INVALID_RMID))
+   pkg_data->max_rmid = INVALID_RMID - 1;
+
+   if (c->x86_cache_occ_scale != cqm_l3_scale) {
+   pr_err("Multiple LLC scale values, disabling\n");
+   kfree(pkg_data);
+   return -EINVAL;
+   }
+
+   pkg_data->prmids_by_rmid = kmalloc_node(
+   sizeof(struct prmid *) * (1 + pkg_data->max_rmid),
+   GFP_KERNEL, cpu_to_node(cpu));
+
+   if (!pkg_data) {
+   kfree(pkg_data);
+   return -ENOMEM;
+   }
+
+   INIT_LIST_HEAD(_data->free_prmids_pool);
+
+   mutex_init(_data->pkg_data_mutex);
+   raw_spin_lock_init(_data->pkg_data_lock);
+
+   /* XXX: Chose randomly*/
+   pkg_data->rotation_cpu = cpu;
+
+   cqm_pkgs_data[pkg_id] = pkg_data;
+   return 0;
+}
+
+static int intel_cqm_setup_pkg_prmid_pools(u16 pkg_id)
+{
+   int r;
+   unsigned long flags;
+   struct prmid *prmid;
+   struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
+
+   if (!__valid_pkg_id(pkg_id))
+   return -EINVAL;
+
+   for (r = 0; r <= pkg_data->max_rmid; r++) {
+
+   prmid = kmalloc_node(sizeof(struct prmid), GFP_KERNEL,
+cpu_to_node(pkg_data->rotation_cpu));
+   if (!prmid)
+   goto fail;
+
+   atomic64_set(>last_read_value, 0L);
+   atomic64_set(>last_read_time, 0L);
+   INIT_LIST_HEAD(>pool_entry);
+   prmid->rmid = r;
+
+   /* Lock needed if called during CPU hotplug. */
+   raw_spin_lock_irqsave_nested(
+   _data->pkg_data_lock, flags, pkg_id);
+   pkg_data->prmids_by_rmid[r] = prmid;
+
+
+   /* RMID 0 is special and makes the root of rmid hierarchy. */
+   raw_spin_unlock_irqrestore(_data->pkg_data_lock, flags);
+   }
+   return 0;
+fail:
+   while (!list_empty(_data->free_prmids_pool)) {
+   prmid = list_first_entry(_data->free_prmids_pool,
+struct prmid, pool_entry);
+   list_del(>pool_entry);
+   kfree(pkg_data->prmids_by_rmid[prmid->rmid]);
+   kfree(prmid);
+   }
+   return -ENOMEM;
+}
+
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ *
+ * If @a and @b measure the same set of tasks then we want to share a
+ * single RMID.
+ */
+static bool __match_event(struct perf_event *a, struct perf_event *b)
+{
+   /* Per-cpu and