[PATCH v8 8/9] perf/amd/iommu: Fix sysfs perf attribute groups

2017-01-15 Thread Suravee Suthikulpanit
From: Suravee Suthikulpanit 

Introduce static amd_iommu_attr_groups to simplify the
sysfs attributes initialization code.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 85 -
 1 file changed, 37 insertions(+), 48 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index cc7bea4..223c01d 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -43,14 +43,8 @@ struct perf_amd_iommu {
u8 max_counters;
u64 cntr_assign_mask;
raw_spinlock_t lock;
-   const struct attribute_group *attr_groups[4];
 };
 
-#define format_group   attr_groups[0]
-#define cpumask_group  attr_groups[1]
-#define events_group   attr_groups[2]
-#define null_group attr_groups[3]
-
 /*-
  * sysfs format attributes
  *-*/
@@ -81,6 +75,10 @@ struct perf_amd_iommu {
 /*-
  * sysfs events attributes
  *-*/
+static struct attribute_group amd_iommu_events_group = {
+   .name = "events",
+};
+
 struct amd_iommu_event_desc {
struct kobj_attribute attr;
const char *event;
@@ -388,76 +386,63 @@ static void perf_iommu_del(struct perf_event *event, int 
flags)
perf_event_update_userpage(event);
 }
 
-static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+static __init int _init_events_attrs(void)
 {
-   struct attribute **attrs;
-   struct attribute_group *attr_group;
int i = 0, j;
+   struct attribute **attrs;
 
while (amd_iommu_v2_event_descs[i].attr.attr.name)
i++;
 
-   attr_group = kzalloc(sizeof(struct attribute *)
-   * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
-   if (!attr_group)
+   attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL);
+   if (!attrs)
return -ENOMEM;
 
-   attrs = (struct attribute **)(attr_group + 1);
for (j = 0; j < i; j++)
attrs[j] = _iommu_v2_event_descs[j].attr.attr;
 
-   attr_group->name = "events";
-   attr_group->attrs = attrs;
-   perf_iommu->events_group = attr_group;
-
+   amd_iommu_events_group.attrs = attrs;
return 0;
 }
 
 static __init void amd_iommu_pc_exit(void)
 {
-   if (__perf_iommu.events_group != NULL) {
-   kfree(__perf_iommu.events_group);
-   __perf_iommu.events_group = NULL;
+   if (amd_iommu_events_group.attrs) {
+   kfree(amd_iommu_events_group.attrs);
+   amd_iommu_events_group.attrs = NULL;
}
 }
 
-static __init int _init_perf_amd_iommu(
-   struct perf_amd_iommu *perf_iommu, char *name)
+const struct attribute_group *amd_iommu_attr_groups[] = {
+   _iommu_format_group,
+   _iommu_cpumask_group,
+   _iommu_events_group,
+   NULL,
+};
+
+static __init int
+_init_perf_amd_iommu(struct perf_amd_iommu *perf_iommu, char *name)
 {
int ret;
 
raw_spin_lock_init(_iommu->lock);
 
-   perf_iommu->format_group = _iommu_format_group;
-
/* Init cpumask attributes to only core 0 */
cpumask_set_cpu(0, _cpumask);
-   perf_iommu->cpumask_group = _iommu_cpumask_group;
-
-   ret = _init_events_attrs(perf_iommu);
-   if (ret) {
-   pr_err("Error initializing AMD IOMMU perf events.\n");
-   return ret;
-   }
 
perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0);
perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0);
if (!perf_iommu->max_banks || !perf_iommu->max_counters)
return -EINVAL;
 
-   perf_iommu->null_group = NULL;
-   perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
-
+   perf_iommu->pmu.attr_groups = amd_iommu_attr_groups;
ret = perf_pmu_register(_iommu->pmu, name, -1);
-   if (ret) {
+   if (ret)
pr_err("Error initializing AMD IOMMU perf counters.\n");
-   amd_iommu_pc_exit();
-   } else {
+   else
pr_info("perf: amd_iommu: Detected. (%d banks, %d 
counters/bank)\n",
amd_iommu_pc_get_max_banks(0),
amd_iommu_pc_get_max_counters(0));
-   }
-
return ret;
 }
 
@@ -471,24 +456,28 @@ static __init int _init_perf_amd_iommu(
.stop   = perf_iommu_stop,
.read   = perf_iommu_read,
},
-   .max_banks  = 0x00,
-   .max_counters   = 0x00,
-   .cntr_assign_mask   = 0ULL,
-   .format_group   = NULL,
-   .cpumask_group  = NULL,
-   .events_group   = NULL,
-   .null_group 

[PATCH v8 9/9] perf/amd/iommu: Enable support for multiple IOMMUs

2017-01-15 Thread Suravee Suthikulpanit
From: Suravee Suthikulpanit 

Add multi-IOMMU support for perf by exposing an AMD IOMMU PMU
for each IOMMU found in the system via:

  /bus/event_source/devices/amd_iommu_x

where x is the IOMMU index. This allows users to specify
different events to be programed onto performance counters
of each IOMMU.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 114 ++--
 1 file changed, 67 insertions(+), 47 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 223c01d..38eafbf 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -35,16 +35,21 @@
 #define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xULL)
 #define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xULL)
 
-static struct perf_amd_iommu __perf_iommu;
+#define PERF_AMD_IOMMU_NAME_SZ 16
 
 struct perf_amd_iommu {
+   struct list_head list;
struct pmu pmu;
+   unsigned int idx;
+   char name[PERF_AMD_IOMMU_NAME_SZ];
u8 max_banks;
u8 max_counters;
u64 cntr_assign_mask;
raw_spinlock_t lock;
 };
 
+static LIST_HEAD(perf_amd_iommu_list);
+
 /*-
  * sysfs format attributes
  *-*/
@@ -202,8 +207,7 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu 
*perf_iommu,
 static int perf_iommu_event_init(struct perf_event *event)
 {
struct hw_perf_event *hwc = >hw;
-   struct perf_amd_iommu *perf_iommu;
-   u64 config, config1;
+   struct perf_amd_iommu *pi;
 
/* test the event attr type check for PMU enumeration */
if (event->attr.type != event->pmu->type)
@@ -225,27 +229,18 @@ static int perf_iommu_event_init(struct perf_event *event)
if (event->cpu < 0)
return -EINVAL;
 
-   perf_iommu = &__perf_iommu;
-
-   if (event->pmu != _iommu->pmu)
-   return -ENOENT;
-
-   if (perf_iommu) {
-   config = event->attr.config;
-   config1 = event->attr.config1;
-   } else {
-   return -EINVAL;
-   }
-
/* update the hw_perf_event struct with the iommu config data */
-   hwc->config = config;
-   hwc->extra_reg.config = config1;
+   pi = container_of(event->pmu, struct perf_amd_iommu, pmu);
+   hwc->idx  = pi->idx;
+   hwc->config   = event->attr.config;
+   hwc->extra_reg.config = event->attr.config1;
 
return 0;
 }
 
 static void perf_iommu_enable_event(struct perf_event *ev)
 {
+   struct hw_perf_event *hwc = >hw;
u8 csource = _GET_CSOURCE(ev);
u16 devid = _GET_DEVID(ev);
u8 bank = _GET_BANK(ev);
@@ -253,30 +248,34 @@ static void perf_iommu_enable_event(struct perf_event *ev)
u64 reg = 0ULL;
 
reg = csource;
-   amd_iommu_pc_set_reg(0, bank, cntr,
+   amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
 IOMMU_PC_COUNTER_SRC_REG, );
 
reg = devid | (_GET_DEVID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
-   amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, );
+   amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
+IOMMU_PC_DEVID_MATCH_REG, );
 
reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
-   amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_PASID_MATCH_REG, );
+   amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
+IOMMU_PC_PASID_MATCH_REG, );
 
reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
-   amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, );
+   amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
+IOMMU_PC_DOMID_MATCH_REG, );
 }
 
 static void perf_iommu_disable_event(struct perf_event *event)
 {
+   struct hw_perf_event *hwc = >hw;
u64 reg = 0ULL;
 
-   amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
+   amd_iommu_pc_set_reg(hwc->idx, _GET_BANK(event), _GET_CNTR(event),
 IOMMU_PC_COUNTER_SRC_REG, );
 }
 
@@ -295,7 +294,7 @@ static void perf_iommu_start(struct perf_event *event, int 
flags)
return;
 
val = local64_read(>prev_count) & GENMASK_ULL(48, 0);
-   if (amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
+   if (amd_iommu_pc_set_reg(hwc->idx, _GET_BANK(event), _GET_CNTR(event),
   IOMMU_PC_COUNTER_REG, ))
return;
 
@@ -309,7 +308,7 @@ static void perf_iommu_read(struct perf_event *event)
s64 delta;
struct hw_perf_event *hwc = >hw;
 
-   if 

[PATCH v8 6/9] perf/amd/iommu: Modify amd_iommu_pc_get_set_reg_val() API to allow specifying IOMMU index

2017-01-15 Thread Suravee Suthikulpanit
From: Suravee Suthikulpanit 

The current amd_iommu_pc_get_set_reg_val() cannot support multiple IOMMUs
It is also confusing since it is trying to support set and get in
one function.

So break it down to amd_iommu_pc_[get|set]_reg(),
and modifies them to allow callers to specify IOMMU index. This prepares
the driver for supporting multi-IOMMU in subsequent patch.

Also remove unnecessary function declarations in amd_iommu_proto.h.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 34 ++
 arch/x86/events/amd/iommu.h |  7 --
 drivers/iommu/amd_iommu_init.c  | 53 ++---
 drivers/iommu/amd_iommu_proto.h |  5 
 4 files changed, 52 insertions(+), 47 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index ec7e873..200d2e8 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -250,42 +250,36 @@ static void perf_iommu_enable_event(struct perf_event *ev)
 {
u8 csource = _GET_CSOURCE(ev);
u16 devid = _GET_DEVID(ev);
+   u8 bank = _GET_BANK(ev);
+   u8 cntr = _GET_CNTR(ev);
u64 reg = 0ULL;
 
reg = csource;
-   amd_iommu_pc_get_set_reg_val(devid,
-   _GET_BANK(ev), _GET_CNTR(ev) ,
-IOMMU_PC_COUNTER_SRC_REG, , true);
+   amd_iommu_pc_set_reg(0, bank, cntr,
+IOMMU_PC_COUNTER_SRC_REG, );
 
reg = devid | (_GET_DEVID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
-   amd_iommu_pc_get_set_reg_val(devid,
-   _GET_BANK(ev), _GET_CNTR(ev) ,
-IOMMU_PC_DEVID_MATCH_REG, , true);
+   amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, );
 
reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
-   amd_iommu_pc_get_set_reg_val(devid,
-   _GET_BANK(ev), _GET_CNTR(ev) ,
-IOMMU_PC_PASID_MATCH_REG, , true);
+   amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_PASID_MATCH_REG, );
 
reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
-   amd_iommu_pc_get_set_reg_val(devid,
-   _GET_BANK(ev), _GET_CNTR(ev) ,
-IOMMU_PC_DOMID_MATCH_REG, , true);
+   amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, );
 }
 
 static void perf_iommu_disable_event(struct perf_event *event)
 {
u64 reg = 0ULL;
 
-   amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-   _GET_BANK(event), _GET_CNTR(event),
-   IOMMU_PC_COUNTER_SRC_REG, , true);
+   amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
+IOMMU_PC_COUNTER_SRC_REG, );
 }
 
 static void perf_iommu_start(struct perf_event *event, int flags)
@@ -300,9 +294,8 @@ static void perf_iommu_start(struct perf_event *event, int 
flags)
 
if (flags & PERF_EF_RELOAD) {
u64 prev_raw_count =  local64_read(>prev_count);
-   amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-   _GET_BANK(event), _GET_CNTR(event),
-   IOMMU_PC_COUNTER_REG, _raw_count, true);
+   amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
+IOMMU_PC_COUNTER_REG, _raw_count);
}
 
perf_iommu_enable_event(event);
@@ -316,9 +309,8 @@ static void perf_iommu_read(struct perf_event *event)
s64 delta;
struct hw_perf_event *hwc = >hw;
 
-   amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-   _GET_BANK(event), _GET_CNTR(event),
-   IOMMU_PC_COUNTER_REG, , false);
+   amd_iommu_pc_get_reg(0, _GET_BANK(event), _GET_CNTR(event),
+IOMMU_PC_COUNTER_REG, );
 
/* IOMMU pc counter register is only 48 bits */
count &= GENMASK_ULL(48, 0);
diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
index cf3dd05..cd70921 100644
--- a/arch/x86/events/amd/iommu.h
+++ b/arch/x86/events/amd/iommu.h
@@ -33,7 +33,10 @@
 
 extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
 
-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
-   u8 fxn, u64 *value, bool is_write);
+extern int amd_iommu_pc_set_reg(unsigned int idx, u8 bank, u8 cntr,
+   u8 fxn, u64 *value);
+
+extern int amd_iommu_pc_get_reg(unsigned int idx, u8 bank, u8 cntr,
+   u8 fxn, u64 *value);
 
 #endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/drivers/iommu/amd_iommu_init.c 

[PATCH v8 5/9] perf/amd/iommu: Modify functions to query max banks and counters

2017-01-15 Thread Suravee Suthikulpanit
Currently, amd_iommu_pc_get_max_[banks|counters]() use end-point
device ID to locate an IOMMU and check the reported max banks/counters.
The logic assumes that the IOMMU_BASE_DEVID belongs to the first IOMMU,
and uses it to acquire a reference to the first IOMMU, which does not work
on certain systems. Instead, we modify the function to take IOMMU index,
and use it to query the corresponded AMD IOMMU instance.

Note that we currently hard-code the IOMMU index to 0, since the current
AMD IOMMU perf implementation only supports single IOMMU. Subsequent patch
will add support for multi-IOMMU, and will use proper IOMMU index.

This patch also removes unnecessary function declaration in
amd_iommu_proto.h.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 17 +++--
 arch/x86/events/amd/iommu.h |  7 ++-
 drivers/iommu/amd_iommu_init.c  | 36 ++--
 drivers/iommu/amd_iommu_proto.h |  2 --
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 3f1c18a..ec7e873 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -239,14 +239,6 @@ static int perf_iommu_event_init(struct perf_event *event)
return -EINVAL;
}
 
-   /* integrate with iommu base devid (), assume one iommu */
-   perf_iommu->max_banks =
-   amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
-   perf_iommu->max_counters =
-   amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
-   if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
-   return -EINVAL;
-
/* update the hw_perf_event struct with the iommu config data */
hwc->config = config;
hwc->extra_reg.config = config1;
@@ -453,6 +445,11 @@ static __init int _init_perf_amd_iommu(
return ret;
}
 
+   perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0);
+   perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0);
+   if (!perf_iommu->max_banks || !perf_iommu->max_counters)
+   return -EINVAL;
+
perf_iommu->null_group = NULL;
perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
 
@@ -462,8 +459,8 @@ static __init int _init_perf_amd_iommu(
amd_iommu_pc_exit();
} else {
pr_info("perf: amd_iommu: Detected. (%d banks, %d 
counters/bank)\n",
-   amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
-   amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+   amd_iommu_pc_get_max_banks(0),
+   amd_iommu_pc_get_max_counters(0));
}
 
return ret;
diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
index 5c5c932..cf3dd05 100644
--- a/arch/x86/events/amd/iommu.h
+++ b/arch/x86/events/amd/iommu.h
@@ -24,17 +24,14 @@
 #define PC_MAX_SPEC_BNKS   64
 #define PC_MAX_SPEC_CNTRS  16
 
-/* iommu pc reg masks*/
-#define IOMMU_BASE_DEVID   0x
-
 /* amd_iommu_init.c external support functions */
 extern int amd_iommu_get_num_iommus(void);
 
 extern bool amd_iommu_pc_supported(void);
 
-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
 
-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
 
 extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
u8 fxn, u64 *value, bool is_write);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 515d4c1..ed21307d 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2713,6 +2713,20 @@ bool amd_iommu_v2_supported(void)
 }
 EXPORT_SYMBOL(amd_iommu_v2_supported);
 
+static struct amd_iommu *get_amd_iommu(unsigned int idx)
+{
+   unsigned int i = 0;
+   struct amd_iommu *iommu, *ret = NULL;
+
+   for_each_iommu(iommu) {
+   if (i++ == idx) {
+   ret = iommu;
+   break;
+   }
+   }
+   return ret;
+}
+
 /
  *
  * IOMMU EFR Performance Counter support functionality. This code allows
@@ -2720,17 +2734,14 @@ bool amd_iommu_v2_supported(void)
  *
  /
 
-u8 amd_iommu_pc_get_max_banks(u16 devid)
+u8 amd_iommu_pc_get_max_banks(unsigned int idx)
 {
-   struct amd_iommu *iommu;
-   u8 ret = 0;
+   struct amd_iommu *iommu = get_amd_iommu(idx);
 
-   /* locate the iommu governing the devid */
-   iommu = amd_iommu_rlookup_table[devid];

[PATCH v8 3/9] perf/amd/iommu: Misc fix up perf_iommu_read

2017-01-15 Thread Suravee Suthikulpanit
* Fix overflow handling since u64 delta would lose the MSB sign bit.
* Remove unnecessary local64_cmpxchg().
* Coding style and make use of GENMASK_ULL macro.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 1aa25d8..3f1c18a 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -320,9 +320,8 @@ static void perf_iommu_start(struct perf_event *event, int 
flags)
 
 static void perf_iommu_read(struct perf_event *event)
 {
-   u64 count = 0ULL;
-   u64 prev_raw_count = 0ULL;
-   u64 delta = 0ULL;
+   u64 count, prev;
+   s64 delta;
struct hw_perf_event *hwc = >hw;
 
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
@@ -330,18 +329,20 @@ static void perf_iommu_read(struct perf_event *event)
IOMMU_PC_COUNTER_REG, , false);
 
/* IOMMU pc counter register is only 48 bits */
-   count &= 0xULL;
+   count &= GENMASK_ULL(48, 0);
 
-   prev_raw_count =  local64_read(>prev_count);
-   if (local64_cmpxchg(>prev_count, prev_raw_count,
-   count) != prev_raw_count)
-   return;
+   prev = local64_read(>prev_count);
 
-   /* Handling 48-bit counter overflowing */
-   delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+   /*
+* Since we do not enable counter overflow interrupts,
+* we do not have to worry about prev_count changing on us.
+*/
+   local64_set(>prev_count, count);
+
+   /* Handle 48-bit counter overflow */
+   delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
delta >>= COUNTER_SHIFT;
local64_add(delta, >count);
-
 }
 
 static void perf_iommu_stop(struct perf_event *event, int flags)
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 7/9] perf/amd/iommu: Check return value when set and get counter value

2017-01-15 Thread Suravee Suthikulpanit
From: Suravee Suthikulpanit 

In, perf_iommu_start(), we need to check the return value from
amd_iommu_set_reg(). In case of failure, we should not enable the PMU.

Also, in perf_iommu_read(), we need to check the return value from
amd_iommu_get_reg() before using the value.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 200d2e8..cc7bea4 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -284,6 +284,7 @@ static void perf_iommu_disable_event(struct perf_event 
*event)
 
 static void perf_iommu_start(struct perf_event *event, int flags)
 {
+   u64 val;
struct hw_perf_event *hwc = >hw;
 
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
@@ -292,15 +293,16 @@ static void perf_iommu_start(struct perf_event *event, 
int flags)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
 
-   if (flags & PERF_EF_RELOAD) {
-   u64 prev_raw_count =  local64_read(>prev_count);
-   amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
-IOMMU_PC_COUNTER_REG, _raw_count);
-   }
+   if (!(flags & PERF_EF_RELOAD))
+   return;
+
+   val = local64_read(>prev_count) & GENMASK_ULL(48, 0);
+   if (amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
+  IOMMU_PC_COUNTER_REG, ))
+   return;
 
perf_iommu_enable_event(event);
perf_event_update_userpage(event);
-
 }
 
 static void perf_iommu_read(struct perf_event *event)
@@ -309,8 +311,9 @@ static void perf_iommu_read(struct perf_event *event)
s64 delta;
struct hw_perf_event *hwc = >hw;
 
-   amd_iommu_pc_get_reg(0, _GET_BANK(event), _GET_CNTR(event),
-IOMMU_PC_COUNTER_REG, );
+   if (amd_iommu_pc_get_reg(0, _GET_BANK(event), _GET_CNTR(event),
+IOMMU_PC_COUNTER_REG, ))
+   return;
 
/* IOMMU pc counter register is only 48 bits */
count &= GENMASK_ULL(48, 0);
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 4/9] iommu/amd: Introduce amd_iommu_get_num_iommus()

2017-01-15 Thread Suravee Suthikulpanit
Introduce amd_iommu_get_num_iommus(), which returns the value of
amd_iommus_present, then replaces the direct access to the variable
which is now declared as static.

This function will also be used by Perf AMD IOMMU driver.

Cc: Borislav Petkov 
Cc: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.h |  2 ++
 drivers/iommu/amd_iommu.c   |  6 +++---
 drivers/iommu/amd_iommu_init.c  | 11 +--
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h |  3 ---
 5 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
index 845d173..5c5c932 100644
--- a/arch/x86/events/amd/iommu.h
+++ b/arch/x86/events/amd/iommu.h
@@ -28,6 +28,8 @@
 #define IOMMU_BASE_DEVID   0x
 
 /* amd_iommu_init.c external support functions */
+extern int amd_iommu_get_num_iommus(void);
+
 extern bool amd_iommu_pc_supported(void);
 
 extern u8 amd_iommu_pc_get_max_banks(u16 devid);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 754595e..ae55485 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1228,7 +1228,7 @@ static void __domain_flush_pages(struct protection_domain 
*domain,
 
build_inv_iommu_pages(, address, size, domain->id, pde);
 
-   for (i = 0; i < amd_iommus_present; ++i) {
+   for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (!domain->dev_iommu[i])
continue;
 
@@ -1272,7 +1272,7 @@ static void domain_flush_complete(struct 
protection_domain *domain)
 {
int i;
 
-   for (i = 0; i < amd_iommus_present; ++i) {
+   for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (domain && !domain->dev_iommu[i])
continue;
 
@@ -3341,7 +3341,7 @@ static int __flush_pasid(struct protection_domain 
*domain, int pasid,
 * IOMMU TLB needs to be flushed before Device TLB to
 * prevent device TLB refill from IOMMU TLB
 */
-   for (i = 0; i < amd_iommus_present; ++i) {
+   for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (domain->dev_iommu[i] == 0)
continue;
 
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 157e934..515d4c1 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -164,7 +164,9 @@ struct ivmd_header {
 
 /* Array to assign indices to IOMMUs*/
 struct amd_iommu *amd_iommus[MAX_IOMMUS];
-int amd_iommus_present;
+
+/* Number of IOMMUs present in the system */
+static int amd_iommus_present;
 
 /* IOMMUs have a non-present cache? */
 bool amd_iommu_np_cache __read_mostly;
@@ -269,6 +271,11 @@ static inline unsigned long tbl_size(int entry_size)
return 1UL << shift;
 }
 
+int amd_iommu_get_num_iommus(void)
+{
+   return amd_iommus_present;
+}
+
 /* Access to l1 and l2 indexed register spaces */
 
 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
@@ -1333,7 +1340,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, 
struct ivhd_header *h)
 
/* Add IOMMU to internal data structures */
list_add_tail(>list, _iommu_list);
-   iommu->index = amd_iommus_present++;
+   iommu->index = amd_iommus_present++;
 
if (unlikely(iommu->index >= MAX_IOMMUS)) {
WARN(1, "AMD-Vi: System has more IOMMUs than supported by this 
driver\n");
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 7eb60c1..e8f0710 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -21,6 +21,7 @@
 
 #include "amd_iommu_types.h"
 
+extern int amd_iommu_get_num_iommus(void);
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
 extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0d91785..09d7a11 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -606,9 +606,6 @@ struct devid_map {
  */
 extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
 
-/* Number of IOMMUs present in the system */
-extern int amd_iommus_present;
-
 /*
  * Declarations for the global list of all protection domains
  */
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 2/9] perf/amd/iommu: Clean up perf_iommu_enable_event

2017-01-15 Thread Suravee Suthikulpanit
From: Suravee Suthikulpanit 

* Clean up various bitwise operations in perf_iommu_enable_event
* Make use macros BIT(x)

This should not affect logic and functionality.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 44638d0..1aa25d8 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -164,11 +164,11 @@ static int get_next_avail_iommu_bnk_cntr(struct 
perf_amd_iommu *perf_iommu)
for (bank = 0, shift = 0; bank < max_banks; bank++) {
for (cntr = 0; cntr < max_cntrs; cntr++) {
shift = bank + (bank*3) + cntr;
-   if (perf_iommu->cntr_assign_mask & (1ULL<cntr_assign_mask & BIT(shift)) {
continue;
} else {
-   perf_iommu->cntr_assign_mask |= (1ULL<cntr_assign_mask |= BIT(shift);
+   retval = ((u16)((u16)bank << 8) | (u8)(cntr));
goto out;
}
}
@@ -265,23 +265,23 @@ static void perf_iommu_enable_event(struct perf_event *ev)
_GET_BANK(ev), _GET_CNTR(ev) ,
 IOMMU_PC_COUNTER_SRC_REG, , true);
 
-   reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+   reg = devid | (_GET_DEVID_MASK(ev) << 32);
if (reg)
-   reg |= (1UL << 31);
+   reg |= BIT(31);
amd_iommu_pc_get_set_reg_val(devid,
_GET_BANK(ev), _GET_CNTR(ev) ,
 IOMMU_PC_DEVID_MATCH_REG, , true);
 
-   reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+   reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
if (reg)
-   reg |= (1UL << 31);
+   reg |= BIT(31);
amd_iommu_pc_get_set_reg_val(devid,
_GET_BANK(ev), _GET_CNTR(ev) ,
 IOMMU_PC_PASID_MATCH_REG, , true);
 
-   reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+   reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
if (reg)
-   reg |= (1UL << 31);
+   reg |= BIT(31);
amd_iommu_pc_get_set_reg_val(devid,
_GET_BANK(ev), _GET_CNTR(ev) ,
 IOMMU_PC_DOMID_MATCH_REG, , true);
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 0/9] perf/amd/iommu: Enable multi-IOMMU support

2017-01-15 Thread Suravee Suthikulpanit
From: Suravee Suthikulpanit 

This patch series modifies the existing IOMMU and Perf drivers to support
systems with multiple IOMMUs by allocating an amd_iommu PMU per IOMMU instance.
This allows users to specify performance events and filters separately for each
IOMMU.

This has been tested on the new family17h-based server w/ multiple IOMMUs.

Git branch containing this patch series is available here:

https://github.com/ssuthiku/linux.git  perf-iommu-v8

Changes from V7 (https://lkml.org/lkml/2017/1/9/917)
  * Re-order patches to clean up first before introducing new stuff.
  * Always use amd_iommu_get_num_iommus() to access amd_iommus_present
variable now.
  * Fix Perf IOMMU sysfs attributes initialization.
  * Miscellaneous clean up 

Changes from V6 (https://lkml.org/lkml/2016/12/23/134)
  * Renamed function parameters from devid to idx (per Joerg).
  * Removed unnecessary function declarations from amd_iommu_proto.h
(per Joerg).

Changes from V5 (https://lkml.org/lkml/2016/2/23/370)
  * Rebased onto v4.9.
  * Remove the patch which consolidates function delclarations since
we have not yet agreed on the appropriate place for the new header file.

Thanks,
Suravee

Suravee Suthikulpanit (9):
  perf/amd/iommu: Declare pr_fmt and remove unnecessary pr_debug
  perf/amd/iommu: Clean up perf_iommu_enable_event
  perf/amd/iommu: Misc fix up perf_iommu_read
  iommu/amd: Introduce amd_iommu_get_num_iommus()
  perf/amd/iommu: Modify functions to query max banks and counters
  perf/amd/iommu: Modify amd_iommu_pc_get_set_reg_val() API to allow
specifying IOMMU index
  perf/amd/iommu: Check return value when set and get counter value
  perf/amd/iommu: Fix sysfs perf attribute groups
  perf/amd/iommu: Enable support for multiple IOMMUs

 arch/x86/events/amd/iommu.c | 280 
 arch/x86/events/amd/iommu.h |  16 ++-
 drivers/iommu/amd_iommu.c   |   6 +-
 drivers/iommu/amd_iommu_init.c  | 100 +-
 drivers/iommu/amd_iommu_proto.h |   8 +-
 drivers/iommu/amd_iommu_types.h |   3 -
 6 files changed, 217 insertions(+), 196 deletions(-)

-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 1/9] perf/amd/iommu: Declare pr_fmt and remove unnecessary pr_debug

2017-01-15 Thread Suravee Suthikulpanit
Declare pr_fmt for perf/amd_iommu and remove unnecessary pr_debug.

Also check return value when _init_events_attrs fails.

Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/events/amd/iommu.c | 20 
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index b28200d..44638d0 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -11,6 +11,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt)"perf/amd_iommu: " fmt
+
 #include 
 #include 
 #include 
@@ -298,7 +300,6 @@ static void perf_iommu_start(struct perf_event *event, int 
flags)
 {
struct hw_perf_event *hwc = >hw;
 
-   pr_debug("perf: amd_iommu:perf_iommu_start\n");
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
 
@@ -323,7 +324,6 @@ static void perf_iommu_read(struct perf_event *event)
u64 prev_raw_count = 0ULL;
u64 delta = 0ULL;
struct hw_perf_event *hwc = >hw;
-   pr_debug("perf: amd_iommu:perf_iommu_read\n");
 
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
_GET_BANK(event), _GET_CNTR(event),
@@ -349,8 +349,6 @@ static void perf_iommu_stop(struct perf_event *event, int 
flags)
struct hw_perf_event *hwc = >hw;
u64 config;
 
-   pr_debug("perf: amd_iommu:perf_iommu_stop\n");
-
if (hwc->state & PERF_HES_UPTODATE)
return;
 
@@ -372,7 +370,6 @@ static int perf_iommu_add(struct perf_event *event, int 
flags)
struct perf_amd_iommu *perf_iommu =
container_of(event->pmu, struct perf_amd_iommu, pmu);
 
-   pr_debug("perf: amd_iommu:perf_iommu_add\n");
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 
/* request an iommu bank/counter */
@@ -393,7 +390,6 @@ static void perf_iommu_del(struct perf_event *event, int 
flags)
struct perf_amd_iommu *perf_iommu =
container_of(event->pmu, struct perf_amd_iommu, pmu);
 
-   pr_debug("perf: amd_iommu:perf_iommu_del\n");
perf_iommu_stop(event, PERF_EF_UPDATE);
 
/* clear the assigned iommu bank/counter */
@@ -444,24 +440,24 @@ static __init int _init_perf_amd_iommu(
 
raw_spin_lock_init(_iommu->lock);
 
-   /* Init format attributes */
perf_iommu->format_group = _iommu_format_group;
 
/* Init cpumask attributes to only core 0 */
cpumask_set_cpu(0, _cpumask);
perf_iommu->cpumask_group = _iommu_cpumask_group;
 
-   /* Init events attributes */
-   if (_init_events_attrs(perf_iommu) != 0)
-   pr_err("perf: amd_iommu: Only support raw events.\n");
+   ret = _init_events_attrs(perf_iommu);
+   if (ret) {
+   pr_err("Error initializing AMD IOMMU perf events.\n");
+   return ret;
+   }
 
-   /* Init null attributes */
perf_iommu->null_group = NULL;
perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
 
ret = perf_pmu_register(_iommu->pmu, name, -1);
if (ret) {
-   pr_err("perf: amd_iommu: Failed to initialized.\n");
+   pr_err("Error initializing AMD IOMMU perf counters.\n");
amd_iommu_pc_exit();
} else {
pr_info("perf: amd_iommu: Detected. (%d banks, %d 
counters/bank)\n",
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/1] iommu/arm-smmu: Fix for ThunderX erratum #27704

2017-01-15 Thread Tomasz Nowicki
The goal of erratum #27704 workaround was to make sure that ASIDs and VMIDs
are unique across all SMMU instances on affected Cavium systems.

Currently, the workaround code partitions ASIDs and VMIDs by increasing
global cavium_smmu_context_count which in turn becomes the base ASID and VMID
value for the given SMMU instance upon the context bank initialization.

For systems with multiple SMMU instances this approach implies the risk
of crossing 8-bit ASID, like for 1-socket CN88xx capable of 4 SMMUv2,
128 context banks each:
SMMU_0 (0-127 ASID RANGE)
SMMU_1 (127-255 ASID RANGE)
SMMU_2 (256-383 ASID RANGE) <--- crossing 8-bit ASID
SMMU_3 (384-511 ASID RANGE) <--- crossing 8-bit ASID

Since now we use 8-bit ASID (SMMU_CBn_TCR2.AS = 0) we effectively misconfigure
ASID[15:8] bits of SMMU_CBn_TTBRm register for SMMU_2/3. Moreover, we still
assume non-zero ASID[15:8] bits upon context invalidation. In the end,
except SMMU_0/1 devices all other devices under other SMMUs will fail on guest
power off/on. Since we try to invalidate TLB with 16-bit ASID but we actually
have 8-bit zero padded 16-bit entry.

This patch adds 16-bit ASID support for stage-1 AArch64 contexts so that
we use ASIDs consistently for all SMMU instances.

Signed-off-by: Tomasz Nowicki 
Reviewed-by: Robin Murphy 
Reviewed-by: Tirumalesh Chalamarla  
---
 drivers/iommu/arm-smmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index a60cded..476fab9 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -260,6 +260,7 @@ enum arm_smmu_s2cr_privcfg {
 
 #define TTBCR2_SEP_SHIFT   15
 #define TTBCR2_SEP_UPSTREAM(0x7 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_AS  (1 << 4)
 
 #define TTBRn_ASID_SHIFT   48
 
@@ -778,6 +779,8 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,
reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
reg2 |= TTBCR2_SEP_UPSTREAM;
+   if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+   reg2 |= TTBCR2_AS;
}
if (smmu->version > ARM_SMMU_V1)
writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 5/5] ARM: dts: mt2701: add iommu/smi dtsi node for mt2701

2017-01-15 Thread Honghui Zhang
On Fri, 2017-01-13 at 15:54 +0100, Matthias Brugger wrote:
> 
> On 04/07/16 10:00, Matthias Brugger wrote:
> >
> >
> > On 04/07/16 03:32, Honghui Zhang wrote:
> >> On Sun, 2016-07-03 at 21:12 +0200, Matthias Brugger wrote:
> >>>
> >>> On 07/03/2016 08:24 AM, Matthias Brugger wrote:
> 
> 
>  On 06/08/2016 11:51 AM, honghui.zh...@mediatek.com wrote:
> > From: Honghui Zhang 
> >
> > Add the dtsi node of iommu and smi for mt2701.
> >
> > Signed-off-by: Honghui Zhang 
> > ---
> >   arch/arm/boot/dts/mt2701.dtsi | 51
> > +++
> >   1 file changed, 51 insertions(+)
> >
> 
>  Applied,
> >>>
> >>> Please resend the patch including the infracfg and mmsys node.
> >>>
> >>
> >> Hi, Matthias,
> >>
> >> Please hold this one.
> >> This one is based on CCF "arm: dts: mt2701: Add clock controller device
> >> nodes"[1] and power domain patch "Mediatek MT2701 SCPSYS power domain
> >> support v7"[2],
> >> But these two patchset are still being reviewed now.
> >>
> >> Do you think it's better that I send this one later after ccf and power
> >> domain patch got merged? I will send this patch later if it's OK with
> >> you.
> >>
> >
> > Sounds good.
> 
> Applied now to v4.10-next/dts32
> 
> Thanks.
> 

Thanks.

> >
> > Thanks a lot,
> > Matthias
> >
> >> Thanks.
> >> [1] https://patchwork.kernel.org/patch/9109081
> >> [2]
> >> http://lists.infradead.org/pipermail/linux-mediatek/2016-May/005429.html
> >>


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu