[PATCH v8 8/9] perf/amd/iommu: Fix sysfs perf attribute groups
From: Suravee SuthikulpanitIntroduce static amd_iommu_attr_groups to simplify the sysfs attributes initialization code. Cc: Peter Zijlstra Cc: Borislav Petkov Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 85 - 1 file changed, 37 insertions(+), 48 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index cc7bea4..223c01d 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -43,14 +43,8 @@ struct perf_amd_iommu { u8 max_counters; u64 cntr_assign_mask; raw_spinlock_t lock; - const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] -#define cpumask_group attr_groups[1] -#define events_group attr_groups[2] -#define null_group attr_groups[3] - /*- * sysfs format attributes *-*/ @@ -81,6 +75,10 @@ struct perf_amd_iommu { /*- * sysfs events attributes *-*/ +static struct attribute_group amd_iommu_events_group = { + .name = "events", +}; + struct amd_iommu_event_desc { struct kobj_attribute attr; const char *event; @@ -388,76 +386,63 @@ static void perf_iommu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); } -static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu) +static __init int _init_events_attrs(void) { - struct attribute **attrs; - struct attribute_group *attr_group; int i = 0, j; + struct attribute **attrs; while (amd_iommu_v2_event_descs[i].attr.attr.name) i++; - attr_group = kzalloc(sizeof(struct attribute *) - * (i + 1) + sizeof(*attr_group), GFP_KERNEL); - if (!attr_group) + attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL); + if (!attrs) return -ENOMEM; - attrs = (struct attribute **)(attr_group + 1); for (j = 0; j < i; j++) attrs[j] = _iommu_v2_event_descs[j].attr.attr; - attr_group->name = "events"; - attr_group->attrs = attrs; - perf_iommu->events_group = attr_group; - + amd_iommu_events_group.attrs = attrs; return 0; } static __init void amd_iommu_pc_exit(void) { - if (__perf_iommu.events_group != NULL) { - kfree(__perf_iommu.events_group); - __perf_iommu.events_group = NULL; + if (amd_iommu_events_group.attrs) { + kfree(amd_iommu_events_group.attrs); + amd_iommu_events_group.attrs = NULL; } } -static __init int _init_perf_amd_iommu( - struct perf_amd_iommu *perf_iommu, char *name) +const struct attribute_group *amd_iommu_attr_groups[] = { + _iommu_format_group, + _iommu_cpumask_group, + _iommu_events_group, + NULL, +}; + +static __init int +_init_perf_amd_iommu(struct perf_amd_iommu *perf_iommu, char *name) { int ret; raw_spin_lock_init(_iommu->lock); - perf_iommu->format_group = _iommu_format_group; - /* Init cpumask attributes to only core 0 */ cpumask_set_cpu(0, _cpumask); - perf_iommu->cpumask_group = _iommu_cpumask_group; - - ret = _init_events_attrs(perf_iommu); - if (ret) { - pr_err("Error initializing AMD IOMMU perf events.\n"); - return ret; - } perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0); perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0); if (!perf_iommu->max_banks || !perf_iommu->max_counters) return -EINVAL; - perf_iommu->null_group = NULL; - perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; - + perf_iommu->pmu.attr_groups = amd_iommu_attr_groups; ret = perf_pmu_register(_iommu->pmu, name, -1); - if (ret) { + if (ret) pr_err("Error initializing AMD IOMMU perf counters.\n"); - amd_iommu_pc_exit(); - } else { + else pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n", amd_iommu_pc_get_max_banks(0), amd_iommu_pc_get_max_counters(0)); - } - return ret; } @@ -471,24 +456,28 @@ static __init int _init_perf_amd_iommu( .stop = perf_iommu_stop, .read = perf_iommu_read, }, - .max_banks = 0x00, - .max_counters = 0x00, - .cntr_assign_mask = 0ULL, - .format_group = NULL, - .cpumask_group = NULL, - .events_group = NULL, - .null_group
[PATCH v8 9/9] perf/amd/iommu: Enable support for multiple IOMMUs
From: Suravee SuthikulpanitAdd multi-IOMMU support for perf by exposing an AMD IOMMU PMU for each IOMMU found in the system via: /bus/event_source/devices/amd_iommu_x where x is the IOMMU index. This allows users to specify different events to be programed onto performance counters of each IOMMU. Cc: Peter Zijlstra Cc: Borislav Petkov Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 114 ++-- 1 file changed, 67 insertions(+), 47 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 223c01d..38eafbf 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -35,16 +35,21 @@ #define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xULL) #define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xULL) -static struct perf_amd_iommu __perf_iommu; +#define PERF_AMD_IOMMU_NAME_SZ 16 struct perf_amd_iommu { + struct list_head list; struct pmu pmu; + unsigned int idx; + char name[PERF_AMD_IOMMU_NAME_SZ]; u8 max_banks; u8 max_counters; u64 cntr_assign_mask; raw_spinlock_t lock; }; +static LIST_HEAD(perf_amd_iommu_list); + /*- * sysfs format attributes *-*/ @@ -202,8 +207,7 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, static int perf_iommu_event_init(struct perf_event *event) { struct hw_perf_event *hwc = >hw; - struct perf_amd_iommu *perf_iommu; - u64 config, config1; + struct perf_amd_iommu *pi; /* test the event attr type check for PMU enumeration */ if (event->attr.type != event->pmu->type) @@ -225,27 +229,18 @@ static int perf_iommu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - perf_iommu = &__perf_iommu; - - if (event->pmu != _iommu->pmu) - return -ENOENT; - - if (perf_iommu) { - config = event->attr.config; - config1 = event->attr.config1; - } else { - return -EINVAL; - } - /* update the hw_perf_event struct with the iommu config data */ - hwc->config = config; - hwc->extra_reg.config = config1; + pi = container_of(event->pmu, struct perf_amd_iommu, pmu); + hwc->idx = pi->idx; + hwc->config = event->attr.config; + hwc->extra_reg.config = event->attr.config1; return 0; } static void perf_iommu_enable_event(struct perf_event *ev) { + struct hw_perf_event *hwc = >hw; u8 csource = _GET_CSOURCE(ev); u16 devid = _GET_DEVID(ev); u8 bank = _GET_BANK(ev); @@ -253,30 +248,34 @@ static void perf_iommu_enable_event(struct perf_event *ev) u64 reg = 0ULL; reg = csource; - amd_iommu_pc_set_reg(0, bank, cntr, + amd_iommu_pc_set_reg(hwc->idx, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ); reg = devid | (_GET_DEVID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ); + amd_iommu_pc_set_reg(hwc->idx, bank, cntr, +IOMMU_PC_DEVID_MATCH_REG, ); reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ); + amd_iommu_pc_set_reg(hwc->idx, bank, cntr, +IOMMU_PC_PASID_MATCH_REG, ); reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ); + amd_iommu_pc_set_reg(hwc->idx, bank, cntr, +IOMMU_PC_DOMID_MATCH_REG, ); } static void perf_iommu_disable_event(struct perf_event *event) { + struct hw_perf_event *hwc = >hw; u64 reg = 0ULL; - amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event), + amd_iommu_pc_set_reg(hwc->idx, _GET_BANK(event), _GET_CNTR(event), IOMMU_PC_COUNTER_SRC_REG, ); } @@ -295,7 +294,7 @@ static void perf_iommu_start(struct perf_event *event, int flags) return; val = local64_read(>prev_count) & GENMASK_ULL(48, 0); - if (amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event), + if (amd_iommu_pc_set_reg(hwc->idx, _GET_BANK(event), _GET_CNTR(event), IOMMU_PC_COUNTER_REG, )) return; @@ -309,7 +308,7 @@ static void perf_iommu_read(struct perf_event *event) s64 delta; struct hw_perf_event *hwc = >hw; - if
[PATCH v8 6/9] perf/amd/iommu: Modify amd_iommu_pc_get_set_reg_val() API to allow specifying IOMMU index
From: Suravee SuthikulpanitThe current amd_iommu_pc_get_set_reg_val() cannot support multiple IOMMUs It is also confusing since it is trying to support set and get in one function. So break it down to amd_iommu_pc_[get|set]_reg(), and modifies them to allow callers to specify IOMMU index. This prepares the driver for supporting multi-IOMMU in subsequent patch. Also remove unnecessary function declarations in amd_iommu_proto.h. Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Joerg Roedel Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 34 ++ arch/x86/events/amd/iommu.h | 7 -- drivers/iommu/amd_iommu_init.c | 53 ++--- drivers/iommu/amd_iommu_proto.h | 5 4 files changed, 52 insertions(+), 47 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index ec7e873..200d2e8 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -250,42 +250,36 @@ static void perf_iommu_enable_event(struct perf_event *ev) { u8 csource = _GET_CSOURCE(ev); u16 devid = _GET_DEVID(ev); + u8 bank = _GET_BANK(ev); + u8 cntr = _GET_CNTR(ev); u64 reg = 0ULL; reg = csource; - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , -IOMMU_PC_COUNTER_SRC_REG, , true); + amd_iommu_pc_set_reg(0, bank, cntr, +IOMMU_PC_COUNTER_SRC_REG, ); reg = devid | (_GET_DEVID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , -IOMMU_PC_DEVID_MATCH_REG, , true); + amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ); reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , -IOMMU_PC_PASID_MATCH_REG, , true); + amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ); reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , -IOMMU_PC_DOMID_MATCH_REG, , true); + amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ); } static void perf_iommu_disable_event(struct perf_event *event) { u64 reg = 0ULL; - amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), - _GET_BANK(event), _GET_CNTR(event), - IOMMU_PC_COUNTER_SRC_REG, , true); + amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event), +IOMMU_PC_COUNTER_SRC_REG, ); } static void perf_iommu_start(struct perf_event *event, int flags) @@ -300,9 +294,8 @@ static void perf_iommu_start(struct perf_event *event, int flags) if (flags & PERF_EF_RELOAD) { u64 prev_raw_count = local64_read(>prev_count); - amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), - _GET_BANK(event), _GET_CNTR(event), - IOMMU_PC_COUNTER_REG, _raw_count, true); + amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event), +IOMMU_PC_COUNTER_REG, _raw_count); } perf_iommu_enable_event(event); @@ -316,9 +309,8 @@ static void perf_iommu_read(struct perf_event *event) s64 delta; struct hw_perf_event *hwc = >hw; - amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), - _GET_BANK(event), _GET_CNTR(event), - IOMMU_PC_COUNTER_REG, , false); + amd_iommu_pc_get_reg(0, _GET_BANK(event), _GET_CNTR(event), +IOMMU_PC_COUNTER_REG, ); /* IOMMU pc counter register is only 48 bits */ count &= GENMASK_ULL(48, 0); diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h index cf3dd05..cd70921 100644 --- a/arch/x86/events/amd/iommu.h +++ b/arch/x86/events/amd/iommu.h @@ -33,7 +33,10 @@ extern u8 amd_iommu_pc_get_max_counters(unsigned int idx); -extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); +extern int amd_iommu_pc_set_reg(unsigned int idx, u8 bank, u8 cntr, + u8 fxn, u64 *value); + +extern int amd_iommu_pc_get_reg(unsigned int idx, u8 bank, u8 cntr, + u8 fxn, u64 *value); #endif /*_PERF_EVENT_AMD_IOMMU_H_*/ diff --git a/drivers/iommu/amd_iommu_init.c
[PATCH v8 5/9] perf/amd/iommu: Modify functions to query max banks and counters
Currently, amd_iommu_pc_get_max_[banks|counters]() use end-point device ID to locate an IOMMU and check the reported max banks/counters. The logic assumes that the IOMMU_BASE_DEVID belongs to the first IOMMU, and uses it to acquire a reference to the first IOMMU, which does not work on certain systems. Instead, we modify the function to take IOMMU index, and use it to query the corresponded AMD IOMMU instance. Note that we currently hard-code the IOMMU index to 0, since the current AMD IOMMU perf implementation only supports single IOMMU. Subsequent patch will add support for multi-IOMMU, and will use proper IOMMU index. This patch also removes unnecessary function declaration in amd_iommu_proto.h. Cc: Peter ZijlstraCc: Borislav Petkov Cc: Joerg Roedel Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 17 +++-- arch/x86/events/amd/iommu.h | 7 ++- drivers/iommu/amd_iommu_init.c | 36 ++-- drivers/iommu/amd_iommu_proto.h | 2 -- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 3f1c18a..ec7e873 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -239,14 +239,6 @@ static int perf_iommu_event_init(struct perf_event *event) return -EINVAL; } - /* integrate with iommu base devid (), assume one iommu */ - perf_iommu->max_banks = - amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID); - perf_iommu->max_counters = - amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID); - if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0)) - return -EINVAL; - /* update the hw_perf_event struct with the iommu config data */ hwc->config = config; hwc->extra_reg.config = config1; @@ -453,6 +445,11 @@ static __init int _init_perf_amd_iommu( return ret; } + perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0); + perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0); + if (!perf_iommu->max_banks || !perf_iommu->max_counters) + return -EINVAL; + perf_iommu->null_group = NULL; perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; @@ -462,8 +459,8 @@ static __init int _init_perf_amd_iommu( amd_iommu_pc_exit(); } else { pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n", - amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID), - amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID)); + amd_iommu_pc_get_max_banks(0), + amd_iommu_pc_get_max_counters(0)); } return ret; diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h index 5c5c932..cf3dd05 100644 --- a/arch/x86/events/amd/iommu.h +++ b/arch/x86/events/amd/iommu.h @@ -24,17 +24,14 @@ #define PC_MAX_SPEC_BNKS 64 #define PC_MAX_SPEC_CNTRS 16 -/* iommu pc reg masks*/ -#define IOMMU_BASE_DEVID 0x - /* amd_iommu_init.c external support functions */ extern int amd_iommu_get_num_iommus(void); extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_banks(unsigned int idx); -extern u8 amd_iommu_pc_get_max_counters(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(unsigned int idx); extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 515d4c1..ed21307d 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2713,6 +2713,20 @@ bool amd_iommu_v2_supported(void) } EXPORT_SYMBOL(amd_iommu_v2_supported); +static struct amd_iommu *get_amd_iommu(unsigned int idx) +{ + unsigned int i = 0; + struct amd_iommu *iommu, *ret = NULL; + + for_each_iommu(iommu) { + if (i++ == idx) { + ret = iommu; + break; + } + } + return ret; +} + / * * IOMMU EFR Performance Counter support functionality. This code allows @@ -2720,17 +2734,14 @@ bool amd_iommu_v2_supported(void) * / -u8 amd_iommu_pc_get_max_banks(u16 devid) +u8 amd_iommu_pc_get_max_banks(unsigned int idx) { - struct amd_iommu *iommu; - u8 ret = 0; + struct amd_iommu *iommu = get_amd_iommu(idx); - /* locate the iommu governing the devid */ - iommu = amd_iommu_rlookup_table[devid];
[PATCH v8 3/9] perf/amd/iommu: Misc fix up perf_iommu_read
* Fix overflow handling since u64 delta would lose the MSB sign bit. * Remove unnecessary local64_cmpxchg(). * Coding style and make use of GENMASK_ULL macro. Cc: Peter ZijlstraCc: Borislav Petkov Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 23 --- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 1aa25d8..3f1c18a 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -320,9 +320,8 @@ static void perf_iommu_start(struct perf_event *event, int flags) static void perf_iommu_read(struct perf_event *event) { - u64 count = 0ULL; - u64 prev_raw_count = 0ULL; - u64 delta = 0ULL; + u64 count, prev; + s64 delta; struct hw_perf_event *hwc = >hw; amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), @@ -330,18 +329,20 @@ static void perf_iommu_read(struct perf_event *event) IOMMU_PC_COUNTER_REG, , false); /* IOMMU pc counter register is only 48 bits */ - count &= 0xULL; + count &= GENMASK_ULL(48, 0); - prev_raw_count = local64_read(>prev_count); - if (local64_cmpxchg(>prev_count, prev_raw_count, - count) != prev_raw_count) - return; + prev = local64_read(>prev_count); - /* Handling 48-bit counter overflowing */ - delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT); + /* +* Since we do not enable counter overflow interrupts, +* we do not have to worry about prev_count changing on us. +*/ + local64_set(>prev_count, count); + + /* Handle 48-bit counter overflow */ + delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); delta >>= COUNTER_SHIFT; local64_add(delta, >count); - } static void perf_iommu_stop(struct perf_event *event, int flags) -- 1.8.3.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 7/9] perf/amd/iommu: Check return value when set and get counter value
From: Suravee SuthikulpanitIn, perf_iommu_start(), we need to check the return value from amd_iommu_set_reg(). In case of failure, we should not enable the PMU. Also, in perf_iommu_read(), we need to check the return value from amd_iommu_get_reg() before using the value. Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Joerg Roedel Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 19 +++ 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 200d2e8..cc7bea4 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -284,6 +284,7 @@ static void perf_iommu_disable_event(struct perf_event *event) static void perf_iommu_start(struct perf_event *event, int flags) { + u64 val; struct hw_perf_event *hwc = >hw; if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) @@ -292,15 +293,16 @@ static void perf_iommu_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; - if (flags & PERF_EF_RELOAD) { - u64 prev_raw_count = local64_read(>prev_count); - amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event), -IOMMU_PC_COUNTER_REG, _raw_count); - } + if (!(flags & PERF_EF_RELOAD)) + return; + + val = local64_read(>prev_count) & GENMASK_ULL(48, 0); + if (amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_REG, )) + return; perf_iommu_enable_event(event); perf_event_update_userpage(event); - } static void perf_iommu_read(struct perf_event *event) @@ -309,8 +311,9 @@ static void perf_iommu_read(struct perf_event *event) s64 delta; struct hw_perf_event *hwc = >hw; - amd_iommu_pc_get_reg(0, _GET_BANK(event), _GET_CNTR(event), -IOMMU_PC_COUNTER_REG, ); + if (amd_iommu_pc_get_reg(0, _GET_BANK(event), _GET_CNTR(event), +IOMMU_PC_COUNTER_REG, )) + return; /* IOMMU pc counter register is only 48 bits */ count &= GENMASK_ULL(48, 0); -- 1.8.3.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 4/9] iommu/amd: Introduce amd_iommu_get_num_iommus()
Introduce amd_iommu_get_num_iommus(), which returns the value of amd_iommus_present, then replaces the direct access to the variable which is now declared as static. This function will also be used by Perf AMD IOMMU driver. Cc: Borislav PetkovCc: Joerg Roedel Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.h | 2 ++ drivers/iommu/amd_iommu.c | 6 +++--- drivers/iommu/amd_iommu_init.c | 11 +-- drivers/iommu/amd_iommu_proto.h | 1 + drivers/iommu/amd_iommu_types.h | 3 --- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h index 845d173..5c5c932 100644 --- a/arch/x86/events/amd/iommu.h +++ b/arch/x86/events/amd/iommu.h @@ -28,6 +28,8 @@ #define IOMMU_BASE_DEVID 0x /* amd_iommu_init.c external support functions */ +extern int amd_iommu_get_num_iommus(void); + extern bool amd_iommu_pc_supported(void); extern u8 amd_iommu_pc_get_max_banks(u16 devid); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 754595e..ae55485 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1228,7 +1228,7 @@ static void __domain_flush_pages(struct protection_domain *domain, build_inv_iommu_pages(, address, size, domain->id, pde); - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (!domain->dev_iommu[i]) continue; @@ -1272,7 +1272,7 @@ static void domain_flush_complete(struct protection_domain *domain) { int i; - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (domain && !domain->dev_iommu[i]) continue; @@ -3341,7 +3341,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid, * IOMMU TLB needs to be flushed before Device TLB to * prevent device TLB refill from IOMMU TLB */ - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (domain->dev_iommu[i] == 0) continue; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 157e934..515d4c1 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -164,7 +164,9 @@ struct ivmd_header { /* Array to assign indices to IOMMUs*/ struct amd_iommu *amd_iommus[MAX_IOMMUS]; -int amd_iommus_present; + +/* Number of IOMMUs present in the system */ +static int amd_iommus_present; /* IOMMUs have a non-present cache? */ bool amd_iommu_np_cache __read_mostly; @@ -269,6 +271,11 @@ static inline unsigned long tbl_size(int entry_size) return 1UL << shift; } +int amd_iommu_get_num_iommus(void) +{ + return amd_iommus_present; +} + /* Access to l1 and l2 indexed register spaces */ static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) @@ -1333,7 +1340,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) /* Add IOMMU to internal data structures */ list_add_tail(>list, _iommu_list); - iommu->index = amd_iommus_present++; + iommu->index = amd_iommus_present++; if (unlikely(iommu->index >= MAX_IOMMUS)) { WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 7eb60c1..e8f0710 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -21,6 +21,7 @@ #include "amd_iommu_types.h" +extern int amd_iommu_get_num_iommus(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 0d91785..09d7a11 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -606,9 +606,6 @@ struct devid_map { */ extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; -/* Number of IOMMUs present in the system */ -extern int amd_iommus_present; - /* * Declarations for the global list of all protection domains */ -- 1.8.3.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 2/9] perf/amd/iommu: Clean up perf_iommu_enable_event
From: Suravee Suthikulpanit* Clean up various bitwise operations in perf_iommu_enable_event * Make use macros BIT(x) This should not affect logic and functionality. Cc: Peter Zijlstra Cc: Borislav Petkov Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 44638d0..1aa25d8 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -164,11 +164,11 @@ static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu) for (bank = 0, shift = 0; bank < max_banks; bank++) { for (cntr = 0; cntr < max_cntrs; cntr++) { shift = bank + (bank*3) + cntr; - if (perf_iommu->cntr_assign_mask & (1ULL< cntr_assign_mask & BIT(shift)) { continue; } else { - perf_iommu->cntr_assign_mask |= (1ULL< cntr_assign_mask |= BIT(shift); + retval = ((u16)((u16)bank << 8) | (u8)(cntr)); goto out; } } @@ -265,23 +265,23 @@ static void perf_iommu_enable_event(struct perf_event *ev) _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_COUNTER_SRC_REG, , true); - reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32); + reg = devid | (_GET_DEVID_MASK(ev) << 32); if (reg) - reg |= (1UL << 31); + reg |= BIT(31); amd_iommu_pc_get_set_reg_val(devid, _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_DEVID_MATCH_REG, , true); - reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); + reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); if (reg) - reg |= (1UL << 31); + reg |= BIT(31); amd_iommu_pc_get_set_reg_val(devid, _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_PASID_MATCH_REG, , true); - reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); + reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); if (reg) - reg |= (1UL << 31); + reg |= BIT(31); amd_iommu_pc_get_set_reg_val(devid, _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_DOMID_MATCH_REG, , true); -- 1.8.3.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 0/9] perf/amd/iommu: Enable multi-IOMMU support
From: Suravee SuthikulpanitThis patch series modifies the existing IOMMU and Perf drivers to support systems with multiple IOMMUs by allocating an amd_iommu PMU per IOMMU instance. This allows users to specify performance events and filters separately for each IOMMU. This has been tested on the new family17h-based server w/ multiple IOMMUs. Git branch containing this patch series is available here: https://github.com/ssuthiku/linux.git perf-iommu-v8 Changes from V7 (https://lkml.org/lkml/2017/1/9/917) * Re-order patches to clean up first before introducing new stuff. * Always use amd_iommu_get_num_iommus() to access amd_iommus_present variable now. * Fix Perf IOMMU sysfs attributes initialization. * Miscellaneous clean up Changes from V6 (https://lkml.org/lkml/2016/12/23/134) * Renamed function parameters from devid to idx (per Joerg). * Removed unnecessary function declarations from amd_iommu_proto.h (per Joerg). Changes from V5 (https://lkml.org/lkml/2016/2/23/370) * Rebased onto v4.9. * Remove the patch which consolidates function delclarations since we have not yet agreed on the appropriate place for the new header file. Thanks, Suravee Suravee Suthikulpanit (9): perf/amd/iommu: Declare pr_fmt and remove unnecessary pr_debug perf/amd/iommu: Clean up perf_iommu_enable_event perf/amd/iommu: Misc fix up perf_iommu_read iommu/amd: Introduce amd_iommu_get_num_iommus() perf/amd/iommu: Modify functions to query max banks and counters perf/amd/iommu: Modify amd_iommu_pc_get_set_reg_val() API to allow specifying IOMMU index perf/amd/iommu: Check return value when set and get counter value perf/amd/iommu: Fix sysfs perf attribute groups perf/amd/iommu: Enable support for multiple IOMMUs arch/x86/events/amd/iommu.c | 280 arch/x86/events/amd/iommu.h | 16 ++- drivers/iommu/amd_iommu.c | 6 +- drivers/iommu/amd_iommu_init.c | 100 +- drivers/iommu/amd_iommu_proto.h | 8 +- drivers/iommu/amd_iommu_types.h | 3 - 6 files changed, 217 insertions(+), 196 deletions(-) -- 1.8.3.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 1/9] perf/amd/iommu: Declare pr_fmt and remove unnecessary pr_debug
Declare pr_fmt for perf/amd_iommu and remove unnecessary pr_debug. Also check return value when _init_events_attrs fails. Cc: Peter ZijlstraCc: Borislav Petkov Signed-off-by: Suravee Suthikulpanit --- arch/x86/events/amd/iommu.c | 20 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index b28200d..44638d0 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt)"perf/amd_iommu: " fmt + #include #include #include @@ -298,7 +300,6 @@ static void perf_iommu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = >hw; - pr_debug("perf: amd_iommu:perf_iommu_start\n"); if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -323,7 +324,6 @@ static void perf_iommu_read(struct perf_event *event) u64 prev_raw_count = 0ULL; u64 delta = 0ULL; struct hw_perf_event *hwc = >hw; - pr_debug("perf: amd_iommu:perf_iommu_read\n"); amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), _GET_BANK(event), _GET_CNTR(event), @@ -349,8 +349,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = >hw; u64 config; - pr_debug("perf: amd_iommu:perf_iommu_stop\n"); - if (hwc->state & PERF_HES_UPTODATE) return; @@ -372,7 +370,6 @@ static int perf_iommu_add(struct perf_event *event, int flags) struct perf_amd_iommu *perf_iommu = container_of(event->pmu, struct perf_amd_iommu, pmu); - pr_debug("perf: amd_iommu:perf_iommu_add\n"); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; /* request an iommu bank/counter */ @@ -393,7 +390,6 @@ static void perf_iommu_del(struct perf_event *event, int flags) struct perf_amd_iommu *perf_iommu = container_of(event->pmu, struct perf_amd_iommu, pmu); - pr_debug("perf: amd_iommu:perf_iommu_del\n"); perf_iommu_stop(event, PERF_EF_UPDATE); /* clear the assigned iommu bank/counter */ @@ -444,24 +440,24 @@ static __init int _init_perf_amd_iommu( raw_spin_lock_init(_iommu->lock); - /* Init format attributes */ perf_iommu->format_group = _iommu_format_group; /* Init cpumask attributes to only core 0 */ cpumask_set_cpu(0, _cpumask); perf_iommu->cpumask_group = _iommu_cpumask_group; - /* Init events attributes */ - if (_init_events_attrs(perf_iommu) != 0) - pr_err("perf: amd_iommu: Only support raw events.\n"); + ret = _init_events_attrs(perf_iommu); + if (ret) { + pr_err("Error initializing AMD IOMMU perf events.\n"); + return ret; + } - /* Init null attributes */ perf_iommu->null_group = NULL; perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; ret = perf_pmu_register(_iommu->pmu, name, -1); if (ret) { - pr_err("perf: amd_iommu: Failed to initialized.\n"); + pr_err("Error initializing AMD IOMMU perf counters.\n"); amd_iommu_pc_exit(); } else { pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n", -- 1.8.3.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/1] iommu/arm-smmu: Fix for ThunderX erratum #27704
The goal of erratum #27704 workaround was to make sure that ASIDs and VMIDs are unique across all SMMU instances on affected Cavium systems. Currently, the workaround code partitions ASIDs and VMIDs by increasing global cavium_smmu_context_count which in turn becomes the base ASID and VMID value for the given SMMU instance upon the context bank initialization. For systems with multiple SMMU instances this approach implies the risk of crossing 8-bit ASID, like for 1-socket CN88xx capable of 4 SMMUv2, 128 context banks each: SMMU_0 (0-127 ASID RANGE) SMMU_1 (127-255 ASID RANGE) SMMU_2 (256-383 ASID RANGE) <--- crossing 8-bit ASID SMMU_3 (384-511 ASID RANGE) <--- crossing 8-bit ASID Since now we use 8-bit ASID (SMMU_CBn_TCR2.AS = 0) we effectively misconfigure ASID[15:8] bits of SMMU_CBn_TTBRm register for SMMU_2/3. Moreover, we still assume non-zero ASID[15:8] bits upon context invalidation. In the end, except SMMU_0/1 devices all other devices under other SMMUs will fail on guest power off/on. Since we try to invalidate TLB with 16-bit ASID but we actually have 8-bit zero padded 16-bit entry. This patch adds 16-bit ASID support for stage-1 AArch64 contexts so that we use ASIDs consistently for all SMMU instances. Signed-off-by: Tomasz NowickiReviewed-by: Robin Murphy Reviewed-by: Tirumalesh Chalamarla --- drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a60cded..476fab9 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -260,6 +260,7 @@ enum arm_smmu_s2cr_privcfg { #define TTBCR2_SEP_SHIFT 15 #define TTBCR2_SEP_UPSTREAM(0x7 << TTBCR2_SEP_SHIFT) +#define TTBCR2_AS (1 << 4) #define TTBRn_ASID_SHIFT 48 @@ -778,6 +779,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; reg2 |= TTBCR2_SEP_UPSTREAM; + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) + reg2 |= TTBCR2_AS; } if (smmu->version > ARM_SMMU_V1) writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 5/5] ARM: dts: mt2701: add iommu/smi dtsi node for mt2701
On Fri, 2017-01-13 at 15:54 +0100, Matthias Brugger wrote: > > On 04/07/16 10:00, Matthias Brugger wrote: > > > > > > On 04/07/16 03:32, Honghui Zhang wrote: > >> On Sun, 2016-07-03 at 21:12 +0200, Matthias Brugger wrote: > >>> > >>> On 07/03/2016 08:24 AM, Matthias Brugger wrote: > > > On 06/08/2016 11:51 AM, honghui.zh...@mediatek.com wrote: > > From: Honghui Zhang> > > > Add the dtsi node of iommu and smi for mt2701. > > > > Signed-off-by: Honghui Zhang > > --- > > arch/arm/boot/dts/mt2701.dtsi | 51 > > +++ > > 1 file changed, 51 insertions(+) > > > > Applied, > >>> > >>> Please resend the patch including the infracfg and mmsys node. > >>> > >> > >> Hi, Matthias, > >> > >> Please hold this one. > >> This one is based on CCF "arm: dts: mt2701: Add clock controller device > >> nodes"[1] and power domain patch "Mediatek MT2701 SCPSYS power domain > >> support v7"[2], > >> But these two patchset are still being reviewed now. > >> > >> Do you think it's better that I send this one later after ccf and power > >> domain patch got merged? I will send this patch later if it's OK with > >> you. > >> > > > > Sounds good. > > Applied now to v4.10-next/dts32 > > Thanks. > Thanks. > > > > Thanks a lot, > > Matthias > > > >> Thanks. > >> [1] https://patchwork.kernel.org/patch/9109081 > >> [2] > >> http://lists.infradead.org/pipermail/linux-mediatek/2016-May/005429.html > >> ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu