On Mon, 24 Jul 2017 11:29:21 +0100 Suzuki K Poulose <[email protected]> wrote:
> Add support for the Cluster PMU part of the ARM DynamIQ Shared Unit (DSU). > The DSU integrates one or more cores with an L3 memory system, control > logic, and external interfaces to form a multicore cluster. The PMU > allows counting the various events related to L3, SCU etc, along with > providing a cycle counter. > > The PMU can be accessed via system registers, which are common > to the cores in the same cluster. The PMU registers follow the > semantics of the ARMv8 PMU, mostly, with the exception that > the counters record the cluster wide events. > > This driver is mostly based on the ARMv8 and CCI PMU drivers. > > Cc: Mark Rutland <[email protected]> > Cc: Will Deacon <[email protected]> > Signed-off-by: Suzuki K Poulose <[email protected]> A few quick comments. Jonathan > --- > arch/arm64/include/asm/arm_dsu_pmu.h | 124 +++++ > drivers/perf/Kconfig | 9 + > drivers/perf/Makefile | 1 + > drivers/perf/arm_dsu_pmu.c | 877 > +++++++++++++++++++++++++++++++++++ > 4 files changed, 1011 insertions(+) > create mode 100644 arch/arm64/include/asm/arm_dsu_pmu.h > create mode 100644 drivers/perf/arm_dsu_pmu.c > > diff --git a/arch/arm64/include/asm/arm_dsu_pmu.h > b/arch/arm64/include/asm/arm_dsu_pmu.h > new file mode 100644 > index 0000000..e7276fd > --- /dev/null > +++ b/arch/arm64/include/asm/arm_dsu_pmu.h > @@ -0,0 +1,124 @@ <snip> > +static inline void __dsu_pmu_counter_interrupt_disable(int counter) > +{ > + write_sysreg_s(BIT(counter), CLUSTERPMINTENCLR_EL1); > + isb(); > +} > + > + > +static inline u32 __dsu_pmu_read_pmceid(int n) > +{ > + switch (n) { > + case 0: > + return read_sysreg_s(CLUSTERPMCEID0_EL1); > + case 1: > + return read_sysreg_s(CLUSTERPMCEID1_EL1); > + default: > + BUILD_BUG(); > + return 0; > + } > +} What is the benefit of having this lot in a header? Is it to support future additional drivers? If not, why not just push them down into the c code. > diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig > index e5197ff..ee3d7d1 100644 > --- a/drivers/perf/Kconfig > +++ b/drivers/perf/Kconfig > @@ -17,6 +17,15 @@ config ARM_PMU_ACPI > depends on ARM_PMU && ACPI > def_bool y > > +config ARM_DSU_PMU > + tristate "ARM DynamIQ Shared Unit (DSU) PMU" > + depends on ARM64 && PERF_EVENTS > + help > + Provides support for performance monitor unit in ARM DynamIQ Shared > + Unit (DSU). The DSU integrates one or more cores with an L3 memory > + system, control logic. The PMU allows counting various events related > + to DSU. > + > config QCOM_L2_PMU > bool "Qualcomm Technologies L2-cache PMU" > depends on ARCH_QCOM && ARM64 && ACPI > diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile > index 6420bd4..0adb4f6 100644 > --- a/drivers/perf/Makefile > +++ b/drivers/perf/Makefile > @@ -1,5 +1,6 @@ > obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o > obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o > +obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o > obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o > obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o > obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o > diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c > new file mode 100644 > index 0000000..60b2c03 > --- /dev/null > +++ b/drivers/perf/arm_dsu_pmu.c <snip> > + > +/* > + * Make sure the group of events can be scheduled at once > + * on the PMU. > + */ > +static int dsu_pmu_validate_group(struct perf_event *event) > +{ > + struct perf_event *sibling, *leader = event->group_leader; > + struct dsu_hw_events fake_hw; > + > + if (event->group_leader == event) > + return 0; > + > + memset(fake_hw.used_mask, 0, sizeof(fake_hw.used_mask)); > + if (!dsu_pmu_validate_event(event->pmu, &fake_hw, leader)) > + return -EINVAL; > + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { > + if (!dsu_pmu_validate_event(event->pmu, &fake_hw, sibling)) > + return -EINVAL; > + } > + if (dsu_pmu_validate_event(event->pmu, &fake_hw, event)) Perhaps a comment to say why in this case validate_event has the opposite meaning to the others cases above? (not !dsu_pmu_validate_event()) > + return -EINVAL; > + return 0; > +} > + > +static int dsu_pmu_event_init(struct perf_event *event) > +{ > + struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu); > + > + if (event->attr.type != event->pmu->type) > + return -ENOENT; > + > + if (!dsu_pmu_event_supported(dsu_pmu, event->attr.config)) > + return -EOPNOTSUPP; > + > + /* We cannot support task bound events */ > + if (event->cpu < 0) { > + dev_dbg(dsu_pmu->pmu.dev, "Can't support per-task counters\n"); > + return -EINVAL; > + } > + > + /* We don't support sampling */ > + if (is_sampling_event(event)) { > + dev_dbg(dsu_pmu->pmu.dev, "Can't support sampling events\n"); > + return -EOPNOTSUPP; > + } > + > + if (has_branch_stack(event) || > + event->attr.exclude_user || > + event->attr.exclude_kernel || > + event->attr.exclude_hv || > + event->attr.exclude_idle || > + event->attr.exclude_host || > + event->attr.exclude_guest) { > + dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n"); > + return -EINVAL; > + } > + > + if (dsu_pmu_validate_group(event)) > + return -EINVAL; > + > + event->cpu = cpumask_first(&dsu_pmu->active_cpu); > + if (event->cpu >= nr_cpu_ids) > + return -EINVAL; > + > + event->hw.config_base = event->attr.config; > + return 0; > +} > + > +static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev) > +{ > + struct dsu_pmu *dsu_pmu; > + > + dsu_pmu = devm_kzalloc(&pdev->dev, sizeof(*dsu_pmu), GFP_KERNEL); > + if (!dsu_pmu) > + return ERR_PTR(-ENOMEM); A blank line here would make it a little more readable > + raw_spin_lock_init(&dsu_pmu->pmu_lock); And one here. > + return dsu_pmu; > +} > + > +/** > + * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster. > + */ > +static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask) > +{ > + int i = 0, n, cpu; > + struct device_node *cpu_node; > + > + n = of_count_phandle_with_args(dev, "cpus", NULL); > + if (n <= 0) > + goto out; > + for (; i < n; i++) { > + cpu_node = of_parse_phandle(dev, "cpus", i); > + if (!cpu_node) > + break; > + cpu = of_device_node_get_cpu(cpu_node); > + of_node_put(cpu_node); > + if (cpu >= nr_cpu_ids) > + break; It rather seems like this is an error we would not want to skip over. > + cpumask_set_cpu(cpu, mask); > + } > +out: > + return i > 0; Cleaner to actually return appropriate errors from within this function and pass them all the way up. > +} > + > +/* > + * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster. > + */ > +static void dsu_pmu_probe_pmu(void *data) > +{ > + struct dsu_pmu *dsu_pmu = data; > + u64 cpmcr; > + u32 cpmceid[2]; > + > + if (WARN_ON(!cpumask_test_cpu(smp_processor_id(), > + &dsu_pmu->supported_cpus))) > + return; > + cpmcr = __dsu_pmu_read_pmcr(); > + dsu_pmu->num_counters = ((cpmcr >> CLUSTERPMCR_N_SHIFT) & > + CLUSTERPMCR_N_MASK); > + if (!dsu_pmu->num_counters) > + return; > + cpmceid[0] = __dsu_pmu_read_pmceid(0); > + cpmceid[1] = __dsu_pmu_read_pmceid(1); > + bitmap_from_u32array(dsu_pmu->cpmceid_bitmap, > + DSU_PMU_MAX_COMMON_EVENTS, > + cpmceid, > + ARRAY_SIZE(cpmceid)); > +} > + > +static void dsu_pmu_cleanup_dev(struct dsu_pmu *dsu_pmu) > +{ > + cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node); > + irq_set_affinity_hint(dsu_pmu->irq, NULL); > +} > + > +static int dsu_pmu_probe(struct platform_device *pdev) > +{ > + int irq, rc, cpu; > + struct dsu_pmu *dsu_pmu; > + char *name; > + > + static atomic_t pmu_idx = ATOMIC_INIT(-1); > + > + One blank line only. > + dsu_pmu = dsu_pmu_alloc(pdev); > + if (!dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->supported_cpus)) { > + dev_warn(&pdev->dev, "Failed to parse the CPUs\n"); > + return -EINVAL; > + } > + > + rc = smp_call_function_any(&dsu_pmu->supported_cpus, > + dsu_pmu_probe_pmu, > + dsu_pmu, 1); > + if (rc) > + return rc; > + if (!dsu_pmu->num_counters) > + return -ENODEV; > + irq = platform_get_irq(pdev, 0); > + if (irq < 0) { > + dev_warn(&pdev->dev, "Failed to find IRQ\n"); > + return -EINVAL; > + } > + > + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d", > + PMUNAME, atomic_inc_return(&pmu_idx)); > + rc = devm_request_irq(&pdev->dev, irq, dsu_pmu_handle_irq, > + 0, name, dsu_pmu); > + if (rc) { > + dev_warn(&pdev->dev, "Failed to request IRQ %d\n", irq); > + return rc; > + } > + > + /* > + * Find one CPU in the DSU to handle the IRQs. > + * It is highly unlikely that we would fail > + * to find one, given the probing has succeeded. > + */ > + cpu = dsu_pmu_get_online_cpu(dsu_pmu); > + if (cpu >= nr_cpu_ids) > + return -ENODEV; > + cpumask_set_cpu(cpu, &dsu_pmu->active_cpu); > + rc = irq_set_affinity_hint(irq, &dsu_pmu->active_cpu); > + if (rc) { > + dev_warn(&pdev->dev, "Failed to force IRQ affinity for %d\n", > + irq); > + return rc; > + } It is a little unusual that you have the above two elements inline here, but have a function to unwind them. Just makes it a little harder to read and leads to missing things like... > + > + platform_set_drvdata(pdev, dsu_pmu); > + rc = cpuhp_state_add_instance(dsu_pmu_cpuhp_state, > + &dsu_pmu->cpuhp_node); > + if (rc) I believe irq_set_affinity_hit(dsu_pmu->irq, NULL) would make sense here. > + return rc; > + > + dsu_pmu->irq = irq; > + dsu_pmu->pmu = (struct pmu) { > + .task_ctx_nr = perf_invalid_context, > + > + .pmu_enable = dsu_pmu_enable, > + .pmu_disable = dsu_pmu_disable, > + .event_init = dsu_pmu_event_init, > + .add = dsu_pmu_add, > + .del = dsu_pmu_del, > + .start = dsu_pmu_start, > + .stop = dsu_pmu_stop, > + .read = dsu_pmu_read, > + > + .attr_groups = dsu_pmu_attr_groups, > + }; > + > + rc = perf_pmu_register(&dsu_pmu->pmu, name, -1); > + > + if (!rc) > + dev_info(&pdev->dev, "Registered %s with %d event counters", > + name, dsu_pmu->num_counters); > + else > + dsu_pmu_cleanup_dev(dsu_pmu); It is cleaner to have the error handled as the 'exceptional' element. Slightly more code, but easier to read. i.e. if (rc) { dsu_pmu_cleanup_dev(dsu_pmu); return rc; } dev_info(...) > + return rc; > +} > + > +static int dsu_pmu_device_remove(struct platform_device *pdev) The difference in naming style between this and probe is a little confusing. Why not dsu_pmu_remove? > +{ > + struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev); > + > + dsu_pmu_cleanup_dev(dsu_pmu); > + perf_pmu_unregister(&dsu_pmu->pmu); The remove order should be the reverse of probe. It just makes it more 'obviously' right and saves reviewer time. If there is a reason not to do this, there should be a comment saying why. > + return 0; > +} > + > +static const struct of_device_id dsu_pmu_of_match[] = { > + { .compatible = "arm,dsu-pmu", }, > + {}, > +}; > + > +static struct platform_driver dsu_pmu_driver = { > + .driver = { > + .name = DRVNAME, > + .of_match_table = of_match_ptr(dsu_pmu_of_match), > + }, > + .probe = dsu_pmu_probe, > + .remove = dsu_pmu_device_remove, > +}; > + > +static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) > +{ > + int dst; > + struct dsu_pmu *dsu_pmu = container_of(node, > + struct dsu_pmu, cpuhp_node); > + > + if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu)) > + return 0; > + > + dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu); > + if (dst < nr_cpu_ids) { > + cpumask_set_cpu(dst, &dsu_pmu->active_cpu); > + perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst); > + irq_set_affinity_hint(dsu_pmu->irq, &dsu_pmu->active_cpu); > + } > + > + return 0; > +} > + > +static int __init dsu_pmu_init(void) > +{ > + int ret; > + > + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, > + DRVNAME, > + NULL, > + dsu_pmu_cpu_teardown); > + if (ret < 0) > + return ret; > + dsu_pmu_cpuhp_state = ret; I'm just curious - what prevents this initialization being done in probe rather than init? > + return platform_driver_register(&dsu_pmu_driver); > +} > + > +static void __exit dsu_pmu_exit(void) > +{ > + platform_driver_unregister(&dsu_pmu_driver); > + cpuhp_remove_multi_state(dsu_pmu_cpuhp_state); > +} > + > +module_init(dsu_pmu_init); > +module_exit(dsu_pmu_exit); > + > +MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit"); > +MODULE_AUTHOR("Suzuki K Poulose <[email protected]>"); > +MODULE_LICENSE("GPL v2");

