Re: [RFC PATCH 27/43] KVM: PPC: Book3S HV P9: Move host OS save/restore functions to built-in
> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin wrote: > > Move the P9 guest/host register switching functions to the built-in > P9 entry code, and export it for nested to use as well. > > This allows more flexibility in scheduling these supervisor privileged > SPR accesses with the HV privileged and PR SPR accesses in the low level > entry code. > > Signed-off-by: Nicholas Piggin > --- > arch/powerpc/kvm/book3s_hv.c | 351 +- > arch/powerpc/kvm/book3s_hv.h | 39 +++ > arch/powerpc/kvm/book3s_hv_p9_entry.c | 332 > 3 files changed, 372 insertions(+), 350 deletions(-) > create mode 100644 arch/powerpc/kvm/book3s_hv.h > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 35749b0b663f..a7660af22161 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -79,6 +79,7 @@ > #include > > #include "book3s.h" > +#include "book3s_hv.h" > > #define CREATE_TRACE_POINTS > #include "trace_hv.h" > @@ -3675,356 +3676,6 @@ static noinline void kvmppc_run_core(struct > kvmppc_vcore *vc) > trace_kvmppc_run_core(vc, 1); > } > > -/* > - * Privileged (non-hypervisor) host registers to save. > - */ > -struct p9_host_os_sprs { > - unsigned long dscr; > - unsigned long tidr; > - unsigned long iamr; > - unsigned long amr; > - unsigned long fscr; > - > - unsigned int pmc1; > - unsigned int pmc2; > - unsigned int pmc3; > - unsigned int pmc4; > - unsigned int pmc5; > - unsigned int pmc6; > - unsigned long mmcr0; > - unsigned long mmcr1; > - unsigned long mmcr2; > - unsigned long mmcr3; > - unsigned long mmcra; > - unsigned long siar; > - unsigned long sier1; > - unsigned long sier2; > - unsigned long sier3; > - unsigned long sdar; > -}; > - > -static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) > -{ > - if (!(mmcr0 & MMCR0_FC)) > - goto do_freeze; > - if (mmcra & MMCRA_SAMPLE_ENABLE) > - goto do_freeze; > - if (cpu_has_feature(CPU_FTR_ARCH_31)) { > - if (!(mmcr0 & MMCR0_PMCCEXT)) > - goto do_freeze; > - if (!(mmcra & MMCRA_BHRB_DISABLE)) > - goto do_freeze; > - } > - return; > - > -do_freeze: > - mmcr0 = MMCR0_FC; > - mmcra = 0; > - if (cpu_has_feature(CPU_FTR_ARCH_31)) { > - mmcr0 |= MMCR0_PMCCEXT; > - mmcra = MMCRA_BHRB_DISABLE; > - } > - > - mtspr(SPRN_MMCR0, mmcr0); > - mtspr(SPRN_MMCRA, mmcra); > - isync(); > -} > - > -static void switch_pmu_to_guest(struct kvm_vcpu *vcpu, > - struct p9_host_os_sprs *host_os_sprs) > -{ > - struct lppaca *lp; > - int load_pmu = 1; > - > - lp = vcpu->arch.vpa.pinned_addr; > - if (lp) > - load_pmu = lp->pmcregs_in_use; > - > - if (load_pmu) > - vcpu->arch.hfscr |= HFSCR_PM; > - > - /* Save host */ > - if (ppc_get_pmu_inuse()) { > - /* > - * It might be better to put PMU handling (at least for the > - * host) in the perf subsystem because it knows more about what > - * is being used. > - */ > - > - /* POWER9, POWER10 do not implement HPMC or SPMC */ > - > - host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); > - host_os_sprs->mmcra = mfspr(SPRN_MMCRA); > - > - freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); > - > - host_os_sprs->pmc1 = mfspr(SPRN_PMC1); > - host_os_sprs->pmc2 = mfspr(SPRN_PMC2); > - host_os_sprs->pmc3 = mfspr(SPRN_PMC3); > - host_os_sprs->pmc4 = mfspr(SPRN_PMC4); > - host_os_sprs->pmc5 = mfspr(SPRN_PMC5); > - host_os_sprs->pmc6 = mfspr(SPRN_PMC6); > - host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); > - host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); > - host_os_sprs->sdar = mfspr(SPRN_SDAR); > - host_os_sprs->siar = mfspr(SPRN_SIAR); > - host_os_sprs->sier1 = mfspr(SPRN_SIER); > - > - if (cpu_has_feature(CPU_FTR_ARCH_31)) { > - host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); > - host_os_sprs->sier2 = mfspr(SPRN_SIER2); > - host_os_sprs->sier3 = mfspr(SPRN_SIER3); > - } > - } > - > -#ifdef CONFIG_PPC_PSERIES > - if (kvmhv_on_pseries()) { > - if (vcpu->arch.vpa.pinned_addr) { > - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; > - get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; > - } else { > - get_lppaca()->pmcregs_in_use = 1; > - } > - } > -#endif > - > - /* Load guest */ > - if (vcpu->arch.hfscr & HFSCR_PM) { > - mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); > - mtspr(SPRN_PMC2,
[RFC PATCH v0 1/1] powerpc/percpu: Use 2MB atom_size in percpu allocator on radix
The atom_size used by percpu allocator on powerpc is currently determined by mmu_linear_psize which is initialized to 4K and mmu_linear_psize is modified only by hash. Till now for radix the atom_size was defaulting to PAGE_SIZE(64K). Go for 2MB atom_size on radix if support for 2MB pages exist. 2MB atom_size on radix will allow using PMD mappings in the vmalloc area if and when support for higher sized vmalloc mappings is enabled for the pecpu allocator. However right now this change will result in more number of units to be allocated within one allocation due to increased upa(units per allocation). Signed-off-by: Bharata B Rao --- arch/powerpc/kernel/setup_64.c | 34 +- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1ff258f6c76c..45ce2d6e8112 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -871,6 +871,30 @@ static void __init pcpu_populate_pte(unsigned long addr) __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); } +static size_t pcpu_atom_size(void) +{ + size_t atom_size = PAGE_SIZE; + + /* +* Radix: Use PAGE_SIZE by default or 2M if available. +*/ + if (radix_enabled()) { + if (mmu_psize_defs[MMU_PAGE_2M].shift) + atom_size = 1 << mmu_psize_defs[MMU_PAGE_2M].shift; + goto out; + } + + /* +* Hash: Linear mapping is one of 4K, 1M and 16M. For 4K, no need +* to group units. For larger mappings, use 1M atom which +* should be large enough to contain a number of units. +*/ + if (mmu_linear_psize != MMU_PAGE_4K) + atom_size = 1 << 20; + +out: + return atom_size; +} void __init setup_per_cpu_areas(void) { @@ -880,15 +904,7 @@ void __init setup_per_cpu_areas(void) unsigned int cpu; int rc = -EINVAL; - /* -* Linear mapping is one of 4K, 1M and 16M. For 4K, no need -* to group units. For larger mappings, use 1M atom which -* should be large enough to contain a number of units. -*/ - if (mmu_linear_psize == MMU_PAGE_4K) - atom_size = PAGE_SIZE; - else - atom_size = 1 << 20; + atom_size = pcpu_atom_size(); if (pcpu_chosen_fc != PCPU_FC_PAGE) { rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, -- 2.31.1
Re: [PATCH v2 0/4] bus: Make remove callback return void
On Tue, Jul 6, 2021 at 11:50 AM Uwe Kleine-König wrote: > > drivers/staging/fieldbus/anybuss/host.c | 4 +--- Awesome ! Acked-by: Sven Van Asbroeck
Re: [PATCH kernel] KVM: PPC: Book3S HV: Make unique debugfs nodename
On 08/07/2021 03:48, Fabiano Rosas wrote: Alexey Kardashevskiy writes: Currently it is vm-$currentpid which works as long as there is just one VM per the userspace (99.99% cases) but produces a bunch of "debugfs: Directory 'vm16679' with parent 'kvm' already present!" when syzkaller (syscall fuzzer) is running so only one VM is present in the debugfs for a given process. This changes the debugfs node to include the LPID which alone should be system wide unique. This leaves the existing pid for the convenience of matching the VM's debugfs with the running userspace process (QEMU). Signed-off-by: Alexey Kardashevskiy Reviewed-by: Fabiano Rosas thanks. Strangely it also fixes a bunch of BUG: unable to handle kernel NULL pointer dereference in corrupted BUG: unable to handle kernel paging request in corrupted I was having 3 of these for every hour of running syzkaller and not anymore with this patch. --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1d1fcc290fca..0223ddc0eed0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5227,7 +5227,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* * Create a debugfs directory for the VM */ - snprintf(buf, sizeof(buf), "vm%d", current->pid); + snprintf(buf, sizeof(buf), "vm%d-lp%ld", current->pid, lpid); kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir); kvmppc_mmu_debugfs_init(kvm); if (radix_enabled()) -- Alexey
Re: [PATCH v2 4/4] bus: Make remove callback return void
On Tue, Jul 06, 2021 at 05:48:03PM +0200, Uwe Kleine-König wrote: > The driver core ignores the return value of this callback because there > is only little it can do when a device disappears. > > This is the final bit of a long lasting cleanup quest where several > buses were converted to also return void from their remove callback. > Additionally some resource leaks were fixed that were caused by drivers > returning an error code in the expectation that the driver won't go > away. > > With struct bus_type::remove returning void it's prevented that newly > implemented buses return an ignored error code and so don't anticipate > wrong expectations for driver authors. [...] > drivers/siox/siox-core.c | 4 +--- (For drivers/siox) Acked-by: Thorsten Scherer Best regards Thorsten -- Thorsten Scherer | Eckelmann AG | www.eckelmann.de |
Re: [PATCH v2 4/4] bus: Make remove callback return void
On 7/6/21 5:48 PM, Uwe Kleine-König wrote: The driver core ignores the return value of this callback because there is only little it can do when a device disappears. This is the final bit of a long lasting cleanup quest where several buses were converted to also return void from their remove callback. Additionally some resource leaks were fixed that were caused by drivers returning an error code in the expectation that the driver won't go away. With struct bus_type::remove returning void it's prevented that newly implemented buses return an ignored error code and so don't anticipate wrong expectations for driver authors. Acked-by: Russell King (Oracle) (For ARM, Amba and related parts) Acked-by: Mark Brown Acked-by: Chen-Yu Tsai (for drivers/bus/sunxi-rsb.c) Acked-by: Pali Rohár Acked-by: Mauro Carvalho Chehab (for drivers/media) Acked-by: Hans de Goede (For drivers/platform) Acked-by: Alexandre Belloni Acked-By: Vinod Koul Acked-by: Juergen Gross (For Xen) Acked-by: Lee Jones (For drivers/mfd) Acked-by: Johannes Thumshirn (For drivers/mcb) Acked-by: Johan Hovold Acked-by: Srinivas Kandagatla (For drivers/slimbus) Acked-by: Kirti Wankhede (For drivers/vfio) Acked-by: Maximilian Luz Acked-by: Heikki Krogerus (For ulpi and typec) Acked-by: Samuel Iglesias Gonsálvez (For ipack) Reviewed-by: Tom Rix (For fpga) Acked-by: Geoff Levand (For ps3) Signed-off-by: Uwe Kleine-König --- [...] drivers/hid/hid-core.c| 4 +--- drivers/hid/intel-ish-hid/ishtp/bus.c | 4 +--- [...] diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 7db332139f7d..dbed2524fd47 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2302,7 +2302,7 @@ static int hid_device_probe(struct device *dev) return ret; } -static int hid_device_remove(struct device *dev) +static void hid_device_remove(struct device *dev) { struct hid_device *hdev = to_hid_device(dev); struct hid_driver *hdrv; @@ -2322,8 +2322,6 @@ static int hid_device_remove(struct device *dev) if (!hdev->io_started) up(>driver_input_lock); - - return 0; } static ssize_t modalias_show(struct device *dev, struct device_attribute *a, diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index f0802b047ed8..8a51bd9cd093 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -255,7 +255,7 @@ static int ishtp_cl_bus_match(struct device *dev, struct device_driver *drv) * * Return: Return value from driver remove() call. */ -static int ishtp_cl_device_remove(struct device *dev) +static void ishtp_cl_device_remove(struct device *dev) { struct ishtp_cl_device *device = to_ishtp_cl_device(dev); struct ishtp_cl_driver *driver = to_ishtp_cl_driver(dev->driver); @@ -267,8 +267,6 @@ static int ishtp_cl_device_remove(struct device *dev) if (driver->remove) driver->remove(device); - - return 0; } /** For the HID part: Acked-by: Benjamin Tissoires Cheers, Benjamin
Re: [PATCH] powerpc: preempt: Don't touch the idle task's preempt_count during hotplug
On Wed, Jul 07, 2021 at 07:38:31PM +0100, Valentin Schneider wrote: > Powerpc currently resets a CPU's idle task preempt_count to 0 before said > task starts executing the secondary startup routine (and becomes an idle > task proper). > > This conflicts with commit > > f1a0a376ca0c ("sched/core: Initialize the idle task with preemption > disabled") > > which initializes all of the idle tasks' preempt_count to PREEMPT_DISABLED > during smp_init(). Note that this was superfluous before said commit, as > back then the hotplug machinery would invoke init_idle() via > idle_thread_get(), which would have already reset the CPU's idle task's > preempt_count to PREEMPT_ENABLED. > > Get rid of this preempt_count write. > > Cc: Guenter Roeck > Fixes: f1a0a376ca0c ("sched/core: Initialize the idle task with preemption > disabled") > Reported-by: Bharata B Rao > Signed-off-by: Valentin Schneider Tested-by: Guenter Roeck > --- > arch/powerpc/platforms/cell/smp.c| 3 --- > arch/powerpc/platforms/pseries/smp.c | 5 + > 2 files changed, 1 insertion(+), 7 deletions(-) > > diff --git a/arch/powerpc/platforms/cell/smp.c > b/arch/powerpc/platforms/cell/smp.c > index c855a0aeb49c..d7ab868aab54 100644 > --- a/arch/powerpc/platforms/cell/smp.c > +++ b/arch/powerpc/platforms/cell/smp.c > @@ -78,9 +78,6 @@ static inline int smp_startup_cpu(unsigned int lcpu) > > pcpu = get_hard_smp_processor_id(lcpu); > > - /* Fixup atomic count: it exited inside IRQ handler. */ > - task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; > - > /* >* If the RTAS start-cpu token does not exist then presume the >* cpu is already spinning. > diff --git a/arch/powerpc/platforms/pseries/smp.c > b/arch/powerpc/platforms/pseries/smp.c > index 096629f54576..7ebf3382816a 100644 > --- a/arch/powerpc/platforms/pseries/smp.c > +++ b/arch/powerpc/platforms/pseries/smp.c > @@ -105,10 +105,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) > return 1; > } > > - /* Fixup atomic count: it exited inside IRQ handler. */ > - task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; > - > - /* > + /* >* If the RTAS start-cpu token does not exist then presume the >* cpu is already spinning. >*/ > -- > 2.25.1 >
[PATCH] powerpc: preempt: Don't touch the idle task's preempt_count during hotplug
Powerpc currently resets a CPU's idle task preempt_count to 0 before said task starts executing the secondary startup routine (and becomes an idle task proper). This conflicts with commit f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled") which initializes all of the idle tasks' preempt_count to PREEMPT_DISABLED during smp_init(). Note that this was superfluous before said commit, as back then the hotplug machinery would invoke init_idle() via idle_thread_get(), which would have already reset the CPU's idle task's preempt_count to PREEMPT_ENABLED. Get rid of this preempt_count write. Cc: Guenter Roeck Fixes: f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled") Reported-by: Bharata B Rao Signed-off-by: Valentin Schneider --- arch/powerpc/platforms/cell/smp.c| 3 --- arch/powerpc/platforms/pseries/smp.c | 5 + 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index c855a0aeb49c..d7ab868aab54 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -78,9 +78,6 @@ static inline int smp_startup_cpu(unsigned int lcpu) pcpu = get_hard_smp_processor_id(lcpu); - /* Fixup atomic count: it exited inside IRQ handler. */ - task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; - /* * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 096629f54576..7ebf3382816a 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -105,10 +105,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) return 1; } - /* Fixup atomic count: it exited inside IRQ handler. */ - task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; - - /* + /* * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. */ -- 2.25.1
Re: [PATCH kernel] KVM: PPC: Book3S HV: Make unique debugfs nodename
Alexey Kardashevskiy writes: > Currently it is vm-$currentpid which works as long as there is just one > VM per the userspace (99.99% cases) but produces a bunch > of "debugfs: Directory 'vm16679' with parent 'kvm' already present!" > when syzkaller (syscall fuzzer) is running so only one VM is present in > the debugfs for a given process. > > This changes the debugfs node to include the LPID which alone should be > system wide unique. This leaves the existing pid for the convenience of > matching the VM's debugfs with the running userspace process (QEMU). > > Signed-off-by: Alexey Kardashevskiy Reviewed-by: Fabiano Rosas > --- > arch/powerpc/kvm/book3s_hv.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 1d1fcc290fca..0223ddc0eed0 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -5227,7 +5227,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) > /* >* Create a debugfs directory for the VM >*/ > - snprintf(buf, sizeof(buf), "vm%d", current->pid); > + snprintf(buf, sizeof(buf), "vm%d-lp%ld", current->pid, lpid); > kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir); > kvmppc_mmu_debugfs_init(kvm); > if (radix_enabled())
Re: [PATCH] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs
On 29/06/2021 15:15, Cédric Le Goater wrote: > On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at > runtime. Today, the IPI is not created for such nodes, and hot-plugged > CPUs use a bogus IPI, which leads to soft lockups. > > We could create the node IPI on demand but it is a bit complex because > this code would be called under bringup_up() and some IRQ locking is > being done. The simplest solution is to create the IPIs for all nodes > at startup. > > Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node") > Cc: sta...@vger.kernel.org # v5.13 > Reported-by: Geetika Moolchandani > Cc: Srikar Dronamraju > Signed-off-by: Cédric Le Goater > --- > > This patch breaks old versions of irqbalance (<= v1.4). Possible nodes > are collected from /sys/devices/system/node/ but CPU-less nodes are > not listed there. When interrupts are scanned, the link representing > the node structure is NULL and segfault occurs. > > Version 1.7 seems immune. > > --- > arch/powerpc/sysdev/xive/common.c | 4 > 1 file changed, 4 deletions(-) > > diff --git a/arch/powerpc/sysdev/xive/common.c > b/arch/powerpc/sysdev/xive/common.c > index f3b16ed48b05..5d2c58dba57e 100644 > --- a/arch/powerpc/sysdev/xive/common.c > +++ b/arch/powerpc/sysdev/xive/common.c > @@ -1143,10 +1143,6 @@ static int __init xive_request_ipi(void) > struct xive_ipi_desc *xid = _ipis[node]; > struct xive_ipi_alloc_info info = { node }; > > - /* Skip nodes without CPUs */ > - if (cpumask_empty(cpumask_of_node(node))) > - continue; > - > /* >* Map one IPI interrupt per node for all cpus of that node. >* Since the HW interrupt number doesn't have any meaning, Tested-by: Laurent Vivier
Re: [PATCH v2] xen/hvc: replace BUG_ON() with negative return value
On 07.07.21 11:57, Jan Beulich wrote: On 07.07.2021 11:10, Juergen Gross wrote: Xen frontends shouldn't BUG() in case of illegal data received from their backends. So replace the BUG_ON()s when reading illegal data from the ring page with negative return values. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -86,7 +86,11 @@ static int __write_console(struct xencons_info *xencons, cons = intf->out_cons; prod = intf->out_prod; mb(); /* update queue values before going on */ Largely unrelated note: While in general the barriers here may want switching to virt_*mb(), this particular one looks to be too heavy anyway: a read barrier is all that's needed here afaict, just like there's only a write barrier between ring contents and producer writing in __write_console(). I agree. And btw, since I've got puzzled by the linuxppc-dev@ in the recipients list, I did look up relevant entries in ./MAINTAINERS. Shouldn't the file be part of "XEN HYPERVISOR INTERFACE"? I wouldn't mind. Greg, Jiri, what do you think? Juergen OpenPGP_0xB0DE9DD628BF132F.asc Description: OpenPGP public key OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH v2] perf vendor events power10: Adds 24x7 nest metric events for power10 platform
Em Mon, Jun 28, 2021 at 12:19:35PM +0530, Kajol Jain escreveu: > Patch adds 24x7 nest metric events for POWER10. Thanks, applied. - Arnaldo > Tested-by: Nageswara R Sastry > Signed-off-by: Kajol Jain > --- > .../arch/powerpc/power10/nest_metrics.json| 424 ++ > 1 file changed, 424 insertions(+) > create mode 100644 > tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json > > --- > Changelog: > v1 -> v2 > - Removed "BriefDescription" field as its value was same as "MetricName" > field as suggested by Paul A. Clarke > - Added Tested-by tag. > --- > diff --git a/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json > b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json > new file mode 100644 > index ..8ba3e81c9808 > --- /dev/null > +++ b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json > @@ -0,0 +1,424 @@ > +[ > +{ > + "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P01", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P23", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "LOCAL_NODE_PUMP_RETRY_RATIO_P01", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_LNS_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PB_LNS_PUMP01\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "LOCAL_NODE_PUMP_RETRY_RATIO_P23", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_LNS_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PB_LNS_PUMP23\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "GROUP_PUMP_RETRY_RATIO_P01", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PB_GROUP_PUMP01\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "GROUP_PUMP_RETRY_RATIO_P23", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PB_GROUP_PUMP23\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_GROUP_PUMPS_P01", > + "MetricExpr": "(hv_24x7@PM_PB_GROUP_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_GROUP_PUMPS_P23", > + "MetricExpr": "(hv_24x7@PM_PB_GROUP_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_GROUP_PUMPS_RETRIES_P01", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_GROUP_PUMPS_RETRIES_P23", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P01", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PB_RNS_PUMP01\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P23", > + "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PB_RNS_PUMP23\\,chip\\=?@) * 100", > + "ScaleUnit": "1%", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_VECTOR_GROUP_PUMPS_P01", > + "MetricExpr": "(hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_VECTOR_GROUP_PUMPS_P23", > + "MetricExpr": "(hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_LOCAL_NODE_PUMPS_P01", > + "MetricExpr": "(hv_24x7@PM_PB_LNS_PUMP01\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_LOCAL_NODE_PUMPS_P23", > + "MetricExpr": "(hv_24x7@PM_PB_LNS_PUMP23\\,chip\\=?@ / > hv_24x7@PM_PAU_CYC\\,chip\\=?@)", > + "ScaleUnit": "4", > + "AggregationMode": "PerChip" > +}, > +{ > + "MetricName": "TOTAL_VECTOR_GROUP_PUMPS_RETRIES_P01", > + "MetricExpr":
Re: [PATCH] perf script python: Fix buffer size to report iregs in perf script
Em Wed, Jul 07, 2021 at 11:16:20AM +0530, kajoljain escreveu: > On 7/7/21 12:45 AM, Arnaldo Carvalho de Melo wrote: > > Em Tue, Jul 06, 2021 at 05:26:12PM +0530, kajoljain escreveu: > >> On 6/29/21 12:39 PM, kajoljain wrote: > >>> On 6/28/21 8:19 PM, Paul A. Clarke wrote: > On Mon, Jun 28, 2021 at 11:53:41AM +0530, Kajol Jain wrote: > > @@ -713,7 +711,16 @@ static void set_regs_in_dict(PyObject *dict, > > struct evsel *evsel) > > { > > struct perf_event_attr *attr = >core.attr; > > - char bf[512]; > > + > > + /* > > +* Here value 28 is a constant size which can be used to print > > +* one register value and its corresponds to: > > +* 16 chars is to specify 64 bit register in hexadecimal. > > +* 2 chars is for appending "0x" to the hexadecimal value and > > +* 10 chars is for register name. > > +*/ > > + int size = __sw_hweight64(attr->sample_regs_intr) * 28; > > + char bf[size]; > I propose using a template rather than a magic number here. Something > like: > const char reg_name_tmpl[] = "10 chars "; > const char reg_value_tmpl[] = "0x0123456789abcdef"; > const int size = __sw_hweight64(attr->sample_regs_intr) + > sizeof reg_name_tmpl + sizeof reg_value_tmpl; > >>>Thanks for reviewing the patch. Yes these are > >>> some standardization we can do by creating macros for different > >>> fields. > >>> The basic idea is, we want to provide significant buffer size > >>> based on number of registers present in sample_regs_intr to accommodate > >>> all data. > >>Is the approach used in this patch looks fine to you? > > Yeah, and the comment you provide right above it explains it, so I think > > that is enough, ok? > Thanks for reviewing it. As you said added comment already explains > why we are taking size constant as 28, should we skip adding macros part? > Can you pull this patch. Sure. - Arnaldo
Re: [PATCH v2] xen/hvc: replace BUG_ON() with negative return value
On 07.07.2021 11:10, Juergen Gross wrote: > Xen frontends shouldn't BUG() in case of illegal data received from > their backends. So replace the BUG_ON()s when reading illegal data from > the ring page with negative return values. > > Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich > --- a/drivers/tty/hvc/hvc_xen.c > +++ b/drivers/tty/hvc/hvc_xen.c > @@ -86,7 +86,11 @@ static int __write_console(struct xencons_info *xencons, > cons = intf->out_cons; > prod = intf->out_prod; > mb(); /* update queue values before going on */ Largely unrelated note: While in general the barriers here may want switching to virt_*mb(), this particular one looks to be too heavy anyway: a read barrier is all that's needed here afaict, just like there's only a write barrier between ring contents and producer writing in __write_console(). And btw, since I've got puzzled by the linuxppc-dev@ in the recipients list, I did look up relevant entries in ./MAINTAINERS. Shouldn't the file be part of "XEN HYPERVISOR INTERFACE"? Jan
Re: [RFC PATCH] powerpc: flexible register range save/restore macros
Excerpts from Christophe Leroy's message of July 5, 2021 3:52 pm: > > > Le 03/07/2021 à 11:14, Nicholas Piggin a écrit : >> Introduce macros that operate on a (start, end) range of registers, >> which reduces lines of code and need to do mental arithmetic while >> reading the code. > > Looks like a nice patch. > > Maybe you could split the patch in two parts, one part for GPRs and one patch > for the FP/VR regs. Sure I can do that. Thanks, Nick
Re: [PATCH v2] xen/hvc: replace BUG_ON() with negative return value
On 07. 07. 21, 12:40, Juergen Gross wrote: And btw, since I've got puzzled by the linuxppc-dev@ in the recipients list, I did look up relevant entries in ./MAINTAINERS. Shouldn't the file be part of "XEN HYPERVISOR INTERFACE"? I wouldn't mind. Greg, Jiri, what do you think? /me concurs. thanks, -- js suse labs
Re: [FSL P50xx] IRQ issues
Nice, thanks for reporting and testing. I submitted a qemu patch to hopefully avoid this happening again in future. Thanks, Nick Excerpts from Christian Zigotzky's message of July 7, 2021 1:22 am: > Hi Nick, > > Your patch works (see patch below)! Many thanks for your help! We tested > it on an A-EON AmigaOne X5000/20 and in a virtual e5500 QEMU machine today. > > Screenshots: > > - > http://www.skateman.nl/wp-content/uploads/2021/07/Screenshot-at-2021-07-06-113237.png > - https://i.ibb.co/h813RRp/Kernel-5-14-alpha3-Power-PC.png > > Thanks, > Christian > > On 06 July 2021 at 06:07 am, Christian Zigotzky wrote: >> Hi Nick, >> >> Thanks a lot for your patch! We will test it as soon as possible. >> You're right that this issue doesn't exist in a virtual e5500 QEMU >> machine. >> >> Have a nice day, >> Christian >> >> On 06 July 2021 at 01:36 am, Nicholas Piggin wrote: >>> Excerpts from Christian Zigotzky's message of July 6, 2021 4:49 am: Hi All, Our FSL P50xx machines don't boot anymore because of IRQ issues. [1] Please check the IRQ changes in the latest PowerPC updates 5.14-1. [2] Thanks, Christian [1] https://forum.hyperion-entertainment.com/download/file.php?id=2592=view [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=019b3fd94ba73d3ac615f0537440b81f129821f6 >>> This looks like mtmsrd in the 64e code. I think this should fix it. >>> >>> QEMU does not seem to trap on this, maybe something to improve. >>> >>> Thanks, >>> Nick >>> -- >>> >>> diff --git a/arch/powerpc/kernel/interrupt_64.S >>> b/arch/powerpc/kernel/interrupt_64.S >>> index 4063e8a3f704..d4212d2ff0b5 100644 >>> --- a/arch/powerpc/kernel/interrupt_64.S >>> +++ b/arch/powerpc/kernel/interrupt_64.S >>> @@ -311,9 +311,13 @@ END_BTB_FLUSH_SECTION >>> * trace_hardirqs_off(). >>> */ >>> li r11,IRQS_ALL_DISABLED >>> - li r12,-1 /* Set MSR_EE and MSR_RI */ >>> stb r11,PACAIRQSOFTMASK(r13) >>> +#ifdef CONFIG_PPC_BOOK3S >>> + li r12,-1 /* Set MSR_EE and MSR_RI */ >>> mtmsrd r12,1 >>> +#else >>> + wrteei 1 >>> +#endif >>> /* Calling convention has r9 = orig r0, r10 = regs */ >>> mr r9,r0 >> > >
[PATCH v2] xen/hvc: replace BUG_ON() with negative return value
Xen frontends shouldn't BUG() in case of illegal data received from their backends. So replace the BUG_ON()s when reading illegal data from the ring page with negative return values. Signed-off-by: Juergen Gross --- V2: - drop BUG_ON() (Christophe Leroy, Greg Kroah-Hartmann) - replace WARN_ONCE() by pr_err_once() (Greg Kroah-Hartmann) - break out from original series --- drivers/tty/hvc/hvc_xen.c | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 92c9a476defc..8f143c09a169 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -86,7 +86,11 @@ static int __write_console(struct xencons_info *xencons, cons = intf->out_cons; prod = intf->out_prod; mb(); /* update queue values before going on */ - BUG_ON((prod - cons) > sizeof(intf->out)); + + if ((prod - cons) > sizeof(intf->out)) { + pr_err_once("xencons: Illegal ring page indices"); + return -EINVAL; + } while ((sent < len) && ((prod - cons) < sizeof(intf->out))) intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; @@ -114,7 +118,10 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len) */ while (len) { int sent = __write_console(cons, data, len); - + + if (sent < 0) + return sent; + data += sent; len -= sent; @@ -138,7 +145,11 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) cons = intf->in_cons; prod = intf->in_prod; mb(); /* get pointers before reading ring */ - BUG_ON((prod - cons) > sizeof(intf->in)); + + if ((prod - cons) > sizeof(intf->in)) { + pr_err_once("xencons: Illegal ring page indices"); + return -EINVAL; + } while (cons != prod && recv < len) buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)]; -- 2.26.2
[PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
From: Athira Rajeev Power10 performance monitoring unit (PMU) driver uses performance monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6) for counting instructions and cycles. Event used for cycles is PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these events in wait state is controlled by the CC56RUN bit setting in Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not set, PMC5/6 will not count when CTRL[RUN] is zero. Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and PMC6 count instructions and cycles regardless of the run bit. With this change, these events are also now renamed to PM_CYC and PM_INST_CMPL rather than PM_RUN_CYC and PM_RUN_INST_CMPL. Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support") Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan --- Notes on testing done for this change: Tested this patch change with a kernel module that turns off and turns on the runlatch. kernel module also reads the counter values for PMC5 and PMC6 during the period when runlatch is off. - Started PMU counters via "perf stat" and loaded the test module. - Checked the counter values captured from module during the runlatch off period. - Verified that counters were frozen without the patch and with the patch, observed counters were incrementing. arch/powerpc/perf/power10-events-list.h | 8 +++--- arch/powerpc/perf/power10-pmu.c | 44 +++-- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index 93be719..564f1409 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -9,10 +9,10 @@ /* * Power10 event codes. */ -EVENT(PM_RUN_CYC, 0x600f4); +EVENT(PM_CYC, 0x600f4); EVENT(PM_DISP_STALL_CYC, 0x100f8); EVENT(PM_EXEC_STALL, 0x30008); -EVENT(PM_RUN_INST_CMPL,0x500fa); +EVENT(PM_INST_CMPL,0x500fa); EVENT(PM_BR_CMPL, 0x4d05e); EVENT(PM_BR_MPRED_CMPL, 0x400f6); EVENT(PM_BR_FIN, 0x2f04a); @@ -50,8 +50,8 @@ /* ITLB Reloaded */ EVENT(PM_ITLB_MISS,0x400fc); -EVENT(PM_RUN_CYC_ALT, 0x0001e); -EVENT(PM_RUN_INST_CMPL_ALT,0x2); +EVENT(PM_CYC_ALT, 0x0001e); +EVENT(PM_INST_CMPL_ALT,0x2); /* * Memory Access Events diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index f9d64c6..9dd75f3 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -91,8 +91,8 @@ /* Table of alternatives, sorted by column 0 */ static const unsigned int power10_event_alternatives[][MAX_ALT] = { - { PM_RUN_CYC_ALT, PM_RUN_CYC }, - { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, + { PM_CYC_ALT, PM_CYC }, + { PM_INST_CMPL_ALT, PM_INST_CMPL }, }; static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) @@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev) return 0; } -GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); -GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); GENERIC_EVENT_ATTR(branch-instructions,PM_BR_CMPL); GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); @@ -148,8 +148,8 @@ static int power10_check_attr_config(struct perf_event *ev) CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); static struct attribute *power10_events_attr_dd1[] = { - GENERIC_EVENT_PTR(PM_RUN_CYC), - GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_INST_CMPL), GENERIC_EVENT_PTR(PM_BR_CMPL), GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), GENERIC_EVENT_PTR(PM_LD_REF_L1), @@ -173,8 +173,8 @@ static int power10_check_attr_config(struct perf_event *ev) }; static struct attribute *power10_events_attr[] = { - GENERIC_EVENT_PTR(PM_RUN_CYC), - GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_INST_CMPL), GENERIC_EVENT_PTR(PM_BR_FIN), GENERIC_EVENT_PTR(PM_MPRED_BR_FIN), GENERIC_EVENT_PTR(PM_LD_REF_L1), @@ -271,8 +271,8 @@ static int power10_check_attr_config(struct perf_event *ev) }; static int power10_generic_events_dd1[] = { - [PERF_COUNT_HW_CPU_CYCLES] =