Re: [PATCH 1/2] PPC: powernv: remove redundant cpuidle_idle_call()
Hi Nicolas, You will have to include the below patch with yours. You could squash the two I guess, I have added the changelog just for clarity. And you also might want to change the subject to cpuidle/powernv. It gives a better picture. Thanks Regards Preeti U Murthy cpuidle/powernv: Add ppc64_runlatch_off/on() to idle routines Following moving of cpuidle_idle_call() to the generic idle loop, we need to add the runlatch functions to the idle routines on powernv which was earlier taken care of by the arch specific idle routine. Signed-off-by: Preeti U Murthy pre...@linux.vnet.ibm.com --- drivers/cpuidle/cpuidle-powernv.c |5 + 1 file changed, 5 insertions(+) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 78fd174..f48607c 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -14,6 +14,7 @@ #include asm/machdep.h #include asm/firmware.h +#include asm/runlatch.h struct cpuidle_driver powernv_idle_driver = { .name = powernv_idle, @@ -30,12 +31,14 @@ static int snooze_loop(struct cpuidle_device *dev, local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); + ppc64_runlatch_off(); while (!need_resched()) { HMT_low(); HMT_very_low(); } HMT_medium(); + ppc64_runlatch_on(); clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); return index; @@ -45,7 +48,9 @@ static int nap_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { + ppc64_runlatch_off(); power7_idle(); + ppc64_runlatch_on(); return index; } On 02/06/2014 07:46 PM, Nicolas Pitre wrote: The core idle loop now takes care of it. Signed-off-by: Nicolas Pitre n...@linaro.org --- arch/powerpc/platforms/powernv/setup.c | 13 + 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 21166f65c9..a932feb290 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -26,7 +26,6 @@ #include linux/of_fdt.h #include linux/interrupt.h #include linux/bug.h -#include linux/cpuidle.h #include asm/machdep.h #include asm/firmware.h @@ -217,16 +216,6 @@ static int __init pnv_probe(void) return 1; } -void powernv_idle(void) -{ - /* Hook to cpuidle framework if available, else - * call on default platform idle code - */ - if (cpuidle_idle_call()) { - power7_idle(); - } -} - define_machine(powernv) { .name = PowerNV, .probe = pnv_probe, @@ -236,7 +225,7 @@ define_machine(powernv) { .show_cpuinfo = pnv_show_cpuinfo, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, - .power_save = powernv_idle, + .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/2] PPC: powernv: remove redundant cpuidle_idle_call()
On 02/14/2014 04:47 PM, Preeti U Murthy wrote: Hi Nicolas, You will have to include the below patch with yours. You could squash the two I guess, I have added the changelog just for clarity. And you also might want to change the subject to cpuidle/powernv. It gives a better picture. Thanks Regards Preeti U Murthy cpuidle/powernv: Add ppc64_runlatch_off/on() to idle routines Following moving of cpuidle_idle_call() to the generic idle loop, we need to add the runlatch functions to the idle routines on powernv which was earlier taken care of by the arch specific idle routine. Signed-off-by: Preeti U Murthy pre...@linux.vnet.ibm.com Reviewed-by: Deepthi Dharwar deep...@linux.vnet.ibm.com --- drivers/cpuidle/cpuidle-powernv.c |5 + 1 file changed, 5 insertions(+) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 78fd174..f48607c 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -14,6 +14,7 @@ #include asm/machdep.h #include asm/firmware.h +#include asm/runlatch.h struct cpuidle_driver powernv_idle_driver = { .name = powernv_idle, @@ -30,12 +31,14 @@ static int snooze_loop(struct cpuidle_device *dev, local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); + ppc64_runlatch_off(); while (!need_resched()) { HMT_low(); HMT_very_low(); } HMT_medium(); + ppc64_runlatch_on(); clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); return index; @@ -45,7 +48,9 @@ static int nap_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { + ppc64_runlatch_off(); power7_idle(); + ppc64_runlatch_on(); return index; } On 02/06/2014 07:46 PM, Nicolas Pitre wrote: The core idle loop now takes care of it. Signed-off-by: Nicolas Pitre n...@linaro.org --- arch/powerpc/platforms/powernv/setup.c | 13 + 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 21166f65c9..a932feb290 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -26,7 +26,6 @@ #include linux/of_fdt.h #include linux/interrupt.h #include linux/bug.h -#include linux/cpuidle.h #include asm/machdep.h #include asm/firmware.h @@ -217,16 +216,6 @@ static int __init pnv_probe(void) return 1; } -void powernv_idle(void) -{ -/* Hook to cpuidle framework if available, else - * call on default platform idle code - */ -if (cpuidle_idle_call()) { -power7_idle(); -} -} - define_machine(powernv) { .name = PowerNV, .probe = pnv_probe, @@ -236,7 +225,7 @@ define_machine(powernv) { .show_cpuinfo = pnv_show_cpuinfo, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, -.power_save = powernv_idle, +.power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/6] PCI, acpiphp: Use list_for_each_entry() for bus traversal
On Friday, February 14, 2014 10:19:41 AM Yijing Wang wrote: On 2014/2/14 7:54, Rafael J. Wysocki wrote: On Thursday, February 13, 2014 09:13:58 PM Yijing Wang wrote: Replace list_for_each() + pci_bus_b() with the simpler list_for_each_entry(). Signed-off-by: Yijing Wang wangyij...@huawei.com Looks reasonable to me. Does it conflict with anything currently in linux-next (the linux-next branch of linux-pm.git in particular)? Hi Rafael, I applied this to your linux-next branch successfully . No conflicts found. Good. :-) Please feel free to add my ACK to it. Rafael --- drivers/pci/hotplug/acpiphp_glue.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index cd929ae..aee6a0a 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -450,7 +450,7 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) */ static unsigned char acpiphp_max_busnr(struct pci_bus *bus) { - struct list_head *tmp; + struct pci_bus *tmp; unsigned char max, n; /* @@ -463,8 +463,8 @@ static unsigned char acpiphp_max_busnr(struct pci_bus *bus) */ max = bus-busn_res.start; - list_for_each(tmp, bus-children) { - n = pci_bus_max_busnr(pci_bus_b(tmp)); + list_for_each_entry(tmp, bus-children, node) { + n = pci_bus_max_busnr(tmp); if (n max) max = n; } -- I speak only for myself. Rafael J. Wysocki, Intel Open Source Technology Center. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] of: search the best compatible match first in __of_match_node()
On Thu, Feb 13, 2014 at 11:22 PM, Kevin Hao haoke...@gmail.com wrote: Currently, of_match_node compares each given match against all node's compatible strings with of_device_is_compatible. To achieve multiple compatible strings per node with ordering from specific to generic, this requires given matches to be ordered from specific to generic. For most of the drivers this is not true and also an alphabetical ordering is more sane there. Therefore, this patch introduces a function to match each of the node's compatible strings against all given compatible matches without type and name first, before checking the next compatible string. This implies that node's compatibles are ordered from specific to generic while given matches can be in any order. If we fail to find such a match entry, then fall-back to the old method in order to keep compatibility. Cc: Sebastian Hesselbarth sebastian.hesselba...@gmail.com Signed-off-by: Kevin Hao haoke...@gmail.com Looks good to me. I'll put this in next for a few days. I'd really like to see some acks and tested-by's before sending to Linus. We could be a bit more strict here and fallback to the old matching if the match table has any entries with name or type. I don't think that should be necessary though. Rob --- drivers/of/base.c | 43 ++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index ba195fbce4c6..10b51106c854 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -730,13 +730,49 @@ out: } EXPORT_SYMBOL(of_find_node_with_property); +static const struct of_device_id * +of_match_compatible(const struct of_device_id *matches, + const struct device_node *node) +{ + const char *cp; + int cplen, l; + const struct of_device_id *m; + + cp = __of_get_property(node, compatible, cplen); + while (cp (cplen 0)) { + m = matches; + while (m-name[0] || m-type[0] || m-compatible[0]) { + /* Only match for the entries without type and name */ + if (m-name[0] || m-type[0] || + of_compat_cmp(m-compatible, cp, +strlen(m-compatible))) + m++; + else + return m; + } + + /* Get node's next compatible string */ + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return NULL; +} + static const struct of_device_id *__of_match_node(const struct of_device_id *matches, const struct device_node *node) { + const struct of_device_id *m; + if (!matches) return NULL; + m = of_match_compatible(matches, node); + if (m) + return m; + while (matches-name[0] || matches-type[0] || matches-compatible[0]) { int match = 1; if (matches-name[0]) @@ -760,7 +796,12 @@ const struct of_device_id *__of_match_node(const struct of_device_id *matches, * @matches: array of of device match structures to search in * @node: the of device structure to match against * - * Low level utility function used by device matching. + * Low level utility function used by device matching. We have two ways + * of matching: + * - Try to find the best compatible match by comparing each compatible + * string of device node with all the given matches respectively. + * - If the above method failed, then try to match the compatible by using + * __of_device_is_compatible() besides the match in type and name. */ const struct of_device_id *of_match_node(const struct of_device_id *matches, const struct device_node *node) -- 1.8.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] of: search the best compatible match first in __of_match_node()
On Feb 14, 2014, at 9:53 AM, Rob Herring robherri...@gmail.com wrote: On Thu, Feb 13, 2014 at 11:22 PM, Kevin Hao haoke...@gmail.com wrote: Currently, of_match_node compares each given match against all node's compatible strings with of_device_is_compatible. To achieve multiple compatible strings per node with ordering from specific to generic, this requires given matches to be ordered from specific to generic. For most of the drivers this is not true and also an alphabetical ordering is more sane there. Therefore, this patch introduces a function to match each of the node's compatible strings against all given compatible matches without type and name first, before checking the next compatible string. This implies that node's compatibles are ordered from specific to generic while given matches can be in any order. If we fail to find such a match entry, then fall-back to the old method in order to keep compatibility. Cc: Sebastian Hesselbarth sebastian.hesselba...@gmail.com Signed-off-by: Kevin Hao haoke...@gmail.com Looks good to me. I'll put this in next for a few days. I'd really like to see some acks and tested-by's before sending to Linus. We could be a bit more strict here and fallback to the old matching if the match table has any entries with name or type. I don't think that should be necessary though. Rob Can you push the revert to Linus sooner, since currently a ton of boards wouldn’t be working on the PPC side, so at least -rc3 has the possibility of working for them. - k --- drivers/of/base.c | 43 ++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index ba195fbce4c6..10b51106c854 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -730,13 +730,49 @@ out: } EXPORT_SYMBOL(of_find_node_with_property); +static const struct of_device_id * +of_match_compatible(const struct of_device_id *matches, + const struct device_node *node) +{ + const char *cp; + int cplen, l; + const struct of_device_id *m; + + cp = __of_get_property(node, compatible, cplen); + while (cp (cplen 0)) { + m = matches; + while (m-name[0] || m-type[0] || m-compatible[0]) { + /* Only match for the entries without type and name */ + if (m-name[0] || m-type[0] || + of_compat_cmp(m-compatible, cp, +strlen(m-compatible))) + m++; + else + return m; + } + + /* Get node's next compatible string */ + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return NULL; +} + static const struct of_device_id *__of_match_node(const struct of_device_id *matches, const struct device_node *node) { + const struct of_device_id *m; + if (!matches) return NULL; + m = of_match_compatible(matches, node); + if (m) + return m; + while (matches-name[0] || matches-type[0] || matches-compatible[0]) { int match = 1; if (matches-name[0]) @@ -760,7 +796,12 @@ const struct of_device_id *__of_match_node(const struct of_device_id *matches, * @matches: array of of device match structures to search in * @node: the of device structure to match against * - * Low level utility function used by device matching. + * Low level utility function used by device matching. We have two ways + * of matching: + * - Try to find the best compatible match by comparing each compatible + * string of device node with all the given matches respectively. + * - If the above method failed, then try to match the compatible by using + * __of_device_is_compatible() besides the match in type and name. */ const struct of_device_id *of_match_node(const struct of_device_id *matches, const struct device_node *node) -- 1.8.5.3 -- Employee of Qualcomm Innovation Center, Inc. Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] of: search the best compatible match first in __of_match_node()
On Feb 14, 2014, at 9:53 AM, Rob Herring robherri...@gmail.com wrote: On Thu, Feb 13, 2014 at 11:22 PM, Kevin Hao haoke...@gmail.com wrote: Currently, of_match_node compares each given match against all node's compatible strings with of_device_is_compatible. To achieve multiple compatible strings per node with ordering from specific to generic, this requires given matches to be ordered from specific to generic. For most of the drivers this is not true and also an alphabetical ordering is more sane there. Therefore, this patch introduces a function to match each of the node's compatible strings against all given compatible matches without type and name first, before checking the next compatible string. This implies that node's compatibles are ordered from specific to generic while given matches can be in any order. If we fail to find such a match entry, then fall-back to the old method in order to keep compatibility. Cc: Sebastian Hesselbarth sebastian.hesselba...@gmail.com Signed-off-by: Kevin Hao haoke...@gmail.com Looks good to me. I'll put this in next for a few days. I'd really like to see some acks and tested-by's before sending to Linus. We could be a bit more strict here and fallback to the old matching if the match table has any entries with name or type. I don't think that should be necessary though. Rob Can you push the revert to Linus sooner, since currently a ton of boards wouldn’t be working on the PPC side, so at least -rc3 has the possibility of working for them. - k ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/6] PCI,acpiphp: Use list_for_each_entry() for bus traversal
On Thu, Feb 13, 2014 at 09:13:58PM +0800, Yijing Wang wrote: Replace list_for_each() + pci_bus_b() with the simpler list_for_each_entry(). Signed-off-by: Yijing Wang wangyij...@huawei.com I applied all six of these (please include a 0/6 cover letter in the future; that's a nice place to note that I applied things) to pci/list-for-each-entry for v3.15, thanks! --- drivers/pci/hotplug/acpiphp_glue.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index cd929ae..aee6a0a 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -450,7 +450,7 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) */ static unsigned char acpiphp_max_busnr(struct pci_bus *bus) { - struct list_head *tmp; + struct pci_bus *tmp; unsigned char max, n; /* @@ -463,8 +463,8 @@ static unsigned char acpiphp_max_busnr(struct pci_bus *bus) */ max = bus-busn_res.start; - list_for_each(tmp, bus-children) { - n = pci_bus_max_busnr(pci_bus_b(tmp)); + list_for_each_entry(tmp, bus-children, node) { + n = pci_bus_max_busnr(tmp); if (n max) max = n; } -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V2] powerpc: thp: Fix crash on mremap
On Thu, 2014-02-13 at 08:03 +1100, Benjamin Herrenschmidt wrote: It looks very different because the function that needs to be fixed changed a lot upstream in 3.13. .../... Hi Greg ! You didn't say if that explanation was to your liking :-) If it is, do you want Aneesh to re-submit the patch with such an explanation in the changelog ? Cheers, Ben. In practice it's *not* very different in behaviour. It's just that on powerpc we need to unconditionally call withdraw and deposit when moving PTEs or it will crash, due to how we keep the transparent huge page in sync with the hash table. With the 3.13 code, due to lock breaking introduced by Kirill in 3.13-rc's, there's already a generic case for doing that (if we dropped the lock). So we just changed the condition to essentially force the condition to true to always do it under control of an arch helper. The pre-3.13 code didn't do the withdraw and deposit at all in that function however, so in that case, the patch (this 3.12 one) basically just adds the calls to withdraw and deposit under control of an ifdef which is only enabled for powerpc64. So you are taking 0 risk with other architecture and as the powerpc maintainer I'm happy with the patch. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 00/11] powerpc: Add support for Power Hypervisor supplied performance counters
These patches add basic pmus for 2 powerpc hypervisor interfaces to obtain performance counters: gpci (get performance counter info) and 24x7. The counters supplied by these interfaces are continually counting and never need to be (and cannot be) disabled or enabled. They additionally do not generate any interrupts. This makes them in some regards similar to software counters, and as a result their implimentation shares some common code (which an initial patch exposes) with the sw counters. There is ongoing work to support transactions for each of these pmus. These 2 PMUs end up providing access to some cpu, core, and chip level counters not exposed via other interfaces, and additionally allow monitoring the performance of other lpars (guests) on the same host system. Because it provides access to core and chip level counters, this pair of PMUs could be thought of as powerpc's counterpart to x86's uncore events. As an example, processor_bus_utilization_abc and processor_bus_utilization_wxyz (in hv_gpci.h) allow retreval of total cycles and idle cycles for various inter-chip buses. GPCI is an interface that already exists on some power6 and power7 machines (depending on the fw version), but is rather in-flexible and code intensive to add additional counters to. The 24x7 interfaces currently are designed to co-exist with the gpci interface while replacing most of gpci's functionality on newer systems. Right now, the 24x7 code I've submitted uses the gpci calls to check if it has permission to access certain classes of counters. Example perf usage: perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8,secondary_index=0,starting_index=0x,request=0x10/' -r 0 -C 0 -x ' ' sleep 0.1 perf stat -e 'hv_24x7/domain=2,offset=8,starting_index=0,lpar=0x/' -r 0 -C 0 -x ' ' sleep 0.1 -- Changes since v1: - add a few attributes to hv_gpci and hv_24x7 that expose some info about the interfaces - so the attributes show up in the right place, fix bin_attr creation in sysfs groups. - move hv_gpci.h and hv_24x7.h interface headers into arch/powerpc/perf - fix bit ordering in hv_gpci.h - split out hv_perf_caps_get() and use it to probe for the interface before registering - ensure proper alignment of hypervisor args - add a few missing counter requests to hv_gpci.h - s/CIR_xxx/CIR_XXX/ in hv_gpci.h - s/modules_init/device_initcall/ - Don't set event-cpu, use the user provided one - remove the union of gpci events, just give the user 1024 bytes to play with - clarify some comments (the list of fw versions is now labeled) - provide and event_24x7_request() that wraps single_24x7_request() - probably some other small fixes I'm forgetting. Cody P Schafer (11): perf: add PMU_RANGE_ATTR() helper for use by sw-like pmus perf core: export swevent hrtimer helpers sysfs: create bin_attributes under the requested group powerpc: add hvcalls for 24x7 and gpci (get performance counter info) powerpc: add hv_gpci interface header powerpc: add 24x7 interface header powerpc: add a shared interface to get gpci version and capabilities powerpc/perf: add support for the hv gpci (get performance counter info) interface powerpc/perf: add support for the hv 24x7 interface powerpc/perf: add kconfig option for hypervisor provided counters powerpc/perf/hv_{gpci,24x7}: add documentation of device attributes .../testing/sysfs-bus-event_source-devices-hv_24x7 | 22 + .../testing/sysfs-bus-event_source-devices-hv_gpci | 43 ++ arch/powerpc/include/asm/hvcall.h | 5 + arch/powerpc/perf/Makefile | 2 + arch/powerpc/perf/hv-24x7.c| 491 +++ arch/powerpc/perf/hv-24x7.h| 239 ++ arch/powerpc/perf/hv-common.c | 39 ++ arch/powerpc/perf/hv-common.h | 17 + arch/powerpc/perf/hv-gpci.c| 290 arch/powerpc/perf/hv-gpci.h| 521 + arch/powerpc/platforms/Kconfig.cputype | 6 + fs/sysfs/group.c | 7 +- include/linux/perf_event.h | 22 +- kernel/events/core.c | 8 +- 14 files changed, 1705 insertions(+), 7 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci create mode 100644 arch/powerpc/perf/hv-24x7.c create mode 100644 arch/powerpc/perf/hv-24x7.h create mode 100644 arch/powerpc/perf/hv-common.c create mode 100644 arch/powerpc/perf/hv-common.h create mode 100644 arch/powerpc/perf/hv-gpci.c create mode 100644 arch/powerpc/perf/hv-gpci.h -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 02/11] perf core: export swevent hrtimer helpers
Export the swevent hrtimer helpers currently only used in events/core.c to allow the addition of architecture specific sw-like pmus. Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- include/linux/perf_event.h | 5 - kernel/events/core.c | 8 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2702e91..24378a9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -559,7 +559,10 @@ extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); - +extern void perf_swevent_init_hrtimer(struct perf_event *event); +extern void perf_swevent_start_hrtimer(struct perf_event *event); +extern void perf_swevent_cancel_hrtimer(struct perf_event *event); +extern int perf_swevent_event_idx(struct perf_event *event); struct perf_sample_data { u64 type; diff --git a/kernel/events/core.c b/kernel/events/core.c index 56003c6..feb0347 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5816,7 +5816,7 @@ static int perf_swevent_init(struct perf_event *event) return 0; } -static int perf_swevent_event_idx(struct perf_event *event) +int perf_swevent_event_idx(struct perf_event *event) { return 0; } @@ -6045,7 +6045,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) return ret; } -static void perf_swevent_start_hrtimer(struct perf_event *event) +void perf_swevent_start_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = event-hw; s64 period; @@ -6067,7 +6067,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) HRTIMER_MODE_REL_PINNED, 0); } -static void perf_swevent_cancel_hrtimer(struct perf_event *event) +void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = event-hw; @@ -6079,7 +6079,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) } } -static void perf_swevent_init_hrtimer(struct perf_event *event) +void perf_swevent_init_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = event-hw; -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 01/11] perf: add PMU_RANGE_ATTR() helper for use by sw-like pmus
Add PMU_RANGE_ATTR() and PMU_RANGE_RESV() (for reserved areas) which generate functions to extract the relevent bits from event-attr.config{,1,2} for use by sw-like pmus where the 'config{,1,2}' values don't map directly to hardware registers. Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- include/linux/perf_event.h | 17 + 1 file changed, 17 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e56b07f..2702e91 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -871,4 +871,21 @@ _name##_show(struct device *dev, \ \ static struct device_attribute format_attr_##_name = __ATTR_RO(_name) +#define PMU_RANGE_ATTR(name, attr_var, bit_start, bit_end) \ +PMU_FORMAT_ATTR(name, #attr_var : #bit_start - #bit_end); \ +PMU_RANGE_RESV(name, attr_var, bit_start, bit_end) + +#define PMU_RANGE_RESV(name, attr_var, bit_start, bit_end) \ +static u64 event_get_##name##_max(void) \ +{ \ + int bits = (bit_end) - (bit_start) + 1; \ + return ((0x1ULL (bits - 1ULL)) - 1ULL) | \ + (0xFULL (bits - 4ULL)); \ +} \ +static u64 event_get_##name(struct perf_event *event) \ +{ \ + return (event-attr.attr_var (bit_start)) \ + event_get_##name##_max(); \ +} + #endif /* _LINUX_PERF_EVENT_H */ -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 03/11] sysfs: create bin_attributes under the requested group
bin_attributes created/updated in create_files() (such as those listed via (struct device).attribute_groups) were not placed under the specified group, and instead appeared in the base kobj directory. Fix this by making bin_attributes use creating code similar to normal attributes. A quick grep shows that no one is using bin_attrs in a named attribute group yet, so we can do this without breaking anything in usespace. Note that I do not add is_visible() support to bin_attributes, though that could be done as well. Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- fs/sysfs/group.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 6b57938..aa04068 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -70,8 +70,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, if (grp-bin_attrs) { for (bin_attr = grp-bin_attrs; *bin_attr; bin_attr++) { if (update) - sysfs_remove_bin_file(kobj, *bin_attr); - error = sysfs_create_bin_file(kobj, *bin_attr); + kernfs_remove_by_name(parent, + (*bin_attr)-attr.name); + error = sysfs_add_file_mode_ns(parent, + (*bin_attr)-attr, true, + (*bin_attr)-attr.mode, NULL); if (error) break; } -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 04/11] powerpc: add hvcalls for 24x7 and gpci (get performance counter info)
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/include/asm/hvcall.h | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index d8b600b..652f7e4 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -274,6 +274,11 @@ /* Platform specific hcalls, used by KVM */ #define H_RTAS 0xf000 +/* Platform specific hcalls, provided by PHYP */ +#define H_GET_24X7_CATALOG_PAGE 0xF078 +#define H_GET_24X7_DATA0xF07C +#define H_GET_PERF_COUNTER_INFO 0xF080 + #ifndef __ASSEMBLY__ /** -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 05/11] powerpc: add hv_gpci interface header
H_GetPerformanceCounterInfo (refered to as hv_gpci or just gpci from here on) is an interface to retrieve specific performance counters and other data from the hypervisor. All outputs have a fixed format (and are represented as structs in this patch). Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-gpci.h | 521 1 file changed, 521 insertions(+) create mode 100644 arch/powerpc/perf/hv-gpci.h diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h new file mode 100644 index 000..d602809 --- /dev/null +++ b/arch/powerpc/perf/hv-gpci.h @@ -0,0 +1,521 @@ +#ifndef LINUX_POWERPC_PERF_HV_GPCI_H_ +#define LINUX_POWERPC_PERF_HV_GPCI_H_ + +#include linux/types.h + +/* From the document H_GetPerformanceCounterInfo Interface v1.07 */ + +/* H_GET_PERF_COUNTER_INFO argument */ +struct hv_get_perf_counter_info_params { + __be32 counter_request; /* I */ + __be32 starting_index; /* IO */ + __be16 secondary_index; /* IO */ + __be16 returned_values; /* O */ + __be32 detail_rc; /* O, only needed when called via *_norets() */ + + /* +* O, size each of counter_value element in bytes, only set for version +* = 0x3 +*/ + __be16 cv_element_size; + + /* I, 0 (zero) for versions 0x3 */ + __u8 counter_info_version_in; + + /* O, 0 (zero) if version 0x3. Must be set to 0 when making hcall */ + __u8 counter_info_version_out; + __u8 reserved[0xC]; + __u8 counter_value[]; +} __packed; + +/* + * counter info version = fw version/reference (spec version) + * + * 8 = power8 (1.07) + * [7 is skipped by spec 1.07] + * 6 = TLBIE (1.07) + * 5 = v7r7m0.phyp (1.05) + * [4 skipped] + * 3 = v7r6m0.phyp (?) + * [1,2 skipped] + * 0 = v7r{2,3,4}m0.phyp (?) + */ +#define COUNTER_INFO_VERSION_CURRENT 0x8 + +/* + * These determine the counter_value[] layout and the meaning of starting_index + * and secondary_index. + * + * Unless otherwise noted, @secondary_index is unused and ignored. + */ +enum counter_info_requests { + + /* GENERAL */ + + /* @starting_index: starting physical processor index or -1 for +* current physical processor. Data is only collected +* for the processors' primary thread. +*/ + CIR_DISPATCH_TIMEBASE_BY_PROCESSOR = 0x10, + + /* @starting_index: starting partition id or -1 for the current logical +* partition (virtual machine). +*/ + CIR_ENTITLED_CAPPED_UNCAPPED_DONATED_IDLE_TIMEBASE_BY_PARTITION = 0x20, + + /* @starting_index: starting partition id or -1 for the current logical +* partition (virtual machine). +*/ + CIR_RUN_INSTRUCTIONS_RUN_CYCLES_BY_PARTITION = 0X30, + + /* @starting_index: must be -1 (to refer to the current partition) +*/ + CIR_SYSTEM_PERFORMANCE_CAPABILITIES = 0X40, + + + /* Data from this should only be considered valid if +* counter_info_version = 0x3 +* @starting_index: starting hardware chip id or -1 for the current hw +* chip id +*/ + CIR_PROCESSOR_BUS_UTILIZATION_ABC_LINKS = 0X50, + + /* Data from this should only be considered valid if +* counter_info_version = 0x3 +* @starting_index: starting hardware chip id or -1 for the current hw +* chip id +*/ + CIR_PROCESSOR_BUS_UTILIZATION_WXYZ_LINKS = 0X60, + + /* +* EXPANDED - the following are only avaliable if the CV_CM_EXPANDED +* bit is set from system_performace_capabilities. Enforcement is left +* to the hypervisor. +*/ + + /* Available if counter_info_version = 0x3 +* @starting_index: starting hardware chip id or -1 for the current hw +* chip id +*/ + CIR_PROCESSOR_BUS_UTILIZATION_GX_LINKS = 0X70, + + /* Available if counter_info_version = 0x3 +* @starting_index: starting hardware chip id or -1 for the current hw +* chip id +*/ + CIR_PROCESSOR_BUS_UTILIZATION_MC_LINKS = 0X80, + + /* Available if counter_info_version = 0x3 +* @starting_index: starting physical processor or -1 for the current +* physical processor +*/ + CIR_PROCESSOR_CONFIG = 0X90, + + /* Available if counter_info_version = 0x3 +* @starting_index: starting physical processor or -1 for the current +* physical processor +*/ + CIR_CURRENT_PROCESSOR_FREQUENCY = 0X91, + + /* Available if counter_info_version = 0x3 and = 0x7 +* @starting_index: starting physical processor or -1 for the current +* physical processor +*/ + CIR_PROCESSOR_CORE_UTILIZATION = 0X94, + + /* Available if
[PATCH v2 06/11] powerpc: add 24x7 interface header
24x7 (also called hv_24x7 or H_24X7) is an interface to obtain performance counters from the hypervisor. These counters do not have a fixed format/possition and are instead documented in a 24x7 Catalog, which is provided by the hypervisor (that interface is also documented in this header). This method of obtaining performance counters from the hypervisor is intended to paritialy replace the gpci interface. Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.h | 239 1 file changed, 239 insertions(+) create mode 100644 arch/powerpc/perf/hv-24x7.h diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h new file mode 100644 index 000..bf079da --- /dev/null +++ b/arch/powerpc/perf/hv-24x7.h @@ -0,0 +1,239 @@ +#ifndef LINUX_POWERPC_PERF_HV_24X7_H_ +#define LINUX_POWERPC_PERF_HV_24X7_H_ + +#include linux/types.h + +struct hv_24x7_request { + /* PHYSICAL domains require enabling via phyp/hmc. */ +#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01 +#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE 0x03 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP 0x04 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE 0x05 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06 + __u8 performance_domain; + __u8 reserved[0x1]; + + /* bytes to read starting at @data_offset. must be a multiple of 8 */ + __be16 data_size; + + /* +* byte offset within the perf domain to read from. must be 8 byte +* aligned +*/ + __be32 data_offset; + + /* +* only valid for VIRTUAL_PROCESSOR domains, ignored for others. +* -1 means current partition only +* Enabling via phyp/hmc required for non--1 values. 0 forbidden +* unless requestor is 0. +*/ + __be16 starting_lpar_ix; + + /* +* Ignored when @starting_lpar_ix == -1 +* Ignored when @performance_domain is not VIRTUAL_PROCESSOR_* +* -1 means infinite or all +*/ + __be16 max_num_lpars; + + /* chip, core, or virtual processor based on @performance_domain */ + __be16 starting_ix; + __be16 max_ix; +} __packed; + +struct hv_24x7_request_buffer { + /* 0 - ? */ + /* 1 - ? */ +#define HV_24X7_IF_VERSION_CURRENT 0x01 + __u8 interface_version; + __u8 num_requests; + __u8 reserved[0xE]; + struct hv_24x7_request requests[]; +} __packed; + +struct hv_24x7_result_element { + __be16 lpar_ix; + + /* +* represents the core, chip, or virtual processor based on the +* request's @performance_domain +*/ + __be16 domain_ix; + + /* -1 if @performance_domain does not refer to a virtual processor */ + __be32 lpar_cfg_instance_id; + + /* size = @result_element_data_size of cointaining result. */ + __u8 element_data[]; +} __packed; + +struct hv_24x7_result { + __u8 result_ix; + + /* +* 0 = not all result elements fit into the buffer, additional requests +* required +* 1 = all result elements were returned +*/ + __u8 results_complete; + __be16 num_elements_returned; + + /* This is a copy of @data_size from the coresponding hv_24x7_request */ + __be16 result_element_data_size; + __u8 reserved[0x2]; + + /* WARNING: only valid for first result element due to variable sizes +* of result elements */ + /* struct hv_24x7_result_element[@num_elements_returned] */ + struct hv_24x7_result_element elements[]; +} __packed; + +struct hv_24x7_data_result_buffer { + /* See versioning for request buffer */ + __u8 interface_version; + + __u8 num_results; + __u8 reserved[0x1]; + __u8 failing_request_ix; + __be32 detailed_rc; + __be64 cec_cfg_instance_id; + __be64 catalog_version_num; + __u8 reserved2[0x8]; + /* WARNING: only valid for the first result due to variable sizes of +* results */ + struct hv_24x7_result results[]; /* [@num_results] */ +} __packed; + +/* From document 24x7 Event and Group Catalog Formats Proposal v0.14 */ +struct hv_24x7_catalog_page_0 { +#define HV_24X7_CATALOG_MAGIC 0x32347837 /* 24x7 in ASCII */ + __be32 magic; + __be32 length; /* In 4096 byte pages */ + __u8 reserved1[4]; + __be32 version; + __u8 build_time_stamp[16]; /* MMDDHHMMSS\0\0 */ + __u8 reserved2[32]; + __be16 schema_data_offs; /* in 4096 byte pages */ + __be16 schema_data_len; /* in 4096 byte pages */ + __be16 schema_entry_count; + __u8 reserved3[2]; + __be16 group_data_offs; /* in 4096 byte pages */ + __be16 group_data_len; /* in 4096 byte pages */ + __be16 group_entry_count; + __u8 reserved4[2]; + __be16
[PATCH v2 07/11] powerpc: add a shared interface to get gpci version and capabilities
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-common.c | 39 +++ arch/powerpc/perf/hv-common.h | 17 + 2 files changed, 56 insertions(+) create mode 100644 arch/powerpc/perf/hv-common.c create mode 100644 arch/powerpc/perf/hv-common.h diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c new file mode 100644 index 000..47e02b3 --- /dev/null +++ b/arch/powerpc/perf/hv-common.c @@ -0,0 +1,39 @@ +#include asm/io.h +#include asm/hvcall.h + +#include hv-gpci.h +#include hv-common.h + +unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) +{ + unsigned long r; + struct p { + struct hv_get_perf_counter_info_params params; + struct cv_system_performance_capabilities caps; + } __packed __aligned(sizeof(uint64_t)); + + struct p arg = { + .params = { + .counter_request = cpu_to_be32( + CIR_SYSTEM_PERFORMANCE_CAPABILITIES), + .starting_index = cpu_to_be32(-1), + .counter_info_version_in = 0, + } + }; + + r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), sizeof(arg)); + + if (r) + return r; + + pr_devel(capability_mask: 0x%x\n, arg.caps.capability_mask); + + caps-version = arg.params.counter_info_version_out; + caps-collect_privileged = !!arg.caps.perf_collect_privileged; + caps-ga = !!(arg.caps.capability_mask CV_CM_GA); + caps-expanded = !!(arg.caps.capability_mask CV_CM_EXPANDED); + caps-lab = !!(arg.caps.capability_mask CV_CM_LAB); + + return r; +} diff --git a/arch/powerpc/perf/hv-common.h b/arch/powerpc/perf/hv-common.h new file mode 100644 index 000..7e615bd --- /dev/null +++ b/arch/powerpc/perf/hv-common.h @@ -0,0 +1,17 @@ +#ifndef LINUX_POWERPC_PERF_HV_COMMON_H_ +#define LINUX_POWERPC_PERF_HV_COMMON_H_ + +#include linux/types.h + +struct hv_perf_caps { + u16 version; + u16 collect_privileged:1, + ga:1, + expanded:1, + lab:1, + unused:12; +}; + +unsigned long hv_perf_caps_get(struct hv_perf_caps *caps); + +#endif -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 08/11] powerpc/perf: add support for the hv gpci (get performance counter info) interface
This provides a basic link between perf and hv_gpci. Notably, it does not yet support transactions and does not list any events (they can still be manually composed). Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-gpci.c | 290 1 file changed, 290 insertions(+) create mode 100644 arch/powerpc/perf/hv-gpci.c diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c new file mode 100644 index 000..1f5d96d --- /dev/null +++ b/arch/powerpc/perf/hv-gpci.c @@ -0,0 +1,290 @@ +/* + * Hypervisor supplied gpci (get performance counter info) performance + * counter support + * + * Author: Cody P Schafer c...@linux.vnet.ibm.com + * Copyright 2014 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#define pr_fmt(fmt) hv-gpci: fmt + +#include linux/init.h +#include linux/perf_event.h +#include asm/firmware.h +#include asm/hvcall.h +#include asm/io.h + +#include hv-gpci.h +#include hv-common.h + +PMU_RANGE_ATTR(request, config, 0, 31); /* u32 */ +PMU_RANGE_ATTR(starting_index, config, 32, 63); /* u32 */ +PMU_RANGE_ATTR(secondary_index, config1, 0, 15); /* u16 */ +PMU_RANGE_ATTR(counter_info_version, config1, 16, 23); /* u8 */ +PMU_RANGE_ATTR(length, config1, 24, 31); /* u8, bytes of data (1-8) */ +PMU_RANGE_ATTR(offset, config1, 32, 63); /* u32, byte offset */ + +static struct attribute *format_attrs[] = { + format_attr_request.attr, + format_attr_starting_index.attr, + format_attr_secondary_index.attr, + format_attr_counter_info_version.attr, + + format_attr_offset.attr, + format_attr_length.attr, + NULL, +}; + +static struct attribute_group format_group = { + .name = format, + .attrs = format_attrs, +}; + +#define HV_CAPS_ATTR(_name, _format) \ +static ssize_t _name##_show(struct device *dev,\ + struct device_attribute *attr, \ + char *page) \ +{ \ + struct hv_perf_caps caps; \ + unsigned long hret = hv_perf_caps_get(caps); \ + if (hret) \ + return -EIO;\ + \ + return sprintf(page, _format, caps._name); \ +} \ +static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) + +static ssize_t kernel_version_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, 0x%x\n, COUNTER_INFO_VERSION_CURRENT); +} + +DEVICE_ATTR_RO(kernel_version); +HV_CAPS_ATTR(version, 0x%x\n); +HV_CAPS_ATTR(ga, %d\n); +HV_CAPS_ATTR(expanded, %d\n); +HV_CAPS_ATTR(lab, %d\n); +HV_CAPS_ATTR(collect_privileged, %d\n); + +static struct attribute *interface_attrs[] = { + dev_attr_kernel_version.attr, + hv_caps_attr_version.attr, + hv_caps_attr_ga.attr, + hv_caps_attr_expanded.attr, + hv_caps_attr_lab.attr, + hv_caps_attr_collect_privileged.attr, + NULL, +}; + +static struct attribute_group interface_group = { + .name = interface, + .attrs = interface_attrs, +}; + +static const struct attribute_group *attr_groups[] = { + format_group, + interface_group, + NULL, +}; + +#define GPCI_MAX_DATA_BYTES \ + (1024 - sizeof(struct hv_get_perf_counter_info_params)) + +static unsigned long single_gpci_request(u32 req, u32 starting_index, + u16 secondary_index, u8 version_in, u32 offset, u8 length, + u64 *value) +{ + unsigned long ret; + size_t i; + u64 count; + + struct { + struct hv_get_perf_counter_info_params params; + uint8_t bytes[GPCI_MAX_DATA_BYTES]; + } __packed __aligned(sizeof(uint64_t)) arg = { + .params = { + .counter_request = cpu_to_be32(req), + .starting_index = cpu_to_be32(starting_index), + .secondary_index = cpu_to_be16(secondary_index), + .counter_info_version_in = version_in, + } + }; + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), sizeof(arg)); + if (ret) { + pr_devel(hcall failed: 0x%lx\n, ret); + return ret; + } + + /* +* we verify offset and length are within the zeroed buffer
[PATCH v2 09/11] powerpc/perf: add support for the hv 24x7 interface
This provides a basic interface between hv_24x7 and perf. Similar to the one provided for gpci, it lacks transaction support and does not list any events. Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.c | 491 1 file changed, 491 insertions(+) create mode 100644 arch/powerpc/perf/hv-24x7.c diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c new file mode 100644 index 000..13de140 --- /dev/null +++ b/arch/powerpc/perf/hv-24x7.c @@ -0,0 +1,491 @@ +/* + * Hypervisor supplied 24x7 performance counter support + * + * Author: Cody P Schafer c...@linux.vnet.ibm.com + * Copyright 2014 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) hv-24x7: fmt + +#include linux/perf_event.h +#include linux/module.h +#include linux/slab.h +#include asm/firmware.h +#include asm/hvcall.h +#include asm/io.h + +#include hv-24x7.h +#include hv-common.h + +/* + * TODO: Merging events: + * - Think of the hcall as an interface to a 4d array of counters: + * - x = domains + * - y = indexes in the domain (core, chip, vcpu, node, etc) + * - z = offset into the counter space + * - w = lpars (guest vms, logical partitions) + * - A single request is: x,y,y_last,z,z_last,w,w_last + * - this means we can retrieve a rectangle of counters in y,z for a single x. + * + * - Things to consider (ignoring w): + * - input cost_per_request = 16 + * - output cost_per_result(ys,zs) = 8 + 8 * ys + ys * zs + * - limited number of requests per hcall (must fit into 4K bytes) + * - 4k = 16 [buffer header] - 16 [request size] * request_count + * - 255 requests per hcall + * - sometimes it will be more efficient to read extra data and discard + */ + +PMU_RANGE_ATTR(domain, config, 0, 3); /* u3 0-6, one of HV_24X7_PERF_DOMAIN */ +PMU_RANGE_ATTR(starting_index, config, 16, 31); /* u16 */ +PMU_RANGE_ATTR(offset, config, 32, 63); /* u32, see data_offset */ +PMU_RANGE_ATTR(lpar, config1, 0, 15); /* u16 */ + +PMU_RANGE_RESV(reserved1, config, 4, 15); +PMU_RANGE_RESV(reserved2, config1, 16, 63); +PMU_RANGE_RESV(reserved3, config2, 0, 63); + +static struct attribute *format_attrs[] = { + format_attr_domain.attr, + format_attr_offset.attr, + format_attr_starting_index.attr, + format_attr_lpar.attr, + NULL, +}; + +static struct attribute_group format_group = { + .name = format, + .attrs = format_attrs, +}; + +/* + * read_offset_data - copy data from one buffer to another while treating the + *source buffer as a small view on the total avaliable + *source data. + * + * @dest: buffer to copy into + * @dest_len: length of @dest in bytes + * @requested_offset: the offset within the source data we want. Must be 0 + * @src: buffer to copy data from + * @src_len: length of @src in bytes + * @source_offset: the offset in the sorce data that (src,src_len) refers to. + * Must be 0 + * + * returns the number of bytes copied. + * + * '.' areas in d are written to. + * + * u + * x wv z + * d |.| + * s |--| + * + * u + * x w z v + * d |--| + * s |--| + * + * x wu,z,v + * d || + * s |--| + * + * x,wu,v,z + * d |--| + * s |--| + * + * xu + * wvz + * d || + * s |--| + * + * x z w v + * d|--| + * s |--| + * + * x = source_offset + * w = requested_offset + * z = source_offset + src_len + * v = requested_offset + dest_len + * + * w_offset_in_s = w - x = requested_offset - source_offset + * z_offset_in_s = z - x = src_len + * v_offset_in_s = v - x = request_offset + dest_len - src_len + * u_offset_in_s = min(z_offset_in_s, v_offset_in_s) + * + * copy_len = u_offset_in_s - w_offset_in_s = min(z_offset_in_s, v_offset_in_s) + * - w_offset_in_s + */ +static ssize_t read_offset_data(void *dest, size_t dest_len, + loff_t requested_offset, void *src, + size_t src_len, loff_t source_offset) +{ + size_t w_offset_in_s = requested_offset - source_offset; + size_t z_offset_in_s = src_len; + size_t v_offset_in_s = requested_offset + dest_len - src_len; + size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s); + size_t copy_len = u_offset_in_s - w_offset_in_s; + + if (requested_offset 0 || source_offset 0) + return
[PATCH v2 11/11] powerpc/perf/hv_{gpci, 24x7}: add documentation of device attributes
gpci and 24x7 expose some device specific attributes. Add some documentation for them. Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- .../testing/sysfs-bus-event_source-devices-hv_24x7 | 22 +++ .../testing/sysfs-bus-event_source-devices-hv_gpci | 43 ++ 2 files changed, 65 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 new file mode 100644 index 000..13474d3 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 @@ -0,0 +1,22 @@ +What: /sys/bus/event_source/devices/hv_24x7/interface/catalog +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + Provides access to the binary 24x7 catalog provided by the + hypervisor on POWER7 and 8 systems. This catalog lists events + avaliable from the powerpc hv_24x7 pmu. Its format is + documented in arch/powerpc/perf/hv_24x7.h. + +What: /sys/bus/event_source/devices/hv_24x7/interface/catalog_length +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + A number equal to the length in bytes of the catalog. This is + also extractable from the provided binary catalog sysfs entry. + +What: /sys/bus/event_source/devices/hv_24x7/interface/catalog_version +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + Exposes the version field of the 24x7 catalog. This is also + extractable from the provided binary catalog sysfs entry. diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci new file mode 100644 index 000..3fa58c2 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -0,0 +1,43 @@ +What: /sys/bus/event_source/devices/hv_gpci/interface/collect_privileged +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + '0' if the hypervisor is configured to forbid access to event + counters being accumulated by other guests and to physical + domain event counters. + '1' if that access is allowed. + +What: /sys/bus/event_source/devices/hv_gpci/interface/ga +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + 0 or 1. Indicates whether we have access to GA events (listed + in arch/powerpc/perf/hv-gpci.h). + +What: /sys/bus/event_source/devices/hv_gpci/interface/expanded +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + 0 or 1. Indicates whether we have access to EXPANDED events (listed + in arch/powerpc/perf/hv-gpci.h). + +What: /sys/bus/event_source/devices/hv_gpci/interface/lab +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + 0 or 1. Indicates whether we have access to LAB events (listed + in arch/powerpc/perf/hv-gpci.h). + +What: /sys/bus/event_source/devices/hv_gpci/interface/version +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + A number indicating the version of the gpci interface that the + hypervisor reports supporting. + +What: /sys/bus/event_source/devices/hv_gpci/interface/kernel_version +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + A number indicating the latest version of the gpci interface + that the kernel is aware of. -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 10/11] powerpc/perf: add kconfig option for hypervisor provided counters
Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/Makefile | 2 ++ arch/powerpc/platforms/Kconfig.cputype | 6 ++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 60d71ee..f9c083a 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -11,5 +11,7 @@ obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o +obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o + obj-$(CONFIG_PPC64)+= $(obj64-y) obj-$(CONFIG_PPC32)+= $(obj32-y) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 434fda3..dcc67cd 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -364,6 +364,12 @@ config PPC_PERF_CTRS help This enables the powerpc-specific perf_event back-end. +config HV_PERF_CTRS + def_bool y + depends on PERF_EVENTS PPC_HAVE_PMU_SUPPORT + help + Enable access to perf counters provided by the hypervisor + config SMP depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x bool Symmetric multi-processing support -- 1.8.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 10/11] powerpc/perf: add kconfig option for hypervisor provided counters
On Fri, 2014-02-14 at 14:02 -0800, Cody P Schafer wrote: Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/Makefile | 2 ++ arch/powerpc/platforms/Kconfig.cputype | 6 ++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 60d71ee..f9c083a 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -11,5 +11,7 @@ obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o +obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o + obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 434fda3..dcc67cd 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -364,6 +364,12 @@ config PPC_PERF_CTRS help This enables the powerpc-specific perf_event back-end. +config HV_PERF_CTRS + def_bool y + depends on PERF_EVENTS PPC_HAVE_PMU_SUPPORT + help + Enable access to perf counters provided by the hypervisor Please don't add default-y stuff that is platform-specific, and definitely point out that platform dependency in the config description -- I have to look elsewhere in the patchset to determine that this is for Power Hypervisor. PPC_HAVE_PMU_SUPPORT is enabled by all 6xx builds, even for hardware like e300 that doesn't have PMU at all (it has the FSL embedded perfmon instead), much less this hv interface. And yes, PPC_PERF_CTRS has the same problem and should be fixed. :-) -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Anyone using SysRQ key sequences on console serial port ?
Hi, I tried using the SysRq hotkey sequence on a serial console - 3.11.0-5-powerpc-e500mc system, by issuing a break and the system immediately wedges after displaying SysRQ : HELP : using both Putty and Teraterm terminal emulators. I know the system is dead because my ssh sessions stopped too. http://en.wikipedia.org/wiki/Magic_SysRq_key . -- *Regards,* * John.* *--* *o* Energy-efficiency is #1 reason data centers look to expand. -- Digital Realty Trust *o* Green Data Centers spending to increase 300% worldwide by 2016. -- Pike Research *o *Data Centers have become as vital to the functioni ng of society as power stations. -- The Economist ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 10/11] powerpc/perf: add kconfig option for hypervisor provided counters
On Fri, Feb 14, 2014 at 04:32:13PM -0600, Scott Wood wrote: On Fri, 2014-02-14 at 14:02 -0800, Cody P Schafer wrote: Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/Makefile | 2 ++ arch/powerpc/platforms/Kconfig.cputype | 6 ++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 60d71ee..f9c083a 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -11,5 +11,7 @@ obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o +obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o + obj-$(CONFIG_PPC64)+= $(obj64-y) obj-$(CONFIG_PPC32)+= $(obj32-y) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 434fda3..dcc67cd 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -364,6 +364,12 @@ config PPC_PERF_CTRS help This enables the powerpc-specific perf_event back-end. +config HV_PERF_CTRS + def_bool y + depends on PERF_EVENTS PPC_HAVE_PMU_SUPPORT + help + Enable access to perf counters provided by the hypervisor Please don't add default-y stuff that is platform-specific, and definitely point out that platform dependency in the config description -- I have to look elsewhere in the patchset to determine that this is for Power Hypervisor. PPC_HAVE_PMU_SUPPORT is enabled by all 6xx builds, even for hardware like e300 that doesn't have PMU at all (it has the FSL embedded perfmon instead), much less this hv interface. And yes, PPC_PERF_CTRS has the same problem and should be fixed. :-) Yep, I just based this one on what PPC_PERF_CTRS was doing. How about the following: +config HV_PERF_CTRS + bool Perf Hypervisor supplied counters + default y + depends on PERF_EVENTS PPC_HAVE_PMU_SUPPORT PPC_PSERIES + help + Enable access to hypervisor supplied counters in perf. Currently, + this enables code that uses the hcall GetPerfCounterInfo and 24x7 + interfaces to retrieve counters. GPCI exists on Power 6 and later + systems. 24x7 is available on Power 8 systems. + + If unsure, select Y. And relocated to arch/powerpc/platforms/Kconfig (as this isn't really strictly cputype related). ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/6] PCI, acpiphp: Use list_for_each_entry() for bus traversal
Does it conflict with anything currently in linux-next (the linux-next branch of linux-pm.git in particular)? Hi Rafael, I applied this to your linux-next branch successfully . No conflicts found. Good. :-) Please feel free to add my ACK to it. Thanks very much! ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/6] PCI, acpiphp: Use list_for_each_entry() for bus traversal
On 2014/2/15 2:23, Bjorn Helgaas wrote: On Thu, Feb 13, 2014 at 09:13:58PM +0800, Yijing Wang wrote: Replace list_for_each() + pci_bus_b() with the simpler list_for_each_entry(). Signed-off-by: Yijing Wang wangyij...@huawei.com I applied all six of these (please include a 0/6 cover letter in the future; that's a nice place to note that I applied things) to pci/list-for-each-entry for v3.15, thanks! Thanks, I will add cover letter in the next time, sorry. --- drivers/pci/hotplug/acpiphp_glue.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index cd929ae..aee6a0a 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -450,7 +450,7 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) */ static unsigned char acpiphp_max_busnr(struct pci_bus *bus) { -struct list_head *tmp; +struct pci_bus *tmp; unsigned char max, n; /* @@ -463,8 +463,8 @@ static unsigned char acpiphp_max_busnr(struct pci_bus *bus) */ max = bus-busn_res.start; -list_for_each(tmp, bus-children) { -n = pci_bus_max_busnr(pci_bus_b(tmp)); +list_for_each_entry(tmp, bus-children, node) { +n = pci_bus_max_busnr(tmp); if (n max) max = n; } -- 1.7.1 -- Thanks! Yijing ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] of: search the best compatible match first in __of_match_node()
Rob Herring robherri...@gmail.com wrote on 02/15/2014 02:53:40 AM: From: Rob Herring robherri...@gmail.com To: Kevin Hao haoke...@gmail.com Cc: devicet...@vger.kernel.org devicet...@vger.kernel.org, linuxppc-dev linuxppc-dev@lists.ozlabs.org, Sebastian Hesselbarth sebastian.hesselba...@gmail.com, Stephen N Chivers schiv...@csc.com.au, Grant Likely grant.lik...@linaro.org, Rob Herring robh...@kernel.org, Kumar Gala ga...@codeaurora.org Date: 02/15/2014 02:53 AM Subject: Re: [PATCH 2/2] of: search the best compatible match first in __of_match_node() On Thu, Feb 13, 2014 at 11:22 PM, Kevin Hao haoke...@gmail.com wrote: Currently, of_match_node compares each given match against all node's compatible strings with of_device_is_compatible. To achieve multiple compatible strings per node with ordering from specific to generic, this requires given matches to be ordered from specific to generic. For most of the drivers this is not true and also an alphabetical ordering is more sane there. Therefore, this patch introduces a function to match each of the node's compatible strings against all given compatible matches without type and name first, before checking the next compatible string. This implies that node's compatibles are ordered from specific to generic while given matches can be in any order. If we fail to find such a match entry, then fall-back to the old method in order to keep compatibility. Cc: Sebastian Hesselbarth sebastian.hesselba...@gmail.com Signed-off-by: Kevin Hao haoke...@gmail.com Looks good to me. I'll put this in next for a few days. I'd really like to see some acks and tested-by's before sending to Linus. Tested-by: Stephen Chivers schiv...@csc.com I have tested the patch for the four PowerPC platforms available to me. They are: MPC8349_MITXGP - Works. MVME5100- Works. MVME4100 - Works. SAM440EP - Works. The MPC8349_MITXGP platform is present in Linux-3.13 and previous releases. The MVME5100 is a revived platform that is in Linux-3.14-rc2. The MVME4100 is a work in progress and is the 85xx platform that the original failure report was for. The SAM440EP is present in Linux-3.13 and previous releases. The MPC8349_MITXGP is one of the 49 DTS files with the serial compatible: compatible = fsl,ns16550, ns16550; For the SAM440EP, the patch improves things from Linux-3.13. In that release the same sort of problem as reported in: Linux-3.14-rc2: Order of serial node compatibles in DTS files. occurs with slightly different symptoms: of_serial ef600300.serial: Port found of_serial ef600300.serial: Port found of_serial ef600300.serial: Unknown serial port found, ignored of_serial ef600400.serial: Port found of_serial ef600400.serial: Port found of_serial ef600400.serial: Unknown serial port found, ignored of_serial ef600500.serial: Port found of_serial ef600500.serial: Port found of_serial ef600500.serial: Unknown serial port found, ignored of_serial ef600600.serial: Port found of_serial ef600600.serial: Port found of_serial ef600600.serial: Unknown serial port found, ignored The SAM440EP has a IBM/AMCC 440EP PowerPC CPU and so simply has ns16550 as its serial compatible. We could be a bit more strict here and fallback to the old matching if the match table has any entries with name or type. I don't think that should be necessary though. Rob Stephen Chivers, CSC Australia Pty. Ltd. --- drivers/of/base.c | 43 ++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index ba195fbce4c6..10b51106c854 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -730,13 +730,49 @@ out: } EXPORT_SYMBOL(of_find_node_with_property); +static const struct of_device_id * +of_match_compatible(const struct of_device_id *matches, + const struct device_node *node) +{ + const char *cp; + int cplen, l; + const struct of_device_id *m; + + cp = __of_get_property(node, compatible, cplen); + while (cp (cplen 0)) { + m = matches; + while (m-name[0] || m-type[0] || m-compatible[0]) { + /* Only match for the entries without type and name */ + if (m-name[0] || m-type[0] || + of_compat_cmp(m-compatible, cp, +strlen(m-compatible))) + m++; + else + return m; + } + + /* Get node's next compatible string */ + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return NULL; +} + static const struct of_device_id *__of_match_node(const struct of_device_id *matches, const