Re: [PATCH 5/8] powerpc: add 24x7 interface header
On Thu, 2014-16-01 at 23:53:51 UTC, Cody P Schafer wrote: > 24x7 (also called hv_24x7 or H_24X7) is an interface to obtain > performance counters from the hypervisor. These counters do not have a > fixed format/possition and are instead documented in a "24x7 Catalog", > which is provided by the hypervisor (that interface is also documented > in this header). > > This method of obtaining performance counters from the hypervisor is > intended to paritialy replace the gpci interface. Same comments as for the previous patch. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 6/8] powerpc/perf: add support for the hv gpci (get performance counter info) interface
On Thu, 2014-16-01 at 23:53:52 UTC, Cody P Schafer wrote: > This provides a basic link between perf and hv_gpci. Notably, it does > not yet support transactions and does not list any events (they can > still be manually composed). What are the plans for listing? The manual compose is nice but pretty hairy to use in practice I would think. > diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c > new file mode 100644 > index 000..31d9d59 > --- /dev/null > +++ b/arch/powerpc/perf/hv-gpci.c > @@ -0,0 +1,235 @@ > +/* > + * Hypervisor supplied "gpci" ("get performance counter info") performance > + * counter support > + * > + * Author: Cody P Schafer > + * Copyright 2014 IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > +#define pr_fmt(fmt) "hv-gpci: " fmt > + > +#include > +#include > +#include > +#include > +#include > +#include Needed? > +/* See arch/powerpc/include/asm/hv_gpci.h for details on the hcall interface > */ > + > +PMU_RANGE_ATTR(request, config, 0, 31); /* u32 */ > +PMU_RANGE_ATTR(starting_index, config, 32, 63); /* u32 */ > +PMU_RANGE_ATTR(secondary_index, config1, 0, 15); /* u16 */ > +PMU_RANGE_ATTR(counter_info_version, config1, 16, 23); /* u8 */ > +PMU_RANGE_ATTR(length, config1, 24, 31); /* u8, bytes of data (1-8) */ > +PMU_RANGE_ATTR(offset, config1, 32, 63); /* u32, byte offset */ > + > +static struct attribute *format_attr[] = { > + &format_attr_request.attr, > + &format_attr_starting_index.attr, > + &format_attr_secondary_index.attr, > + &format_attr_counter_info_version.attr, > + Lonley blank line. > + &format_attr_offset.attr, > + &format_attr_length.attr, > + NULL, > +}; > + > +static struct attribute_group format_group = { > + .name = "format", > + .attrs = format_attr, > +}; > + > +static const struct attribute_group *attr_groups[] = { > + &format_group, > + NULL, > +}; > + > +static unsigned long single_gpci_request(u32 req, u32 starting_index, > + u16 secondary_index, u8 version_in, u32 offset, u8 length, > + u64 *value) Passing the event and extracting the values in here would be neater IMHO. > +{ > + unsigned long ret; > + size_t i; > + u64 count; > + > + struct { > + struct hv_get_perf_counter_info_params params; > + union { > + union h_gpci_cvs data; > + uint8_t bytes[sizeof(union h_gpci_cvs)]; > + }; > + } arg = { > + .params = { > + .counter_request = cpu_to_be32(req), > + .starting_index = cpu_to_be32(starting_index), > + .secondary_index = cpu_to_be16(secondary_index), > + .counter_info_version_in = version_in, > + } > + }; > + > + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, > + virt_to_phys(&arg), sizeof(arg)); > + if (ret) { > + pr_devel("hcall failed: 0x%lx\n", ret); > + return ret; > + } > + > + /* > + * we verify offset and length are within the zeroed buffer at event > + * init. > + */ > + count = 0; > + for (i = offset; i < offset + length; i++) > + count |= arg.bytes[i] << (i - offset); > + > + *value = count; > + return ret; > +} > + > +static u64 h_gpci_get_value(struct perf_event *event) > +{ > + u64 count; > + unsigned long ret = single_gpci_request(event_get_request(event), > + event_get_starting_index(event), > + event_get_secondary_index(event), > + event_get_counter_info_version(event), > + event_get_offset(event), > + event_get_length(event), > + &count); > + if (ret) > + return 0; > + return count; > +} > + > +static void h_gpci_event_update(struct perf_event *event) > +{ > + s64 prev; > + u64 now = h_gpci_get_value(event); > + prev = local64_xchg(&event->hw.prev_count, now); > + local64_add(now - prev, &event->count); > +} > + > +static void h_gpci_event_start(struct perf_event *event, int flags) > +{ > + local64_set(&event->hw.prev_count, h_gpci_get_value(event)); > + perf_swevent_start_hrtimer(event); > +} > + > +static void h_gpci_event_stop(struct perf_event *event, int flags) > +{ > + perf_swevent_cancel_hrtimer(event); > + h_gpci_event_update(event); > +} > + > +static int h_gpci_event_add(struct perf_event *event, int flags) > +{ > + if (flags & PERF_EF_START) > + h_gpci_event_start(event, flags); > + > +
Re: [PATCH 4/8] powerpc: add hv_gpci interface header
On Thu, 2014-16-01 at 23:53:50 UTC, Cody P Schafer wrote: > "H_GetPerformanceCounterInfo" (refered to as hv_gpci or just gpci from > here on) is an interface to retrieve specific performance counters and > other data from the hypervisor. All outputs have a fixed format (and > are represented as structs in this patch). So how much of this are we actually using? A lot of these seem to be only used in the union at the bottom of this file, and not touched elsewhere - or am I missing something subtle? Some of it doesn't seem to be used at all? > diff --git a/arch/powerpc/include/asm/hv_gpci.h > b/arch/powerpc/include/asm/hv_gpci.h Any reason this can't just live in arch/powerpc/perf ? > +++ b/arch/powerpc/include/asm/hv_gpci.h > @@ -0,0 +1,490 @@ > +#ifndef LINUX_POWERPC_UAPI_HV_GPCI_H_ > +#define LINUX_POWERPC_UAPI_HV_GPCI_H_ > + > +#include > + > +/* From the document "H_GetPerformanceCounterInfo Interface" v1.06, paritialy > + * updated with v1.07 */ Is that public? > + > +/* H_GET_PERF_COUNTER_INFO argument */ > +struct hv_get_perf_counter_info_params { > + __be32 counter_request; /* I */ > + __be32 starting_index; /* IO */ > + __be16 secondary_index; /* IO */ > + __be16 returned_values; /* O */ > + __be32 detail_rc; /* O, "only for 32bit clients" */ > + > + /* > + * O, size each of counter_value element in bytes, only set for version > + * >= 0x3 > + */ > + __be16 cv_element_size; > + > + /* I, funny if version < 0x3 */ Funny how? Or better still, do we only support operating on some minimum sane version of the API? > + __u8 counter_info_version_in; > + > + /* O, funny if version < 0x3 */ > + __u8 counter_info_version_out; > + __u8 reserved[0xC]; > + __u8 counter_value[]; > +} __packed; > + > +/* 8 => power8 (1.07) > + * 6 => TLBIE (1.07) > + * 5 => (1.05) > + * 4 => ? > + * 3 => ? > + * 2 => v7r7m0.phyp (?) > + * 1 => v7r6m0.phyp (?) > + * 0 => v7r{2,3,4}m0.phyp (?) > + */ I think this is a mapping of version numbers to firmware releases, it should say so. > +#define COUNTER_INFO_VERSION_CURRENT 0x8 > + > +/* these determine the counter_value[] layout and the meaning of > starting_index > + * and secondary_index */ Needs: leading capital, full stop, block comment. > +enum counter_info_requests { > + > + /* GENERAL */ > + > + /* @starting_index: "starting" physical processor index or -1 for Why '"starting"' ? > + * current phyical processor. Data is only collected > + * for the processors' "primary" thread. > + * @secondary_index: unused This seems to be true in all cases at least for this enum, can we drop it? > + */ > + CIR_dispatch_timebase_by_processor = 0x10, Any reason for the weird capitialisation? You've obviously learnt the noCamelCase rule, but this is still a bit odd :) > + > + /* @starting_index: starting partition id or -1 for the current logical > + * partition (virtual machine). > + * @secondary_index: unused > + */ > + CIR_entitled_capped_uncapped_donated_idle_timebase_by_partition = 0x20, > + > + /* @starting_index: starting partition id or -1 for the current logical > + * partition (virtual machine). > + * @secondary_index: unused > + */ > + CIR_run_instructions_run_cycles_by_partition = 0x30, > + > + /* @starting_index: must be -1 (to refer to the current partition) > + * @secondary_index: unused > + */ > + CIR_system_performance_capabilities = 0x40, > + > + > + /* Data from this should only be considered valid if > + * counter_info_version >= 0x3 > + * @starting_index: starting hardware chip id or -1 for the current hw > + * chip id > + * @secondary_index: unused > + */ > + CIR_processor_bus_utilization_abc_links = 0x50, > + > + /* Data from this should only be considered valid if > + * counter_info_version >= 0x3 > + * @starting_index: starting hardware chip id or -1 for the current hw > + * chip id > + * @secondary_index: unused > + */ > + CIR_processor_bus_utilization_wxyz_links = 0x60, > + > + > + /* EXPANDED */ ?? These are only available if you have the DLC ? > + /* Avaliable if counter_info_version >= 0x3 > + * @starting_index: starting hardware chip id or -1 for the current hw > + * chip id > + * @secondary_index: unused > + */ > + CIR_processor_bus_utilization_gx_links = 0x70, > + > + /* Avaliable if counter_info_version >= 0x3 > + * @starting_index: starting hardware chip id or -1 for the current hw > + * chip id > + * @secondary_index: unused > + */ > + CIR_processor_bus_utilization_mc_links = 0x80, > + > + /* Avaliable if counter_info_version >= 0x3 > + * @starting_index: starting physical processor or -1 for the current
Re: [PATCH 3/8] powerpc: add hvcalls for 24x7 and gpci (get performance counter info)
On Thu, 2014-16-01 at 23:53:49 UTC, Cody P Schafer wrote: > Signed-off-by: Cody P Schafer > --- > arch/powerpc/include/asm/hvcall.h | 6 +- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/hvcall.h > b/arch/powerpc/include/asm/hvcall.h > index d8b600b..48d6efa 100644 > --- a/arch/powerpc/include/asm/hvcall.h > +++ b/arch/powerpc/include/asm/hvcall.h > @@ -269,11 +269,15 @@ > #define H_COP0x304 > #define H_GET_MPP_X 0x314 > #define H_SET_MODE 0x31C > -#define MAX_HCALL_OPCODE H_SET_MODE > +#define H_GET_24X7_CATALOG_PAGE 0xF078 > +#define H_GET_24X7_DATA 0xF07C > +#define H_GET_PERF_COUNTER_INFO 0xF080 Ugh, why the hell did they put them up there. > +#define MAX_HCALL_OPCODE H_GET_PERF_COUNTER_INFO We have an array which is sized based on this, which is unpleasant. I think you're better off putting these below in the platform specific section, and leaving MAX_HCALL_OPCODE alone. The only downside is you can't use the hcall tracing to see them. > /* Platform specific hcalls, used by KVM */ > #define H_RTAS 0xf000 cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/8] perf core: export swevent hrtimer helpers
Peter, Ingo, can we get your ACK on this please? cheers On Thu, 2014-16-01 at 23:53:48 UTC, Cody P Schafer wrote: > Export the swevent hrtimer helpers currently only used in events/core.c > to allow the addition of architecture specific sw-like pmus. > Signed-off-by: Cody P Schafer > --- > include/linux/perf_event.h | 5 - > kernel/events/core.c | 8 > 2 files changed, 8 insertions(+), 5 deletions(-) > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 8646e33..c5bc71a 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -558,7 +558,10 @@ extern void perf_pmu_migrate_context(struct pmu *pmu, > int src_cpu, int dst_cpu); > extern u64 perf_event_read_value(struct perf_event *event, >u64 *enabled, u64 *running); > - > +extern void perf_swevent_init_hrtimer(struct perf_event *event); > +extern void perf_swevent_start_hrtimer(struct perf_event *event); > +extern void perf_swevent_cancel_hrtimer(struct perf_event *event); > +extern int perf_swevent_event_idx(struct perf_event *event); > > struct perf_sample_data { > u64 type; > diff --git a/kernel/events/core.c b/kernel/events/core.c > index f574401..d881d1e 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -5801,7 +5801,7 @@ static int perf_swevent_init(struct perf_event *event) > return 0; > } > > -static int perf_swevent_event_idx(struct perf_event *event) > +int perf_swevent_event_idx(struct perf_event *event) > { > return 0; > } > @@ -6030,7 +6030,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct > hrtimer *hrtimer) > return ret; > } > > -static void perf_swevent_start_hrtimer(struct perf_event *event) > +void perf_swevent_start_hrtimer(struct perf_event *event) > { > struct hw_perf_event *hwc = &event->hw; > s64 period; > @@ -6052,7 +6052,7 @@ static void perf_swevent_start_hrtimer(struct > perf_event *event) > HRTIMER_MODE_REL_PINNED, 0); > } > > -static void perf_swevent_cancel_hrtimer(struct perf_event *event) > +void perf_swevent_cancel_hrtimer(struct perf_event *event) > { > struct hw_perf_event *hwc = &event->hw; > > @@ -6064,7 +6064,7 @@ static void perf_swevent_cancel_hrtimer(struct > perf_event *event) > } > } > > -static void perf_swevent_init_hrtimer(struct perf_event *event) > +void perf_swevent_init_hrtimer(struct perf_event *event) > { > struct hw_perf_event *hwc = &event->hw; > > -- > 1.8.5.2 > > ___ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev > > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/8] perf: add PMU_RANGE_ATTR() helper for use by sw-like pmus
On Thu, 2014-16-01 at 23:53:47 UTC, Cody P Schafer wrote: > Add PMU_RANGE_ATTR() and PMU_RANGE_RESV() (for reserved areas) which > generate functions to extract the relevent bits from > event->attr.config{,1,2} for use by sw-like pmus where the > 'config{,1,2}' values don't map directly to hardware registers. This is neat. The split of the macros is a bit weird, ie. PMU_RANGE_RESV() doesn't really do what it's name suggests. I think you want one macro which creates the accessors, with a name that reflects that - yeah I can't think of a good one right now, but "event" should probably be in there because that's what it operates on. Having a macro for the reserved regions is good, but you MUST actually check that the reserved regions are zero. Otherwise you are permitting your caller to pass junk in there and you then can't unreserved them in a future version of the API. So I think a macro that gives you a special reserved region routine would be good, so you can write something like: if (event_check_reserved1() || event_check_reserved2()) return -EINVAL; cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2] powerpc: Add cpu family documentation
This patch adds some documentation on the different cpu families supported by arch/powerpc. Signed-off-by: Michael Ellerman --- v2: Reworked formatting to avoid wrapping. Fixed up Freescale details. Documentation/powerpc/cpu_families.txt | 227 + 1 file changed, 227 insertions(+) create mode 100644 Documentation/powerpc/cpu_families.txt diff --git a/Documentation/powerpc/cpu_families.txt b/Documentation/powerpc/cpu_families.txt new file mode 100644 index 000..fa4f159 --- /dev/null +++ b/Documentation/powerpc/cpu_families.txt @@ -0,0 +1,227 @@ +CPU Families + + +This document tries to summarise some of the different cpu families that exist +and are supported by arch/powerpc. + + +Book3S (aka sPAPR) +-- + + - Hash MMU + - Mix of 32 & 64 bit + + +--+ ++ + | Old POWER | ---> | RS64 (threads) | + +--+ ++ + | + | + v + +--+ ++ +---+ + | 601 | ---> | 603 | -> | 740 | + +--+ ++ +---+ + | | + | | + v v + +--+ ++ +---+ + | 604 | |750 (G3)| -> | 750CX | + +--+ ++ +---+ + | | | + | | | + v v v + +--+ ++ +---+ + | 620 (64 bit) | | 7400 || 750CL | + +--+ ++ +---+ + | | | + | | | + v v v + +--+ ++ +---+ + | POWER3/630 | | 7410 || 750FX | + +--+ ++ +---+ + | | + | | + v v + +--+ ++ + | POWER3+| | 7450 | + +--+ ++ + | | + | | + v v + +--+ ++ + |POWER4| | 7455 | + +--+ ++ + | | + | | + v v + +--+ +---+ ++ + | POWER4+| ---> | 970 | | 7447 | + +--+ +---+ ++ + | | | + | | | + v v v + +--+ +---++---+ ++ + |POWER5| --> | Cell || 970FX | | 7448 | + +--+ +---++---+ ++ + | | + | | + v v + +--+ +---+ + | POWER5+| | 970MP | + +--+ +---+ + | + | + v + +--+ + | POWER5++ | + +--+ + | + | + v + +--+ + |POWER6| + +--+ + | + | + v + +--+ + |POWER7| + +--+ + | + | + v + +--+ + | POWER7+| + +--+ + | + | + v + +--
Re: [PATCH] powerpc: Add cpu family documentation
On Fri, 2014-01-31 at 07:32 -0600, Kumar Gala wrote: > On Jan 29, 2014, at 8:38 PM, Michael Ellerman wrote: > > +Freescale BookE > > +--- > > + > > + - Software loaded TLB. > > + - e6500 adds HW loaded indirect TLB entries. > > + - Mix of 32 & 64 bit > > + > > + e200 --- e500 --- e500v2 --- e500mc --- e5500 --- e6500 > > + (Book3E) (HW TLB) > > + (64bit) > > + > > e200 is its own core family that doesn’t have any relation to e500 line other > than being book-e > > might want to add multithreaded to e6500. Thanks Kumar. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Add cpu family documentation
On Thu, 2014-01-30 at 14:32 +1100, Stephen Rothwell wrote: > Hi Michael, > > Nice. > > On Thu, 30 Jan 2014 13:38:00 +1100 Michael Ellerman > wrote: > > > > +++ b/Documentation/powerpc/cpu_families.txt > > @@ -0,0 +1,76 @@ > > +CPU Families > > + > > + > > +This doco tries to summarise some of the different cpu families that exist > > and > > document > > > + || > > + |* [620] --- POWER3/630 --- POWER3+ --- POWER4 --- > > POWER4+ --- POWER5 --- POWER5+ --- POWER5++ --- POWER6 --- POWER7 --- > > POWER7+ --- POWER8 > > Its a pity that this wraps ... Yeah it is. I was too lazy to fix it. New version coming. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 3/3] powerpc/pseries: Report in kernel device tree update to drmgr
Traditionally it has been drmgr's responsibilty to update the device tree through the /proc/ppc64/ofdt interface after a suspend/resume operation. This patchset however has modified suspend/resume ops to preform that update entirely in the kernel during the resume. Therefore, a mechanism is required for drmgr to determine who is responsible for the update. This patch adds a show function to the "hibernate" attribute that returns 1 if the kernel updates the device tree after the resume and 0 if drmgr is responsible. Signed-off-by: Tyrel Datwyler --- arch/powerpc/platforms/pseries/suspend.c | 25 - 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 1d9c580..b87b978 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -192,7 +192,30 @@ out: return rc; } -static DEVICE_ATTR(hibernate, S_IWUSR, NULL, store_hibernate); +#define USER_DT_UPDATE 0 +#define KERN_DT_UPDATE 1 + +/** + * show_hibernate - Report device tree update responsibilty + * @dev: subsys root device + * @attr: device attribute struct + * @buf: buffer + * + * Report whether a device tree update is performed by the kernel after a + * resume, or if drmgr must coordinate the update from user space. + * + * Return value: + * 0 if drmgr is to initiate update, and 1 otherwise + **/ +static ssize_t show_hibernate(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", KERN_DT_UPDATE); +} + +static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO, + show_hibernate, store_hibernate); static struct bus_type suspend_subsys = { .name = "power", -- 1.7.12.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 2/3] powerpc/pseries: Update dynamic cache nodes for suspend/resume operation
From: Haren Myneni pHyp can change cache nodes for suspend/resume operation. The current code updates the device tree after all non boot CPUs are enabled. Hence, we do not modify the cache list based on the latest cache nodes. Also we do not remove cache entries for the primary CPU. This patch removes the cache list for the boot CPU, updates the device tree before enabling nonboot CPUs and adds cache list for the boot cpu. Signed-off-by: Haren Myneni Signed-off-by: Tyrel Datwyler --- arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/platforms/pseries/suspend.c | 19 +++ 2 files changed, 20 insertions(+) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9bd52c6..a0e1add 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -283,6 +283,7 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern int pseries_devicetree_update(s32 scope); +extern void post_mobility_fixup(void); #endif #ifdef CONFIG_PPC_RTAS_DAEMON diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 16a2552..1d9c580 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -26,6 +26,7 @@ #include #include #include +#include "../../kernel/cacheinfo.h" static u64 stream_id; static struct device suspend_dev; @@ -79,6 +80,23 @@ static int pseries_suspend_cpu(void) } /** + * pseries_suspend_enable_irqs + * + * Post suspend configuration updates + * + **/ +static void pseries_suspend_enable_irqs(void) +{ + /* +* Update configuration which can be modified based on device tree +* changes during resume. +*/ + cacheinfo_cpu_offline(smp_processor_id()); + post_mobility_fixup(); + cacheinfo_cpu_online(smp_processor_id()); +} + +/** * pseries_suspend_enter - Final phase of hibernation * * Return value: @@ -235,6 +253,7 @@ static int __init pseries_suspend_init(void) return rc; ppc_md.suspend_disable_cpu = pseries_suspend_cpu; + ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; } -- 1.7.12.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 1/3] powerpc/pseries: Device tree should only be updated once after suspend/migrate
From: Haren Myneni The current code makes rtas calls for update-nodes, activate-firmware and then update-nodes again. The FW provides the same data for both update-nodes calls. As a result a proc entry exists error is reported for the second update while adding device nodes. This patch makes a single rtas call for update-nodes after activating the FW. It also add rtas_busy delay for the activate-firmware rtas call. Signed-off-by: Haren Myneni Signed-off-by: Tyrel Datwyler --- arch/powerpc/platforms/pseries/mobility.c | 26 ++ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index cde4e0a..bde7eba 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -290,13 +290,6 @@ void post_mobility_fixup(void) int rc; int activate_fw_token; - rc = pseries_devicetree_update(MIGRATION_SCOPE); - if (rc) { - printk(KERN_ERR "Initial post-mobility device tree update " - "failed: %d\n", rc); - return; - } - activate_fw_token = rtas_token("ibm,activate-firmware"); if (activate_fw_token == RTAS_UNKNOWN_SERVICE) { printk(KERN_ERR "Could not make post-mobility " @@ -304,16 +297,17 @@ void post_mobility_fixup(void) return; } - rc = rtas_call(activate_fw_token, 0, 1, NULL); - if (!rc) { - rc = pseries_devicetree_update(MIGRATION_SCOPE); - if (rc) - printk(KERN_ERR "Secondary post-mobility device tree " - "update failed: %d\n", rc); - } else { + do { + rc = rtas_call(activate_fw_token, 0, 1, NULL); + } while (rtas_busy_delay(rc)); + + if (rc) printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc); - return; - } + + rc = pseries_devicetree_update(MIGRATION_SCOPE); + if (rc) + printk(KERN_ERR "Post-mobility device tree update " + "failed: %d\n", rc); return; } -- 1.7.12.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 0/3] powerpc/pseries: fix issues in suspend/resume code
This patchset fixes a couple of issues encountered in the suspend/resume code base. First when using the kernel device tree update code update-nodes is unnecessarily called more than once. Second the cpu cache lists are not updated after a suspend/resume which under certain conditions may cause a panic. Finally, since the cache list fix utilzes in kernel device tree update code a means for telling drmgr not to perform a device tree update from userspace is required. Changes from v2: - Moved dynamic configuration update code into pseries specific routine per Nathan's suggestion. Changes from v1: - Fixed several commit message typos - Fixed authorship of first two patches Haren Myneni (2): powerpc/pseries: Device tree should only be updated once after suspend/migrate powerpc/pseries: Update dynamic cache nodes for suspend/resume operation Tyrel Datwyler (1): powerpc/pseries: Report in kernel device tree update to drmgr arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/platforms/pseries/mobility.c | 26 +++--- arch/powerpc/platforms/pseries/suspend.c | 44 ++- 3 files changed, 54 insertions(+), 17 deletions(-) -- 1.7.12.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: PCIe Access - achieve bursts without DMA
Hi Michael, I'm currently trying to benchmark access speeds to our PCIe-connected IP-cores located inside our FPGA. On x86-based systems I was able to achieve bursts for both read and write access. On PPC32, using an e500v2, I had no success at all so far. Whenever I want to benchmark PCI/PCIe performance I do the following tests; 1. Peripheral board DMA (board-to-board) Use two of your FPGA boards in a chassis and DMA between them. In a PCI system, you can put the cards on the same bus segment and then between a bridge and see how that affects things. In your case, the PCIe traffic will all be via the root-complex/switch, so you should get the same performance regardless of which PCIe slot you use. This is likely the "best you can do" as far as bursts go. 2. Peripheral board DMA to host memory. In this case I typically insmod a simple driver on the host that gives me a page of memory, and then DMA into and out of that memory, using the DMA controller on the peripheral. 3. Host (root complex) DMA. If your host has a DMA controller, then program it per (2). As far as "verification" of your custom peripheral board FPGA IP is concerned, if I was a customer, and you had data for (1) and (2), I'd be pretty happy (and could care less about (2), since its so system dependent). Since its an FPGA-based IP. I'd also expect to see a PCIe simulation with Bus Functional Models showing what the optimal performance of your IP was, and then how it nicely matches with the measurements in (1). If you do not have a PCIe logic analyzer, both Xilinx and Altera have Chipscope/SignalTap logic analyzers that can be used for tracing traffic at the TLP layer inside the FPGA. Just some thoughts ... Cheers, Dave ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: PCIe Access - achieve bursts without DMA
On Thu, 2014-01-30 at 12:20 +, Moese, Michael wrote: > Hello PPC-developers, > I'm currently trying to benchmark access speeds to our PCIe-connected IP-cores > located inside our FPGA. On x86-based systems I was able to achieve bursts for > both read and write access. On PPC32, using an e500v2, I had no success at > all > so far. > I tried using ioremap_wc(), like I did on x86, for writing, and it only > results in my > writes just being single requests, one after another. Hrm, ioremap_wc will give you a mapping without the G (guard) bit. Whether that results in some store gathering or not on IOs depends on a specific HW implementation, you'll have to check with the FSP folks on that one, there could also be a chicken switch (HID bit or similar) needed to enable that (there was on some earlier ppc32 chips). Another thing you can try is to use FP register load/stores. > For reads, I noticed I could not ioremap_cache() on PPC, so I used simple > ioremap() > here. > I used several ways to read from the device, from simple > readl(),memcpy_from_io(), > memcpy() to cacheable_memcpy() - with no improvements. Even when just > issuing > a batch of prefetch()-calls for all the memory to read did not result in read > bursts. > > I only get really poor results, writing is possible with around 40 MiByte/s, > whereas I > can read at about only 3 MiByte/s. > After hours of studying the reference manual from freescale, looking into > other code > and searching the web, I'm close to resignation. > > Maybe someone of you has some more directions for me, I'd appreciate every > hint > that leads me to my problem's solution - maybe I just missed something or > lack > knowledge about this architecture in general. > > Thanks for your reading. > > > Michael > ___ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 01/10] KVM: PPC: BOOK3S: PR: Fix PURR and SPURR emulation
On Fri, Jan 31, 2014 at 11:47:44AM +0100, Alexander Graf wrote: > > On 31.01.2014, at 11:38, Aneesh Kumar K.V > wrote: > > > Alexander Graf writes: > > > >> On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote: > >>> We definitely don't need to emulate mtspr, because both the registers > >>> are hypervisor resource. > >> > >> This patch description doesn't cover what the patch actually does. It > >> changes the implementation from "always tell the guest it uses 100%" to > >> "give the guest an accurate amount of cpu time spent inside guest > >> context". > > > > Will fix that > > > >> > >> Also, I think we either go with full hyp semantics which means we also > >> emulate the offset or we go with no hyp awareness in the guest at all > >> which means we also don't emulate SPURR which is a hyp privileged > >> register. > > > > Can you clarify this ? > > In the 2.06 ISA SPURR is hypervisor privileged. That changed for 2.07 where > it became supervisor privileged. So I suppose your patch is ok. When > reviewing those patches I only had 2.06 around because power.org was broken. It's always been supervisor privilege for reading and hypervisor privilege for writing, ever since it was introduced in 2.05, and that hasn't changed. So I think what Aneesh is doing is correct. Regards, Paul. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 0/8] Add support for PowerPC Hypervisor supplied performance counters
On 01/22/2014 04:11 PM, Cody P Schafer wrote: On 01/21/2014 05:32 PM, Michael Ellerman wrote: On Thu, 2014-01-16 at 15:53 -0800, Cody P Schafer wrote: These patches add basic pmus for 2 powerpc hypervisor interfaces to obtain performance counters: gpci ("get performance counter info") and 24x7. Any comments on/things that need fixing for this patch set to be merged? ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] Fix coding style errors
On Mon, Jan 27, 2014 at 09:07:34PM -0600, Brandon Stewart wrote: > I corrected several coding errors. > > Signed-off-by: Brandon Stewart > --- > drivers/macintosh/adb.c | 7 --- > 1 file changed, 4 insertions(+), 3 deletions(-) > > diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c > index 53611de..dd3f49a 100644 > --- a/drivers/macintosh/adb.c > +++ b/drivers/macintosh/adb.c > @@ -623,7 +623,7 @@ do_adb_query(struct adb_request *req) > { > int ret = -EINVAL; > > - switch(req->data[1]) { > + switch (req->data[1]) { > case ADB_QUERY_GETDEVINFO: > if (req->nbytes < 3) > break; > @@ -792,8 +792,9 @@ static ssize_t adb_write(struct file *file, const char > __user *buf, > } > /* Special case for ADB_BUSRESET request, all others are sent to > the controller */ > - else if ((req->data[0] == ADB_PACKET) && (count > 1) > - && (req->data[1] == ADB_BUSRESET)) { > + else if (req->data[0] == ADB_PACKET && > + req->data[1] == ADB_BUSRESET && > + count > 1) { Is this re-ordering safe? Isn't 'count > 1' notionally indicating whether req->data[1] exists to be tested in the first place? On the other hand there's a check at the top of the routine that returns if count < 2, so maybe the check here should be removed altogether (along with one a few lines above)? ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/eeh: drop taken reference to driver on eeh_rmv_device
On Fri, Jan 31, 2014 at 08:46:11AM +0800, Gavin Shan wrote: > On Thu, Jan 30, 2014 at 11:00:48AM -0200, Thadeu Lima de Souza Cascardo wrote: > >Commit f5c57710dd62dd06f176934a8b4b8accbf00f9f8 ("powerpc/eeh: Use > >partial hotplug for EEH unaware drivers") introduces eeh_rmv_device, > >which may grab a reference to a driver, but not release it. > > > >That prevents a driver from being removed after it has gone through EEH > >recovery. > > > >This patch drops the reference in either exit path if it was taken. > > > >Signed-off-by: Thadeu Lima de Souza Cascardo > >--- > > arch/powerpc/kernel/eeh_driver.c |5 - > > 1 files changed, 4 insertions(+), 1 deletions(-) > > > >diff --git a/arch/powerpc/kernel/eeh_driver.c > >b/arch/powerpc/kernel/eeh_driver.c > >index 7bb30dc..afe7337 100644 > >--- a/arch/powerpc/kernel/eeh_driver.c > >+++ b/arch/powerpc/kernel/eeh_driver.c > >@@ -364,7 +364,7 @@ static void *eeh_rmv_device(void *data, void *userdata) > > return NULL; > > driver = eeh_pcid_get(dev); > > if (driver && driver->err_handler) > >-return NULL; > >+goto out; > > > > /* Remove it from PCI subsystem */ > > pr_debug("EEH: Removing %s without EEH sensitive driver\n", > >@@ -377,6 +377,9 @@ static void *eeh_rmv_device(void *data, void *userdata) > > For normal case (driver without EEH support), we probably release the > reference > to the driver before pci_stop_and_remove_bus_device(). You are right, we need to call it before we call pci_stop_and_remove_bus_device, otherwise dev->driver will be NULL, and eeh_pcid_put will not do module_put. On the other hand, we could change the call to eeh_pcid_put to accept struct pci_driver instead. > > > pci_stop_and_remove_bus_device(dev); > > pci_unlock_rescan_remove(); > > > >+out: > >+if (driver) > >+eeh_pcid_put(dev); > > return NULL; > > We needn't "if (driver)" here as eeh_pcid_put() already had the check. > What if try_module_get returned false on eeh_pcid_get? How about something like the patch below? > > } > > > > Thanks, > Gavin --- diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7bb30dc..3a397fa 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -352,6 +352,7 @@ static void *eeh_rmv_device(void *data, void *userdata) struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); int *removed = (int *)userdata; + bool has_err_handler; /* * Actually, we should remove the PCI bridges as well. @@ -362,8 +363,12 @@ static void *eeh_rmv_device(void *data, void *userdata) */ if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) return NULL; + driver = eeh_pcid_get(dev); - if (driver && driver->err_handler) + has_err_handler = driver && driver->err_handler; + if (driver) + eeh_pcid_put(dev); + if (has_err_handler) return NULL; /* Remove it from PCI subsystem */ --- ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Add cpu family documentation
On Jan 29, 2014, at 8:38 PM, Michael Ellerman wrote: > This patch adds some documentation on the different cpu families > supported by arch/powerpc. > > Signed-off-by: Michael Ellerman > --- > Documentation/powerpc/cpu_families.txt | 76 ++ > 1 file changed, 76 insertions(+) > create mode 100644 Documentation/powerpc/cpu_families.txt > > diff --git a/Documentation/powerpc/cpu_families.txt > b/Documentation/powerpc/cpu_families.txt > new file mode 100644 > index 000..df72657 > --- /dev/null > +++ b/Documentation/powerpc/cpu_families.txt > @@ -0,0 +1,76 @@ > +CPU Families > + > + > +This doco tries to summarise some of the different cpu families that exist > and > +are supported by arch/powerpc. > + > +Book3S (aka sPAPR) > +-- > + > + - Hash MMU > + - Mix of 32 & 64 bit > + > + Old > + POWER --- 601 --- 603 > + || | > + || *- 740 > + || | > + || *- 750 (G3) --- 750CX --- 750CL --- 750FX > + || | > + || | > + | 604 *--- 7400 --- 7410 --- 7450 --- 7455 --- 7447 > --- 7448 > + || > + || > + |* [620] --- POWER3/630 --- POWER3+ --- POWER4 --- > POWER4+ --- POWER5 --- POWER5+ --- POWER5++ --- POWER6 --- POWER7 --- POWER7+ > --- POWER8 > + | (64bit) > |. > + | > |. > + | > |*--- Cell > + | > | > + | > *--- 970 --- 970FX --- 970MP > + | > + *--- RS64 (threads) > + > + > + PA6T (64bit) ... > + > + > +IBM BookE > +- > + > + - Software loaded TLB. > + - All 32 bit > + > + 401 --- 403 --- 405 --- 440 --- 450 --- 460 --- 476 > + | > + *--- BG/P > + > + > +Motorola/Freescale 8xx > +-- > + > + - Software loaded with hardware assist. > + - All 32 bit > + > + 8xx --- 850 > + > + > +Freescale BookE > +--- > + > + - Software loaded TLB. > + - e6500 adds HW loaded indirect TLB entries. > + - Mix of 32 & 64 bit > + > + e200 --- e500 --- e500v2 --- e500mc --- e5500 --- e6500 > + (Book3E) (HW TLB) > + (64bit) > + e200 is its own core family that doesn’t have any relation to e500 line other than being book-e might want to add multithreaded to e6500. > +IBM A2 core > +--- > + > + - Book3E, software loaded TLB + HW loaded indirect TLB entries. > + - 64 bit > + > + A2 core --- BG/Q > + | > + *--- WSP > -- > 1.8.3.2 > > ___ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: PCIe Access - achieve bursts without DMA
On Thu, Jan 30, 2014 at 12:20:21PM +, Moese, Michael wrote: > Hello PPC-developers, > I'm currently trying to benchmark access speeds to our PCIe-connected IP-cores > located inside our FPGA. On x86-based systems I was able to achieve bursts for > both read and write access. On PPC32, using an e500v2, I had no success at > all > so far. > I tried using ioremap_wc(), like I did on x86, for writing, and it only > results in my > writes just being single requests, one after another. I believe that on PPC, write-combine is directly mapped to nocache. I can't remember if there is a writethrough option for ioremap (but adding it would probably be relaively easy). > For reads, I noticed I could not ioremap_cache() on PPC, so I used simple > ioremap() > here. You might be able to use ioremap_cache and using direct cache control instruction (dcbf/dcbi) to achieve your goals. This becomes similar to handling machines with no hardware cache coherency. You have to know the hardware cache line size to make this work. This said, it might be better to mark the memory as guarded and non-coherent (WIMG=), I don't know what ioremap_cache does for the MG bits and don't have the time to look it up right now. > I used several ways to read from the device, from simple > readl(),memcpy_from_io(), > memcpy() to cacheable_memcpy() - with no improvements. Even when just > issuing > a batch of prefetch()-calls for all the memory to read did not result in read > bursts. If the device data you want to read is supposed to be cacheable (which means basically that the data does not change unexpectedly under you, i.e., is not as volatile as a typical device I/O register), you don't want to use readl() which adds some synchronization to the read. Prefetch only works on writeback memory, maybe writethrough, expecting it to work on cache-inhibited memory is contradictory. Regards, Gabriel ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 0/2] Fixes for PCI-E link speed
On Fri, 2014-01-31 at 10:20 -0200, Kleber Sacilotto de Souza wrote: > On 01/17/2014 11:56 AM, Kleber Sacilotto de Souza wrote: > > These two patches fix problems on the PCI-E link speed detection. > > The first one fixes a regression and adds some improvements on the > > code, and the second one adds definitions for Gen3 speeds. > > > > Kleber Sacilotto de Souza (2): > >powerpc/pseries: fix regression on PCI link speed > >powerpc/pseries: add Gen3 definitions for PCIE link speed > > > > arch/powerpc/platforms/pseries/pci.c | 22 +++--- > > 1 files changed, 15 insertions(+), 7 deletions(-) > > > > Hi, > > Any feedback on this patch series? Patches on this list are tracked in patchwork so are generally not "lost". Plus I was on vacation last week. So there's no need for such pings unless much more time has elapsed. I'll probably put it in after -rc1. Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 0/2] Fixes for PCI-E link speed
On 01/17/2014 11:56 AM, Kleber Sacilotto de Souza wrote: These two patches fix problems on the PCI-E link speed detection. The first one fixes a regression and adds some improvements on the code, and the second one adds definitions for Gen3 speeds. Kleber Sacilotto de Souza (2): powerpc/pseries: fix regression on PCI link speed powerpc/pseries: add Gen3 definitions for PCIE link speed arch/powerpc/platforms/pseries/pci.c | 22 +++--- 1 files changed, 15 insertions(+), 7 deletions(-) Hi, Any feedback on this patch series? Thanks, -- Kleber Sacilotto de Souza IBM Linux Technology Center ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 08/10] KVM: PPC: BOOK3S: PR: Add support for facility unavailable interrupt
On 31.01.2014, at 12:40, Aneesh Kumar K.V wrote: > Alexander Graf writes: > >> On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote: >>> At this point we allow all the supported facilities except EBB. So >>> forward the interrupt to guest as illegal instruction. >>> >>> Signed-off-by: Aneesh Kumar K.V >>> --- >>> arch/powerpc/include/asm/kvm_asm.h | 4 +++- >>> arch/powerpc/kvm/book3s.c | 4 >>> arch/powerpc/kvm/book3s_emulate.c | 18 ++ >>> arch/powerpc/kvm/book3s_pr.c | 17 + >>> 4 files changed, 42 insertions(+), 1 deletion(-) >>> >>> diff --git a/arch/powerpc/include/asm/kvm_asm.h >>> b/arch/powerpc/include/asm/kvm_asm.h >>> index 1bd92fd43cfb..799244face51 100644 >>> --- a/arch/powerpc/include/asm/kvm_asm.h >>> +++ b/arch/powerpc/include/asm/kvm_asm.h >>> @@ -99,6 +99,7 @@ >>> #define BOOK3S_INTERRUPT_PERFMON 0xf00 >>> #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 >>> #define BOOK3S_INTERRUPT_VSX 0xf40 >>> +#define BOOK3S_INTERRUPT_FAC_UNAVAIL0xf60 >>> >>> #define BOOK3S_IRQPRIO_SYSTEM_RESET0 >>> #define BOOK3S_IRQPRIO_DATA_SEGMENT1 >>> @@ -117,7 +118,8 @@ >>> #define BOOK3S_IRQPRIO_DECREMENTER 14 >>> #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15 >>> #define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16 >>> -#define BOOK3S_IRQPRIO_MAX 17 >>> +#define BOOK3S_IRQPRIO_FAC_UNAVAIL 17 >>> +#define BOOK3S_IRQPRIO_MAX 18 >>> >>> #define BOOK3S_HFLAG_DCBZ320x1 >>> #define BOOK3S_HFLAG_SLB 0x2 >>> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c >>> index 8912608b7e1b..a9aea28c2677 100644 >>> --- a/arch/powerpc/kvm/book3s.c >>> +++ b/arch/powerpc/kvm/book3s.c >>> @@ -143,6 +143,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) >>> case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG;break; >>> case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break; >>> case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break; >>> + case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL; break; >>> default:prio = BOOK3S_IRQPRIO_MAX; break; >>> } >>> >>> @@ -273,6 +274,9 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu >>> *vcpu, unsigned int priority) >>> case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR: >>> vec = BOOK3S_INTERRUPT_PERFMON; >>> break; >>> + case BOOK3S_IRQPRIO_FAC_UNAVAIL: >>> + vec = BOOK3S_INTERRUPT_FAC_UNAVAIL; >>> + break; >>> default: >>> deliver = 0; >>> printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); >>> diff --git a/arch/powerpc/kvm/book3s_emulate.c >>> b/arch/powerpc/kvm/book3s_emulate.c >>> index 60d0b6b745e7..bf6b11021250 100644 >>> --- a/arch/powerpc/kvm/book3s_emulate.c >>> +++ b/arch/powerpc/kvm/book3s_emulate.c >>> @@ -481,6 +481,15 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu >>> *vcpu, int sprn, ulong spr_val) >>> vcpu->arch.shadow_fscr = vcpu->arch.fscr & host_fscr; >>> break; >>> } >>> + case SPRN_EBBHR: >>> + vcpu->arch.ebbhr = spr_val; >>> + break; >>> + case SPRN_EBBRR: >>> + vcpu->arch.ebbrr = spr_val; >>> + break; >>> + case SPRN_BESCR: >>> + vcpu->arch.bescr = spr_val; >>> + break; >>> unprivileged: >>> default: >>> printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); >>> @@ -607,6 +616,15 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu >>> *vcpu, int sprn, ulong *spr_val >>> case SPRN_FSCR: >>> *spr_val = vcpu->arch.fscr; >>> break; >>> + case SPRN_EBBHR: >>> + *spr_val = vcpu->arch.ebbhr; >>> + break; >>> + case SPRN_EBBRR: >>> + *spr_val = vcpu->arch.ebbrr; >>> + break; >>> + case SPRN_BESCR: >>> + *spr_val = vcpu->arch.bescr; >>> + break; >>> default: >>> unprivileged: >>> printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); >>> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c >>> index 51d469f8c9fd..828056ec208f 100644 >>> --- a/arch/powerpc/kvm/book3s_pr.c >>> +++ b/arch/powerpc/kvm/book3s_pr.c >>> @@ -900,6 +900,23 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct >>> kvm_vcpu *vcpu, >>> case BOOK3S_INTERRUPT_PERFMON: >>> r = RESUME_GUEST; >>> break; >>> + case BOOK3S_INTERRUPT_FAC_UNAVAIL: >>> + { >>> + /* >>> +* Check for the facility that need to be emulated >>> +*/ >>> + ulong fscr_ic = vcpu->arch.shadow_fscr >> 56; >>> + if (fscr_ic != FSCR_EBB_LG) { >>> + /* >>> +* We only disable EBB facility. >>> +* So only emulate that. >> >> I don't understand the comment. We emulate nothing at all her
Re: [RFC PATCH 08/10] KVM: PPC: BOOK3S: PR: Add support for facility unavailable interrupt
Alexander Graf writes: > On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote: >> At this point we allow all the supported facilities except EBB. So >> forward the interrupt to guest as illegal instruction. >> >> Signed-off-by: Aneesh Kumar K.V >> --- >> arch/powerpc/include/asm/kvm_asm.h | 4 +++- >> arch/powerpc/kvm/book3s.c | 4 >> arch/powerpc/kvm/book3s_emulate.c | 18 ++ >> arch/powerpc/kvm/book3s_pr.c | 17 + >> 4 files changed, 42 insertions(+), 1 deletion(-) >> >> diff --git a/arch/powerpc/include/asm/kvm_asm.h >> b/arch/powerpc/include/asm/kvm_asm.h >> index 1bd92fd43cfb..799244face51 100644 >> --- a/arch/powerpc/include/asm/kvm_asm.h >> +++ b/arch/powerpc/include/asm/kvm_asm.h >> @@ -99,6 +99,7 @@ >> #define BOOK3S_INTERRUPT_PERFMON 0xf00 >> #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 >> #define BOOK3S_INTERRUPT_VSX 0xf40 >> +#define BOOK3S_INTERRUPT_FAC_UNAVAIL0xf60 >> >> #define BOOK3S_IRQPRIO_SYSTEM_RESET0 >> #define BOOK3S_IRQPRIO_DATA_SEGMENT1 >> @@ -117,7 +118,8 @@ >> #define BOOK3S_IRQPRIO_DECREMENTER 14 >> #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15 >> #define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16 >> -#define BOOK3S_IRQPRIO_MAX 17 >> +#define BOOK3S_IRQPRIO_FAC_UNAVAIL 17 >> +#define BOOK3S_IRQPRIO_MAX 18 >> >> #define BOOK3S_HFLAG_DCBZ320x1 >> #define BOOK3S_HFLAG_SLB 0x2 >> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c >> index 8912608b7e1b..a9aea28c2677 100644 >> --- a/arch/powerpc/kvm/book3s.c >> +++ b/arch/powerpc/kvm/book3s.c >> @@ -143,6 +143,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) >> case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG;break; >> case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break; >> case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break; >> +case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL; break; >> default:prio = BOOK3S_IRQPRIO_MAX; break; >> } >> >> @@ -273,6 +274,9 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, >> unsigned int priority) >> case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR: >> vec = BOOK3S_INTERRUPT_PERFMON; >> break; >> +case BOOK3S_IRQPRIO_FAC_UNAVAIL: >> +vec = BOOK3S_INTERRUPT_FAC_UNAVAIL; >> +break; >> default: >> deliver = 0; >> printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); >> diff --git a/arch/powerpc/kvm/book3s_emulate.c >> b/arch/powerpc/kvm/book3s_emulate.c >> index 60d0b6b745e7..bf6b11021250 100644 >> --- a/arch/powerpc/kvm/book3s_emulate.c >> +++ b/arch/powerpc/kvm/book3s_emulate.c >> @@ -481,6 +481,15 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, >> int sprn, ulong spr_val) >> vcpu->arch.shadow_fscr = vcpu->arch.fscr & host_fscr; >> break; >> } >> +case SPRN_EBBHR: >> +vcpu->arch.ebbhr = spr_val; >> +break; >> +case SPRN_EBBRR: >> +vcpu->arch.ebbrr = spr_val; >> +break; >> +case SPRN_BESCR: >> +vcpu->arch.bescr = spr_val; >> +break; >> unprivileged: >> default: >> printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); >> @@ -607,6 +616,15 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, >> int sprn, ulong *spr_val >> case SPRN_FSCR: >> *spr_val = vcpu->arch.fscr; >> break; >> +case SPRN_EBBHR: >> +*spr_val = vcpu->arch.ebbhr; >> +break; >> +case SPRN_EBBRR: >> +*spr_val = vcpu->arch.ebbrr; >> +break; >> +case SPRN_BESCR: >> +*spr_val = vcpu->arch.bescr; >> +break; >> default: >> unprivileged: >> printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); >> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c >> index 51d469f8c9fd..828056ec208f 100644 >> --- a/arch/powerpc/kvm/book3s_pr.c >> +++ b/arch/powerpc/kvm/book3s_pr.c >> @@ -900,6 +900,23 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct >> kvm_vcpu *vcpu, >> case BOOK3S_INTERRUPT_PERFMON: >> r = RESUME_GUEST; >> break; >> +case BOOK3S_INTERRUPT_FAC_UNAVAIL: >> +{ >> +/* >> + * Check for the facility that need to be emulated >> + */ >> +ulong fscr_ic = vcpu->arch.shadow_fscr >> 56; >> +if (fscr_ic != FSCR_EBB_LG) { >> +/* >> + * We only disable EBB facility. >> + * So only emulate that. > > I don't understand the comment. We emulate nothing at all here. We either > - hit an EBB unavailable in which case we send the guest an illegal > instr
Re: [RFC PATCH 07/10] KVM: PPC: BOOK3S: PR: Emulate facility status and control register
Paul Mackerras writes: > On Tue, Jan 28, 2014 at 10:14:12PM +0530, Aneesh Kumar K.V wrote: >> We allow priv-mode update of this. The guest value is saved in fscr, >> and the value actually used is saved in shadow_fscr. shadow_fscr >> only contains values that are allowed by the host. On >> facility unavailable interrupt, if the facility is allowed by fscr >> but disabled in shadow_fscr we need to emulate the support. Currently >> all but EBB is disabled. We still don't support performance monitoring >> in PR guest. > > ... > >> +/* >> + * Save the current fscr in shadow fscr >> + */ >> +mfspr r3,SPRN_FSCR >> +PPC_STL r3, VCPU_SHADOW_FSCR(r7) > > I don't think you need to do this. What could possibly have changed > FSCR since we loaded it on the way into the guest? The reason for facility unavailable interrupt is encoded in FSCR right ? -aneesh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 03/10] KVM: PPC: BOOK3S: PR: Emulate instruction counter
On 31.01.2014, at 12:25, Aneesh Kumar K.V wrote: > Alexander Graf writes: > >> On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote: >>> Writing to IC is not allowed in the privileged mode. >> >> This is not a patch description. >> >>> >>> Signed-off-by: Aneesh Kumar K.V >>> --- >>> arch/powerpc/include/asm/kvm_host.h | 1 + >>> arch/powerpc/kvm/book3s_emulate.c | 3 +++ >>> arch/powerpc/kvm/book3s_pr.c| 2 ++ >>> 3 files changed, 6 insertions(+) >>> >>> diff --git a/arch/powerpc/include/asm/kvm_host.h >>> b/arch/powerpc/include/asm/kvm_host.h >>> index 9ebdd12e50a9..e0b13aca98e6 100644 >>> --- a/arch/powerpc/include/asm/kvm_host.h >>> +++ b/arch/powerpc/include/asm/kvm_host.h >>> @@ -509,6 +509,7 @@ struct kvm_vcpu_arch { >>> /* Time base value when we entered the guest */ >>> u64 entry_tb; >>> u64 entry_vtb; >>> + u64 entry_ic; >>> u32 tcr; >>> ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ >>> u32 ivor[64]; >>> diff --git a/arch/powerpc/kvm/book3s_emulate.c >>> b/arch/powerpc/kvm/book3s_emulate.c >>> index 4b58d8a90cb5..abe6f3057e5b 100644 >>> --- a/arch/powerpc/kvm/book3s_emulate.c >>> +++ b/arch/powerpc/kvm/book3s_emulate.c >>> @@ -531,6 +531,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, >>> int sprn, ulong *spr_val >>> case SPRN_VTB: >>> *spr_val = vcpu->arch.vtb; >>> break; >>> + case SPRN_IC: >>> + *spr_val = vcpu->arch.ic; >>> + break; >>> case SPRN_GQR0: >>> case SPRN_GQR1: >>> case SPRN_GQR2: >>> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c >>> index b5598e9cdd09..51d469f8c9fd 100644 >>> --- a/arch/powerpc/kvm/book3s_pr.c >>> +++ b/arch/powerpc/kvm/book3s_pr.c >>> @@ -121,6 +121,7 @@ void kvmppc_copy_to_svcpu(struct >>> kvmppc_book3s_shadow_vcpu *svcpu, >>> */ >>> vcpu->arch.entry_tb = get_tb(); >>> vcpu->arch.entry_vtb = get_vtb(); >>> + vcpu->arch.entry_ic = mfspr(SPRN_IC); >> >> Is this implemented on all systems? >> >>> >>> } >>> >>> @@ -174,6 +175,7 @@ out: >>> vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; >>> vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; >>> vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; >>> + vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; >> >> This is getting quite convoluted. How about we act slightly more fuzzy >> and put all of this into vcpu_load/put? >> > > I am not sure whether vcpu_load/put is too early/late to save these > context ? It'd mean we treat instruction emulation as part of guest overhead and time, but we'd make the entry/exit path faster. Unlike with HV KVM, guest entry/exit is pretty hot due to the massive amounts of instruction emulation we need to do. Alex ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 03/10] KVM: PPC: BOOK3S: PR: Emulate instruction counter
Alexander Graf writes: > On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote: >> Writing to IC is not allowed in the privileged mode. > > This is not a patch description. > >> >> Signed-off-by: Aneesh Kumar K.V >> --- >> arch/powerpc/include/asm/kvm_host.h | 1 + >> arch/powerpc/kvm/book3s_emulate.c | 3 +++ >> arch/powerpc/kvm/book3s_pr.c| 2 ++ >> 3 files changed, 6 insertions(+) >> >> diff --git a/arch/powerpc/include/asm/kvm_host.h >> b/arch/powerpc/include/asm/kvm_host.h >> index 9ebdd12e50a9..e0b13aca98e6 100644 >> --- a/arch/powerpc/include/asm/kvm_host.h >> +++ b/arch/powerpc/include/asm/kvm_host.h >> @@ -509,6 +509,7 @@ struct kvm_vcpu_arch { >> /* Time base value when we entered the guest */ >> u64 entry_tb; >> u64 entry_vtb; >> +u64 entry_ic; >> u32 tcr; >> ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ >> u32 ivor[64]; >> diff --git a/arch/powerpc/kvm/book3s_emulate.c >> b/arch/powerpc/kvm/book3s_emulate.c >> index 4b58d8a90cb5..abe6f3057e5b 100644 >> --- a/arch/powerpc/kvm/book3s_emulate.c >> +++ b/arch/powerpc/kvm/book3s_emulate.c >> @@ -531,6 +531,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, >> int sprn, ulong *spr_val >> case SPRN_VTB: >> *spr_val = vcpu->arch.vtb; >> break; >> +case SPRN_IC: >> +*spr_val = vcpu->arch.ic; >> +break; >> case SPRN_GQR0: >> case SPRN_GQR1: >> case SPRN_GQR2: >> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c >> index b5598e9cdd09..51d469f8c9fd 100644 >> --- a/arch/powerpc/kvm/book3s_pr.c >> +++ b/arch/powerpc/kvm/book3s_pr.c >> @@ -121,6 +121,7 @@ void kvmppc_copy_to_svcpu(struct >> kvmppc_book3s_shadow_vcpu *svcpu, >> */ >> vcpu->arch.entry_tb = get_tb(); >> vcpu->arch.entry_vtb = get_vtb(); >> +vcpu->arch.entry_ic = mfspr(SPRN_IC); > > Is this implemented on all systems? > >> >> } >> >> @@ -174,6 +175,7 @@ out: >> vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; >> vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; >> vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; >> +vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; > > This is getting quite convoluted. How about we act slightly more fuzzy > and put all of this into vcpu_load/put? > I am not sure whether vcpu_load/put is too early/late to save these context ? -aneesh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 02/10] KVM: PPC: BOOK3S: PR: Emulate virtual timebase register
Paul Mackerras writes: > On Tue, Jan 28, 2014 at 10:14:07PM +0530, Aneesh Kumar K.V wrote: >> virtual time base register is a per vm register and need to saved >> and restored on vm exit and entry. Writing to VTB is not allowed >> in the privileged mode. > ... > >> +#ifdef CONFIG_PPC_BOOK3S_64 >> +#define mfvtb() ({unsigned long rval; >> \ >> +asm volatile("mfspr %0, %1" : \ >> + "=r" (rval) : "i" (SPRN_VTB)); rval;}) > > The mfspr will be a no-op on anything before POWER8, meaning the > result will be whatever value was in the destination GPR before the > mfspr. I suppose that may not matter if the result is only ever used > when we're running on a POWER8 host, but I would feel more comfortable > if we had explicit feature tests to make sure of that, rather than > possibly doing computations with unpredictable values. > > With your patch, a guest on a POWER7 or a PPC970 could do a read from > VTB and get garbage -- first, there is nothing to stop userspace from > requesting POWER8 emulation on an older machine, and secondly, even if > the virtual machine is a PPC970 (say) you don't implement > unimplemented SPR semantics for VTB (no-op if PR=0, illegal > instruction interrupt if PR=1). Ok that means we need to do something like ? struct cpu_spec *s = find_cpuspec(vcpu->arch.pvr); if (s->cpu_features & CPU_FTR_ARCH_207S) { } > > On the whole I think it is reasonable to reject an attempt to set the > virtual PVR to a POWER8 PVR value if we are not running on a POWER8 > host, because emulating all the new POWER8 features in software > (particularly transactional memory) would not be feasible. Alex may > disagree. :) That would make it much simpler. -aneesh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 01/10] KVM: PPC: BOOK3S: PR: Fix PURR and SPURR emulation
On 31.01.2014, at 11:38, Aneesh Kumar K.V wrote: > Alexander Graf writes: > >> On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote: >>> We definitely don't need to emulate mtspr, because both the registers >>> are hypervisor resource. >> >> This patch description doesn't cover what the patch actually does. It >> changes the implementation from "always tell the guest it uses 100%" to >> "give the guest an accurate amount of cpu time spent inside guest >> context". > > Will fix that > >> >> Also, I think we either go with full hyp semantics which means we also >> emulate the offset or we go with no hyp awareness in the guest at all >> which means we also don't emulate SPURR which is a hyp privileged >> register. > > Can you clarify this ? In the 2.06 ISA SPURR is hypervisor privileged. That changed for 2.07 where it became supervisor privileged. So I suppose your patch is ok. When reviewing those patches I only had 2.06 around because power.org was broken. Alex ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 01/10] KVM: PPC: BOOK3S: PR: Fix PURR and SPURR emulation
Alexander Graf writes: > On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote: >> We definitely don't need to emulate mtspr, because both the registers >> are hypervisor resource. > > This patch description doesn't cover what the patch actually does. It > changes the implementation from "always tell the guest it uses 100%" to > "give the guest an accurate amount of cpu time spent inside guest > context". Will fix that > > Also, I think we either go with full hyp semantics which means we also > emulate the offset or we go with no hyp awareness in the guest at all > which means we also don't emulate SPURR which is a hyp privileged > register. Can you clarify this ? > > Otherwise I like the patch :). > -aneesh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 2/2] powerpc/mm: Fix compile error of pgtable-ppc64.h
From: Li Zhong It seems that forward declaration couldn't work well with typedef, use struct spinlock directly to avoiding following build errors: In file included from include/linux/spinlock.h:81, from include/linux/seqlock.h:35, from include/linux/time.h:5, from include/uapi/linux/timex.h:56, from include/linux/timex.h:56, from include/linux/sched.h:17, from arch/powerpc/kernel/asm-offsets.c:17: include/linux/spinlock_types.h:76: error: redefinition of typedef 'spinlock_t' /root/linux-next/arch/powerpc/include/asm/pgtable-ppc64.h:563: note: previous declaration of 'spinlock_t' was here upstream sha1:fd120dc2e205d2318a8b47d6d8098b789e3af67d for 3.13 stable series Signed-off-by: Li Zhong Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pgtable-ppc64.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index d27960c89a71..bc141c950b1e 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -560,9 +560,9 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define pmd_move_must_withdraw pmd_move_must_withdraw -typedef struct spinlock spinlock_t; -static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, -spinlock_t *old_pmd_ptl) +struct spinlock; +static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, +struct spinlock *old_pmd_ptl) { /* * Archs like ppc64 use pgtable to store per pmd -- 1.8.3.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 1/2] powerpc/thp: Fix crash on mremap
From: "Aneesh Kumar K.V" This patch fix the below crash NIP [c004cee4] .__hash_page_thp+0x2a4/0x440 LR [c00439ac] .hash_page+0x18c/0x5e0 ... Call Trace: [c00736103c40] [1b00] 0x1b00(unreliable) [437908.479693] [c00736103d50] [c00439ac] .hash_page+0x18c/0x5e0 [437908.479699] [c00736103e30] [c000924c] .do_hash_page+0x4c/0x58 On ppc64 we use the pgtable for storing the hpte slot information and store address to the pgtable at a constant offset (PTRS_PER_PMD) from pmd. On mremap, when we switch the pmd, we need to withdraw and deposit the pgtable again, so that we find the pgtable at PTRS_PER_PMD offset from new pmd. We also want to move the withdraw and deposit before the set_pmd so that, when page fault find the pmd as trans huge we can be sure that pgtable can be located at the offset. upstream SHA1: b3084f4db3aeb991c507ca774337c7e7893ed04f for 3.13 stable series Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pgtable-ppc64.h | 14 ++ include/asm-generic/pgtable.h| 12 mm/huge_memory.c | 14 +- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 4a191c472867..d27960c89a71 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -558,5 +558,19 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #define __HAVE_ARCH_PMDP_INVALIDATE extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); + +#define pmd_move_must_withdraw pmd_move_must_withdraw +typedef struct spinlock spinlock_t; +static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, +spinlock_t *old_pmd_ptl) +{ + /* +* Archs like ppc64 use pgtable to store per pmd +* specific information. So when we switch the pmd, +* we should also withdraw and deposit the pgtable +*/ + return true; +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index db0923458940..8e4f41d9af4d 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -558,6 +558,18 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) } #endif +#ifndef pmd_move_must_withdraw +static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, +spinlock_t *old_pmd_ptl) +{ + /* +* With split pmd lock we also need to move preallocated +* PTE page table if new_pmd is on different PMD page table. +*/ + return new_pmd_ptl != old_pmd_ptl; +} +#endif + /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 95d1acb0f3d2..5d80c53b87cb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1502,19 +1502,15 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); - set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); - if (new_ptl != old_ptl) { - pgtable_t pgtable; - /* -* Move preallocated PTE page table if new_pmd is on -* different PMD page table. -*/ + if (pmd_move_must_withdraw(new_ptl, old_ptl)) { + pgtable_t pgtable; pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); pgtable_trans_huge_deposit(mm, new_pmd, pgtable); - - spin_unlock(new_ptl); } + set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); + if (new_ptl != old_ptl) + spin_unlock(new_ptl); spin_unlock(old_ptl); } out: -- 1.8.3.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/2][v8] driver/memory:Move Freescale IFC driver to a common driver
Freescale IFC controller has been used for mpc8xxx. It will be used for ARM-based SoC as well. This patch moves the driver to driver/memory and fix the header file includes. Also remove module_platform_driver() and instead call platform_driver_register() from subsys_initcall() to make sure this module has been loaded before MTD partition parsing starts. Signed-off-by: Prabhakar Kushwaha Acked-by: Arnd Bergmann --- Based upon git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git Branch next Changes for v2: - Move fsl_ifc in driver/memory Changes for v3: - move device tree bindings to memory Changes for v4: Rebased to git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git Changes for v5: - Moved powerpc/Kconfig option to driver/memory Changes for v6: - Update Kconfig details Changes for v7: - Update Kconfig Changes for v8: - Update Kconfig help .../{powerpc => memory-controllers}/fsl/ifc.txt|0 arch/powerpc/Kconfig |4 arch/powerpc/sysdev/Makefile |1 - drivers/memory/Kconfig |8 drivers/memory/Makefile|1 + {arch/powerpc/sysdev => drivers/memory}/fsl_ifc.c |8 ++-- drivers/mtd/nand/fsl_ifc_nand.c|2 +- .../include/asm => include/linux}/fsl_ifc.h|0 8 files changed, 16 insertions(+), 8 deletions(-) rename Documentation/devicetree/bindings/{powerpc => memory-controllers}/fsl/ifc.txt (100%) rename {arch/powerpc/sysdev => drivers/memory}/fsl_ifc.c (98%) rename {arch/powerpc/include/asm => include/linux}/fsl_ifc.h (100%) diff --git a/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt b/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt similarity index 100% rename from Documentation/devicetree/bindings/powerpc/fsl/ifc.txt rename to Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a5e5d2e..00edd29 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -734,10 +734,6 @@ config FSL_LBC controller. Also contains some common code used by drivers for specific local bus peripherals. -config FSL_IFC - bool -depends on FSL_SOC - config FSL_GTM bool depends on PPC_83xx || QUICC_ENGINE || CPM2 diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index f67ac90..afbcc37 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_FSL_SOC) += fsl_soc.o fsl_mpic_err.o obj-$(CONFIG_FSL_PCI) += fsl_pci.o $(fsl-msi-obj-y) obj-$(CONFIG_FSL_PMC) += fsl_pmc.o obj-$(CONFIG_FSL_LBC) += fsl_lbc.o -obj-$(CONFIG_FSL_IFC) += fsl_ifc.o obj-$(CONFIG_FSL_GTM) += fsl_gtm.o obj-$(CONFIG_FSL_85XX_CACHE_SRAM) += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o obj-$(CONFIG_SIMPLE_GPIO) += simple_gpio.o diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig index 29a11db..57721ed 100644 --- a/drivers/memory/Kconfig +++ b/drivers/memory/Kconfig @@ -50,4 +50,12 @@ config TEGRA30_MC analysis, especially for IOMMU/SMMU(System Memory Management Unit) module. +config FSL_IFC + bool "Freescale Integrated Flash Controller" + depends on FSL_SOC + help + This driver is for the Integrated Flash Controller(IFC) module + available in Freescale SoCs. This controller allows to handle + devices such as NOR, NAND, FPGA and ASIC etc. + endif diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile index 969d923..f2bf25c 100644 --- a/drivers/memory/Makefile +++ b/drivers/memory/Makefile @@ -6,6 +6,7 @@ ifeq ($(CONFIG_DDR),y) obj-$(CONFIG_OF) += of_memory.o endif obj-$(CONFIG_TI_EMIF) += emif.o +obj-$(CONFIG_FSL_IFC) += fsl_ifc.o obj-$(CONFIG_MVEBU_DEVBUS) += mvebu-devbus.o obj-$(CONFIG_TEGRA20_MC) += tegra20-mc.o obj-$(CONFIG_TEGRA30_MC) += tegra30-mc.o diff --git a/arch/powerpc/sysdev/fsl_ifc.c b/drivers/memory/fsl_ifc.c similarity index 98% rename from arch/powerpc/sysdev/fsl_ifc.c rename to drivers/memory/fsl_ifc.c index fbc885b..3d5d792 100644 --- a/arch/powerpc/sysdev/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -29,8 +29,8 @@ #include #include #include +#include #include -#include struct fsl_ifc_ctrl *fsl_ifc_ctrl_dev; EXPORT_SYMBOL(fsl_ifc_ctrl_dev); @@ -298,7 +298,11 @@ static struct platform_driver fsl_ifc_ctrl_driver = { .remove = fsl_ifc_ctrl_remove, }; -module_platform_driver(fsl_ifc_ctrl_driver); +static int __init fsl_ifc_init(void) +{ + return platform_driver_register(&fsl_ifc_ctrl_driver); +} +subsys_initcall(fsl_ifc_init); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Freescale Semiconductor"); diff --git a/drivers/mtd/nan
[PATCH 2/2][v8] powerpc/config: Enable memory driver
As Freescale IFC controller has been moved to driver to driver/memory. So enable memory driver in powerpc config Signed-off-by: Prabhakar Kushwaha --- Based upon git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git Branch next Changes for v2: Sending as it is Changes for v3: Sending as it is Changes for v4: Rebased to git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git changes for v5: - Rebased to branch next of git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git Changes for v6: Sending as it is Changes for v7: Sending as it is Changes for v8: Sending as it is arch/powerpc/configs/corenet32_smp_defconfig |1 + arch/powerpc/configs/corenet64_smp_defconfig |1 + arch/powerpc/configs/mpc85xx_defconfig |1 + arch/powerpc/configs/mpc85xx_smp_defconfig |1 + 4 files changed, 4 insertions(+) diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index bbd794d..087d437 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -142,6 +142,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=y CONFIG_STAGING=y +CONFIG_MEMORY=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_EXT2_FS=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 63508dd..25b03f8 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -129,6 +129,7 @@ CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index 83d3550..cba638c 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -216,6 +216,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1307=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 4b68629..e315b8a 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -217,6 +217,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1307=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev