[PATCH] powerpc: remove the ppc44x ocm.c file
The on chip memory allocator is entirely unused in the kernel tree. Signed-off-by: Christoph Hellwig --- arch/powerpc/configs/ppc40x_defconfig | 1 - arch/powerpc/include/asm/ppc4xx_ocm.h | 31 -- arch/powerpc/platforms/44x/Kconfig| 8 - arch/powerpc/platforms/4xx/Makefile | 1 - arch/powerpc/platforms/4xx/ocm.c | 390 -- 5 files changed, 431 deletions(-) delete mode 100644 arch/powerpc/include/asm/ppc4xx_ocm.h delete mode 100644 arch/powerpc/platforms/4xx/ocm.c diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 8f136b52198b..a5f683aed328 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -84,4 +84,3 @@ CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y -CONFIG_PPC4xx_OCM=y diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h b/arch/powerpc/include/asm/ppc4xx_ocm.h deleted file mode 100644 index fc4db6dcde84.. --- a/arch/powerpc/include/asm/ppc4xx_ocm.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * PowerPC 4xx OCM memory allocation support - * - * (C) Copyright 2009, Applied Micro Circuits Corporation - * Victor Gallardo (vgalla...@amcc.com) - * - * See file CREDITS for list of people who contributed to this - * project. - */ - -#ifndef __ASM_POWERPC_PPC4XX_OCM_H__ -#define __ASM_POWERPC_PPC4XX_OCM_H__ - -#define PPC4XX_OCM_NON_CACHED 0 -#define PPC4XX_OCM_CACHED 1 - -#if defined(CONFIG_PPC4xx_OCM) - -void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align, - int flags, const char *owner); -void ppc4xx_ocm_free(const void *virt); - -#else - -#define ppc4xx_ocm_alloc(phys, size, align, flags, owner) NULL -#define ppc4xx_ocm_free(addr) ((void)0) - -#endif /* CONFIG_PPC4xx_OCM */ - -#endif /* __ASM_POWERPC_PPC4XX_OCM_H__ */ diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index b369ed4e3675..25ebe634a661 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -272,14 +272,6 @@ config PPC4xx_GPIO help Enable gpiolib support for ppc440 based boards -config PPC4xx_OCM - bool "PPC4xx On Chip Memory (OCM) support" - depends on 4xx - select PPC_LIB_RHEAP - help - Enable OCM support for PowerPC 4xx platforms with on chip memory, - OCM provides the fast place for memory access to improve performance. - # 44x specific CPU modules, selected based on the board above. config 440EP bool diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile index f5ae27ca131b..d009d2e0b9e8 100644 --- a/arch/powerpc/platforms/4xx/Makefile +++ b/arch/powerpc/platforms/4xx/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += uic.o machine_check.o -obj-$(CONFIG_PPC4xx_OCM) += ocm.o obj-$(CONFIG_4xx_SOC) += soc.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c deleted file mode 100644 index ba3257406ced.. --- a/arch/powerpc/platforms/4xx/ocm.c +++ /dev/null @@ -1,390 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * PowerPC 4xx OCM memory allocation support - * - * (C) Copyright 2009, Applied Micro Circuits Corporation - * Victor Gallardo (vgalla...@amcc.com) - * - * See file CREDITS for list of people who contributed to this - * project. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define OCM_DISABLED 0 -#define OCM_ENABLED1 - -struct ocm_block { - struct list_headlist; - void __iomem*addr; - int size; - const char *owner; -}; - -/* non-cached or cached region */ -struct ocm_region { - phys_addr_t phys; - void __iomem*virt; - - int memtotal; - int memfree; - - rh_info_t *rh; - struct list_headlist; -}; - -struct ocm_info { - int index; - int status; - int ready; - - phys_addr_t phys; - - int alignment; - int memtotal; - int cache_size; - - struct ocm_region nc; /* non-cached region */ - struct ocm_region c; /* cached region */ -}; - -static struct ocm_info *ocm_nodes; -static int ocm_count; - -static struct ocm_info *ocm_get_node(unsigned int index) -{ - if (index
Re: [PATCH v4 11/25] powernv/fadump: register kernel metadata address with opal
On 13/08/19 4:11 PM, Mahesh J Salgaonkar wrote: > On 2019-07-16 17:03:15 Tue, Hari Bathini wrote: >> OPAL allows registering address with it in the first kernel and >> retrieving it after MPIPL. Setup kernel metadata and register its >> address with OPAL to use it for processing the crash dump. >> >> Signed-off-by: Hari Bathini >> --- >> arch/powerpc/kernel/fadump-common.h |4 + >> arch/powerpc/kernel/fadump.c | 65 ++- >> arch/powerpc/platforms/powernv/opal-fadump.c | 73 >> ++ >> arch/powerpc/platforms/powernv/opal-fadump.h | 37 + >> arch/powerpc/platforms/pseries/rtas-fadump.c | 32 +-- >> 5 files changed, 177 insertions(+), 34 deletions(-) >> create mode 100644 arch/powerpc/platforms/powernv/opal-fadump.h >> > [...] >> @@ -346,30 +349,42 @@ int __init fadump_reserve_mem(void) >> * use memblock_find_in_range() here since it doesn't allocate >> * from bottom to top. >> */ >> -for (base = fw_dump.boot_memory_size; >> - base <= (memory_boundary - size); >> - base += size) { >> +while (base <= (memory_boundary - size)) { >> if (memblock_is_region_memory(base, size) && >> !memblock_is_region_reserved(base, size)) >> break; >> + >> +base += size; >> } >> -if ((base > (memory_boundary - size)) || >> -memblock_reserve(base, size)) { >> + >> +if (base > (memory_boundary - size)) { >> +pr_err("Failed to find memory chunk for reservation\n"); >> +goto error_out; >> +} >> +fw_dump.reserve_dump_area_start = base; >> + >> +/* >> + * Calculate the kernel metadata address and register it with >> + * f/w if the platform supports. >> + */ >> +if (fw_dump.ops->setup_kernel_metadata(_dump) < 0) >> +goto error_out; > > I see setup_kernel_metadata() registers the metadata address with opal without > having any minimum data initialized in it. Secondaly, why can't this wait > until> registration ? I think we should defer this until fadump registration. If setting up metadata address fails (it should ideally not fail, but..), everything else is useless. So, we might as well try that early and fall back to KDump in case of an error.. > What if kernel crashes before metadata area is initialized ? registered_regions would be '0'. So, it is treated as fadump is not registered case. Let me initialize metadata explicitly before registering the address with f/w to avoid any assumption... > >> + >> +if (memblock_reserve(base, size)) { >> pr_err("Failed to reserve memory\n"); >> -return 0; >> +goto error_out; >> } > [...] >> - >> static struct fadump_ops rtas_fadump_ops = { >> -.init_fadump_mem_struct = rtas_fadump_init_mem_struct, >> -.register_fadump= rtas_fadump_register_fadump, >> -.unregister_fadump = rtas_fadump_unregister_fadump, >> -.invalidate_fadump = rtas_fadump_invalidate_fadump, >> -.process_fadump = rtas_fadump_process_fadump, >> -.fadump_region_show = rtas_fadump_region_show, >> -.fadump_trigger = rtas_fadump_trigger, >> +.init_fadump_mem_struct = rtas_fadump_init_mem_struct, >> +.get_kernel_metadata_size = rtas_fadump_get_kernel_metadata_size, >> +.setup_kernel_metadata = rtas_fadump_setup_kernel_metadata, >> +.register_fadump= rtas_fadump_register_fadump, >> +.unregister_fadump = rtas_fadump_unregister_fadump, >> +.invalidate_fadump = rtas_fadump_invalidate_fadump, >> +.process_fadump = rtas_fadump_process_fadump, >> +.fadump_region_show = rtas_fadump_region_show, >> +.fadump_trigger = rtas_fadump_trigger, > > Can you make the tab space changes in your previous patch where these > were initially introduced ? So that this patch can only show new members > that are added. done. Thanks Hari
[PATCH v1 3/4] arm64: dts: ls1028a: fix little-big endian issue for dcfg
dcfg use little endian that SoC register value will be correct Signed-off-by: Yinbo Zhu --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b0d4f8916ede..5538e8e354b2 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -162,7 +162,7 @@ dcfg: syscon@1e0 { compatible = "fsl,ls1028a-dcfg", "syscon"; reg = <0x0 0x1e0 0x0 0x1>; - big-endian; + little-endian; }; scfg: syscon@1fc { -- 2.17.1
[PATCH 1/3] powerpc/mce: Add MCE notification chain
This is needed to report bad blocks for persistent memory. Signed-off-by: Santosh Sivaraj --- arch/powerpc/include/asm/mce.h | 3 +++ arch/powerpc/kernel/mce.c | 15 +++ 2 files changed, 18 insertions(+) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index e1931c8c2743..b1c6363f924c 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -212,6 +212,9 @@ extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr); +int mce_register_notifier(struct notifier_block *nb); +int mce_unregister_notifier(struct notifier_block *nb); + #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index ec4b3e1087be..a78210ca6cd9 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -47,6 +47,20 @@ static struct irq_work mce_ue_event_irq_work = { DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); + +int mce_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_register_notifier); + +int mce_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_unregister_notifier); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -263,6 +277,7 @@ static void machine_process_ue_event(struct work_struct *work) while (__this_cpu_read(mce_ue_count) > 0) { index = __this_cpu_read(mce_ue_count) - 1; evt = this_cpu_ptr(_ue_event_queue[index]); + blocking_notifier_call_chain(_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* * This should probably queued elsewhere, but -- 2.21.0
[PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
Subscribe to the MCE notification and add the physical address which generated a memory error to nvdimm bad range. Signed-off-by: Santosh Sivaraj --- arch/powerpc/platforms/pseries/papr_scm.c | 65 +++ 1 file changed, 65 insertions(+) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index a5ac371a3f06..4d25c98a9835 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include @@ -39,8 +41,12 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + struct list_head list; }; +LIST_HEAD(papr_nd_regions); +DEFINE_MUTEX(papr_ndr_lock); + static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dev_info(dev, "Region registered with target node %d and online node %d", target_nid, online_nid); + mutex_lock(_ndr_lock); + list_add_tail(>list, _nd_regions); + mutex_unlock(_ndr_lock); + return 0; err: nvdimm_bus_unregister(p->bus); @@ -371,6 +381,60 @@ err: nvdimm_bus_unregister(p->bus); return -ENXIO; } +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, +void *data) +{ + struct machine_check_event *evt = data; + struct papr_scm_priv *p; + u64 phys_addr; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(_nd_regions)) + return NOTIFY_DONE; + + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + mutex_lock(_ndr_lock); + list_for_each_entry(p, _nd_regions, list) { + struct resource res = p->res; + u64 aligned_addr; + + if (res.start > phys_addr) + continue; + + if (res.end < phys_addr) + continue; + + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", +aligned_addr, aligned_addr + L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(p->bus, + aligned_addr, L1_CACHE_BYTES)) + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(p->region, +NVDIMM_REVALIDATE_POISON); + + break; + } + mutex_unlock(_ndr_lock); + + return NOTIFY_OK; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int papr_scm_probe(struct platform_device *pdev) { struct device_node *dn = pdev->dev.of_node; @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev) goto err2; platform_set_drvdata(pdev, p); + mce_register_notifier(_ue_nb); return 0; -- 2.21.0
Re: [PATCH v2 1/3] KVM: PPC: Book3S HV: Fix race in re-enabling XIVE escalation interrupts
On Wed, Aug 14, 2019 at 02:46:38PM +1000, Jordan Niethe wrote: > On Tue, 2019-08-13 at 20:03 +1000, Paul Mackerras wrote: [snip] > > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > index 337e644..2e7e788 100644 > > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > @@ -2831,29 +2831,39 @@ kvm_cede_prodded: > > kvm_cede_exit: > > ld r9, HSTATE_KVM_VCPU(r13) > > #ifdef CONFIG_KVM_XICS > > - /* Abort if we still have a pending escalation */ > > + /* are we using XIVE with single escalation? */ > > + ld r10, VCPU_XIVE_ESC_VADDR(r9) > > + cmpdi r10, 0 > > + beq 3f > > + li r6, XIVE_ESB_SET_PQ_00 > Would it make sense to put the above instruction down into the 4: label > instead? If we do not branch to 4, r6 is overwriten anyway. Right. > I think that would save a load when we do not branch to 4. Also it Well, li is a load immediate rather than a load ("load" would normally imply a load from memory). Load-immediate instructions are essentially free since they can easily be executed in parallel with other instructions and execute in a single cycle. > would mean that you could use r5 everywhere instead of changing it to > r6? Yes. If I have to respin the patch for other reasons then I will rearrange things as you suggest. I don't think it's worth respinning just for this change -- it won't reduce the total number of instructions, and I strongly doubt there would be any measurable performance difference. > > + /* > > +* If we still have a pending escalation, abort the cede, > > +* and we must set PQ to 10 rather than 00 so that we don't > > +* potentially end up with two entries for the escalation > > +* interrupt in the XIVE interrupt queue. In that case > > +* we also don't want to set xive_esc_on to 1 here in > > +* case we race with xive_esc_irq(). > > +*/ > > lbz r5, VCPU_XIVE_ESC_ON(r9) > > cmpwi r5, 0 > > - beq 1f > > + beq 4f > > li r0, 0 > > stb r0, VCPU_CEDED(r9) > > -1: /* Enable XIVE escalation */ > > - li r5, XIVE_ESB_SET_PQ_00 > > + li r6, XIVE_ESB_SET_PQ_10 > > + b 5f > > +4: li r0, 1 > > + stb r0, VCPU_XIVE_ESC_ON(r9) > > + /* make sure store to xive_esc_on is seen before xive_esc_irq > > runs */ > > + sync > > +5: /* Enable XIVE escalation */ > > mfmsr r0 > > andi. r0, r0, MSR_DR /* in real mode? */ > > beq 1f > > - ld r10, VCPU_XIVE_ESC_VADDR(r9) > > - cmpdi r10, 0 > > - beq 3f > > - ldx r0, r10, r5 > > + ldx r0, r10, r6 > > b 2f > > 1: ld r10, VCPU_XIVE_ESC_RADDR(r9) > > - cmpdi r10, 0 > > - beq 3f > > - ldcix r0, r10, r5 > > + ldcix r0, r10, r6 > > 2: sync > > - li r0, 1 > > - stb r0, VCPU_XIVE_ESC_ON(r9) > > #endif /* CONFIG_KVM_XICS */ > > 3: b guest_exit_cont > > Paul.
Re: [PATCH v1 10/10] powerpc/mm: refactor ioremap_range() and use ioremap_page_range()
Le 14/08/2019 à 07:49, Christoph Hellwig a écrit : Somehow this series is missing a cover letter. While you are touching all this "fun" can you also look into killing __ioremap? It seems to be a weird non-standard version of ioremap_prot (probably predating ioremap_prot) that is missing a few lines of code setting attributes that might not even be applicable for the two drivers calling it. ocm_init_node() [arch/powerpc/platforms/4xx/ocm.c] calls __ioremap() with _PAGE_EXEC set while ioremap_prot() clears _PAGE_EXEC Christophe
Re: [PATCH v4 05/25] pseries/fadump: introduce callbacks for platform specific operations
On 12/08/19 3:12 PM, Mahesh J Salgaonkar wrote: > On 2019-07-16 17:02:30 Tue, Hari Bathini wrote: >> Introduce callback functions for platform specific operations like >> register, unregister, invalidate & such. Also, define place-holders >> for the same on pSeries platform. >> >> Signed-off-by: Hari Bathini >> --- >> arch/powerpc/kernel/fadump-common.h | 33 ++ >> arch/powerpc/kernel/fadump.c | 47 + >> arch/powerpc/platforms/pseries/Makefile |1 >> arch/powerpc/platforms/pseries/rtas-fadump.c | 134 >> ++ >> 4 files changed, 171 insertions(+), 44 deletions(-) >> create mode 100644 arch/powerpc/platforms/pseries/rtas-fadump.c >> >> diff --git a/arch/powerpc/kernel/fadump-common.h >> b/arch/powerpc/kernel/fadump-common.h >> index 09d6161..020d582 100644 >> --- a/arch/powerpc/kernel/fadump-common.h >> +++ b/arch/powerpc/kernel/fadump-common.h >> @@ -50,6 +50,12 @@ >> #define FADUMP_UNREGISTER 2 >> #define FADUMP_INVALIDATE 3 >> >> +/* Firmware-Assited Dump platforms */ >> +enum fadump_platform_type { >> +FADUMP_PLATFORM_UNKNOWN = 0, >> +FADUMP_PLATFORM_PSERIES, >> +}; > > Do we really need these ? Aren't we hiding all platform specific things > under fadump_ops functions ? I see that these values are used only for > assignements and not making any decision in code flow. Am I missing > anything here ? True. This isn't really useful. will drop it.. Thanks Hari
[PATCH v1 2/4] soc: fsl: guts: Add definition for LS1028A
Adding compatible string "ls1028a-dcfg" to initialize guts driver for ls1028 and SoC die attribute definition for LS1028A Signed-off-by: Yinbo Zhu --- drivers/soc/fsl/guts.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c index 1ef8068c8dd3..34810f9bb2ee 100644 --- a/drivers/soc/fsl/guts.c +++ b/drivers/soc/fsl/guts.c @@ -102,6 +102,11 @@ static const struct fsl_soc_die_attr fsl_soc_die[] = { .svr = 0x8736, .mask = 0xff3f, }, + /* Die: LS1028A, SoC: LS1028A */ + { .die = "LS1028A", + .svr = 0x870b, + .mask = 0xff3f, + }, { }, }; @@ -224,6 +229,7 @@ static const struct of_device_id fsl_guts_of_match[] = { { .compatible = "fsl,ls1012a-dcfg", }, { .compatible = "fsl,ls1046a-dcfg", }, { .compatible = "fsl,lx2160a-dcfg", }, + { .compatible = "fsl,ls1028a-dcfg", }, {} }; MODULE_DEVICE_TABLE(of, fsl_guts_of_match); -- 2.17.1
Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe
On 8/12/19 2:52 PM, Santosh Sivaraj wrote: > If we take a UE on one of the instructions with a fixup entry, set nip > to continue execution at the fixup entry. Stop processing the event > further or print it. > > Co-developed-by: Reza Arbab > Signed-off-by: Reza Arbab > Cc: Mahesh Salgaonkar > Signed-off-by: Santosh Sivaraj Looks good to me. Reviewed-by: Mahesh Salgaonkar Thanks, -Mahesh. > --- > arch/powerpc/include/asm/mce.h | 4 +++- > arch/powerpc/kernel/mce.c | 16 > arch/powerpc/kernel/mce_power.c | 15 +-- > 3 files changed, 32 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h > index f3a6036b6bc0..e1931c8c2743 100644 > --- a/arch/powerpc/include/asm/mce.h > +++ b/arch/powerpc/include/asm/mce.h > @@ -122,7 +122,8 @@ struct machine_check_event { > enum MCE_UeErrorType ue_error_type:8; > u8 effective_address_provided; > u8 physical_address_provided; > - u8 reserved_1[5]; > + u8 ignore_event; > + u8 reserved_1[4]; > u64 effective_address; > u64 physical_address; > u8 reserved_2[8]; > @@ -193,6 +194,7 @@ struct mce_error_info { > enum MCE_Initiator initiator:8; > enum MCE_ErrorClass error_class:8; > boolsync_error; > + boolignore_event; > }; > > #define MAX_MC_EVT 100 > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index a3b122a685a5..ec4b3e1087be 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, > if (phys_addr != ULONG_MAX) { > mce->u.ue_error.physical_address_provided = true; > mce->u.ue_error.physical_address = phys_addr; > + mce->u.ue_error.ignore_event = mce_err->ignore_event; > machine_check_ue_event(mce); > } > } > @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct > *work) > /* >* This should probably queued elsewhere, but >* oh! well > + * > + * Don't report this machine check because the caller has a > + * asked us to ignore the event, it has a fixup handler which > + * will do the appropriate error handling and reporting. >*/ > if (evt->error_type == MCE_ERROR_TYPE_UE) { > + if (evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_ue_count); > + continue; > + } > + > if (evt->u.ue_error.physical_address_provided) { > unsigned long pfn; > > @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct > irq_work *work) > while (__this_cpu_read(mce_queue_count) > 0) { > index = __this_cpu_read(mce_queue_count) - 1; > evt = this_cpu_ptr(_event_queue[index]); > + > + if (evt->error_type == MCE_ERROR_TYPE_UE && > + evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_queue_count); > + continue; > + } > machine_check_print_event_info(evt, false, false); > __this_cpu_dec(mce_queue_count); > } > diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c > index e74816f045f8..1dd87f6f5186 100644 > --- a/arch/powerpc/kernel/mce_power.c > +++ b/arch/powerpc/kernel/mce_power.c > @@ -11,6 +11,7 @@ > > #include > #include > +#include > #include > #include > #include > @@ -18,6 +19,7 @@ > #include > #include > #include > +#include > > /* > * Convert an address related to an mm to a physical address. > @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, > return 0; > } > > -static long mce_handle_ue_error(struct pt_regs *regs) > +static long mce_handle_ue_error(struct pt_regs *regs, > + struct mce_error_info *mce_err) > { > long handled = 0; > + const struct exception_table_entry *entry; > + > + entry = search_kernel_exception_table(regs->nip); > + if (entry) { > + mce_err->ignore_event = true; > + regs->nip = extable_fixup(entry); > + return 1; > + } > > /* >* On specific SCOM read via MMIO we may get a machine check > @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs, > _addr); > > if (!handled &&
[PATCH 0/3] Add bad pmem bad blocks to bad range
This series, which should be based on top of the still un-merged "powerpc: implement machine check safe memcpy" series, adds support to add the bad blocks which generated an MCE to the NVDIMM bad blocks. The next access of the same memory will be blocked by the NVDIMM layer itself. Santosh Sivaraj (3): powerpc/mce: Add MCE notification chain of_pmem: Add memory ranges which took a mce to bad range papr/scm: Add bad memory ranges to nvdimm bad ranges arch/powerpc/include/asm/mce.h| 3 + arch/powerpc/kernel/mce.c | 15 +++ arch/powerpc/platforms/pseries/papr_scm.c | 65 drivers/nvdimm/of_pmem.c | 122 ++ 4 files changed, 186 insertions(+), 19 deletions(-) -- 2.21.0
Re: [PATCH v1 05/10] powerpc/mm: Do early ioremaps from top to bottom on PPC64 too.
On Wed, Aug 14, 2019 at 08:10:59AM +0200, Christophe Leroy wrote: > > Note that while a few other architectures have a magic hack like powerpc > > to make ioremap work before vmalloc, the normal practice would be > > to explicitly use early_ioremap. I guess your change is fine for now, > > but it might make sense convert powerpc to the explicit early_ioremap > > scheme as well. > > > > I've been looking into early_ioremap(), but IIUC early_ioremap() is for > ephemeral mappings only, it expects all early mappings to be gone at the end > of init. Yes. > PPC installs definitive early mappings (for instance for PCI). How does that > have to be handled ? Good question, and no good answer. I've just been looking at a generic ioremap for simple architectures, and been finding all kinds of crap and inconsistencies, and this is one of the things I noticed.
Re: [PATCH v1 10/10] powerpc/mm: refactor ioremap_range() and use ioremap_page_range()
On Wed, Aug 14, 2019 at 08:23:54AM +0200, Christophe Leroy wrote: > Le 14/08/2019 à 07:49, Christoph Hellwig a écrit : > > Somehow this series is missing a cover letter. > > > > While you are touching all this "fun" can you also look into killing > > __ioremap? It seems to be a weird non-standard version of ioremap_prot > > (probably predating ioremap_prot) that is missing a few lines of code > > setting attributes that might not even be applicable for the two drivers > > calling it. > > > > ocm_init_node() [arch/powerpc/platforms/4xx/ocm.c] calls __ioremap() with > _PAGE_EXEC set while ioremap_prot() clears _PAGE_EXEC Indeed. But I don't see anything marking this intentional. Then again the driver is entirely unused, so we might as well kill it off now.
Re: [PATCH v4 1/2] powerpc/time: Only set CONFIG_ARCH_HAS_SCALED_CPUTIME on PPC64
Hi Nick, Le 07/06/2018 à 03:43, Nicholas Piggin a écrit : On Wed, 6 Jun 2018 14:21:08 + (UTC) Christophe Leroy wrote: scaled cputime is only meaningfull when the processor has SPURR and/or PURR, which means only on PPC64. [...] I wonder if we could make this depend on PPC_PSERIES or even PPC_SPLPAR as well? (That would be for a later patch) Can we go further on this ? Do we know exactly which configuration support scaled cputime, in extenso have SPRN_SPURR and/or SPRN_PURR ? Ref https://github.com/linuxppc/issues/issues/171 Christophe
Re: [PATCH v4 06/25] pseries/fadump: define register/un-register callback functions
On 12/08/19 9:31 PM, Mahesh J Salgaonkar wrote: > On 2019-07-16 17:02:38 Tue, Hari Bathini wrote: >> Make RTAS calls to register and un-register for FADump. Also, update >> how fadump_region contents are diplayed to provide more information. >> >> Signed-off-by: Hari Bathini >> --- >> arch/powerpc/kernel/fadump-common.h |2 >> arch/powerpc/kernel/fadump.c | 164 >> ++ >> arch/powerpc/platforms/pseries/rtas-fadump.c | 163 >> +- >> 3 files changed, 176 insertions(+), 153 deletions(-) >> > [...] >> static int rtas_fadump_register_fadump(struct fw_dump *fadump_conf) >> { >> -return -EIO; >> +int rc, err = -EIO; >> +unsigned int wait_time; >> + >> +/* TODO: Add upper time limit for the delay */ >> +do { >> +rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, >> +NULL, FADUMP_REGISTER, , >> +sizeof(struct rtas_fadump_mem_struct)); >> + >> +wait_time = rtas_busy_delay_time(rc); >> +if (wait_time) >> +mdelay(wait_time); >> + >> +} while (wait_time); >> + >> +switch (rc) { >> +case 0: >> +pr_info("Registration is successful!\n"); >> +fadump_conf->dump_registered = 1; >> +err = 0; >> +break; >> +case -1: >> +pr_err("Failed to register. Hardware Error(%d).\n", rc); >> +break; >> +case -3: >> +if (!is_fadump_boot_mem_contiguous(fadump_conf)) >> +pr_err("Can't hot-remove boot memory area.\n"); >> +else if (!is_fadump_reserved_mem_contiguous(fadump_conf)) >> +pr_err("Can't hot-remove reserved memory area.\n"); > > Any reason why we changed the error messages here ? it gives an impression as > if fadump reservation tried to hot remove memory and failed. Yeah, the message is indeed a bit confusing. Will stick with old message.. Thanks Hari
Re: [PATCH v1 05/10] powerpc/mm: Do early ioremaps from top to bottom on PPC64 too.
Le 14/08/2019 à 07:55, Christoph Hellwig a écrit : On Tue, Aug 13, 2019 at 08:11:38PM +, Christophe Leroy wrote: Until vmalloc system is up and running, ioremap basically allocates addresses at the border of the IOREMAP area. Note that while a few other architectures have a magic hack like powerpc to make ioremap work before vmalloc, the normal practice would be to explicitly use early_ioremap. I guess your change is fine for now, but it might make sense convert powerpc to the explicit early_ioremap scheme as well. I've been looking into early_ioremap(), but IIUC early_ioremap() is for ephemeral mappings only, it expects all early mappings to be gone at the end of init. PPC installs definitive early mappings (for instance for PCI). How does that have to be handled ? Christophe
Re: [PATCH] powerpc: remove the ppc44x ocm.c file
Le 14/08/2019 à 08:32, Christoph Hellwig a écrit : The on chip memory allocator is entirely unused in the kernel tree. Signed-off-by: Christoph Hellwig Since this driver was added in Linux 3.9, functions ppc4xx_ocm_alloc() and ppc4xx_ocm_free() have never been used in any driver, and are not even exported to modules. Acked-by: Christophe Leroy --- arch/powerpc/configs/ppc40x_defconfig | 1 - arch/powerpc/include/asm/ppc4xx_ocm.h | 31 -- arch/powerpc/platforms/44x/Kconfig| 8 - arch/powerpc/platforms/4xx/Makefile | 1 - arch/powerpc/platforms/4xx/ocm.c | 390 -- 5 files changed, 431 deletions(-) delete mode 100644 arch/powerpc/include/asm/ppc4xx_ocm.h delete mode 100644 arch/powerpc/platforms/4xx/ocm.c diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 8f136b52198b..a5f683aed328 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -84,4 +84,3 @@ CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y -CONFIG_PPC4xx_OCM=y diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h b/arch/powerpc/include/asm/ppc4xx_ocm.h deleted file mode 100644 index fc4db6dcde84.. --- a/arch/powerpc/include/asm/ppc4xx_ocm.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * PowerPC 4xx OCM memory allocation support - * - * (C) Copyright 2009, Applied Micro Circuits Corporation - * Victor Gallardo (vgalla...@amcc.com) - * - * See file CREDITS for list of people who contributed to this - * project. - */ - -#ifndef __ASM_POWERPC_PPC4XX_OCM_H__ -#define __ASM_POWERPC_PPC4XX_OCM_H__ - -#define PPC4XX_OCM_NON_CACHED 0 -#define PPC4XX_OCM_CACHED 1 - -#if defined(CONFIG_PPC4xx_OCM) - -void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align, - int flags, const char *owner); -void ppc4xx_ocm_free(const void *virt); - -#else - -#define ppc4xx_ocm_alloc(phys, size, align, flags, owner) NULL -#define ppc4xx_ocm_free(addr) ((void)0) - -#endif /* CONFIG_PPC4xx_OCM */ - -#endif /* __ASM_POWERPC_PPC4XX_OCM_H__ */ diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index b369ed4e3675..25ebe634a661 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -272,14 +272,6 @@ config PPC4xx_GPIO help Enable gpiolib support for ppc440 based boards -config PPC4xx_OCM - bool "PPC4xx On Chip Memory (OCM) support" - depends on 4xx - select PPC_LIB_RHEAP - help - Enable OCM support for PowerPC 4xx platforms with on chip memory, - OCM provides the fast place for memory access to improve performance. - # 44x specific CPU modules, selected based on the board above. config 440EP bool diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile index f5ae27ca131b..d009d2e0b9e8 100644 --- a/arch/powerpc/platforms/4xx/Makefile +++ b/arch/powerpc/platforms/4xx/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += uic.o machine_check.o -obj-$(CONFIG_PPC4xx_OCM) += ocm.o obj-$(CONFIG_4xx_SOC) += soc.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c deleted file mode 100644 index ba3257406ced.. --- a/arch/powerpc/platforms/4xx/ocm.c +++ /dev/null @@ -1,390 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * PowerPC 4xx OCM memory allocation support - * - * (C) Copyright 2009, Applied Micro Circuits Corporation - * Victor Gallardo (vgalla...@amcc.com) - * - * See file CREDITS for list of people who contributed to this - * project. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define OCM_DISABLED 0 -#define OCM_ENABLED1 - -struct ocm_block { - struct list_headlist; - void __iomem*addr; - int size; - const char *owner; -}; - -/* non-cached or cached region */ -struct ocm_region { - phys_addr_t phys; - void __iomem*virt; - - int memtotal; - int memfree; - - rh_info_t *rh; - struct list_headlist; -}; - -struct ocm_info { - int index; - int status; - int ready; - - phys_addr_t phys; - - int alignment; - int memtotal; - int
Re: [PATCH v4 12/25] powernv/fadump: define register/un-register callback functions
On 13/08/19 8:04 PM, Mahesh J Salgaonkar wrote: > On 2019-07-16 17:03:23 Tue, Hari Bathini wrote: >> Make OPAL calls to register and un-register with firmware for MPIPL. >> >> Signed-off-by: Hari Bathini >> --- >> arch/powerpc/platforms/powernv/opal-fadump.c | 71 >> +- >> 1 file changed, 69 insertions(+), 2 deletions(-) >> > [...] >> @@ -88,12 +104,63 @@ static int opal_fadump_setup_kernel_metadata(struct >> fw_dump *fadump_conf) >> >> static int opal_fadump_register_fadump(struct fw_dump *fadump_conf) >> { >> -return -EIO; >> +int i, err = -EIO; >> +s64 rc; >> + >> +for (i = 0; i < opal_fdm->region_cnt; i++) { >> +rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, >> + opal_fdm->rgn[i].src, >> + opal_fdm->rgn[i].dest, >> + opal_fdm->rgn[i].size); >> +if (rc != OPAL_SUCCESS) > > You may want to remove ranges which has been added so far on error and reset > opal_fdm->registered_regions. Thanks for catching this, Mahesh. Will update.. > >> +break; >> + >> +opal_fdm->registered_regions++; >> +} >> + >> +switch (rc) { >> +case OPAL_SUCCESS: >> +pr_info("Registration is successful!\n"); >> +fadump_conf->dump_registered = 1; >> +err = 0; >> +break; >> +case OPAL_UNSUPPORTED: >> +pr_err("Support not available.\n"); >> +fadump_conf->fadump_supported = 0; >> +fadump_conf->fadump_enabled = 0; >> +break; >> +case OPAL_INTERNAL_ERROR: >> +pr_err("Failed to register. Hardware Error(%lld).\n", rc); >> +break; >> +case OPAL_PARAMETER: >> +pr_err("Failed to register. Parameter Error(%lld).\n", rc); >> +break; >> +case OPAL_PERMISSION: > > You may want to remove this check. With latest opal mpipl patches > opal_mpipl_update() no more returns OPAL_PERMISSION. > > Even if opal does, we can not say fadump already registered just by > looking at return status of single entry addition. Sure. Thanks Hari
[PATCH v1 4/4] mmc: sdhci-of-esdhc: add erratum A011334 support in ls1028a 1.0 SoC
This patch is to add erratum A011334 support in ls1028a 1.0 SoC Signed-off-by: Yinbo Zhu --- drivers/mmc/host/sdhci-of-esdhc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index b16f7d440f78..eb2b290447fc 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -1006,6 +1006,7 @@ static struct soc_device_attribute soc_incorrect_hostver[] = { static struct soc_device_attribute soc_fixup_sdhc_clkdivs[] = { { .family = "QorIQ LX2160A", .revision = "1.0", }, { .family = "QorIQ LX2160A", .revision = "2.0", }, + { .family = "QorIQ LS1028A", .revision = "1.0", }, { }, }; -- 2.17.1
Re: [PATCH] powerpc/32s: fix boot failure with DEBUG_PAGEALLOC without KASAN.
On Wed, Aug 14, 2019 at 05:28:35AM +, Christophe Leroy wrote: > When KASAN is selected, the definitive hash table has to be > set up later, but there is already an early temporary one. > > When KASAN is not selected, there is no early hash table, > so the setup of the definitive hash table cannot be delayed. > > Reported-by: Jonathan Neuschafer > Fixes: 72f208c6a8f7 ("powerpc/32s: move hash code patching out of > MMU_init_hw()") > Signed-off-by: Christophe Leroy > --- Thanks. This does fix the DEBUG_PAGEALLOC-without-KASAN case. Tested-by: Jonathan Neuschafer signature.asc Description: PGP signature
[PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range
Subscribe to the MCE notification and add the physical address which generated a memory error to nvdimm bad range. Signed-off-by: Santosh Sivaraj --- drivers/nvdimm/of_pmem.c | 122 +-- 1 file changed, 103 insertions(+), 19 deletions(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index a0c8dcfa0bf9..828dbfe44ca6 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -8,6 +8,9 @@ #include #include #include +#include +#include +#include static const struct attribute_group *region_attr_groups[] = { _region_attribute_group, @@ -25,11 +28,77 @@ struct of_pmem_private { struct nvdimm_bus *bus; }; +struct of_pmem_region { + struct of_pmem_private *priv; + struct nd_region_desc *region_desc; + struct nd_region *region; + struct list_head list; +}; + +LIST_HEAD(pmem_regions); +DEFINE_MUTEX(pmem_region_lock); + +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, +void *data) +{ + struct machine_check_event *evt = data; + struct of_pmem_region *pmem_region; + u64 phys_addr; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(_regions)) + return NOTIFY_DONE; + + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + mutex_lock(_region_lock); + list_for_each_entry(pmem_region, _regions, list) { + struct resource *res = pmem_region->region_desc->res; + u64 aligned_addr; + + if (res->start > phys_addr) + continue; + + if (res->end < phys_addr) + continue; + + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", +aligned_addr, aligned_addr + L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(pmem_region->priv->bus, +aligned_addr, L1_CACHE_BYTES)) + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(pmem_region->region, +NVDIMM_REVALIDATE_POISON); + + break; + } + mutex_unlock(_region_lock); + + return NOTIFY_OK; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int of_pmem_region_probe(struct platform_device *pdev) { struct of_pmem_private *priv; struct device_node *np; struct nvdimm_bus *bus; + struct of_pmem_region *pmem_region; + struct nd_region_desc *ndr_desc; bool is_volatile; int i; @@ -58,34 +127,49 @@ static int of_pmem_region_probe(struct platform_device *pdev) is_volatile ? "volatile" : "non-volatile", np); for (i = 0; i < pdev->num_resources; i++) { - struct nd_region_desc ndr_desc; struct nd_region *region; - /* -* NB: libnvdimm copies the data from ndr_desc into it's own -* structures so passing a stack pointer is fine. -*/ - memset(_desc, 0, sizeof(ndr_desc)); - ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = dev_to_node(>dev); - ndr_desc.target_node = ndr_desc.numa_node; - ndr_desc.res = >resource[i]; - ndr_desc.of_node = np; - set_bit(ND_REGION_PAGEMAP, _desc.flags); + ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL); + if (!ndr_desc) { + nvdimm_bus_unregister(priv->bus); + kfree(priv); + return -ENOMEM; + } + + ndr_desc->attr_groups = region_attr_groups; + ndr_desc->numa_node = dev_to_node(>dev); + ndr_desc->target_node = ndr_desc->numa_node; + ndr_desc->res = >resource[i]; + ndr_desc->of_node = np; + set_bit(ND_REGION_PAGEMAP, _desc->flags); if (is_volatile) - region = nvdimm_volatile_region_create(bus, _desc); + region = nvdimm_volatile_region_create(bus, ndr_desc); else - region = nvdimm_pmem_region_create(bus, _desc); + region = nvdimm_pmem_region_create(bus, ndr_desc); if (!region) - dev_warn(>dev, "Unable
Re: [PATCH v4 13/25] powernv/fadump: support copying multiple kernel memory regions
On 13/08/19 8:33 PM, Mahesh J Salgaonkar wrote: > On 2019-07-16 17:03:30 Tue, Hari Bathini wrote: >> Firmware uses 32-bit field for region size while copying/backing-up >> memory during MPIPL. So, the maximum copy size for a region would >> be a page less than 4GB (aligned to pagesize) but FADump capture >> kernel usually needs more memory than that to be preserved to avoid >> running into out of memory errors. >> >> So, request firmware to copy multiple kernel memory regions instead >> of just one (which worked fine for pseries as 64-bit field was used >> for size there). With support to copy multiple kernel memory regions, >> also handle holes in the memory area to be preserved. Support as many >> as 128 kernel memory regions. This allows having an adequate FADump >> capture kernel size for different scenarios. > > Can you split this patch into 2 ? One for handling holes in boot memory > and other for handling 4Gb region size ? So that it will be easy to > review changes. Sure. Let me split and have the patch that handles holes in boot memory as the last patch in the series.
[PATCH v1 1/4] arm64: dts: ls1028a-rdb: enable emmc hs400 mode
This patch is to enable emmc hs400 mode for ls1028ardb Signed-off-by: Yinbo Zhu --- arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts index 8a725409e881..f1e46cc4cea1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts @@ -92,8 +92,10 @@ }; { - status = "okay"; mmc-hs200-1_8v; + mmc-hs400-1_8v; + bus-width = <8>; + status = "okay"; }; { -- 2.17.1
[PATCH 4/5] powerpc/ptdump: get out of note_prot_wx() when CONFIG_PPC_DEBUG_WX is not selected.
When CONFIG_PPC_DEBUG_WX, note_prot_wx() is useless. Get out of it early and inconditionnally in that case, so that GCC can kick all the code out. Signed-off-by: Christophe Leroy --- arch/powerpc/mm/ptdump/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 9a2186c133e6..ab6a572202b4 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -177,7 +177,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr) static void note_prot_wx(struct pg_state *st, unsigned long addr) { - if (!st->check_wx) + if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) return; if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) -- 2.13.3
Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro
Le 14/08/2019 à 04:08, Paul Mackerras a écrit : On Tue, Aug 13, 2019 at 09:59:35AM +, Christophe Leroy wrote: [snip] +.macro __LOAD_REG_IMMEDIATE r, x + .if \x & ~0x != 0 + __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32 + rldicr \r, \r, 32, 31 + .if (\x) & 0x != 0 + oris \r, \r, (\x)@__AS_ATHIGH + .endif + .if (\x) & 0x != 0 + oris \r, \r, (\x)@l + .endif + .else + __LOAD_REG_IMMEDIATE_32 \r, \x + .endif +.endm Doesn't this force all negative constants, even small ones, to use the long sequence? For example, __LOAD_REG_IMMEDIATE r3, -1 will generate (as far as I can see): li r3, -1 rldicr r3, r3, 32, 31 orisr3, r3, 0x ori r3, r3, 0x which seems suboptimal. Ah yes, thanks. And it is also buggy when \x is over 0x8000 because lis is a signed ops I'll send v2 Christophe
[PATCH v2 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE()
LOAD_MSR_KERNEL() and LOAD_REG_IMMEDIATE() are doing the same thing in the same way. Drop LOAD_MSR_KERNEL() Signed-off-by: Christophe Leroy --- v2: no change arch/powerpc/kernel/entry_32.S | 18 +- arch/powerpc/kernel/head_32.h | 21 - 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 54fab22c9a43..972b05504a0a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -230,7 +230,7 @@ transfer_to_handler_cont: */ lis r12,reenable_mmu@h ori r12,r12,reenable_mmu@l - LOAD_MSR_KERNEL(r0, MSR_KERNEL) + LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r0 SYNC @@ -304,7 +304,7 @@ stack_ovf: addir1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD lis r9,StackOverflow@ha addir9,r9,StackOverflow@l - LOAD_MSR_KERNEL(r10,MSR_KERNEL) + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif @@ -324,7 +324,7 @@ trace_syscall_entry_irq_off: bl trace_hardirqs_on /* Now enable for real */ - LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) + LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) mtmsr r10 REST_GPR(0, r1) @@ -394,7 +394,7 @@ ret_from_syscall: #endif mr r6,r3 /* disable interrupts so current_thread_info()->flags can't change */ - LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */ /* Note: We don't bother telling lockdep about it */ SYNC MTMSRD(r10) @@ -824,7 +824,7 @@ ret_from_except: * can't change between when we test it and when we return * from the interrupt. */ /* Note: We don't bother telling lockdep about it */ - LOAD_MSR_KERNEL(r10,MSR_KERNEL) + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) SYNC/* Some chip revs have problems here... */ MTMSRD(r10) /* disable interrupts */ @@ -991,7 +991,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) * can restart the exception exit path at the label * exc_exit_restart below. -- paulus */ - LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI) + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI) SYNC MTMSRD(r10) /* clear the RI bit */ .globl exc_exit_restart @@ -1066,7 +1066,7 @@ exc_exit_restart_end: REST_NVGPRS(r1);\ lwz r3,_MSR(r1);\ andi. r3,r3,MSR_PR; \ - LOAD_MSR_KERNEL(r10,MSR_KERNEL);\ + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \ bne user_exc_return;\ lwz r0,GPR0(r1);\ lwz r2,GPR2(r1);\ @@ -1236,7 +1236,7 @@ recheck: * neither. Those disable/enable cycles used to peek at * TI_FLAGS aren't advertised. */ - LOAD_MSR_KERNEL(r10,MSR_KERNEL) + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) SYNC MTMSRD(r10) /* disable interrupts */ lwz r9,TI_FLAGS(r2) @@ -1329,7 +1329,7 @@ _GLOBAL(enter_rtas) lwz r4,RTASBASE(r4) mfmsr r9 stw r9,8(r1) - LOAD_MSR_KERNEL(r0,MSR_KERNEL) + LOAD_REG_IMMEDIATE(r0,MSR_KERNEL) SYNC/* disable interrupts so SRR0/1 */ MTMSRD(r0) /* don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 4a692553651f..8abc7783dbe5 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -5,19 +5,6 @@ #include /* for STACK_FRAME_REGS_MARKER */ /* - * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE. - */ -.macro __LOAD_MSR_KERNEL r, x -.if \x >= 0x8000 - lis \r, (\x)@h - ori \r, \r, (\x)@l -.else - li \r, (\x) -.endif -.endm -#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x - -/* * Exception entry code. This code runs with address translation * turned off, i.e. using physical addresses. * We assume sprg3 has the physical address of the current @@ -92,7 +79,7 @@ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #else - LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ + LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ MTMSRD(r10) /* (except
[PATCH v2] powerpc/32s: fix boot failure with DEBUG_PAGEALLOC without KASAN.
When KASAN is selected, the definitive hash table has to be set up later, but there is already an early temporary one. When KASAN is not selected, there is no early hash table, so the setup of the definitive hash table cannot be delayed. Reported-by: Jonathan Neuschafer Fixes: 72f208c6a8f7 ("powerpc/32s: move hash code patching out of MMU_init_hw()") Tested-by: Jonathan Neuschafer Signed-off-by: Christophe Leroy --- v2: Added a comment in MMU_init_hw() arch/powerpc/kernel/head_32.S | 2 ++ arch/powerpc/mm/book3s32/mmu.c | 9 + 2 files changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index f255e22184b4..c8b4f7ed318c 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -897,9 +897,11 @@ start_here: bl machine_init bl __save_cpu_setup bl MMU_init +#ifdef CONFIG_KASAN BEGIN_MMU_FTR_SECTION bl MMU_init_hw_patch END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* * Go back to running unmapped so we can load up new values diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index e249fbf6b9c3..8d68f03bf5a4 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -358,6 +358,15 @@ void __init MMU_init_hw(void) hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; if (lg_n_hpteg > 16) hash_mb2 = 16 - LG_HPTEG_SIZE; + + /* +* When KASAN is selected, there is already an early temporary hash +* table and the switch to the final hash table is done later. +*/ + if (IS_ENABLED(CONFIG_KASAN)) + return; + + MMU_init_hw_patch(); } void __init MMU_init_hw_patch(void) -- 2.13.3
Re: [PATCH v5 2/7] powerpc/kernel: Add ucall_norets() ultravisor call handler
Claudio Carvalho writes: > diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S > new file mode 100644 > index ..de9133e45d21 > --- /dev/null > +++ b/arch/powerpc/kernel/ucall.S > @@ -0,0 +1,20 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Generic code to perform an ultravisor call. > + * > + * Copyright 2019, IBM Corporation. > + * > + */ > +#include > +#include > + > +_GLOBAL(ucall_norets) > +EXPORT_SYMBOL_GPL(ucall_norets) > + mfcrr0 > + stw r0,8(r1) > + > + sc 2 /* Invoke the ultravisor */ > + > + lwz r0,8(r1) > + mtcrf 0xff,r0 > + blr /* Return r3 = status */ Paulus points that we shouldn't need to save CR here. Our caller will have already saved it if it needed to, and we don't use CR in this function so we don't need to save it. That's assuming the Ultravisor follows the hcall ABI in which CR2-4 are non-volatile (PAPR § 14.5.3). I know plpar_hcall_norets() does save CR, but it shouldn't need to, that seems to be historical. aka. no one knows why it does it but it always has. cheers
Re: [PATCH v4 14/25] powernv/fadump: process the crashdump by exporting it as /proc/vmcore
On 14/08/19 3:48 PM, Mahesh J Salgaonkar wrote: > On 2019-07-16 17:03:38 Tue, Hari Bathini wrote: >> Add support in the kernel to process the crash'ed kernel's memory >> preserved during MPIPL and export it as /proc/vmcore file for the >> userland scripts to filter and analyze it later. >> >> Signed-off-by: Hari Bathini >> --- >> arch/powerpc/platforms/powernv/opal-fadump.c | 190 >> ++ >> 1 file changed, 187 insertions(+), 3 deletions(-) >> > [...] >> +ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, ); >> +if ((ret != OPAL_SUCCESS) || !addr) { >> +pr_err("Failed to get Kernel metadata (%lld)\n", ret); >> +return 1; >> +} >> + >> +addr = be64_to_cpu(addr); >> +pr_debug("Kernel metadata addr: %llx\n", addr); >> + >> +opal_fdm_active = __va(addr); >> +r_opal_fdm_active = (void *)addr; >> +if (r_opal_fdm_active->version != OPAL_FADUMP_VERSION) { >> +pr_err("FADump active but version (%u) unsupported!\n", >> + r_opal_fdm_active->version); >> +return 1; >> +} >> + >> +/* Kernel regions not registered with f/w for MPIPL */ >> +if (r_opal_fdm_active->registered_regions == 0) { >> +opal_fdm_active = NULL; > > What about partial dump capture scenario ? What if opal crashes while > kernel was in middle of registering ranges ? We may have partial dump > captured which won't be useful. > e,g. If we have total of 4 ranges to be registered and opal crashes > after successful registration of only 2 ranges with 2 pending, we will get a > partial dump which needs to be ignored. > > I think check shuold be comparing registered_regions against total number of > regions. What do you think ? Yes, Mahesh. Taking care of that in 22/25 Thanks Hari
Re: [PATCH v5 4/7] powerpc/mm: Use UV_WRITE_PATE ucall to register a PATE
Hi Claudio, Claudio Carvalho writes: > From: Michael Anderson > > In ultravisor enabled systems, the ultravisor creates and maintains the > partition table in secure memory where the hypervisor cannot access, and ^ which? > therefore, the hypervisor have to do the UV_WRITE_PATE ucall whenever it ^ ^ hasa > wants to set a partition table entry (PATE). > > This patch adds the UV_WRITE_PATE ucall and uses it to set a PATE if > ultravisor is enabled. Additionally, this also also keeps a copy of the > partition table because the nestMMU does not have access to secure > memory. Such copy has entries for nonsecure and hypervisor partition. I'm having trouble parsing the last sentence there. Or at least it doesn't seem to match the code, or I don't understand either the code or the comment. More below. > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > b/arch/powerpc/mm/book3s64/pgtable.c > index 85bc81abd286..033731f5dbaa 100644 > --- a/arch/powerpc/mm/book3s64/pgtable.c > +++ b/arch/powerpc/mm/book3s64/pgtable.c > @@ -213,34 +223,50 @@ void __init mmu_partition_table_init(void) > powernv_set_nmmu_ptcr(ptcr); > } > > -void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, > -unsigned long dw1) > +/* > + * Global flush of TLBs and partition table caches for this lpid. The type of > + * flush (hash or radix) depends on what the previous use of this partition > ID > + * was, not the new use. > + */ > +static void flush_partition(unsigned int lpid, unsigned long old_patb0) A nicer API would be for the 2nd param to be a "bool radix", and have the caller worry about the fact that it comes from (patb0 & PATB_HR). > { > - unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); > - > - partition_tb[lpid].patb0 = cpu_to_be64(dw0); > - partition_tb[lpid].patb1 = cpu_to_be64(dw1); > - > - /* > - * Global flush of TLBs and partition table caches for this lpid. > - * The type of flush (hash or radix) depends on what the previous > - * use of this partition ID was, not the new use. > - */ > asm volatile("ptesync" : : : "memory"); > - if (old & PATB_HR) { > - asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : > + if (old_patb0 & PATB_HR) { > + asm volatile(PPC_TLBIE_5(%0, %1, 2, 0, 1) : : >"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); > - asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : > + asm volatile(PPC_TLBIE_5(%0, %1, 2, 1, 1) : : That looks like an unrelated whitespace change. >"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); > trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); > } else { > - asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : > + asm volatile(PPC_TLBIE_5(%0, %1, 2, 0, 0) : : Ditto. >"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); > trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); > } > /* do we need fixup here ?*/ > asm volatile("eieio; tlbsync; ptesync" : : : "memory"); > } > + > +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, > + unsigned long dw1) > +{ > + unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); > + > + partition_tb[lpid].patb0 = cpu_to_be64(dw0); > + partition_tb[lpid].patb1 = cpu_to_be64(dw1); ie. here we always update the copy of the partition table, regardless of whether we're running under an ultravisor or not. So the copy is a complete copy isn't it? > + /* > + * In ultravisor enabled systems, the ultravisor maintains the partition > + * table in secure memory where we don't have access, therefore, we have > + * to do a ucall to set an entry. > + */ > + if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) { > + uv_register_pate(lpid, dw0, dw1); > + pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = > 0x%lx\n", > + dw0, dw1); > + } else { > + flush_partition(lpid, old); > + } What is different is whether we flush or not. And don't we still need to do the flush for the nestMMU? I assume we're saying the ultravisor will broadcast a flush for us, which will also handle the nestMMU case? cheers
Re: [PATCH v5 5/7] powerpc/mm: Write to PTCR only if ultravisor disabled
Claudio Carvalho writes: > In ultravisor enabled systems, PTCR becomes ultravisor privileged only > for writing and an attempt to write to it will cause a Hypervisor > Emulation Assitance interrupt. > > This patch adds the try_set_ptcr(val) macro as an accessor to > mtspr(SPRN_PTCR, val), which will be executed only if ultravisor > disabled. > > Signed-off-by: Claudio Carvalho > --- > arch/powerpc/include/asm/reg.h | 13 + > arch/powerpc/mm/book3s64/hash_utils.c| 4 ++-- > arch/powerpc/mm/book3s64/pgtable.c | 2 +- > arch/powerpc/mm/book3s64/radix_pgtable.c | 6 +++--- > 4 files changed, 19 insertions(+), 6 deletions(-) > > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h > index 10caa145f98b..14139b1ebdb8 100644 > --- a/arch/powerpc/include/asm/reg.h > +++ b/arch/powerpc/include/asm/reg.h > @@ -15,6 +15,7 @@ > #include > #include > #include > +#include reg.h is already too big and unwieldy. Can you put this in ultravisor.h and include that in the appropriate places. > @@ -1452,6 +1453,18 @@ static inline void update_power8_hid0(unsigned long > hid0) >*/ > asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); > } > + > +/* > + * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for > + * writing and an attempt to write to it will cause a Hypervisor Emulation > + * Assistance interrupt. > + */ > +#define try_set_ptcr(val)\ > + do {\ > + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) \ > + mtspr(SPRN_PTCR, val); \ > + } while (0) This should be a static inline please, not a macro. Sorry, I don't like the name, we're not trying to set it, we know when to set it and when not to. It is awkward to come up with a good name because we don't have a term for "hypervisor that's not running under an ultravisor". Maybe set_ptcr_when_no_uv() Which is kinda messy, someone feel free to come up with something better. I also see some more accesses to the PTCR in arch/powerpc/platforms/powernv/idle.c which you haven't patched? cheers
[PATCH 3/5] powerpc/ptdump: drop dummy KERN_VIRT_START on PPC32
PPC32 doesn't have KERN_VIRT_START. Make PAGE_OFFSET the default starting address for the dump, and drop the dummy definition of KERN_VIRT_START. Only use KERN_VIRT_START for non radix PPC64. Signed-off-by: Christophe Leroy --- arch/powerpc/mm/ptdump/ptdump.c | 18 -- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 74ff2bff4ea0..9a2186c133e6 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -26,10 +26,6 @@ #include "ptdump.h" -#ifdef CONFIG_PPC32 -#define KERN_VIRT_START0 -#endif - /* * To visualise what is happening, * @@ -362,12 +358,13 @@ static int ptdump_show(struct seq_file *m, void *v) struct pg_state st = { .seq = m, .marker = address_markers, + .start_address = PAGE_OFFSET, }; - if (radix_enabled()) - st.start_address = PAGE_OFFSET; - else +#ifdef CONFIG_PPC64 + if (!radix_enabled()) st.start_address = KERN_VIRT_START; +#endif /* Traverse kernel page tables */ walk_pagetables(); @@ -405,12 +402,13 @@ void ptdump_check_wx(void) .seq = NULL, .marker = address_markers, .check_wx = true, + .start_address = PAGE_OFFSET, }; - if (radix_enabled()) - st.start_address = PAGE_OFFSET; - else +#ifdef CONFIG_PPC64 + if (!radix_enabled()) st.start_address = KERN_VIRT_START; +#endif walk_pagetables(); -- 2.13.3
[PATCH 1/5] powerpc/ptdump: fix addresses display on PPC32
Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a shift in the displayed addresses. Lets revert that change to resync walk_pagetables()'s addr val and pgd_t pointer for PPC32. Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/mm/ptdump/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 6a88a9f585d4..3ad64fc11419 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -27,7 +27,7 @@ #include "ptdump.h" #ifdef CONFIG_PPC32 -#define KERN_VIRT_STARTPAGE_OFFSET +#define KERN_VIRT_START0 #endif /* -- 2.13.3
Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe
Hi Balbir, Balbir Singh writes: > On 12/8/19 7:22 pm, Santosh Sivaraj wrote: >> If we take a UE on one of the instructions with a fixup entry, set nip >> to continue execution at the fixup entry. Stop processing the event >> further or print it. >> >> Co-developed-by: Reza Arbab >> Signed-off-by: Reza Arbab >> Cc: Mahesh Salgaonkar >> Signed-off-by: Santosh Sivaraj >> --- > > Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it > should still have my author tag and signed-off-by Originally when I received the series for posting, I had Reza's authorship and signed-off-by, since the patch changed significantly I added co-developed-by as Reza. I will update in the next spin. https://lore.kernel.org/linuxppc-dev/20190702051932.511-1-sant...@fossix.org/ Santosh > > Balbir Singh > >> arch/powerpc/include/asm/mce.h | 4 +++- >> arch/powerpc/kernel/mce.c | 16 >> arch/powerpc/kernel/mce_power.c | 15 +-- >> 3 files changed, 32 insertions(+), 3 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h >> index f3a6036b6bc0..e1931c8c2743 100644 >> --- a/arch/powerpc/include/asm/mce.h >> +++ b/arch/powerpc/include/asm/mce.h >> @@ -122,7 +122,8 @@ struct machine_check_event { >> enum MCE_UeErrorType ue_error_type:8; >> u8 effective_address_provided; >> u8 physical_address_provided; >> -u8 reserved_1[5]; >> +u8 ignore_event; >> +u8 reserved_1[4]; >> u64 effective_address; >> u64 physical_address; >> u8 reserved_2[8]; >> @@ -193,6 +194,7 @@ struct mce_error_info { >> enum MCE_Initiator initiator:8; >> enum MCE_ErrorClass error_class:8; >> boolsync_error; >> +boolignore_event; >> }; >> >> #define MAX_MC_EVT 100 >> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c >> index a3b122a685a5..ec4b3e1087be 100644 >> --- a/arch/powerpc/kernel/mce.c >> +++ b/arch/powerpc/kernel/mce.c >> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, >> if (phys_addr != ULONG_MAX) { >> mce->u.ue_error.physical_address_provided = true; >> mce->u.ue_error.physical_address = phys_addr; >> +mce->u.ue_error.ignore_event = mce_err->ignore_event; >> machine_check_ue_event(mce); >> } >> } >> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct >> *work) >> /* >> * This should probably queued elsewhere, but >> * oh! well >> + * >> + * Don't report this machine check because the caller has a >> + * asked us to ignore the event, it has a fixup handler which >> + * will do the appropriate error handling and reporting. >> */ >> if (evt->error_type == MCE_ERROR_TYPE_UE) { >> +if (evt->u.ue_error.ignore_event) { >> +__this_cpu_dec(mce_ue_count); >> +continue; >> +} >> + >> if (evt->u.ue_error.physical_address_provided) { >> unsigned long pfn; >> >> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct >> irq_work *work) >> while (__this_cpu_read(mce_queue_count) > 0) { >> index = __this_cpu_read(mce_queue_count) - 1; >> evt = this_cpu_ptr(_event_queue[index]); >> + >> +if (evt->error_type == MCE_ERROR_TYPE_UE && >> +evt->u.ue_error.ignore_event) { >> +__this_cpu_dec(mce_queue_count); >> +continue; >> +} >> machine_check_print_event_info(evt, false, false); >> __this_cpu_dec(mce_queue_count); >> } >> diff --git a/arch/powerpc/kernel/mce_power.c >> b/arch/powerpc/kernel/mce_power.c >> index e74816f045f8..1dd87f6f5186 100644 >> --- a/arch/powerpc/kernel/mce_power.c >> +++ b/arch/powerpc/kernel/mce_power.c >> @@ -11,6 +11,7 @@ >> >> #include >> #include >> +#include >> #include >> #include >> #include >> @@ -18,6 +19,7 @@ >> #include >> #include >> #include >> +#include >> >> /* >> * Convert an address related to an mm to a physical address. >> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, >> return 0; >> } >> >> -static long mce_handle_ue_error(struct pt_regs *regs) >> +static long mce_handle_ue_error(struct pt_regs *regs, >> +struct mce_error_info *mce_err) >> { >> long handled = 0; >> +const struct
[PATCH v10 2/7] powerpc/mce: Fix MCE handling for huge pages
From: Balbir Singh The current code would fail on huge pages addresses, since the shift would be incorrect. Use the correct page shift value returned by __find_linux_pte() to get the correct physical address. The code is more generic and can handle both regular and compound pages. Fixes: ba41e1e1ccb9 ("powerpc/mce: Hookup derror (load/store) UE errors") Signed-off-by: Balbir Singh [ar...@linux.ibm.com: Fixup pseries_do_memory_failure()] Signed-off-by: Reza Arbab Co-developed-by: Santosh Sivaraj Signed-off-by: Santosh Sivaraj Tested-by: Mahesh Salgaonkar Cc: sta...@vger.kernel.org # v4.15+ --- arch/powerpc/include/asm/mce.h | 2 +- arch/powerpc/kernel/mce_power.c | 55 ++-- arch/powerpc/platforms/pseries/ras.c | 9 ++--- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index a4c6a74ad2fb..f3a6036b6bc0 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -209,7 +209,7 @@ extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); -unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); +unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index a814d2dfb5b0..e74816f045f8 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -20,13 +20,14 @@ #include /* - * Convert an address related to an mm to a PFN. NOTE: we are in real - * mode, we could potentially race with page table updates. + * Convert an address related to an mm to a physical address. + * NOTE: we are in real mode, we could potentially race with page table updates. */ -unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) +unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr) { - pte_t *ptep; - unsigned long flags; + pte_t *ptep, pte; + unsigned int shift; + unsigned long flags, phys_addr; struct mm_struct *mm; if (user_mode(regs)) @@ -35,14 +36,21 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) mm = _mm; local_irq_save(flags); - if (mm == current->mm) - ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL); - else - ptep = find_init_mm_pte(addr, NULL); + ptep = __find_linux_pte(mm->pgd, addr, NULL, ); local_irq_restore(flags); + if (!ptep || pte_special(*ptep)) return ULONG_MAX; - return pte_pfn(*ptep); + + pte = *ptep; + if (shift > PAGE_SHIFT) { + unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; + + pte = __pte(pte_val(pte) | (addr & rpnmask)); + } + phys_addr = pte_pfn(pte) << PAGE_SHIFT; + + return phys_addr; } /* flush SLBs and reload */ @@ -344,7 +352,7 @@ static const struct mce_derror_table mce_p9_derror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, false, 0, 0, 0, 0, 0 } }; -static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, +static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) { /* @@ -354,18 +362,16 @@ static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, * faults */ int instr; - unsigned long pfn, instr_addr; + unsigned long instr_addr; struct instruction_op op; struct pt_regs tmp = *regs; - pfn = addr_to_pfn(regs, regs->nip); - if (pfn != ULONG_MAX) { - instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); + instr_addr = addr_to_phys(regs, regs->nip) + (regs->nip & ~PAGE_MASK); + if (instr_addr != ULONG_MAX) { instr = *(unsigned int *)(instr_addr); if (!analyse_instr(, , instr)) { - pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; - *phys_addr = (pfn << PAGE_SHIFT); + *phys_addr = addr_to_phys(regs, op.ea); return 0; } /* @@ -440,15 +446,9 @@ static int mce_handle_ierror(struct pt_regs *regs, *addr = regs->nip; if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { - unsigned long pfn; - - if (get_paca()->in_mce < MAX_MCE_DEPTH) { - pfn = addr_to_pfn(regs, regs->nip); -
[PATCH 3/6] powerpc: Convert flush_icache_range & friends to C
From: Alastair D'Silva Similar to commit 22e9c88d486a ("powerpc/64: reuse PPC32 static inline flush_dcache_range()") this patch converts flush_icache_range() to C, and reimplements the following functions as wrappers around it: __flush_dcache_icache __flush_dcache_icache_phys This was done as we discovered a long-standing bug where the length of the range was truncated due to using a 32 bit shift instead of a 64 bit one. By converting these functions to C, it becomes easier to maintain. Signed-off-by: Alastair D'Silva --- arch/powerpc/include/asm/cache.h | 26 +++--- arch/powerpc/include/asm/cacheflush.h | 32 --- arch/powerpc/kernel/misc_32.S | 117 -- arch/powerpc/kernel/misc_64.S | 97 - arch/powerpc/mm/mem.c | 71 +++- 5 files changed, 102 insertions(+), 241 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index f852d5cd746c..728f154204db 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -98,20 +98,7 @@ static inline u32 l1_icache_bytes(void) #endif #endif /* ! __ASSEMBLY__ */ -#if defined(__ASSEMBLY__) -/* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ -#define PURGE_PREFETCHED_INS \ - sync; \ - icbi0,r3; \ - sync; \ - isync - -#else +#if !defined(__ASSEMBLY__) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) #ifdef CONFIG_PPC_BOOK3S_32 @@ -145,6 +132,17 @@ static inline void dcbst(void *addr) { __asm__ __volatile__ ("dcbst %y0" : : "Z"(*(u8 *)addr) : "memory"); } + +static inline void icbi(void *addr) +{ + __asm__ __volatile__ ("icbi 0, %0" : : "r"(addr) : "memory"); +} + +static inline void iccci(void) +{ + __asm__ __volatile__ ("iccci 0, r0"); +} + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_CACHE_H */ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index ed57843ef452..4c3377aff8ed 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -42,24 +42,18 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping)do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -extern void flush_icache_range(unsigned long, unsigned long); +void flush_icache_range(unsigned long start, unsigned long stop); extern void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); -extern void __flush_dcache_icache(void *page_va); extern void flush_dcache_icache_page(struct page *page); -#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) -extern void __flush_dcache_icache_phys(unsigned long physaddr); -#else -static inline void __flush_dcache_icache_phys(unsigned long physaddr) -{ - BUG(); -} -#endif -/* - * Write any modified data cache blocks out to memory and invalidate them. +/** + * flush_dcache_range(): Write any modified data cache blocks out to memory and invalidate them. * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) */ static inline void flush_dcache_range(unsigned long start, unsigned long stop) { @@ -82,6 +76,20 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop) isync(); } +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @page: the address of the page to flush + */ +static inline void __flush_dcache_icache(void *page) +{ + unsigned long page_addr = (unsigned long)page; + + flush_icache_range(page_addr, page_addr + PAGE_SIZE); +} + /* * Write any modified data cache blocks out to memory. * Does not invalidate the corresponding cache lines (especially for diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index fe4bd321730e..12b95e6799d4 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -318,123 +318,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) EXPORT_SYMBOL(flush_instruction_cache) #endif /* CONFIG_PPC_8xx */ -/* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * This is a no-op on the 601. - * - * flush_icache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(flush_icache_range) -BEGIN_FTR_SECTION -
[PATCH 4/6] powerpc: Chunk calls to flush_dcache_range in arch_*_memory
From: Alastair D'Silva When presented with large amounts of memory being hotplugged (in my test case, ~890GB), the call to flush_dcache_range takes a while (~50 seconds), triggering RCU stalls. This patch breaks up the call into 16GB chunks, calling cond_resched() inbetween to allow the scheduler to run. Signed-off-by: Alastair D'Silva --- arch/powerpc/mm/mem.c | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 5400da87a804..fb0d5e9aa11b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } +#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull) + int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long i; int rc; resize_hpt_for_hotplug(memblock_phys_mem_size()); @@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, start, start + size, rc); return -EFAULT; } - flush_dcache_range(start, start + size); + + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) { + flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE)); + cond_resched(); + } return __add_pages(nid, start_pfn, nr_pages, restrictions); } @@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); + unsigned long i; int ret; __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range(start, start + size); + for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) { + flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE)); + cond_resched(); + } + ret = remove_section_mapping(start, start + size); WARN_ON_ONCE(ret); -- 2.21.0
[PATCH v4 3/3] x86/kasan: support KASAN_VMALLOC
In the case where KASAN directly allocates memory to back vmalloc space, don't map the early shadow page over it. We prepopulate pgds/p4ds for the range that would otherwise be empty. This is required to get it synced to hardware on boot, allowing the lower levels of the page tables to be filled dynamically. Acked-by: Dmitry Vyukov Signed-off-by: Daniel Axtens --- v2: move from faulting in shadow pgds to prepopulating --- arch/x86/Kconfig| 1 + arch/x86/mm/kasan_init_64.c | 61 + 2 files changed, 62 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 222855cc0158..40562cc3771f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -134,6 +134,7 @@ config X86 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if X86_64 + select HAVE_ARCH_KASAN_VMALLOC if X86_64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 296da58f3013..2f57c4ddff61 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -245,6 +245,52 @@ static void __init kasan_map_early_shadow(pgd_t *pgd) } while (pgd++, addr = next, addr != end); } +static void __init kasan_shallow_populate_p4ds(pgd_t *pgd, + unsigned long addr, + unsigned long end, + int nid) +{ + p4d_t *p4d; + unsigned long next; + void *p; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + + if (p4d_none(*p4d)) { + p = early_alloc(PAGE_SIZE, nid, true); + p4d_populate(_mm, p4d, p); + } + } while (p4d++, addr = next, addr != end); +} + +static void __init kasan_shallow_populate_pgds(void *start, void *end) +{ + unsigned long addr, next; + pgd_t *pgd; + void *p; + int nid = early_pfn_to_nid((unsigned long)start); + + addr = (unsigned long)start; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, (unsigned long)end); + + if (pgd_none(*pgd)) { + p = early_alloc(PAGE_SIZE, nid, true); + pgd_populate(_mm, pgd, p); + } + + /* +* we need to populate p4ds to be synced when running in +* four level mode - see sync_global_pgds_l4() +*/ + kasan_shallow_populate_p4ds(pgd, addr, next, nid); + } while (pgd++, addr = next, addr != (unsigned long)end); +} + + #ifdef CONFIG_KASAN_INLINE static int kasan_die_handler(struct notifier_block *self, unsigned long val, @@ -352,9 +398,24 @@ void __init kasan_init(void) shadow_cpu_entry_end = (void *)round_up( (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); + /* +* If we're in full vmalloc mode, don't back vmalloc space with early +* shadow pages. Instead, prepopulate pgds/p4ds so they are synced to +* the global table and we can populate the lower levels on demand. +*/ +#ifdef CONFIG_KASAN_VMALLOC + kasan_shallow_populate_pgds( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + kasan_mem_to_shadow((void *)VMALLOC_END)); + + kasan_populate_early_shadow( + kasan_mem_to_shadow((void *)VMALLOC_END + 1), + shadow_cpu_entry_begin); +#else kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), shadow_cpu_entry_begin); +#endif kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, (unsigned long)shadow_cpu_entry_end, 0); -- 2.20.1
[PATCH v10 5/7] powerpc/memcpy: Add memcpy_mcsafe for pmem
From: Balbir Singh The pmem infrastructure uses memcpy_mcsafe in the pmem layer so as to convert machine check exceptions into a return value on failure in case a machine check exception is encountered during the memcpy. The return value is the number of bytes remaining to be copied. This patch largely borrows from the copyuser_power7 logic and does not add the VMX optimizations, largely to keep the patch simple. If needed those optimizations can be folded in. Signed-off-by: Balbir Singh [ar...@linux.ibm.com: Added symbol export] Co-developed-by: Santosh Sivaraj Signed-off-by: Santosh Sivaraj --- arch/powerpc/include/asm/string.h | 2 + arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/memcpy_mcsafe_64.S | 242 3 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/lib/memcpy_mcsafe_64.S diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 9bf6dffb4090..b72692702f35 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -53,7 +53,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t n); #ifndef CONFIG_KASAN #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 +#define __HAVE_ARCH_MEMCPY_MCSAFE +extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz); extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index eebc782d89a5..fa6b1b657b43 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ memcpy_power7.o obj64-y+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o pmem.o + memcpy_64.o pmem.o memcpy_mcsafe_64.o obj64-$(CONFIG_SMP)+= locks.o obj64-$(CONFIG_ALTIVEC)+= vmx-helper.o diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S new file mode 100644 index ..949976dc115d --- /dev/null +++ b/arch/powerpc/lib/memcpy_mcsafe_64.S @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) IBM Corporation, 2011 + * Derived from copyuser_power7.s by Anton Blanchard + * Author - Balbir Singh + */ +#include +#include +#include + + .macro err1 +100: + EX_TABLE(100b,.Ldo_err1) + .endm + + .macro err2 +200: + EX_TABLE(200b,.Ldo_err2) + .endm + + .macro err3 +300: EX_TABLE(300b,.Ldone) + .endm + +.Ldo_err2: + ld r22,STK_REG(R22)(r1) + ld r21,STK_REG(R21)(r1) + ld r20,STK_REG(R20)(r1) + ld r19,STK_REG(R19)(r1) + ld r18,STK_REG(R18)(r1) + ld r17,STK_REG(R17)(r1) + ld r16,STK_REG(R16)(r1) + ld r15,STK_REG(R15)(r1) + ld r14,STK_REG(R14)(r1) + addir1,r1,STACKFRAMESIZE +.Ldo_err1: + /* Do a byte by byte copy to get the exact remaining size */ + mtctr r7 +46: +err3; lbz r0,0(r4) + addir4,r4,1 +err3; stb r0,0(r3) + addir3,r3,1 + bdnz46b + li r3,0 + blr + +.Ldone: + mfctr r3 + blr + + +_GLOBAL(memcpy_mcsafe) + mr r7,r5 + cmpldi r5,16 + blt .Lshort_copy + +.Lcopy: + /* Get the source 8B aligned */ + neg r6,r4 + mtocrf 0x01,r6 + clrldi r6,r6,(64-3) + + bf cr7*4+3,1f +err1; lbz r0,0(r4) + addir4,r4,1 +err1; stb r0,0(r3) + addir3,r3,1 + subir7,r7,1 + +1: bf cr7*4+2,2f +err1; lhz r0,0(r4) + addir4,r4,2 +err1; sth r0,0(r3) + addir3,r3,2 + subir7,r7,2 + +2: bf cr7*4+1,3f +err1; lwz r0,0(r4) + addir4,r4,4 +err1; stw r0,0(r3) + addir3,r3,4 + subir7,r7,4 + +3: sub r5,r5,r6 + cmpldi r5,128 + blt 5f + + mflrr0 + stdur1,-STACKFRAMESIZE(r1) + std r14,STK_REG(R14)(r1) + std r15,STK_REG(R15)(r1) + std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) + std r18,STK_REG(R18)(r1) + std r19,STK_REG(R19)(r1) + std r20,STK_REG(R20)(r1) + std r21,STK_REG(R21)(r1) + std r22,STK_REG(R22)(r1) + std r0,STACKFRAMESIZE+16(r1) + + srdir6,r5,7 + mtctr r6 + + /* Now do cacheline (128B) sized loads and stores. */ + .align 5 +4: +err2; ld r0,0(r4) +err2; ld r6,8(r4) +err2; ld r8,16(r4) +err2; ld r9,24(r4) +err2; ld r10,32(r4) +err2; ld r11,40(r4) +err2; ld r12,48(r4) +err2; ld r14,56(r4) +err2; ld r15,64(r4) +err2; ld
[PATCH 1/6] powerpc: Allow flush_icache_range to work across ranges >4GB
From: Alastair D'Silva When calling flush_icache_range with a size >4GB, we were masking off the upper 32 bits, so we would incorrectly flush a range smaller than intended. This patch replaces the 32 bit shifts with 64 bit ones, so that the full size is accounted for. Signed-off-by: Alastair D'Silva Cc: sta...@vger.kernel.org --- arch/powerpc/kernel/misc_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index b55a7b4cb543..9bc0aa9aeb65 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -82,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subfr8,r6,r4/* compute length */ add r8,r8,r5/* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of cache block size */ - srw.r8,r8,r9/* compute line count */ + srd.r8,r8,r9/* compute line count */ beqlr /* nothing to do? */ mtctr r8 1: dcbst 0,r6 @@ -98,7 +98,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subfr8,r6,r4/* compute length */ add r8,r8,r5 lwz r9,ICACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of Icache block size */ - srw.r8,r8,r9/* compute line count */ + srd.r8,r8,r9/* compute line count */ beqlr /* nothing to do? */ mtctr r8 2: icbi0,r6 -- 2.21.0
[PATCH 0/6] powerpc: convert cache asm to C
From: Alastair D'Silva This series addresses a few issues discovered in how we flush caches: 1. Flushes were truncated at 4GB, so larger flushes were incorrect. 2. Flushing the dcache in arch_add_memory was unnecessary This series also converts much of the cache assembler to C, with the aim of making it easier to maintain. Alastair D'Silva (6): powerpc: Allow flush_icache_range to work across ranges >4GB powerpc: define helpers to get L1 icache sizes powerpc: Convert flush_icache_range & friends to C powerpc: Chunk calls to flush_dcache_range in arch_*_memory powerpc: Remove 'extern' from func prototypes in cache headers powerpc: Don't flush caches when adding memory arch/powerpc/include/asm/cache.h | 63 +- arch/powerpc/include/asm/cacheflush.h | 49 ++- arch/powerpc/kernel/misc_32.S | 117 -- arch/powerpc/kernel/misc_64.S | 97 - arch/powerpc/mm/mem.c | 80 +- 5 files changed, 146 insertions(+), 260 deletions(-) -- 2.21.0
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 --- Comment #25 from Christophe Leroy (christophe.le...@c-s.fr) --- You can use get_zeroed_page(GFP_NOFS) instead of __get_free_page(GFP_NOFS | __GFP_ZERO) -- You are receiving this mail because: You are on the CC list for the bug.
[PATCH v5 06/18] compat_ioctl: move WDIOC handling into wdt drivers
All watchdog drivers implement the same set of ioctl commands, and fortunately all of them are compatible between 32-bit and 64-bit architectures. Modern drivers always go through drivers/watchdog/wdt.c as an abstraction layer, but older ones implement their own file_operations on a character device for this. Move the handling from fs/compat_ioctl.c into the individual drivers. Note that most of the legacy drivers will never be used on 64-bit hardware, because they are for an old 32-bit SoC implementation, but doing them all at once is safer than trying to guess which ones do or do not need the compat_ioctl handling. Signed-off-by: Arnd Bergmann --- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 1 + arch/um/drivers/harddog_kern.c| 1 + drivers/char/ipmi/ipmi_watchdog.c | 1 + drivers/hwmon/fschmd.c| 1 + drivers/rtc/rtc-ds1374.c | 1 + drivers/watchdog/acquirewdt.c | 1 + drivers/watchdog/advantechwdt.c | 1 + drivers/watchdog/alim1535_wdt.c | 1 + drivers/watchdog/alim7101_wdt.c | 1 + drivers/watchdog/ar7_wdt.c| 1 + drivers/watchdog/at91rm9200_wdt.c | 1 + drivers/watchdog/ath79_wdt.c | 1 + drivers/watchdog/bcm63xx_wdt.c| 1 + drivers/watchdog/cpu5wdt.c| 1 + drivers/watchdog/eurotechwdt.c| 1 + drivers/watchdog/f71808e_wdt.c| 1 + drivers/watchdog/gef_wdt.c| 1 + drivers/watchdog/geodewdt.c | 1 + drivers/watchdog/ib700wdt.c | 1 + drivers/watchdog/ibmasr.c | 1 + drivers/watchdog/indydog.c| 1 + drivers/watchdog/intel_scu_watchdog.c | 1 + drivers/watchdog/iop_wdt.c| 1 + drivers/watchdog/it8712f_wdt.c| 1 + drivers/watchdog/ixp4xx_wdt.c | 1 + drivers/watchdog/ks8695_wdt.c | 1 + drivers/watchdog/m54xx_wdt.c | 1 + drivers/watchdog/machzwd.c| 1 + drivers/watchdog/mixcomwd.c | 1 + drivers/watchdog/mtx-1_wdt.c | 1 + drivers/watchdog/mv64x60_wdt.c| 1 + drivers/watchdog/nuc900_wdt.c | 1 + drivers/watchdog/nv_tco.c | 1 + drivers/watchdog/pc87413_wdt.c| 1 + drivers/watchdog/pcwd.c | 1 + drivers/watchdog/pcwd_pci.c | 1 + drivers/watchdog/pcwd_usb.c | 1 + drivers/watchdog/pika_wdt.c | 1 + drivers/watchdog/pnx833x_wdt.c| 1 + drivers/watchdog/rc32434_wdt.c| 1 + drivers/watchdog/rdc321x_wdt.c| 1 + drivers/watchdog/riowd.c | 1 + drivers/watchdog/sa1100_wdt.c | 1 + drivers/watchdog/sb_wdog.c| 1 + drivers/watchdog/sbc60xxwdt.c | 1 + drivers/watchdog/sbc7240_wdt.c| 1 + drivers/watchdog/sbc_epx_c3.c | 1 + drivers/watchdog/sbc_fitpc2_wdt.c | 1 + drivers/watchdog/sc1200wdt.c | 1 + drivers/watchdog/sc520_wdt.c | 1 + drivers/watchdog/sch311x_wdt.c| 1 + drivers/watchdog/scx200_wdt.c | 1 + drivers/watchdog/smsc37b787_wdt.c | 1 + drivers/watchdog/w83877f_wdt.c| 1 + drivers/watchdog/w83977f_wdt.c| 1 + drivers/watchdog/wafer5823wdt.c | 1 + drivers/watchdog/watchdog_dev.c | 1 + drivers/watchdog/wdrtas.c | 1 + drivers/watchdog/wdt.c| 1 + drivers/watchdog/wdt285.c | 1 + drivers/watchdog/wdt977.c | 1 + drivers/watchdog/wdt_pci.c| 1 + fs/compat_ioctl.c | 11 --- 63 files changed, 62 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index ba12dc14a3d1..8c0d324f657e 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -650,6 +650,7 @@ static const struct file_operations mpc52xx_wdt_fops = { .llseek = no_llseek, .write = mpc52xx_wdt_write, .unlocked_ioctl = mpc52xx_wdt_ioctl, + .compat_ioctl = compat_ptr_ioctl, .open = mpc52xx_wdt_open, .release= mpc52xx_wdt_release, }; diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 000cb69ba0bc..e6d4f43deba8 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -165,6 +165,7 @@ static const struct file_operations harddog_fops = { .owner = THIS_MODULE, .write = harddog_write, .unlocked_ioctl = harddog_ioctl, + .compat_ioctl = compat_ptr_ioctl, .open = harddog_open, .release= harddog_release, .llseek =
[PATCH 0/6] drm+dma: cache support for arm, etc
From: Rob Clark This is a replacement for a previous patches[1] that was adding arm64 support for drm_clflush. I've also added a patch to solve a similar cache issue in vgem. The first few patches just export arch_sync_dma_for_*(). Possibly instead the EXPORT_SYMBOL_GPL() should be somewere central, rather than per-arch (but where would make sense?) The fourth adds (and exports) these ops for arch/arm. (Arnd Bergmann mentioned on IRC that Christoph Hellwig was working on this already for arch/arm which could replace the fourth patch.) The last two patches actually fix things. [1] https://patchwork.freedesktop.org/series/64732/ Rob Clark (6): arm64: export arch_sync_dma_for_*() mips: export arch_sync_dma_for_*() powerpc: export arch_sync_dma_for_*() arm: add arch_sync_dma_for_*() drm/msm: stop abusing DMA API drm/vgem: fix cache synchronization on arm/arm64 (take two) arch/arm/Kconfig | 2 + arch/arm/mm/dma-mapping-nommu.c | 14 +++ arch/arm/mm/dma-mapping.c | 28 ++ arch/arm64/mm/dma-mapping.c | 2 + arch/arm64/mm/flush.c | 2 + arch/mips/mm/dma-noncoherent.c| 2 + arch/powerpc/mm/dma-noncoherent.c | 2 + drivers/gpu/drm/drm_cache.c | 20 - drivers/gpu/drm/msm/msm_gem.c | 37 +++- drivers/gpu/drm/vgem/vgem_drv.c | 145 -- include/drm/drm_cache.h | 4 + 11 files changed, 182 insertions(+), 76 deletions(-) -- 2.21.0
[PATCH v10 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe
From: Balbir Singh If we take a UE on one of the instructions with a fixup entry, set nip to continue execution at the fixup entry. Stop processing the event further or print it. Co-developed-by: Reza Arbab Signed-off-by: Reza Arbab Signed-off-by: Balbir Singh Signed-off-by: Santosh Sivaraj Reviewed-by: Mahesh Salgaonkar --- arch/powerpc/include/asm/mce.h | 4 +++- arch/powerpc/kernel/mce.c | 16 arch/powerpc/kernel/mce_power.c | 15 +-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index f3a6036b6bc0..e1931c8c2743 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -122,7 +122,8 @@ struct machine_check_event { enum MCE_UeErrorType ue_error_type:8; u8 effective_address_provided; u8 physical_address_provided; - u8 reserved_1[5]; + u8 ignore_event; + u8 reserved_1[4]; u64 effective_address; u64 physical_address; u8 reserved_2[8]; @@ -193,6 +194,7 @@ struct mce_error_info { enum MCE_Initiator initiator:8; enum MCE_ErrorClass error_class:8; boolsync_error; + boolignore_event; }; #define MAX_MC_EVT 100 diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a3b122a685a5..ec4b3e1087be 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, if (phys_addr != ULONG_MAX) { mce->u.ue_error.physical_address_provided = true; mce->u.ue_error.physical_address = phys_addr; + mce->u.ue_error.ignore_event = mce_err->ignore_event; machine_check_ue_event(mce); } } @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work) /* * This should probably queued elsewhere, but * oh! well +* +* Don't report this machine check because the caller has a +* asked us to ignore the event, it has a fixup handler which +* will do the appropriate error handling and reporting. */ if (evt->error_type == MCE_ERROR_TYPE_UE) { + if (evt->u.ue_error.ignore_event) { + __this_cpu_dec(mce_ue_count); + continue; + } + if (evt->u.ue_error.physical_address_provided) { unsigned long pfn; @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work) while (__this_cpu_read(mce_queue_count) > 0) { index = __this_cpu_read(mce_queue_count) - 1; evt = this_cpu_ptr(_event_queue[index]); + + if (evt->error_type == MCE_ERROR_TYPE_UE && + evt->u.ue_error.ignore_event) { + __this_cpu_dec(mce_queue_count); + continue; + } machine_check_print_event_info(evt, false, false); __this_cpu_dec(mce_queue_count); } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index e74816f045f8..1dd87f6f5186 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include /* * Convert an address related to an mm to a physical address. @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, return 0; } -static long mce_handle_ue_error(struct pt_regs *regs) +static long mce_handle_ue_error(struct pt_regs *regs, + struct mce_error_info *mce_err) { long handled = 0; + const struct exception_table_entry *entry; + + entry = search_kernel_exception_table(regs->nip); + if (entry) { + mce_err->ignore_event = true; + regs->nip = extable_fixup(entry); + return 1; + } /* * On specific SCOM read via MMIO we may get a machine check @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs, _addr); if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); + handled = mce_handle_ue_error(regs, _err); save_mce_event(regs,
[PATCH v10 7/7] powerpc: add machine check safe copy_to_user
Use memcpy_mcsafe() implementation to define copy_to_user_mcsafe() Signed-off-by: Santosh Sivaraj --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/uaccess.h | 14 ++ 2 files changed, 15 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 77f6ebf97113..4316e36095a2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 + select ARCH_HAS_UACCESS_MCSAFE if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 8b03eb44e876..15002b51ff18 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -387,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void __user *to, return ret; } +static __always_inline unsigned long __must_check +copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) +{ + if (likely(check_copy_size(from, n, true))) { + if (access_ok(to, n)) { + allow_write_to_user(to, n); + n = memcpy_mcsafe((void *)to, from, n); + prevent_write_to_user(to, n); + } + } + + return n; +} + extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) -- 2.21.0
[PATCH 3/6] powerpc: export arch_sync_dma_for_*()
From: Rob Clark Signed-off-by: Rob Clark --- arch/powerpc/mm/dma-noncoherent.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index c617282d5b2a..80d53b950821 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -401,12 +401,14 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, { __dma_sync_page(paddr, size, dir); } +EXPORT_SYMBOL_GPL(arch_sync_dma_for_device); void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { __dma_sync_page(paddr, size, dir); } +EXPORT_SYMBOL_GPL(arch_sync_dma_for_cpu); /* * Return the PFN for a given cpu virtual address returned by arch_dma_alloc. -- 2.21.0
[PATCH v4 1/3] kasan: support backing vmalloc space with real shadow memory
Hook into vmalloc and vmap, and dynamically allocate real shadow memory to back the mappings. Most mappings in vmalloc space are small, requiring less than a full page of shadow space. Allocating a full shadow page per mapping would therefore be wasteful. Furthermore, to ensure that different mappings use different shadow pages, mappings would have to be aligned to KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE. Instead, share backing space across multiple mappings. Allocate a backing page the first time a mapping in vmalloc space uses a particular page of the shadow region. Keep this page around regardless of whether the mapping is later freed - in the mean time the page could have become shared by another vmalloc mapping. This can in theory lead to unbounded memory growth, but the vmalloc allocator is pretty good at reusing addresses, so the practical memory usage grows at first but then stays fairly stable. This requires architecture support to actually use: arches must stop mapping the read-only zero page over portion of the shadow region that covers the vmalloc space and instead leave it unmapped. This allows KASAN with VMAP_STACK, and will be needed for architectures that do not have a separate module space (e.g. powerpc64, which I am currently working on). It also allows relaxing the module alignment back to PAGE_SIZE. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202009 Acked-by: Vasily Gorbik Signed-off-by: Daniel Axtens [Mark: rework shadow allocation] Signed-off-by: Mark Rutland -- v2: let kasan_unpoison_shadow deal with ranges that do not use a full shadow byte. v3: relax module alignment rename to kasan_populate_vmalloc which is a much better name deal with concurrency correctly v4: Integrate Mark's rework Poision pages on vfree Handle allocation failures. I've tested this by inserting artificial failures and using test_vmalloc to stress it. I haven't handled the per-cpu case: it looked like it would require a messy hacking-up of the function to deal with an OOM failure case in a debug feature. --- Documentation/dev-tools/kasan.rst | 60 +++ include/linux/kasan.h | 24 +++ include/linux/moduleloader.h | 2 +- include/linux/vmalloc.h | 12 ++ lib/Kconfig.kasan | 16 lib/test_kasan.c | 26 mm/kasan/common.c | 67 +++ mm/kasan/generic_report.c | 3 ++ mm/kasan/kasan.h | 1 + mm/vmalloc.c | 28 - 10 files changed, 237 insertions(+), 2 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index b72d07d70239..35fda484a672 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -215,3 +215,63 @@ brk handler is used to print bug reports. A potential expansion of this mode is a hardware tag-based mode, which would use hardware memory tagging support instead of compiler instrumentation and manual shadow memory manipulation. + +What memory accesses are sanitised by KASAN? + + +The kernel maps memory in a number of different parts of the address +space. This poses something of a problem for KASAN, which requires +that all addresses accessed by instrumented code have a valid shadow +region. + +The range of kernel virtual addresses is large: there is not enough +real memory to support a real shadow region for every address that +could be accessed by the kernel. + +By default +~~ + +By default, architectures only map real memory over the shadow region +for the linear mapping (and potentially other small areas). For all +other areas - such as vmalloc and vmemmap space - a single read-only +page is mapped over the shadow area. This read-only shadow page +declares all memory accesses as permitted. + +This presents a problem for modules: they do not live in the linear +mapping, but in a dedicated module space. By hooking in to the module +allocator, KASAN can temporarily map real shadow memory to cover +them. This allows detection of invalid accesses to module globals, for +example. + +This also creates an incompatibility with ``VMAP_STACK``: if the stack +lives in vmalloc space, it will be shadowed by the read-only page, and +the kernel will fault when trying to set up the shadow data for stack +variables. + +CONFIG_KASAN_VMALLOC + + +With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the +cost of greater memory usage. Currently this is only supported on x86. + +This works by hooking into vmalloc and vmap, and dynamically +allocating real shadow memory to back the mappings. + +Most mappings in vmalloc space are small, requiring less than a full +page of shadow space. Allocating a full shadow page per mapping would +therefore be wasteful. Furthermore, to ensure that different mappings
[PATCH v4 2/3] fork: support VMAP_STACK with KASAN_VMALLOC
Supporting VMAP_STACK with KASAN_VMALLOC is straightforward: - clear the shadow region of vmapped stacks when swapping them in - tweak Kconfig to allow VMAP_STACK to be turned on with KASAN Reviewed-by: Dmitry Vyukov Signed-off-by: Daniel Axtens --- arch/Kconfig | 9 + kernel/fork.c | 4 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a7b57dd42c26..e791196005e1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -825,16 +825,17 @@ config HAVE_ARCH_VMAP_STACK config VMAP_STACK default y bool "Use a virtually-mapped stack" - depends on HAVE_ARCH_VMAP_STACK && !KASAN + depends on HAVE_ARCH_VMAP_STACK + depends on !KASAN || KASAN_VMALLOC ---help--- Enable this if you want the use virtually-mapped kernel stacks with guard pages. This causes kernel stack overflows to be caught immediately rather than causing difficult-to-diagnose corruption. - This is presently incompatible with KASAN because KASAN expects - the stack to map directly to the KASAN shadow map using a formula - that is incorrect if the stack is in vmalloc space. + To use this with KASAN, the architecture must support backing + virtual mappings with real shadow memory, and KASAN_VMALLOC must + be enabled. config ARCH_OPTIONAL_KERNEL_RWX def_bool n diff --git a/kernel/fork.c b/kernel/fork.c index d8ae0f1b4148..ce3150fe8ff2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include @@ -215,6 +216,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; + /* Clear the KASAN shadow of the stack. */ + kasan_unpoison_shadow(s->addr, THREAD_SIZE); + /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); -- 2.20.1
[PATCH v4 0/3] kasan: support backing vmalloc space with real shadow memory
Currently, vmalloc space is backed by the early shadow page. This means that kasan is incompatible with VMAP_STACK, and it also provides a hurdle for architectures that do not have a dedicated module space (like powerpc64). This series provides a mechanism to back vmalloc space with real, dynamically allocated memory. I have only wired up x86, because that's the only currently supported arch I can work with easily, but it's very easy to wire up other architectures. This has been discussed before in the context of VMAP_STACK: - https://bugzilla.kernel.org/show_bug.cgi?id=202009 - https://lkml.org/lkml/2018/7/22/198 - https://lkml.org/lkml/2019/7/19/822 In terms of implementation details: Most mappings in vmalloc space are small, requiring less than a full page of shadow space. Allocating a full shadow page per mapping would therefore be wasteful. Furthermore, to ensure that different mappings use different shadow pages, mappings would have to be aligned to KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE. Instead, share backing space across multiple mappings. Allocate a backing page the first time a mapping in vmalloc space uses a particular page of the shadow region. Keep this page around regardless of whether the mapping is later freed - in the mean time the page could have become shared by another vmalloc mapping. This can in theory lead to unbounded memory growth, but the vmalloc allocator is pretty good at reusing addresses, so the practical memory usage appears to grow at first but then stay fairly stable. If we run into practical memory exhaustion issues, I'm happy to consider hooking into the book-keeping that vmap does, but I am not convinced that it will be an issue. v1: https://lore.kernel.org/linux-mm/20190725055503.19507-1-...@axtens.net/ v2: https://lore.kernel.org/linux-mm/20190729142108.23343-1-...@axtens.net/ Address review comments: - Patch 1: use kasan_unpoison_shadow's built-in handling of ranges that do not align to a full shadow byte - Patch 3: prepopulate pgds rather than faulting things in v3: https://lore.kernel.org/linux-mm/20190731071550.31814-1-...@axtens.net/ Address comments from Mark Rutland: - kasan_populate_vmalloc is a better name - handle concurrency correctly - various nits and cleanups - relax module alignment in KASAN_VMALLOC case v4: Changes to patch 1 only: - Integrate Mark's rework, thanks Mark! - handle the case where kasan_populate_shadow might fail - poision shadow on free, allowing the alloc path to just unpoision memory that it uses Daniel Axtens (3): kasan: support backing vmalloc space with real shadow memory fork: support VMAP_STACK with KASAN_VMALLOC x86/kasan: support KASAN_VMALLOC Documentation/dev-tools/kasan.rst | 60 +++ arch/Kconfig | 9 +++-- arch/x86/Kconfig | 1 + arch/x86/mm/kasan_init_64.c | 61 include/linux/kasan.h | 24 +++ include/linux/moduleloader.h | 2 +- include/linux/vmalloc.h | 12 ++ kernel/fork.c | 4 ++ lib/Kconfig.kasan | 16 lib/test_kasan.c | 26 mm/kasan/common.c | 67 +++ mm/kasan/generic_report.c | 3 ++ mm/kasan/kasan.h | 1 + mm/vmalloc.c | 28 - 14 files changed, 308 insertions(+), 6 deletions(-) -- 2.20.1
[PATCH v10 4/7] extable: Add function to search only kernel exception table
Certain architecture specific operating modes (e.g., in powerpc machine check handler that is unable to access vmalloc memory), the search_exception_tables cannot be called because it also searches the module exception tables if entry is not found in the kernel exception table. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Nicholas Piggin Signed-off-by: Santosh Sivaraj Reviewed-by: Nicholas Piggin --- include/linux/extable.h | 2 ++ kernel/extable.c| 11 +-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/extable.h b/include/linux/extable.h index 41c5b3a25f67..81ecfaa83ad3 100644 --- a/include/linux/extable.h +++ b/include/linux/extable.h @@ -19,6 +19,8 @@ void trim_init_extable(struct module *m); /* Given an address, look for it in the exception tables */ const struct exception_table_entry *search_exception_tables(unsigned long add); +const struct exception_table_entry * +search_kernel_exception_table(unsigned long addr); #ifdef CONFIG_MODULES /* For extable.c to search modules' exception tables. */ diff --git a/kernel/extable.c b/kernel/extable.c index e23cce6e6092..f6c9406eec7d 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -40,13 +40,20 @@ void __init sort_main_extable(void) } } +/* Given an address, look for it in the kernel exception table */ +const +struct exception_table_entry *search_kernel_exception_table(unsigned long addr) +{ + return search_extable(__start___ex_table, + __stop___ex_table - __start___ex_table, addr); +} + /* Given an address, look for it in the exception tables. */ const struct exception_table_entry *search_exception_tables(unsigned long addr) { const struct exception_table_entry *e; - e = search_extable(__start___ex_table, - __stop___ex_table - __start___ex_table, addr); + e = search_kernel_exception_table(addr); if (!e) e = search_module_extables(addr); return e; -- 2.21.0
[PATCH v10 3/7] powerpc/mce: Make machine_check_ue_event() static
From: Reza Arbab The function doesn't get used outside this file, so make it static. Signed-off-by: Reza Arbab Signed-off-by: Santosh Sivaraj Reviewed-by: Nicholas Piggin --- arch/powerpc/kernel/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index cff31d4a501f..a3b122a685a5 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -34,7 +34,7 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], static void machine_check_process_queued_event(struct irq_work *work); static void machine_check_ue_irq_work(struct irq_work *work); -void machine_check_ue_event(struct machine_check_event *evt); +static void machine_check_ue_event(struct machine_check_event *evt); static void machine_process_ue_event(struct work_struct *work); static struct irq_work mce_event_process_work = { @@ -212,7 +212,7 @@ static void machine_check_ue_irq_work(struct irq_work *work) /* * Queue up the MCE event which then can be handled later. */ -void machine_check_ue_event(struct machine_check_event *evt) +static void machine_check_ue_event(struct machine_check_event *evt) { int index; -- 2.21.0
Re: [5.3.0-rc4-next][bisected 882632][qla2xxx] WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 qla2x00_status_entry.isra
On 8/14/19 10:18 AM, Abdul Haleem wrote: On Wed, 2019-08-14 at 10:05 -0700, Bart Van Assche wrote: On 8/14/19 9:52 AM, Abdul Haleem wrote: Greeting's Today's linux-next kernel (5.3.0-rc4-next-20190813) booted with warning on my powerpc power 8 lpar The WARN_ON_ONCE() was introduced by commit 88263208 (scsi: qla2xxx: Complain if sp->done() is not...) boot logs: WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 Hi Abdul, Thank you for having reported this. Is that the only warning reported on your setup by the qla2xxx driver? If that warning is commented out, does the qla2xxx driver work as expected? boot warning did not show up when the commit is reverted. should I comment out only the WARN_ON_ONCE() which is causing the issue, and not the other one ? Yes please. Commit 88263208 introduced five kernel warnings but I think only one of these should be removed again, e.g. as follows: diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index cd39ac18c5fd..d81b5ecce24b 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2780,8 +2780,6 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) if (rsp->status_srb == NULL) sp->done(sp, res); - else - WARN_ON_ONCE(true); } /**
[PATCH 6/6] powerpc: Don't flush caches when adding memory
From: Alastair D'Silva This operation takes a significant amount of time when hotplugging large amounts of memory (~50 seconds with 890GB of persistent memory). This was orignally in commit fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug") to support memtrace, but the flush on add is not needed as it is flushed on remove. Signed-off-by: Alastair D'Silva --- arch/powerpc/mm/mem.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index fb0d5e9aa11b..43be99de7c9a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -111,7 +111,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - unsigned long i; int rc; resize_hpt_for_hotplug(memblock_phys_mem_size()); @@ -124,11 +123,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return -EFAULT; } - for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) { - flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE)); - cond_resched(); - } - return __add_pages(nid, start_pfn, nr_pages, restrictions); } -- 2.21.0
[PATCH 5/6] powerpc: Remove 'extern' from func prototypes in cache headers
From: Alastair D'Silva The 'extern' keyword does not value-add for function prototypes. Signed-off-by: Alastair D'Silva --- arch/powerpc/include/asm/cache.h | 8 arch/powerpc/include/asm/cacheflush.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 728f154204db..c5c096e968e0 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -102,10 +102,10 @@ static inline u32 l1_icache_bytes(void) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) #ifdef CONFIG_PPC_BOOK3S_32 -extern long _get_L2CR(void); -extern long _get_L3CR(void); -extern void _set_L2CR(unsigned long); -extern void _set_L3CR(unsigned long); +long _get_L2CR(void); +long _get_L3CR(void); +void _set_L2CR(unsigned long val); +void _set_L3CR(unsigned long val); #else #define _get_L2CR()0L #define _get_L3CR()0L diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 4c3377aff8ed..1826bf2cc137 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -38,15 +38,15 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } #endif #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping)do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) void flush_icache_range(unsigned long start, unsigned long stop); -extern void flush_icache_user_range(struct vm_area_struct *vma, +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); -extern void flush_dcache_icache_page(struct page *page); +void flush_dcache_icache_page(struct page *page); /** * flush_dcache_range(): Write any modified data cache blocks out to memory and invalidate them. -- 2.21.0
Re: [PATCH v9 7/7] powerpc: add machine check safe copy_to_user
Hi Balbir, Balbir Singh writes: > On 12/8/19 7:22 pm, Santosh Sivaraj wrote: >> Use memcpy_mcsafe() implementation to define copy_to_user_mcsafe() >> >> Signed-off-by: Santosh Sivaraj >> --- >> arch/powerpc/Kconfig | 1 + >> arch/powerpc/include/asm/uaccess.h | 14 ++ >> 2 files changed, 15 insertions(+) >> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >> index 77f6ebf97113..4316e36095a2 100644 >> --- a/arch/powerpc/Kconfig >> +++ b/arch/powerpc/Kconfig >> @@ -137,6 +137,7 @@ config PPC >> select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && >> !RELOCATABLE && !HIBERNATION) >> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST >> select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 >> +select ARCH_HAS_UACCESS_MCSAFE if PPC64 >> select ARCH_HAS_UBSAN_SANITIZE_ALL >> select ARCH_HAVE_NMI_SAFE_CMPXCHG >> select ARCH_KEEP_MEMBLOCK >> diff --git a/arch/powerpc/include/asm/uaccess.h >> b/arch/powerpc/include/asm/uaccess.h >> index 8b03eb44e876..15002b51ff18 100644 >> --- a/arch/powerpc/include/asm/uaccess.h >> +++ b/arch/powerpc/include/asm/uaccess.h >> @@ -387,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void >> __user *to, >> return ret; >> } >> >> +static __always_inline unsigned long __must_check >> +copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) >> +{ >> +if (likely(check_copy_size(from, n, true))) { >> +if (access_ok(to, n)) { >> +allow_write_to_user(to, n); >> +n = memcpy_mcsafe((void *)to, from, n); >> +prevent_write_to_user(to, n); >> +} >> +} >> + >> +return n; > > Do we always return n independent of the check_copy_size return value and > access_ok return values? Yes we always return the remaining bytes not copied even if check_copy_size or access_ok fails. Santosh > > Balbir Singh. > >> +} >> + >> extern unsigned long __clear_user(void __user *addr, unsigned long size); >> >> static inline unsigned long clear_user(void __user *addr, unsigned long >> size) >>
[bug report] powerpc/iommu: Implement IOMMU pools to improve multiqueue adapter performance
[ Ancient code. The warning is correct but the bug seems harmless. -- dan ] Hello Anton Blanchard, The patch b4c3a8729ae5: "powerpc/iommu: Implement IOMMU pools to improve multiqueue adapter performance" from Jun 7, 2012, leads to the following static checker warning: arch/powerpc/kernel/iommu.c:377 get_pool() warn: array off by one? '*tbl->pools + pool_nr' arch/powerpc/kernel/iommu.c 364 static struct iommu_pool *get_pool(struct iommu_table *tbl, 365 unsigned long entry) 366 { 367 struct iommu_pool *p; 368 unsigned long largepool_start = tbl->large_pool.start; 369 370 /* The large pool is the last pool at the top of the table */ 371 if (entry >= largepool_start) { 372 p = >large_pool; 373 } else { 374 unsigned int pool_nr = entry / tbl->poolsize; 375 376 BUG_ON(pool_nr > tbl->nr_pools); ^ This should be ">=". The tbl->nr_pools value is either 1 or IOMMU_NR_POOLS and the tbl->pools[] array has IOMMU_NR_POOLS elements. 377 p = >pools[pool_nr]; 378 } 379 380 return p; 381 } regards, dan carpenter
[PATCH v10 1/7] powerpc/mce: Schedule work from irq_work
schedule_work() cannot be called from MCE exception context as MCE can interrupt even in interrupt disabled context. fixes: 733e4a4c ("powerpc/mce: hookup memory_failure for UE errors") Suggested-by: Mahesh Salgaonkar Signed-off-by: Santosh Sivaraj Reviewed-by: Mahesh Salgaonkar Acked-by: Balbir Singh Cc: sta...@vger.kernel.org # v4.15+ --- arch/powerpc/kernel/mce.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index b18df633eae9..cff31d4a501f 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -33,6 +33,7 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_ue_event_queue); static void machine_check_process_queued_event(struct irq_work *work); +static void machine_check_ue_irq_work(struct irq_work *work); void machine_check_ue_event(struct machine_check_event *evt); static void machine_process_ue_event(struct work_struct *work); @@ -40,6 +41,10 @@ static struct irq_work mce_event_process_work = { .func = machine_check_process_queued_event, }; +static struct irq_work mce_ue_event_irq_work = { + .func = machine_check_ue_irq_work, +}; + DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static void mce_set_error_info(struct machine_check_event *mce, @@ -199,6 +204,10 @@ void release_mce_event(void) get_mce_event(NULL, true); } +static void machine_check_ue_irq_work(struct irq_work *work) +{ + schedule_work(_ue_event_work); +} /* * Queue up the MCE event which then can be handled later. @@ -216,7 +225,7 @@ void machine_check_ue_event(struct machine_check_event *evt) memcpy(this_cpu_ptr(_ue_event_queue[index]), evt, sizeof(*evt)); /* Queue work to process this event later. */ - schedule_work(_ue_event_work); + irq_work_queue(_ue_event_irq_work); } /* -- 2.21.0
[PATCH v10 0/7] powerpc: implement machine check safe memcpy
During a memcpy from a pmem device, if a machine check exception is generated we end up in a panic. In case of fsdax read, this should only result in a -EIO. Avoid MCE by implementing memcpy_mcsafe. Before this patch series: ``` bash-4.4# mount -o dax /dev/pmem0 /mnt/pmem/ [ 7621.714094] Disabling lock debugging due to kernel taint [ 7621.714099] MCE: CPU0: machine check (Severe) Host UE Load/Store [Not recovered] [ 7621.714104] MCE: CPU0: NIP: [c0088978] memcpy_power7+0x418/0x7e0 [ 7621.714107] MCE: CPU0: Hardware error [ 7621.714112] opal: Hardware platform error: Unrecoverable Machine Check exception [ 7621.714118] CPU: 0 PID: 1368 Comm: mount Tainted: G M 5.2.0-rc5-00239-g241e39004581 #50 [ 7621.714123] NIP: c0088978 LR: c08e16f8 CTR: 01de [ 7621.714129] REGS: c000fffbfd70 TRAP: 0200 Tainted: G M (5.2.0-rc5-00239-g241e39004581) [ 7621.714131] MSR: 92209033 CR: 24428840 XER: 0004 [ 7621.714160] CFAR: c00889a8 DAR: deadbeefdeadbeef DSISR: 8000 IRQMASK: 0 [ 7621.714171] GPR00: 0e00 c000f0b8b1e0 c12cf100 c000ed8e1100 [ 7621.714186] GPR04: c2001100 0001 0200 03fff1272000 [ 7621.714201] GPR08: 8000 0010 0020 0030 [ 7621.714216] GPR12: 0040 7fffb8c6d390 0050 0060 [ 7621.714232] GPR16: 0070 0001 c000f0b8b960 [ 7621.714247] GPR20: 0001 c000f0b8b940 0001 0001 [ 7621.714262] GPR24: c1382560 c00c003b6380 c00c003b6380 0001 [ 7621.714277] GPR28: 0001 c200 0001 [ 7621.714294] NIP [c0088978] memcpy_power7+0x418/0x7e0 [ 7621.714298] LR [c08e16f8] pmem_do_bvec+0xf8/0x430 ... ... ``` After this patch series: ``` bash-4.4# mount -o dax /dev/pmem0 /mnt/pmem/ [25302.883978] Buffer I/O error on dev pmem0, logical block 0, async page read [25303.020816] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your own risk [25303.021236] EXT4-fs (pmem0): Can't read superblock on 2nd try [25303.152515] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your own risk [25303.284031] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your own risk [25304.084100] UDF-fs: bad mount option "dax" or missing value mount: /mnt/pmem: wrong fs type, bad option, bad superblock on /dev/pmem0, missing codepage or helper program, or other error. ``` MCE is injected on a pmem address using mambo. The last patch which adds a nop is only for testing on mambo, where r13 is not restored upon hitting vector 200. The memcpy code can be optimised by adding VMX optimizations and GAS macros can be used to enable code reusablity, which I will send as another series. -- v10: Fix authorship; add reviewed-bys and acks. v9: * Add a new IRQ work for UE events [mahesh] * Reorder patches, and copy stable v8: * While ignoring UE events, return was used instead of continue. * Checkpatch fixups for commit log v7: * Move schedule_work to be called from irq_work. v6: * Don't return pfn, all callees are expecting physical address anyway [nick] * Patch re-ordering: move exception table patch before memcpy_mcsafe patch [nick] * Reword commit log for search_exception_tables patch [nick] v5: * Don't use search_exception_tables since it searches for module exception tables also [Nicholas] * Fix commit message for patch 2 [Nicholas] v4: * Squash return remaining bytes patch to memcpy_mcsafe implemtation patch [christophe] * Access ok should be checked for copy_to_user_mcsafe() [christophe] v3: * Drop patch which enables DR/IR for external modules * Drop notifier call chain, we don't want to do that in real mode * Return remaining bytes from memcpy_mcsafe correctly * We no longer restore r13 for simulator tests, rather use a nop at vector 0x200 [workaround for simulator; not to be merged] v2: * Don't set RI bit explicitly [mahesh] * Re-ordered series to get r13 workaround as the last patch -- Balbir Singh (3): powerpc/mce: Fix MCE handling for huge pages powerpc/memcpy: Add memcpy_mcsafe for pmem powerpc/mce: Handle UE event for memcpy_mcsafe Reza Arbab (1): powerpc/mce: Make machine_check_ue_event() static Santosh Sivaraj (3): powerpc/mce: Schedule work from irq_work extable: Add function to search only kernel exception table powerpc: add machine check safe copy_to_user arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/mce.h | 6 +- arch/powerpc/include/asm/string.h| 2 + arch/powerpc/include/asm/uaccess.h | 14 ++ arch/powerpc/kernel/mce.c| 31 +++- arch/powerpc/kernel/mce_power.c | 70 arch/powerpc/lib/Makefile| 2 +- arch/powerpc/lib/memcpy_mcsafe_64.S | 242
[PATCH 2/6] powerpc: define helpers to get L1 icache sizes
From: Alastair D'Silva This patch adds helpers to retrieve icache sizes, and renames the existing helpers to make it clear that they are for dcache. Signed-off-by: Alastair D'Silva --- arch/powerpc/include/asm/cache.h | 29 +++ arch/powerpc/include/asm/cacheflush.h | 12 +-- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index b3388d95f451..f852d5cd746c 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -55,25 +55,46 @@ struct ppc64_caches { extern struct ppc64_caches ppc64_caches; -static inline u32 l1_cache_shift(void) +static inline u32 l1_dcache_shift(void) { return ppc64_caches.l1d.log_block_size; } -static inline u32 l1_cache_bytes(void) +static inline u32 l1_dcache_bytes(void) { return ppc64_caches.l1d.block_size; } + +static inline u32 l1_icache_shift(void) +{ + return ppc64_caches.l1i.log_block_size; +} + +static inline u32 l1_icache_bytes(void) +{ + return ppc64_caches.l1i.block_size; +} #else -static inline u32 l1_cache_shift(void) +static inline u32 l1_dcache_shift(void) { return L1_CACHE_SHIFT; } -static inline u32 l1_cache_bytes(void) +static inline u32 l1_dcache_bytes(void) { return L1_CACHE_BYTES; } + +static inline u32 l1_icache_shift(void) +{ + return L1_CACHE_SHIFT; +} + +static inline u32 l1_icache_bytes(void) +{ + return L1_CACHE_BYTES; +} + #endif #endif /* ! __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index eef388f2659f..ed57843ef452 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -63,8 +63,8 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr) */ static inline void flush_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; @@ -89,8 +89,8 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop) */ static inline void clean_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; @@ -108,8 +108,8 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop) static inline void invalidate_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; -- 2.21.0
[RFC PATCH v4 2/2] powerpc/xmon: Restrict when kernel is locked down
Xmon should be either fully or partially disabled depending on the kernel lockdown state. Put xmon into read-only mode for lockdown=integrity and completely disable xmon when lockdown=confidentiality. Xmon checks the lockdown state and takes appropriate action: (1) during xmon_setup to prevent early xmon'ing (2) when triggered via sysrq (3) when toggled via debugfs (4) when triggered via a previously enabled breakpoint The following lockdown state transitions are handled: (1) lockdown=none -> lockdown=integrity set xmon read-only mode (2) lockdown=none -> lockdown=confidentiality clear all breakpoints, set xmon read-only mode, prevent re-entry into xmon (3) lockdown=integrity -> lockdown=confidentiality clear all breakpoints, set xmon read-only mode, prevent re-entry into xmon Suggested-by: Andrew Donnellan Signed-off-by: Christopher M. Riedl --- arch/powerpc/xmon/xmon.c | 59 ++-- include/linux/security.h | 2 ++ security/lockdown/lockdown.c | 2 ++ 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index bb63ecc599fd..8fd79369974e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -187,6 +188,9 @@ static void dump_tlb_44x(void); static void dump_tlb_book3e(void); #endif +static void clear_all_bpt(void); +static void xmon_init(int); + #ifdef CONFIG_PPC64 #define REG"%.16lx" #else @@ -283,10 +287,41 @@ Commands:\n\ " U show uptime information\n" " ? help\n" " # n limit output to n lines per page (for dp, dpa, dl)\n" -" zr reboot\n\ - zh halt\n" +" zr reboot\n" +" zh halt\n" ; +#ifdef CONFIG_SECURITY +static bool xmon_is_locked_down(void) +{ + static bool lockdown; + + if (!lockdown) { + lockdown = !!security_locked_down(LOCKDOWN_XMON_RW); + if (lockdown) { + printf("xmon: Disabled due to kernel lockdown\n"); + xmon_is_ro = true; + xmon_on = 0; + xmon_init(0); + clear_all_bpt(); + } + } + + if (!xmon_is_ro) { + xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR); + if (xmon_is_ro) + printf("xmon: Read-only due to kernel lockdown\n"); + } + + return lockdown; +} +#else /* CONFIG_SECURITY */ +static inline bool xmon_is_locked_down(void) +{ + return false; +} +#endif + static struct pt_regs *xmon_regs; static inline void sync(void) @@ -704,6 +739,9 @@ static int xmon_bpt(struct pt_regs *regs) struct bpt *bp; unsigned long offset; + if (xmon_is_locked_down()) + return 0; + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; @@ -735,6 +773,9 @@ static int xmon_sstep(struct pt_regs *regs) static int xmon_break_match(struct pt_regs *regs) { + if (xmon_is_locked_down()) + return 0; + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; if (dabr.enabled == 0) @@ -745,6 +786,9 @@ static int xmon_break_match(struct pt_regs *regs) static int xmon_iabr_match(struct pt_regs *regs) { + if (xmon_is_locked_down()) + return 0; + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; if (iabr == NULL) @@ -3750,6 +3794,9 @@ static void xmon_init(int enable) #ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handle_xmon(int key) { + if (xmon_is_locked_down()) + return; + /* ensure xmon is enabled */ xmon_init(1); debugger(get_irq_regs()); @@ -3771,7 +3818,6 @@ static int __init setup_xmon_sysrq(void) device_initcall(setup_xmon_sysrq); #endif /* CONFIG_MAGIC_SYSRQ */ -#ifdef CONFIG_DEBUG_FS static void clear_all_bpt(void) { int i; @@ -3793,8 +3839,12 @@ static void clear_all_bpt(void) printf("xmon: All breakpoints cleared\n"); } +#ifdef CONFIG_DEBUG_FS static int xmon_dbgfs_set(void *data, u64 val) { + if (xmon_is_locked_down()) + return 0; + xmon_on = !!val; xmon_init(xmon_on); @@ -3853,6 +3903,9 @@ early_param("xmon", early_parse_xmon); void __init xmon_setup(void) { + if (xmon_is_locked_down()) + return; + if (xmon_on) xmon_init(1); if (xmon_early) diff --git a/include/linux/security.h b/include/linux/security.h index 807dc0d24982..379b74b5d545 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -116,12 +116,14 @@ enum lockdown_reason { LOCKDOWN_MODULE_PARAMETERS, LOCKDOWN_MMIOTRACE, LOCKDOWN_DEBUGFS, + LOCKDOWN_XMON_WR,
[PATCH] powerpc: Allow flush_(inval_)dcache_range to work across ranges >4GB
From: Alastair D'Silva Heads Up: This patch cannot be submitted to Linus's tree, as the affected assembler functions have already been converted to C. When calling flush_(inval_)dcache_range with a size >4GB, we were masking off the upper 32 bits, so we would incorrectly flush a range smaller than intended. This patch replaces the 32 bit shifts with 64 bit ones, so that the full size is accounted for. Signed-off-by: Alastair D'Silva --- arch/powerpc/kernel/misc_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1ad4089dd110..d4d096f80f4b 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -130,7 +130,7 @@ _GLOBAL_TOC(flush_dcache_range) subfr8,r6,r4/* compute length */ add r8,r8,r5/* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ - srw.r8,r8,r9/* compute line count */ + srd.r8,r8,r9/* compute line count */ beqlr /* nothing to do? */ mtctr r8 0: dcbst 0,r6 @@ -148,7 +148,7 @@ _GLOBAL(flush_inval_dcache_range) subfr8,r6,r4/* compute length */ add r8,r8,r5/* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ - srw.r8,r8,r9/* compute line count */ + srd.r8,r8,r9/* compute line count */ beqlr /* nothing to do? */ sync isync -- 2.21.0
[RFC PATCH v4 0/2] Restrict xmon when kernel is locked down
Xmon should be either fully or partially disabled depending on the kernel lockdown state. Put xmon into read-only mode for lockdown=integrity and completely disable xmon when lockdown=confidentiality. Since this can occur dynamically, there may be pre-existing, active breakpoints in xmon when transitioning into read-only mode. These breakpoints will still trigger, so allow them to be listed, but not cleared or altered, using xmon. Changes since v3: - Allow active breakpoints to be shown/listed in read-only mode Changes since v2: - Rebased onto v36 of https://patchwork.kernel.org/cover/11049461/ (based on: f632a8170a6b667ee4e3f552087588f0fe13c4bb) - Do not clear existing breakpoints when transitioning from lockdown=none to lockdown=integrity - Remove line continuation and dangling quote (confuses checkpatch.pl) from the xmon command help/usage string Christopher M. Riedl (2): powerpc/xmon: Allow listing active breakpoints in read-only mode powerpc/xmon: Restrict when kernel is locked down arch/powerpc/xmon/xmon.c | 78 include/linux/security.h | 2 + security/lockdown/lockdown.c | 2 + 3 files changed, 74 insertions(+), 8 deletions(-) -- 2.22.0
[RFC PATCH v4 1/2] powerpc/xmon: Allow listing active breakpoints in read-only mode
Xmon can enter read-only mode dynamically due to changes in kernel lockdown state. This transition does not clear active breakpoints and any these breakpoints should remain visible to the xmon'er. Signed-off-by: Christopher M. Riedl --- arch/powerpc/xmon/xmon.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d0620d762a5a..bb63ecc599fd 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1045,10 +1045,6 @@ cmds(struct pt_regs *excp) set_lpp_cmd(); break; case 'b': - if (xmon_is_ro) { - printf(xmon_ro_msg); - break; - } bpt_cmds(); break; case 'C': @@ -1317,11 +1313,16 @@ bpt_cmds(void) struct bpt *bp; cmd = inchar(); + switch (cmd) { #ifndef CONFIG_PPC_8xx static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; int mode; case 'd': /* bd - hardware data breakpoint */ + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } if (!ppc_breakpoint_available()) { printf("Hardware data breakpoint not supported on this cpu\n"); break; @@ -1349,6 +1350,10 @@ bpt_cmds(void) break; case 'i': /* bi - hardware instr breakpoint */ + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { printf("Hardware instruction breakpoint " "not supported on this cpu\n"); @@ -1372,6 +1377,10 @@ bpt_cmds(void) #endif case 'c': + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } if (!scanhex()) { /* clear all breakpoints */ for (i = 0; i < NBPTS; ++i) @@ -1407,7 +1416,7 @@ bpt_cmds(void) break; } termch = cmd; - if (!scanhex()) { + if (xmon_is_ro || !scanhex()) { /* print all breakpoints */ printf(" typeaddress\n"); if (dabr.enabled) { -- 2.22.0
Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro
Hi Christophe, On Tue, Aug 13, 2019 at 09:59:35AM +, Christophe Leroy wrote: > + rldicr \r, \r, 32, 31 Could you please write this as sldi\r, \r, 32 ? It's much easier to read, imo (it's the exact same instruction). You can do a lot cheaper sequences if you have a temporary reg, as well (longest path of 3 insns instead of 5): lis rt,A ori rt,B lis rd,C ori rd,D rldimi rd,rt,32,0 to load ABCD. Segher
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 --- Comment #24 from Christophe Leroy (christophe.le...@c-s.fr) --- It confirms what I suspected: due to some debug options, kzalloc() doesn't provide aligned areas. In __load_free_space_cache() can you replace e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); By e->bitmap = (void *)__get_free_page(GFP_NOFS | __GFP_ZERO); And same in insert_into_bitmap() Then replace the three kfree() which free bitmaps by something like free_page((unsigned long)entry->bitmap) -- You are receiving this mail because: You are on the CC list for the bug.
Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property in EP driver.
I asked you to remove the period at the end of the patch $SUBJECT and you did not, either you do not read what I write or explain me what's going on. On Wed, Aug 14, 2019 at 10:03:29AM +0800, Xiaowei Bao wrote: > The PCIe controller of layerscape just have 4 BARs, BAR0 and BAR1 > is 32bit, BAR2 and BAR4 is 64bit, this is determined by hardware, > so set the bar_fixed_64bit with 0x14. > > Signed-off-by: Xiaowei Bao Kishon ACK'ed this patch and you have not carried his tag. I will make these changes but that's the last time I do that for you. Lorenzo > --- > v2: > - Replace value 0x14 with a macro. > v3: > - No change. > v4: > - send the patch again with '--to'. > v5: > - fix the commit message. > v6: > - remove the [EXT] tag of the $SUBJECT in email. > > drivers/pci/controller/dwc/pci-layerscape-ep.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c > b/drivers/pci/controller/dwc/pci-layerscape-ep.c > index be61d96..ca9aa45 100644 > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c > @@ -44,6 +44,7 @@ static const struct pci_epc_features ls_pcie_epc_features = > { > .linkup_notifier = false, > .msi_capable = true, > .msix_capable = false, > + .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4), > }; > > static const struct pci_epc_features* > -- > 2.9.5 >
Re: [PATCH v9 7/7] powerpc: add machine check safe copy_to_user
On 12/8/19 7:22 pm, Santosh Sivaraj wrote: > Use memcpy_mcsafe() implementation to define copy_to_user_mcsafe() > > Signed-off-by: Santosh Sivaraj > --- > arch/powerpc/Kconfig | 1 + > arch/powerpc/include/asm/uaccess.h | 14 ++ > 2 files changed, 15 insertions(+) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 77f6ebf97113..4316e36095a2 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -137,6 +137,7 @@ config PPC > select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && > !RELOCATABLE && !HIBERNATION) > select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST > select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 > + select ARCH_HAS_UACCESS_MCSAFE if PPC64 > select ARCH_HAS_UBSAN_SANITIZE_ALL > select ARCH_HAVE_NMI_SAFE_CMPXCHG > select ARCH_KEEP_MEMBLOCK > diff --git a/arch/powerpc/include/asm/uaccess.h > b/arch/powerpc/include/asm/uaccess.h > index 8b03eb44e876..15002b51ff18 100644 > --- a/arch/powerpc/include/asm/uaccess.h > +++ b/arch/powerpc/include/asm/uaccess.h > @@ -387,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void __user > *to, > return ret; > } > > +static __always_inline unsigned long __must_check > +copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) > +{ > + if (likely(check_copy_size(from, n, true))) { > + if (access_ok(to, n)) { > + allow_write_to_user(to, n); > + n = memcpy_mcsafe((void *)to, from, n); > + prevent_write_to_user(to, n); > + } > + } > + > + return n; Do we always return n independent of the check_copy_size return value and access_ok return values? Balbir Singh. > +} > + > extern unsigned long __clear_user(void __user *addr, unsigned long size); > > static inline unsigned long clear_user(void __user *addr, unsigned long size) >
[PATCH v2 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro
Today LOAD_REG_IMMEDIATE() is a basic #define which loads all parts on a value into a register, including the parts that are NUL. This means always 2 instructions on PPC32 and always 5 instructions on PPC64. And those instructions cannot run in parallele as they are updating the same register. Ex: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in: 3c 20 00 00 lis r1,0 60 21 00 00 ori r1,r1,0 78 21 07 c6 rldicr r1,r1,32,31 64 21 00 00 orisr1,r1,0 60 21 40 00 ori r1,r1,16384 Rewrite LOAD_REG_IMMEDIATE() with GAS macro in order to skip the parts that are NUL. Rename existing LOAD_REG_IMMEDIATE() as LOAD_REG_IMMEDIATE_SYM() and use that one for loading value of symbols which are not known at compile time. Now LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in: 38 20 40 00 li r1,16384 Signed-off-by: Christophe Leroy --- v2: Fixed the test from (\x) & 0x to (\x) >= 0x8000 || (\x) < -0x8000 in __LOAD_REG_IMMEDIATE() arch/powerpc/include/asm/ppc_asm.h | 42 +++- arch/powerpc/kernel/exceptions-64e.S | 10 - arch/powerpc/kernel/head_64.S| 2 +- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index e0637730a8e7..bc1385b2f0aa 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -311,13 +311,43 @@ GLUE(.,name): addis reg,reg,(name - 0b)@ha; \ addireg,reg,(name - 0b)@l; -#ifdef __powerpc64__ -#ifdef HAVE_AS_ATHIGH +#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH) #define __AS_ATHIGH high #else #define __AS_ATHIGH h #endif -#define LOAD_REG_IMMEDIATE(reg,expr) \ + +.macro __LOAD_REG_IMMEDIATE_32 r, x + .if (\x) >= 0x8000 || (\x) < -0x8000 + lis \r, (\x)@__AS_ATHIGH + .if (\x) & 0x != 0 + ori \r, \r, (\x)@l + .endif + .else + li \r, (\x)@l + .endif +.endm + +.macro __LOAD_REG_IMMEDIATE r, x + .if (\x) >= 0x8000 || (\x) < -0x8000 + __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32 + rldicr \r, \r, 32, 31 + .if (\x) & 0x != 0 + oris \r, \r, (\x)@__AS_ATHIGH + .endif + .if (\x) & 0x != 0 + oris \r, \r, (\x)@l + .endif + .else + __LOAD_REG_IMMEDIATE_32 \r, \x + .endif +.endm + +#ifdef __powerpc64__ + +#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr + +#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \ lis reg,(expr)@highest; \ ori reg,reg,(expr)@higher; \ rldicr reg,reg,32,31; \ @@ -335,11 +365,13 @@ GLUE(.,name): #else /* 32-bit */ -#define LOAD_REG_IMMEDIATE(reg,expr) \ +#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr + +#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \ lis reg,(expr)@ha; \ addireg,reg,(expr)@l; -#define LOAD_REG_ADDR(reg,name)LOAD_REG_IMMEDIATE(reg, name) +#define LOAD_REG_ADDR(reg,name)LOAD_REG_IMMEDIATE_SYM(reg, name) #define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha #define ADDROFF(name) name@l diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 1cfb3da4a84a..898aae6da167 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -751,8 +751,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) ld r14,interrupt_base_book3e@got(r15) ld r15,__end_interrupts@got(r15) #else - LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) - LOAD_REG_IMMEDIATE(r15,__end_interrupts) + LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts) #endif cmpld cr0,r10,r14 cmpld cr1,r10,r15 @@ -821,8 +821,8 @@ kernel_dbg_exc: ld r14,interrupt_base_book3e@got(r15) ld r15,__end_interrupts@got(r15) #else - LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) - LOAD_REG_IMMEDIATE(r15,__end_interrupts) + LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts) #endif cmpld cr0,r10,r14 cmpld cr1,r10,r15 @@ -1449,7 +1449,7 @@ a2_tlbinit_code_start: a2_tlbinit_after_linear_map: /* Now we branch the new virtual address mapped by this entry */ - LOAD_REG_IMMEDIATE(r3,1f) + LOAD_REG_IMMEDIATE_SYM(r3,1f) mtctr r3 bctr diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 91d297e696dd..1fd44761e997 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -635,7 +635,7 @@
Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property in EP driver.
On Wed, Aug 14, 2019 at 09:48:00AM +, Xiaowei Bao wrote: > > > > -Original Message- > > From: Lorenzo Pieralisi > > Sent: 2019年8月14日 17:30 > > To: Xiaowei Bao > > Cc: M.h. Lian ; Mingkai Hu > > ; Roy Zang ; > > bhelg...@google.com; linuxppc-dev@lists.ozlabs.org; > > linux-...@vger.kernel.org; linux-arm-ker...@lists.infradead.org; > > linux-ker...@vger.kernel.org > > Subject: Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property > > in EP driver. Do not quote the email header in your replies. > > I asked you to remove the period at the end of the patch $SUBJECT and you > > did not, either you do not read what I write or explain me what's going on. > Sorry, I didn't understand the meaning of period correctly before. > > > > On Wed, Aug 14, 2019 at 10:03:29AM +0800, Xiaowei Bao wrote: > > > The PCIe controller of layerscape just have 4 BARs, BAR0 and BAR1 is > > > 32bit, BAR2 and BAR4 is 64bit, this is determined by hardware, so set > > > the bar_fixed_64bit with 0x14. > > > > > > Signed-off-by: Xiaowei Bao > > > > Kishon ACK'ed this patch and you have not carried his tag. > > > > I will make these changes but that's the last time I do that for you. > Thanks a lot, your means is that I don't need to send the v7 patch and you > help me to > Correct this patch, yes? Thanks a lot for your help about the rules of the > upstream. I will > Correct this error next time. ^.^ I fixed that up and pushed out, pci/layerscape, for v5.4. Thanks, Lorenzo > > Lorenzo > > > > > --- > > > v2: > > > - Replace value 0x14 with a macro. > > > v3: > > > - No change. > > > v4: > > > - send the patch again with '--to'. > > > v5: > > > - fix the commit message. > > > v6: > > > - remove the [EXT] tag of the $SUBJECT in email. > > > > > > drivers/pci/controller/dwc/pci-layerscape-ep.c | 1 + > > > 1 file changed, 1 insertion(+) > > > > > > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > b/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > index be61d96..ca9aa45 100644 > > > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > @@ -44,6 +44,7 @@ static const struct pci_epc_features > > ls_pcie_epc_features = { > > > .linkup_notifier = false, > > > .msi_capable = true, > > > .msix_capable = false, > > > + .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4), > > > }; > > > > > > static const struct pci_epc_features* > > > -- > > > 2.9.5 > > >
Re: [PATCH v4 14/25] powernv/fadump: process the crashdump by exporting it as /proc/vmcore
On 2019-07-16 17:03:38 Tue, Hari Bathini wrote: > Add support in the kernel to process the crash'ed kernel's memory > preserved during MPIPL and export it as /proc/vmcore file for the > userland scripts to filter and analyze it later. > > Signed-off-by: Hari Bathini > --- > arch/powerpc/platforms/powernv/opal-fadump.c | 190 > ++ > 1 file changed, 187 insertions(+), 3 deletions(-) > [...] > + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, ); > + if ((ret != OPAL_SUCCESS) || !addr) { > + pr_err("Failed to get Kernel metadata (%lld)\n", ret); > + return 1; > + } > + > + addr = be64_to_cpu(addr); > + pr_debug("Kernel metadata addr: %llx\n", addr); > + > + opal_fdm_active = __va(addr); > + r_opal_fdm_active = (void *)addr; > + if (r_opal_fdm_active->version != OPAL_FADUMP_VERSION) { > + pr_err("FADump active but version (%u) unsupported!\n", > +r_opal_fdm_active->version); > + return 1; > + } > + > + /* Kernel regions not registered with f/w for MPIPL */ > + if (r_opal_fdm_active->registered_regions == 0) { > + opal_fdm_active = NULL; What about partial dump capture scenario ? What if opal crashes while kernel was in middle of registering ranges ? We may have partial dump captured which won't be useful. e,g. If we have total of 4 ranges to be registered and opal crashes after successful registration of only 2 ranges with 2 pending, we will get a partial dump which needs to be ignored. I think check shuold be comparing registered_regions against total number of regions. What do you think ? Thanks, -Mahesh. > + return 1; > + } > + > + pr_info("Firmware-assisted dump is active.\n"); > + fadump_conf->dump_active = 1; > + opal_fadump_get_config(fadump_conf, r_opal_fdm_active); > + } > + > return 1; > } > -- Mahesh J Salgaonkar
Re: [PATCH v4 11/25] powernv/fadump: register kernel metadata address with opal
On 8/14/19 12:36 PM, Hari Bathini wrote: > > > On 13/08/19 4:11 PM, Mahesh J Salgaonkar wrote: >> On 2019-07-16 17:03:15 Tue, Hari Bathini wrote: >>> OPAL allows registering address with it in the first kernel and >>> retrieving it after MPIPL. Setup kernel metadata and register its >>> address with OPAL to use it for processing the crash dump. >>> >>> Signed-off-by: Hari Bathini >>> --- >>> arch/powerpc/kernel/fadump-common.h |4 + >>> arch/powerpc/kernel/fadump.c | 65 ++- >>> arch/powerpc/platforms/powernv/opal-fadump.c | 73 >>> ++ >>> arch/powerpc/platforms/powernv/opal-fadump.h | 37 + >>> arch/powerpc/platforms/pseries/rtas-fadump.c | 32 +-- >>> 5 files changed, 177 insertions(+), 34 deletions(-) >>> create mode 100644 arch/powerpc/platforms/powernv/opal-fadump.h >>> >> [...] >>> @@ -346,30 +349,42 @@ int __init fadump_reserve_mem(void) >>> * use memblock_find_in_range() here since it doesn't allocate >>> * from bottom to top. >>> */ >>> - for (base = fw_dump.boot_memory_size; >>> -base <= (memory_boundary - size); >>> -base += size) { >>> + while (base <= (memory_boundary - size)) { >>> if (memblock_is_region_memory(base, size) && >>> !memblock_is_region_reserved(base, size)) >>> break; >>> + >>> + base += size; >>> } >>> - if ((base > (memory_boundary - size)) || >>> - memblock_reserve(base, size)) { >>> + >>> + if (base > (memory_boundary - size)) { >>> + pr_err("Failed to find memory chunk for reservation\n"); >>> + goto error_out; >>> + } >>> + fw_dump.reserve_dump_area_start = base; >>> + >>> + /* >>> +* Calculate the kernel metadata address and register it with >>> +* f/w if the platform supports. >>> +*/ >>> + if (fw_dump.ops->setup_kernel_metadata(_dump) < 0) >>> + goto error_out; >> >> I see setup_kernel_metadata() registers the metadata address with opal >> without >> having any minimum data initialized in it. Secondaly, why can't this wait >> until> registration ? I think we should defer this until fadump registration. > > If setting up metadata address fails (it should ideally not fail, but..), > everything else > is useless. That's less likely.. so is true with opal_mpipl_update() as well. > So, we might as well try that early and fall back to KDump in case of an > error.. ok. Yeah but not uninitialized metadata. > >> What if kernel crashes before metadata area is initialized ? > > registered_regions would be '0'. So, it is treated as fadump is not > registered case. > Let me > initialize metadata explicitly before registering the address with f/w to > avoid any assumption... Do you want to do that before memblock reservation ? Should we move this to setup_fadump() ? Thanks, -Mahesh. > >> >>> + >>> + if (memblock_reserve(base, size)) { >>> pr_err("Failed to reserve memory\n"); >>> - return 0; >>> + goto error_out; >>> } >> [...] >>> - >>> static struct fadump_ops rtas_fadump_ops = { >>> - .init_fadump_mem_struct = rtas_fadump_init_mem_struct, >>> - .register_fadump= rtas_fadump_register_fadump, >>> - .unregister_fadump = rtas_fadump_unregister_fadump, >>> - .invalidate_fadump = rtas_fadump_invalidate_fadump, >>> - .process_fadump = rtas_fadump_process_fadump, >>> - .fadump_region_show = rtas_fadump_region_show, >>> - .fadump_trigger = rtas_fadump_trigger, >>> + .init_fadump_mem_struct = rtas_fadump_init_mem_struct, >>> + .get_kernel_metadata_size = rtas_fadump_get_kernel_metadata_size, >>> + .setup_kernel_metadata = rtas_fadump_setup_kernel_metadata, >>> + .register_fadump= rtas_fadump_register_fadump, >>> + .unregister_fadump = rtas_fadump_unregister_fadump, >>> + .invalidate_fadump = rtas_fadump_invalidate_fadump, >>> + .process_fadump = rtas_fadump_process_fadump, >>> + .fadump_region_show = rtas_fadump_region_show, >>> + .fadump_trigger = rtas_fadump_trigger, >> >> Can you make the tab space changes in your previous patch where these >> were initially introduced ? So that this patch can only show new members >> that are added. > > done. > > Thanks > Hari >
[PATCH] powerpc/futex: fix warning: 'oldval' may be used uninitialized in this function
CC kernel/futex.o kernel/futex.c: In function 'do_futex': kernel/futex.c:1676:17: warning: 'oldval' may be used uninitialized in this function [-Wmaybe-uninitialized] return oldval == cmparg; ^ kernel/futex.c:1651:6: note: 'oldval' was declared here int oldval, ret; ^ This is because arch_futex_atomic_op_inuser() only sets *oval if ret is NUL and GCC doesn't see that it will use it only when ret is NUL. Anyway, the non-NUL ret path is an error path that won't suffer from setting *oval, and as *oval is a local var in futex_atomic_op_inuser() it will have no impact. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/futex.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 3a6aa57b9d90..eea28ca679db 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -60,8 +60,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, pagefault_enable(); - if (!ret) - *oval = oldval; + *oval = oldval; prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; -- 2.13.3
Re: [PATCH v9 4/7] extable: Add function to search only kernel exception table
Balbir Singh writes: > On 12/8/19 7:22 pm, Santosh Sivaraj wrote: >> Certain architecture specific operating modes (e.g., in powerpc machine >> check handler that is unable to access vmalloc memory), the >> search_exception_tables cannot be called because it also searches the >> module exception tables if entry is not found in the kernel exception >> table. >> >> Cc: Thomas Gleixner >> Cc: Ingo Molnar >> Cc: Nicholas Piggin >> Signed-off-by: Santosh Sivaraj >> Reviewed-by: Nicholas Piggin >> --- >> include/linux/extable.h | 2 ++ >> kernel/extable.c| 11 +-- >> 2 files changed, 11 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/extable.h b/include/linux/extable.h >> index 41c5b3a25f67..81ecfaa83ad3 100644 >> --- a/include/linux/extable.h >> +++ b/include/linux/extable.h >> @@ -19,6 +19,8 @@ void trim_init_extable(struct module *m); >> >> /* Given an address, look for it in the exception tables */ >> const struct exception_table_entry *search_exception_tables(unsigned long >> add); >> +const struct exception_table_entry * >> +search_kernel_exception_table(unsigned long addr); >> > > Can we find a better name search_kernel still sounds like all of the kernel. > Can we rename it to search_kernel_linear_map_extable? I thought search_kernel_exception_table and search_module_extables were non-ambiguous enough :-) But If you think name will be confusing, I can change that as suggested. Thanks, Santosh > > >> #ifdef CONFIG_MODULES >> /* For extable.c to search modules' exception tables. */ >> diff --git a/kernel/extable.c b/kernel/extable.c >> index e23cce6e6092..f6c9406eec7d 100644 >> --- a/kernel/extable.c >> +++ b/kernel/extable.c >> @@ -40,13 +40,20 @@ void __init sort_main_extable(void) >> } >> } >> >> +/* Given an address, look for it in the kernel exception table */ >> +const >> +struct exception_table_entry *search_kernel_exception_table(unsigned long >> addr) >> +{ >> +return search_extable(__start___ex_table, >> + __stop___ex_table - __start___ex_table, addr); >> +} >> + >> /* Given an address, look for it in the exception tables. */ >> const struct exception_table_entry *search_exception_tables(unsigned long >> addr) >> { >> const struct exception_table_entry *e; >> >> -e = search_extable(__start___ex_table, >> - __stop___ex_table - __start___ex_table, addr); >> +e = search_kernel_exception_table(addr); >> if (!e) >> e = search_module_extables(addr); >> return e; >> -- if (( RANDOM % 2 )); then ~/bin/cookie; else fortune -s; fi #cat ~/notes/quotes | sort -R | head -1 | cut -f2- -d " "
Re: [PATCH v3 08/16] powerpc/pseries/svm: Use shared memory for LPPACA structures
Thiago Jung Bauermann writes: > Michael Ellerman writes: >> Thiago Jung Bauermann writes: >>> From: Anshuman Khandual >>> >>> LPPACA structures need to be shared with the host. Hence they need to be in >>> shared memory. Instead of allocating individual chunks of memory for a >>> given structure from memblock, a contiguous chunk of memory is allocated >>> and then converted into shared memory. Subsequent allocation requests will >>> come from the contiguous chunk which will be always shared memory for all >>> structures. >>> >>> While we are able to use a kmem_cache constructor for the Debug Trace Log, >>> LPPACAs are allocated very early in the boot process (before SLUB is >>> available) so we need to use a simpler scheme here. >>> >>> Introduce helper is_svm_platform() which uses the S bit of the MSR to tell >>> whether we're running as a secure guest. >>> >>> Signed-off-by: Anshuman Khandual >>> Signed-off-by: Thiago Jung Bauermann >>> --- >>> arch/powerpc/include/asm/svm.h | 26 >>> arch/powerpc/kernel/paca.c | 43 +- >>> 2 files changed, 68 insertions(+), 1 deletion(-) >>> >>> diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h >>> new file mode 100644 >>> index ..fef3740f46a6 >>> --- /dev/null >>> +++ b/arch/powerpc/include/asm/svm.h >>> @@ -0,0 +1,26 @@ >>> +/* SPDX-License-Identifier: GPL-2.0+ */ >>> +/* >>> + * SVM helper functions >>> + * >>> + * Copyright 2019 Anshuman Khandual, IBM Corporation. >> >> Are we sure this copyright date is correct? > > I may be confused about which year the copyright refers to. I thought it > was the year when the patch was committed. If it is the first time the > patch was published then this one should be 2018. I'm not a lawyer etc. but AIUI the date above is about the authorship, ie. when it was originally written, not when it was published or committed. In general I don't think it matters too much, but in this case I'm pretty sure Anshuman can't have possibly written it in 2019 on behalf of IBM :) So we can either change the date to 2018, or drop his name and just say it's copyright 2019 by IBM. cheers
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 --- Comment #17 from Christophe Leroy (christophe.le...@c-s.fr) --- Created attachment 284379 --> https://bugzilla.kernel.org/attachment.cgi?id=284379=edit Patch to trace misaligned destination in copy_page() on PPC32 Can you try the attached patch to trace misaligned destination on copy_page() ? -- You are receiving this mail because: You are on the CC list for the bug.
Re: [PATCH v9 1/7] powerpc/mce: Schedule work from irq_work
On 12/8/19 7:22 pm, Santosh Sivaraj wrote: > schedule_work() cannot be called from MCE exception context as MCE can > interrupt even in interrupt disabled context. > > fixes: 733e4a4c ("powerpc/mce: hookup memory_failure for UE errors") > Suggested-by: Mahesh Salgaonkar > Signed-off-by: Santosh Sivaraj > Cc: sta...@vger.kernel.org # v4.15+ > --- Acked-by: Balbir Singh
Re: [PATCH v9 4/7] extable: Add function to search only kernel exception table
On 12/8/19 7:22 pm, Santosh Sivaraj wrote: > Certain architecture specific operating modes (e.g., in powerpc machine > check handler that is unable to access vmalloc memory), the > search_exception_tables cannot be called because it also searches the > module exception tables if entry is not found in the kernel exception > table. > > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: Nicholas Piggin > Signed-off-by: Santosh Sivaraj > Reviewed-by: Nicholas Piggin > --- > include/linux/extable.h | 2 ++ > kernel/extable.c| 11 +-- > 2 files changed, 11 insertions(+), 2 deletions(-) > > diff --git a/include/linux/extable.h b/include/linux/extable.h > index 41c5b3a25f67..81ecfaa83ad3 100644 > --- a/include/linux/extable.h > +++ b/include/linux/extable.h > @@ -19,6 +19,8 @@ void trim_init_extable(struct module *m); > > /* Given an address, look for it in the exception tables */ > const struct exception_table_entry *search_exception_tables(unsigned long > add); > +const struct exception_table_entry * > +search_kernel_exception_table(unsigned long addr); > Can we find a better name search_kernel still sounds like all of the kernel. Can we rename it to search_kernel_linear_map_extable? > #ifdef CONFIG_MODULES > /* For extable.c to search modules' exception tables. */ > diff --git a/kernel/extable.c b/kernel/extable.c > index e23cce6e6092..f6c9406eec7d 100644 > --- a/kernel/extable.c > +++ b/kernel/extable.c > @@ -40,13 +40,20 @@ void __init sort_main_extable(void) > } > } > > +/* Given an address, look for it in the kernel exception table */ > +const > +struct exception_table_entry *search_kernel_exception_table(unsigned long > addr) > +{ > + return search_extable(__start___ex_table, > + __stop___ex_table - __start___ex_table, addr); > +} > + > /* Given an address, look for it in the exception tables. */ > const struct exception_table_entry *search_exception_tables(unsigned long > addr) > { > const struct exception_table_entry *e; > > - e = search_extable(__start___ex_table, > -__stop___ex_table - __start___ex_table, addr); > + e = search_kernel_exception_table(addr); > if (!e) > e = search_module_extables(addr); > return e; >
Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe
On 12/8/19 7:22 pm, Santosh Sivaraj wrote: > If we take a UE on one of the instructions with a fixup entry, set nip > to continue execution at the fixup entry. Stop processing the event > further or print it. > > Co-developed-by: Reza Arbab > Signed-off-by: Reza Arbab > Cc: Mahesh Salgaonkar > Signed-off-by: Santosh Sivaraj > --- Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it should still have my author tag and signed-off-by Balbir Singh > arch/powerpc/include/asm/mce.h | 4 +++- > arch/powerpc/kernel/mce.c | 16 > arch/powerpc/kernel/mce_power.c | 15 +-- > 3 files changed, 32 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h > index f3a6036b6bc0..e1931c8c2743 100644 > --- a/arch/powerpc/include/asm/mce.h > +++ b/arch/powerpc/include/asm/mce.h > @@ -122,7 +122,8 @@ struct machine_check_event { > enum MCE_UeErrorType ue_error_type:8; > u8 effective_address_provided; > u8 physical_address_provided; > - u8 reserved_1[5]; > + u8 ignore_event; > + u8 reserved_1[4]; > u64 effective_address; > u64 physical_address; > u8 reserved_2[8]; > @@ -193,6 +194,7 @@ struct mce_error_info { > enum MCE_Initiator initiator:8; > enum MCE_ErrorClass error_class:8; > boolsync_error; > + boolignore_event; > }; > > #define MAX_MC_EVT 100 > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index a3b122a685a5..ec4b3e1087be 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, > if (phys_addr != ULONG_MAX) { > mce->u.ue_error.physical_address_provided = true; > mce->u.ue_error.physical_address = phys_addr; > + mce->u.ue_error.ignore_event = mce_err->ignore_event; > machine_check_ue_event(mce); > } > } > @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct > *work) > /* >* This should probably queued elsewhere, but >* oh! well > + * > + * Don't report this machine check because the caller has a > + * asked us to ignore the event, it has a fixup handler which > + * will do the appropriate error handling and reporting. >*/ > if (evt->error_type == MCE_ERROR_TYPE_UE) { > + if (evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_ue_count); > + continue; > + } > + > if (evt->u.ue_error.physical_address_provided) { > unsigned long pfn; > > @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct > irq_work *work) > while (__this_cpu_read(mce_queue_count) > 0) { > index = __this_cpu_read(mce_queue_count) - 1; > evt = this_cpu_ptr(_event_queue[index]); > + > + if (evt->error_type == MCE_ERROR_TYPE_UE && > + evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_queue_count); > + continue; > + } > machine_check_print_event_info(evt, false, false); > __this_cpu_dec(mce_queue_count); > } > diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c > index e74816f045f8..1dd87f6f5186 100644 > --- a/arch/powerpc/kernel/mce_power.c > +++ b/arch/powerpc/kernel/mce_power.c > @@ -11,6 +11,7 @@ > > #include > #include > +#include > #include > #include > #include > @@ -18,6 +19,7 @@ > #include > #include > #include > +#include > > /* > * Convert an address related to an mm to a physical address. > @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, > return 0; > } > > -static long mce_handle_ue_error(struct pt_regs *regs) > +static long mce_handle_ue_error(struct pt_regs *regs, > + struct mce_error_info *mce_err) > { > long handled = 0; > + const struct exception_table_entry *entry; > + > + entry = search_kernel_exception_table(regs->nip); > + if (entry) { > + mce_err->ignore_event = true; > + regs->nip = extable_fixup(entry); > + return 1; > + } > > /* >* On specific SCOM read via MMIO we may get a machine check > @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs
RE: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property in EP driver.
> -Original Message- > From: Lorenzo Pieralisi > Sent: 2019年8月14日 17:30 > To: Xiaowei Bao > Cc: M.h. Lian ; Mingkai Hu > ; Roy Zang ; > bhelg...@google.com; linuxppc-dev@lists.ozlabs.org; > linux-...@vger.kernel.org; linux-arm-ker...@lists.infradead.org; > linux-ker...@vger.kernel.org > Subject: Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property > in EP driver. > > I asked you to remove the period at the end of the patch $SUBJECT and you > did not, either you do not read what I write or explain me what's going on. Sorry, I didn't understand the meaning of period correctly before. > > On Wed, Aug 14, 2019 at 10:03:29AM +0800, Xiaowei Bao wrote: > > The PCIe controller of layerscape just have 4 BARs, BAR0 and BAR1 is > > 32bit, BAR2 and BAR4 is 64bit, this is determined by hardware, so set > > the bar_fixed_64bit with 0x14. > > > > Signed-off-by: Xiaowei Bao > > Kishon ACK'ed this patch and you have not carried his tag. > > I will make these changes but that's the last time I do that for you. Thanks a lot, your means is that I don't need to send the v7 patch and you help me to Correct this patch, yes? Thanks a lot for your help about the rules of the upstream. I will Correct this error next time. ^.^ > > Lorenzo > > > --- > > v2: > > - Replace value 0x14 with a macro. > > v3: > > - No change. > > v4: > > - send the patch again with '--to'. > > v5: > > - fix the commit message. > > v6: > > - remove the [EXT] tag of the $SUBJECT in email. > > > > drivers/pci/controller/dwc/pci-layerscape-ep.c | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c > > b/drivers/pci/controller/dwc/pci-layerscape-ep.c > > index be61d96..ca9aa45 100644 > > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c > > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c > > @@ -44,6 +44,7 @@ static const struct pci_epc_features > ls_pcie_epc_features = { > > .linkup_notifier = false, > > .msi_capable = true, > > .msix_capable = false, > > + .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4), > > }; > > > > static const struct pci_epc_features* > > -- > > 2.9.5 > >
Re: [PATCH 1/5] powerpc/ptdump: fix addresses display on PPC32
Michael, Le 14/08/2019 à 14:36, Christophe Leroy a écrit : Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a shift in the displayed addresses. Lets revert that change to resync walk_pagetables()'s addr val and pgd_t pointer for PPC32. Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") Either this patch or patch 2 of the series has to go into fixes. If you prefer next patch for fixes, then this one can be squashed into patch 3 which drops the PPC32 hacked definition of KERN_VIRT_START Christophe Cc: sta...@vger.kernel.org Signed-off-by: Christophe Leroy --- arch/powerpc/mm/ptdump/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 6a88a9f585d4..3ad64fc11419 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -27,7 +27,7 @@ #include "ptdump.h" #ifdef CONFIG_PPC32 -#define KERN_VIRT_STARTPAGE_OFFSET +#define KERN_VIRT_START0 #endif /*
[PATCH 2/5] powerpc/ptdump: fix walk_pagetables() address mismatch
walk_pagetables() always walk the entire pgdir from address 0 but considers PAGE_OFFSET or KERN_VIRT_START as the starting address of the walk, resulting in a possible mismatch in the displayed addresses. Ex: on PPC32, when KERN_VIRT_START was locally defined as PAGE_OFFSET, ptdump displayed 0x8000 instead of 0xc000 for the first kernel page, because 0xc000 + 0xc000 = 0x8000 Start the walk at st->start_address instead of starting at 0. Signed-off-by: Christophe Leroy --- arch/powerpc/mm/ptdump/ptdump.c | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 3ad64fc11419..74ff2bff4ea0 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -299,17 +299,15 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) static void walk_pagetables(struct pg_state *st) { - pgd_t *pgd = pgd_offset_k(0UL); unsigned int i; - unsigned long addr; - - addr = st->start_address; + unsigned long addr = st->start_address & PGDIR_MASK; + pgd_t *pgd = pgd_offset_k(addr); /* * Traverse the linux pagetable structure and dump pages that are in * the hash pagetable. */ - for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { + for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); -- 2.13.3
use the generic DMA direct remap code on powerpc
Hi powerpc maintainers, this patch convers powerpc to use the generic dma remapping code for the uncached coherent allocation on non-coherent CPUs. Christophe Leroy tested a slightly earlier version on ppc8xx. Note that I plan to move the need for the arch to call dma_atomic_pool_init in this cycle, so either this needs to go in through the dma-mapping tree (or a shared stable branch with it), or we will need a small manual fixup in linux-next and when Linus merged the later of the two pull requests.
[PATCH 5/5] powerpc/ptdump: drop non vital #ifdefs
hashpagetable.c is only compiled when CONFIG_PPC_BOOK3S_64 is defined, so drop the test and its 'else' branch. Use IS_ENABLED(CONFIG_PPC_PSERIES) instead of #ifdef, this allows the code to be checked at any build. It is still optimised out by GCC. Use IS_ENABLED(CONFIG_PPC_64K_PAGES) instead of #ifdef. Use IS_ENABLED(CONFIG_SPARSEMEN_VMEMMAP) instead of #ifdef. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/plpar_wrappers.h | 6 ++ arch/powerpc/mm/ptdump/hashpagetable.c| 24 +--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index cff5a411e595..4497c8afb573 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -340,6 +340,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr) { return 0; } + +static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex, + unsigned long *ptes) +{ + return 0; +} #endif /* CONFIG_PPC_PSERIES */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index 72f0e4a3d839..a07278027c6f 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -237,7 +237,6 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 return -1; } -#ifdef CONFIG_PPC_PSERIES static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { struct hash_pte ptes[4]; @@ -274,7 +273,6 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * } return -1; } -#endif static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, unsigned long *lp_bits) @@ -316,10 +314,9 @@ static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { -#ifdef CONFIG_PPC_PSERIES - if (firmware_has_feature(FW_FEATURE_LPAR)) + if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR)) return pseries_find(ea, psize, primary, v, r); -#endif + return native_find(ea, psize, primary, v, r); } @@ -386,12 +383,13 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) psize = mmu_vmalloc_psize; else psize = mmu_io_psize; -#ifdef CONFIG_PPC_64K_PAGES + /* check for secret 4K mappings */ - if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) || - ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && + ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO || +(pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) psize = mmu_io_psize; -#endif + /* check for hashpte */ status = hpte_find(st, addr, psize); @@ -469,9 +467,10 @@ static void walk_linearmapping(struct pg_state *st) static void walk_vmemmap(struct pg_state *st) { -#ifdef CONFIG_SPARSEMEM_VMEMMAP struct vmemmap_backing *ptr = vmemmap_list; + if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) + return; /* * Traverse the vmemmaped memory and dump pages that are in the hash * pagetable. @@ -481,7 +480,6 @@ static void walk_vmemmap(struct pg_state *st) ptr = ptr->list; } seq_puts(st->seq, "---[ vmemmap end ]---\n"); -#endif } static void populate_markers(void) @@ -495,11 +493,7 @@ static void populate_markers(void) address_markers[6].start_address = PHB_IO_END; address_markers[7].start_address = IOREMAP_BASE; address_markers[8].start_address = IOREMAP_END; -#ifdef CONFIG_PPC_BOOK3S_64 address_markers[9].start_address = H_VMEMMAP_START; -#else - address_markers[9].start_address = VMEMMAP_BASE; -#endif } static int ptdump_show(struct seq_file *m, void *v) -- 2.13.3
[PATCH] powerpc: use the generic dma coherent remap allocator
This switches to using common code for the DMA allocations, including potential use of the CMA allocator if configured. Switching to the generic code enables DMA allocations from atomic context, which is required by the DMA API documentation, and also adds various other minor features drivers start relying upon. It also makes sure we have on tested code base for all architectures that require uncached pte bits for coherent DMA allocations. Another advantage is that consistent memory allocations now share the general vmalloc pool instead of needing an explicit careout from it. Signed-off-by: Christoph Hellwig --- arch/powerpc/Kconfig | 12 - arch/powerpc/include/asm/book3s/32/pgtable.h | 12 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 12 +- arch/powerpc/mm/dma-noncoherent.c| 318 +-- arch/powerpc/mm/mem.c| 4 - arch/powerpc/mm/ptdump/ptdump.c | 9 - arch/powerpc/platforms/Kconfig.cputype | 2 + 7 files changed, 17 insertions(+), 352 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 77f6ebf97113..7135e47390f3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1139,18 +1139,6 @@ config TASK_SIZE default "0x8000" if PPC_8xx default "0xc000" -config CONSISTENT_SIZE_BOOL - bool "Set custom consistent memory pool size" - depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE - help - This option allows you to set the size of the - consistent memory pool. This pool of virtual memory - is used to make consistent memory allocations. - -config CONSISTENT_SIZE - hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL - default "0x0020" if NOT_COHERENT_CACHE - config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" depends on ADVANCED_OPTIONS && PPC_8xx && \ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 838de59f6754..b6c7214113ab 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -148,21 +148,15 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #include -#ifdef CONFIG_HIGHMEM -#define KVIRT_TOP PKMAP_BASE -#else -#define KVIRT_TOP FIXADDR_START -#endif - /* * ioremap_bot starts at that address. Early ioremaps move down from there, * until mem_init() at which point this becomes the top of the vmalloc * and ioremap space */ -#ifdef CONFIG_NOT_COHERENT_CACHE -#define IOREMAP_TOP((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#ifdef CONFIG_HIGHMEM +#define IOREMAP_TOPPKMAP_BASE #else -#define IOREMAP_TOPKVIRT_TOP +#define IOREMAP_TOPFIXADDR_START #endif /* diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 0284f8f5305f..c3764638c27f 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -78,21 +78,15 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #include -#ifdef CONFIG_HIGHMEM -#define KVIRT_TOP PKMAP_BASE -#else -#define KVIRT_TOP FIXADDR_START -#endif - /* * ioremap_bot starts at that address. Early ioremaps move down from there, * until mem_init() at which point this becomes the top of the vmalloc * and ioremap space */ -#ifdef CONFIG_NOT_COHERENT_CACHE -#define IOREMAP_TOP((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#ifdef CONFIG_HIGHMEM +#define IOREMAP_TOPPKMAP_BASE #else -#define IOREMAP_TOPKVIRT_TOP +#define IOREMAP_TOPFIXADDR_START #endif /* diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index c617282d5b2a..4272ca5e8159 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -4,310 +4,18 @@ *Copyright (C) 2001 Dan Malek (dma...@jlc.net) * * Copyright (C) 2000 Russell King - * - * Consistent memory allocators. Used for DMA devices that want to - * share uncached memory with the processor core. The function return - * is the virtual address and 'dma_handle' is the physical address. - * Mostly stolen from the ARM port, with some changes for PowerPC. - * -- Dan - * - * Reorganized to get rid of the arch-specific consistent_* functions - * and provide non-coherent implementations for the DMA API. -Matt - * - * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() - * implementation. This is pulled straight from ARM and barely - * modified. -Matt */ -#include -#include #include #include -#include #include #include #include #include -#include #include #include -#include - -/* - * This address range defaults to a value that is safe for all - * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It -
[PATCH 1/3] powerpc/xmon: Check for HV mode when dumping XIVE info from OPAL
Currently, the xmon 'dx' command calls OPAL to dump the XIVE state in the OPAL logs and also outputs some of the fields of the internal XIVE structures in Linux. The OPAL calls can only be done on baremetal (PowerNV) and they crash a pseries machine. Fix by checking the hypervisor feature of the CPU. Signed-off-by: Cédric Le Goater --- arch/powerpc/xmon/xmon.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 14e56c25879f..25d4adccf750 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2534,13 +2534,16 @@ static void dump_pacas(void) static void dump_one_xive(int cpu) { unsigned int hwid = get_hard_smp_processor_id(cpu); - - opal_xive_dump(XIVE_DUMP_TM_HYP, hwid); - opal_xive_dump(XIVE_DUMP_TM_POOL, hwid); - opal_xive_dump(XIVE_DUMP_TM_OS, hwid); - opal_xive_dump(XIVE_DUMP_TM_USER, hwid); - opal_xive_dump(XIVE_DUMP_VP, hwid); - opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid); + bool hv = cpu_has_feature(CPU_FTR_HVMODE); + + if (hv) { + opal_xive_dump(XIVE_DUMP_TM_HYP, hwid); + opal_xive_dump(XIVE_DUMP_TM_POOL, hwid); + opal_xive_dump(XIVE_DUMP_TM_OS, hwid); + opal_xive_dump(XIVE_DUMP_TM_USER, hwid); + opal_xive_dump(XIVE_DUMP_VP, hwid); + opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid); + } if (setjmp(bus_error_jmp) != 0) { catch_memory_errors = 0; -- 2.21.0
Re: [PATCH 6/6] driver core: initialize a default DMA mask for platform device
On 11/08/2019 09:05, Christoph Hellwig wrote: We still treat devices without a DMA mask as defaulting to 32-bits for both mask, but a few releases ago we've started warning about such cases, as they require special cases to work around this sloppyness. Add a dma_mask field to struct platform_object so that we can initialize s/object/device/ the dma_mask pointer in struct device and initialize both masks to 32-bits by default. Architectures can still override this in arch_setup_pdev_archdata if needed. Note that the code looks a little odd with the various conditionals because we have to support platform_device structures that are statically allocated. This would be a good point to also get rid of the long-standing bodge in platform_device_register_full(). Signed-off-by: Christoph Hellwig --- drivers/base/platform.c | 15 +-- include/linux/platform_device.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ec974ba9c0c4..b216fcb0a8af 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -264,6 +264,17 @@ struct platform_object { char name[]; }; +static void setup_pdev_archdata(struct platform_device *pdev) Bikeshed: painting the generic DMA API properties as "archdata" feels a bit off-target :/ +{ + if (!pdev->dev.coherent_dma_mask) + pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); + if (!pdev->dma_mask) + pdev->dma_mask = DMA_BIT_MASK(32); + if (!pdev->dev.dma_mask) + pdev->dev.dma_mask = >dma_mask; + arch_setup_pdev_archdata(pdev); AFAICS m68k's implementation of that arch hook becomes entirely redundant after this change, so may as well go. That would just leave powerpc's actual archdata, which at a glance looks like it could probably be cleaned up with not *too* much trouble. Robin. +}; + /** * platform_device_put - destroy a platform device * @pdev: platform device to free @@ -310,7 +321,7 @@ struct platform_device *platform_device_alloc(const char *name, int id) pa->pdev.id = id; device_initialize(>pdev.dev); pa->pdev.dev.release = platform_device_release; - arch_setup_pdev_archdata(>pdev); + setup_pdev_archdata(>pdev); } return pa ? >pdev : NULL; @@ -512,7 +523,7 @@ EXPORT_SYMBOL_GPL(platform_device_del); int platform_device_register(struct platform_device *pdev) { device_initialize(>dev); - arch_setup_pdev_archdata(pdev); + setup_pdev_archdata(pdev); return platform_device_add(pdev); } EXPORT_SYMBOL_GPL(platform_device_register); diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 9bc36b589827..a2abde2aef25 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -24,6 +24,7 @@ struct platform_device { int id; boolid_auto; struct device dev; + u64 dma_mask; u32 num_resources; struct resource *resource;
[PATCH 2/3] kbuild: rebuild modules when module linker scripts are updated
Currently, the timestamp of module linker scripts are not checked. Add them to the dependency of modules so they are correctly rebuilt. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 5 + Makefile | 3 ++- arch/arm/Makefile | 2 +- arch/arm64/Makefile| 2 +- arch/ia64/Makefile | 2 +- arch/m68k/Makefile | 2 +- arch/parisc/Makefile | 2 +- arch/powerpc/Makefile | 2 +- arch/riscv/Makefile| 2 +- scripts/Makefile.modpost | 5 +++-- 10 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index d3448d2c8017..36ba92e199d2 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -999,6 +999,11 @@ When kbuild executes, the following steps are followed (roughly): The linker script with full path. Assigned by the top-level Makefile. +KBUILD_LDS_MODULE + + The module linker script with full path. Assigned by the top-level + Makefile and additionally by the arch Makefile. + KBUILD_VMLINUX_OBJS All object files for vmlinux. They are linked to vmlinux in the same diff --git a/Makefile b/Makefile index 164ca615e2f6..af808837a1f2 100644 --- a/Makefile +++ b/Makefile @@ -485,7 +485,8 @@ KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE -KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds +KBUILD_LDFLAGS_MODULE := +export KBUILD_LDS_MODULE := $(srctree)/scripts/module-common.lds KBUILD_LDFLAGS := GCC_PLUGINS_CFLAGS := CLANG_FLAGS := diff --git a/arch/arm/Makefile b/arch/arm/Makefile index c3624ca6c0bc..fbe50eec8f34 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -17,7 +17,7 @@ KBUILD_LDFLAGS_MODULE += --be8 endif ifeq ($(CONFIG_ARM_MODULE_PLTS),y) -KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds +KBUILD_LDS_MODULE += $(srctree)/arch/arm/kernel/module.lds endif GZFLAGS:=-9 diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 61de992bbea3..d4ed1869e536 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -101,7 +101,7 @@ endif CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) -KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +KBUILD_LDS_MODULE += $(srctree)/arch/arm64/kernel/module.lds endif # Default value diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 171290f9f1de..5c3bcaee5980 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -20,7 +20,7 @@ CHECKFLAGS+= -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__ OBJCOPYFLAGS := --strip-all LDFLAGS_vmlinux:= -static -KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds +KBUILD_LDS_MODULE += $(srctree)/arch/ia64/module.lds KBUILD_AFLAGS_KERNEL := -mconstant-gp EXTRA := diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 482513b9af2c..5d9288384096 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -73,7 +73,7 @@ KBUILD_AFLAGS += -D__uClinux__ endif KBUILD_LDFLAGS := -m m68kelf -KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds +KBUILD_LDS_MODULE += $(srctree)/arch/m68k/kernel/module.lds ifdef CONFIG_SUN3 LDFLAGS_vmlinux = -N diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 3b77d729057f..36b834f1c933 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -60,7 +60,7 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \ -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT) CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1))) -KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/parisc/kernel/module.lds +KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds endif OBJCOPY_FLAGS =-O binary -R .note -R .comment -S diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index c345b79414a9..b2227855de20 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -67,7 +67,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y)) ifdef CONFIG_PPC32 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o else -KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds +KBUILD_LDS_MODULE += $(srctree)/arch/powerpc/kernel/module.lds ifeq ($(call ld-ifversion, -ge, 22500, y),y) # Have the linker provide sfpr if possible. # There is a corresponding test in arch/powerpc/lib/Makefile diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 7a117be8297c..426d989125a8 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -52,7 +52,7 @@ ifeq ($(CONFIG_CMODEL_MEDANY),y) KBUILD_CFLAGS += -mcmodel=medany endif ifeq ($(CONFIG_MODULE_SECTIONS),y) - KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/riscv/kernel/module.lds + KBUILD_LDS_MODULE
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 --- Comment #19 from Christophe Leroy (christophe.le...@c-s.fr) --- Created attachment 284389 --> https://bugzilla.kernel.org/attachment.cgi?id=284389=edit Patch to trace misaligned destination in copy_page() in asm on PPC32 Oops. Can you test with this new patch which implements the warning directly in assembly.? This time it only modifies misc_32.S and It builds ok. -- You are receiving this mail because: You are on the CC list for the bug.
[PATCH 0/3] powerpc/xmon: Fix dump of XIVE interrupt under pseries
Hello, The xmon 'dx*' commands call OPAL to query information on XIVE but this can only be done on baremetal (PowerNV) and it crashes a pseries machine. This little series fixes support on pseries and extend the 'dxi' command. Thanks, C. Cédric Le Goater (3): powerpc/xmon: Check for HV mode when dumping XIVE info from OPAL powerpc/xive: Fix dump of XIVE interrupt under pseries powerpc/xmon: Add a dump of all XIVE interrupts arch/powerpc/include/asm/xive.h | 2 + arch/powerpc/sysdev/xive/xive-internal.h | 2 + arch/powerpc/sysdev/xive/common.c| 7 arch/powerpc/sysdev/xive/native.c| 15 +++ arch/powerpc/sysdev/xive/spapr.c | 51 arch/powerpc/xmon/xmon.c | 50 +-- 6 files changed, 114 insertions(+), 13 deletions(-) -- 2.21.0
[PATCH 2/3] powerpc/xive: Fix dump of XIVE interrupt under pseries
The xmon 'dxi' command calls OPAL to query the XIVE configuration of a interrupt. This can only be done on baremetal (PowerNV) and it will crash a pseries machine. Introduce a new XIVE get_irq_config() operation which implements a different query depending on the platform, PowerNV or pseries, and modify xmon to use a top level wrapper. Signed-off-by: Cédric Le Goater --- arch/powerpc/include/asm/xive.h | 2 + arch/powerpc/sysdev/xive/xive-internal.h | 2 + arch/powerpc/sysdev/xive/common.c| 7 arch/powerpc/sysdev/xive/native.c| 15 +++ arch/powerpc/sysdev/xive/spapr.c | 51 arch/powerpc/xmon/xmon.c | 12 +++--- 6 files changed, 83 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index efb0e597b272..967d6ab3c977 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -99,6 +99,8 @@ extern void xive_flush_interrupt(void); /* xmon hook */ extern void xmon_xive_do_dump(int cpu); +extern int xmon_xive_get_irq_config(u32 irq, u32 *target, u8 *prio, + u32 *sw_irq); /* APIs used by KVM */ extern u32 xive_native_default_eq_shift(void); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 211725dbf364..59cd366e7933 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -33,6 +33,8 @@ struct xive_cpu { struct xive_ops { int (*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data); int (*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); + int (*get_irq_config)(u32 hw_irq, u32 *target, u8 *prio, + u32 *sw_irq); int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); void(*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); void(*setup_cpu)(unsigned int cpu, struct xive_cpu *xc); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index be86fce1a84e..ed4561e71951 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -257,6 +257,13 @@ notrace void xmon_xive_do_dump(int cpu) } #endif } + +int xmon_xive_get_irq_config(u32 irq, u32 *target, u8 *prio, +u32 *sw_irq) +{ + return xive_ops->get_irq_config(irq, target, prio, sw_irq); +} + #endif /* CONFIG_XMON */ static unsigned int xive_get_irq(void) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 2f26b74f6cfa..4b61e44f0171 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -111,6 +111,20 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq) } EXPORT_SYMBOL_GPL(xive_native_configure_irq); +static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio, + u32 *sw_irq) +{ + s64 rc; + __be64 vp; + __be32 lirq; + + rc = opal_xive_get_irq_config(hw_irq, , prio, ); + + *target = be64_to_cpu(vp); + *sw_irq = be32_to_cpu(lirq); + + return rc == 0 ? 0 : -ENXIO; +} /* This can be called multiple time to change a queue configuration */ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, @@ -442,6 +456,7 @@ EXPORT_SYMBOL_GPL(xive_native_sync_queue); static const struct xive_ops xive_native_ops = { .populate_irq_data = xive_native_populate_irq_data, .configure_irq = xive_native_configure_irq, + .get_irq_config = xive_native_get_irq_config, .setup_queue= xive_native_setup_queue, .cleanup_queue = xive_native_cleanup_queue, .match = xive_native_match, diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 8ef9cf4ebb1c..2a2d209336f7 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -211,6 +211,38 @@ static long plpar_int_set_source_config(unsigned long flags, return 0; } +static long plpar_int_get_source_config(unsigned long flags, + unsigned long lisn, + unsigned long *target, + unsigned long *prio, + unsigned long *sw_irq) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn); + + do { + rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn, +target, prio, sw_irq); + } while (plpar_busy_delay(rc)); + + if (rc) { + pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n", +
Re: [5.3.0-rc4-next][bisected 882632][qla2xxx] WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 qla2x00_status_entry.isra
On 8/14/19 9:52 AM, Abdul Haleem wrote: > Greeting's > > Today's linux-next kernel (5.3.0-rc4-next-20190813) booted with warning on > my powerpc power 8 lpar > > The WARN_ON_ONCE() was introduced by commit 88263208 (scsi: qla2xxx: Complain > if sp->done() is not...) > > boot logs: > > WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 Hi Abdul, Thank you for having reported this. Is that the only warning reported on your setup by the qla2xxx driver? If that warning is commented out, does the qla2xxx driver work as expected? Thanks, Bart.
Re: [PATCH v4 18/25] powernv/fadump: process architected register state data provided by firmware
On 2019-07-16 17:04:08 Tue, Hari Bathini wrote: > From: Hari Bathini > > Firmware provides architected register state data at the time of crash. > Process this data and build CPU notes to append to ELF core. > > Signed-off-by: Hari Bathini > Signed-off-by: Vasant Hegde > --- > arch/powerpc/kernel/fadump-common.h |4 + > arch/powerpc/platforms/powernv/opal-fadump.c | 197 > -- > arch/powerpc/platforms/powernv/opal-fadump.h | 39 + > 3 files changed, 228 insertions(+), 12 deletions(-) > [...] > @@ -430,6 +577,32 @@ int __init opal_fadump_dt_scan(struct fw_dump > *fadump_conf, ulong node) > return 1; > } > > + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, ); > + if ((ret != OPAL_SUCCESS) || !addr) { > + pr_err("Failed to get CPU metadata (%lld)\n", ret); > + return 1; > + } > + > + addr = be64_to_cpu(addr); > + pr_debug("CPU metadata addr: %llx\n", addr); > + > + opal_cpu_metadata = __va(addr); > + r_opal_cpu_metadata = (void *)addr; > + fadump_conf->cpu_state_data_version = > + be32_to_cpu(r_opal_cpu_metadata->cpu_data_version); > + if (fadump_conf->cpu_state_data_version != > + HDAT_FADUMP_CPU_DATA_VERSION) { > + pr_err("CPU data format version (%lu) mismatch!\n", > +fadump_conf->cpu_state_data_version); > + return 1; > + } > + fadump_conf->cpu_state_entry_size = > + be32_to_cpu(r_opal_cpu_metadata->cpu_data_size); > + fadump_conf->cpu_state_destination_addr = > + be64_to_cpu(r_opal_cpu_metadata->region[0].dest); > + fadump_conf->cpu_state_data_size = > + be64_to_cpu(r_opal_cpu_metadata->region[0].size); > + opal_fadump_dt_scan isn't the right place to do this. Can you please move above cpu related data processing to opal_fadump_build_cpu_notes() ? Thanks, -Mahesh. > pr_info("Firmware-assisted dump is active.\n"); > fadump_conf->dump_active = 1; > opal_fadump_get_config(fadump_conf, r_opal_fdm_active);
Re: [5.3.0-rc4-next][bisected 882632][qla2xxx] WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 qla2x00_status_entry.isra
On Wed, 2019-08-14 at 10:05 -0700, Bart Van Assche wrote: > On 8/14/19 9:52 AM, Abdul Haleem wrote: > > Greeting's > > > > Today's linux-next kernel (5.3.0-rc4-next-20190813) booted with warning on > > my powerpc power 8 lpar > > > > The WARN_ON_ONCE() was introduced by commit 88263208 (scsi: qla2xxx: > > Complain if sp->done() is not...) > > > > boot logs: > > > > WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 > > Hi Abdul, > > Thank you for having reported this. Is that the only warning reported on your > setup by the qla2xxx > driver? If that warning is commented out, does the qla2xxx driver work as > expected? boot warning did not show up when the commit is reverted. should I comment out only the WARN_ON_ONCE() which is causing the issue, and not the other one ? -- Regard's Abdul Haleem IBM Linux Technology Centre
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 --- Comment #20 from Erhard F. (erhar...@mailbox.org) --- Created attachment 284397 --> https://bugzilla.kernel.org/attachment.cgi?id=284397=edit dmesg (PowerMac G4 DP, kernel 5.3-rc4 + debug patch) /dev/sdb2 mounted after booting, dmesg after unmounting -- You are receiving this mail because: You are on the CC list for the bug.
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 --- Comment #21 from Erhard F. (erhar...@mailbox.org) --- Created attachment 284399 --> https://bugzilla.kernel.org/attachment.cgi?id=284399=edit dmesg (PowerMac G4 DP, kernel 5.3-rc4 + debug patch) /dev/sdb2 mounted at boot, dmesg after unmounting. -- You are receiving this mail because: You are on the CC list for the bug.
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 Erhard F. (erhar...@mailbox.org) changed: What|Removed |Added Attachment #284071|0 |1 is obsolete|| --- Comment #22 from Erhard F. (erhar...@mailbox.org) --- Created attachment 284401 --> https://bugzilla.kernel.org/attachment.cgi?id=284401=edit kernel .config (PowerMac G4 DP, kernel 5.3-rc4) -- You are receiving this mail because: You are on the CC list for the bug.
[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten
https://bugzilla.kernel.org/show_bug.cgi?id=204371 --- Comment #23 from Erhard F. (erhar...@mailbox.org) --- On Wed, 14 Aug 2019 16:10:53 + bugzilla-dae...@bugzilla.kernel.org wrote: > https://bugzilla.kernel.org/show_bug.cgi?id=204371 > > --- Comment #19 from Christophe Leroy (christophe.le...@c-s.fr) --- > Created attachment 284389 > --> https://bugzilla.kernel.org/attachment.cgi?id=284389=edit > Patch to trace misaligned destination in copy_page() in asm on PPC32 > > Oops. > > Can you test with this new patch which implements the warning directly in > assembly.? This time it only modifies misc_32.S and It builds ok. Please find the full dmesg attatched at the kernel bugtracker. [...] Aug 14 19:32:52 T600 kernel: WARNING: CPU: 1 PID: 252 at arch/powerpc/kernel/misc_32.S:457 copy_page+0x4/0x98 Aug 14 19:32:52 T600 kernel: Modules linked in: b43legacy input_leds led_class mac80211 joydev hid_generic usbhid hid cfg80211 snd_aoa_codec_tas snd_aoa_fabric_layout snd_aoa rfkill libarc4 evdev ohci_pci btrfs xor zstd_decompress zstd_compress zlib_deflate radeon raid6_pq zlib_inflate ehci_pci ohci_hcd therm_windtunnel ehci_hcd hwmon i2c_algo_bit firewire_ohci backlight firewire_core sr_mod sungem crc_itu_t drm_kms_helper cdrom sungem_phy usbcore syscopyarea sysfillrect usb_common sysimgblt fb_sys_fops ttm snd_aoa_i2sbus drm snd_aoa_soundbus snd_pcm snd_timer drm_panel_orientation_quirks ssb snd uninorth_agp soundcore agpgart lzo lzo_compress lzo_decompress zram zsmalloc Aug 14 19:32:52 T600 kernel: CPU: 1 PID: 252 Comm: umount Tainted: GW 5.3.0-rc4+ #1 Aug 14 19:32:52 T600 kernel: NIP: c0011524 LR: f1a563f8 CTR: c0011520 Aug 14 19:32:52 T600 kernel: REGS: ed22b810 TRAP: 0700 Tainted: GW (5.3.0-rc4+) Aug 14 19:32:52 T600 kernel: MSR: 00029032 CR: 22048222 XER: 2000 Aug 14 19:32:52 T600 kernel: GPR00: f1a563e0 ed22b8c8 e7348020 e6b442e8 dae3e000 0008 c0596c20 dae3effc GPR08: b2209525 ed22b8c8 c0011520 00745ff4 e8dec1fc GPR16: 0001 c07fe5f8 0001 f1af07f0 c06fd6fc GPR24: e8dec178 ed22b8d8 f1af ec13f1e8 ec13f1e8 e8a945e8 Aug 14 19:32:52 T600 kernel: NIP [c0011524] copy_page+0x4/0x98 Aug 14 19:32:52 T600 kernel: LR [f1a563f8] __load_free_space_cache+0x540/0x61c [btrfs] Aug 14 19:32:52 T600 kernel: Call Trace: Aug 14 19:32:52 T600 kernel: [ed22b8c8] [f1a563e0] __load_free_space_cache+0x528/0x61c [btrfs] (unreliable) Aug 14 19:32:52 T600 kernel: [ed22b958] [f1a565bc] load_free_space_cache+0xe8/0x1bc [btrfs] Aug 14 19:32:52 T600 kernel: [ed22b998] [f19e83f4] cache_block_group+0x1cc/0x3b4 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22b9f8] [f19f04c8] find_free_extent+0x56c/0xe70 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bad8] [f19f0eb8] btrfs_reserve_extent+0xec/0x220 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bb48] [f19f1130] btrfs_alloc_tree_block+0x144/0x35c [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bc38] [f19dc9c0] alloc_tree_block_no_bg_flush+0x88/0x98 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bc78] [f19dfce0] __btrfs_cow_block+0x140/0x4d0 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bce8] [f19e021c] btrfs_cow_block+0x144/0x23c [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bd18] [f1a039e4] commit_cowonly_roots+0x50/0x294 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bd68] [f1a062c4] btrfs_commit_transaction+0x5e4/0x994 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bdb8] [f1a01800] close_ctree+0xf4/0x2c4 [btrfs] Aug 14 19:32:52 T600 kernel: [ed22bdf8] [c01ab508] generic_shutdown_super+0x80/0x110 Aug 14 19:32:52 T600 kernel: [ed22be18] [c01ab718] kill_anon_super+0x18/0x30 Aug 14 19:32:53 T600 kernel: [ed22be38] [f19d88b4] btrfs_kill_super+0x18/0x30 [btrfs] Aug 14 19:32:53 T600 kernel: [ed22be58] [c01abdbc] deactivate_locked_super+0x54/0xa4 Aug 14 19:32:53 T600 kernel: [ed22be78] [c01cbcb4] cleanup_mnt+0x6c/0xe4 Aug 14 19:32:53 T600 kernel: [ed22bea8] [c0054f50] task_work_run+0xa0/0xc0 Aug 14 19:32:53 T600 kernel: [ed22bed8] [c000bc44] do_notify_resume+0x160/0x2c8 Aug 14 19:32:53 T600 kernel: [ed22bf38] [c0014800] do_user_signal+0x2c/0x34 Aug 14 19:32:53 T600 kernel: --- interrupt: c00 at 0x5a93d4 LR = 0x5a93b8 Aug 14 19:32:53 T600 kernel: Instruction dump: Aug 14 19:32:53 T600 kernel: 38630020 4200fff8 7c0004ac 7c8903a6 7c0037ac 38c60020 4200fff8 7c0004ac Aug 14 19:32:53 T600 kernel: 7d400124 4c00012c 4e800020 546506fe <0f05> 3863fffc 3884fffc 38a4 Aug 14 19:32:53 T600 kernel: irq event stamp: 0 Aug 14 19:32:53 T600 kernel: hardirqs last enabled at (0): [<>] 0x0 Aug 14 19:32:53 T600 kernel: hardirqs last disabled at (0): [] copy_process+0x474/0x1368 Aug 14 19:32:53 T600 kernel: softirqs last enabled at (0): [] copy_process+0x474/0x1368 Aug 14 19:32:53 T600 kernel: softirqs last disabled at (0): [<>] 0x0 Aug 14
Re: [PATCH] powerpc: use the generic dma coherent remap allocator
Le 14/08/2019 à 15:22, Christoph Hellwig a écrit : This switches to using common code for the DMA allocations, including potential use of the CMA allocator if configured. Switching to the generic code enables DMA allocations from atomic context, which is required by the DMA API documentation, and also adds various other minor features drivers start relying upon. It also makes sure we have on tested code base for all architectures that require uncached pte bits for coherent DMA allocations. Another advantage is that consistent memory allocations now share the general vmalloc pool instead of needing an explicit careout from it. Signed-off-by: Christoph Hellwig --- arch/powerpc/Kconfig | 12 - arch/powerpc/include/asm/book3s/32/pgtable.h | 12 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 12 +- arch/powerpc/mm/dma-noncoherent.c| 318 +-- arch/powerpc/mm/mem.c| 4 - arch/powerpc/mm/ptdump/ptdump.c | 9 - arch/powerpc/platforms/Kconfig.cputype | 2 + 7 files changed, 17 insertions(+), 352 deletions(-) [...] diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index c617282d5b2a..4272ca5e8159 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c [...] @@ -408,23 +116,15 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, __dma_sync_page(paddr, size, dir); } -/* - * Return the PFN for a given cpu virtual address returned by arch_dma_alloc. - */ -long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, - dma_addr_t dma_addr) +void arch_dma_prep_coherent(struct page *page, size_t size) { - /* This should always be populated, so we don't test every -* level. If that fails, we'll have a nice crash which -* will be as good as a BUG_ON() -*/ - unsigned long cpu_addr = (unsigned long)vaddr; - pgd_t *pgd = pgd_offset_k(cpu_addr); - pud_t *pud = pud_offset(pgd, cpu_addr); - pmd_t *pmd = pmd_offset(pud, cpu_addr); - pte_t *ptep = pte_offset_kernel(pmd, cpu_addr); + unsigned long kaddr = (unsigned long)page_address(page); - if (pte_none(*ptep) || !pte_present(*ptep)) - return 0; - return pte_pfn(*ptep); + flush_dcache_range(kaddr, kaddr + size); +} + +static int __init atomic_pool_init(void) +{ + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); } +postcore_initcall(atomic_pool_init); [...] diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 56a7c814160d..afe71b89dec3 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -450,8 +450,10 @@ config NOT_COHERENT_CACHE depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE select ARCH_HAS_DMA_COHERENT_TO_PFN You drop arch_dma_coherent_to_pfn(), it's surprising to see ARCH_HAS_DMA_COHERENT_TO_PFN remains. At first I thought I'd get a build failure. After looking more, I see there is a arch_dma_coherent_to_pfn() defined in kernel/dma/remap.c when DMA_DIRECT_REMAP is selected. I think the naming is not really consistant and should be fixed some how, because that's misleading to have an arch_something() being common to all. Christophe + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU + select DMA_DIRECT_REMAP default n if PPC_47x default y
[PATCH] powerpc/mm: don't display empty early ioremap area
On the 8xx, the layout displayed at boot is: [0.00] Memory: 121856K/131072K available (5728K kernel code, 592K rwdata, 1248K rodata, 560K init, 448K bss, 9216K reserved, 0K cma-reserved) [0.00] Kernel virtual memory layout: [0.00] * 0xffefc000..0xc000 : fixmap [0.00] * 0xffefc000..0xffefc000 : early ioremap [0.00] * 0xc900..0xffefc000 : vmalloc & ioremap [0.00] SLUB: HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 Remove display of an empty early ioremap. Signed-off-by: Christophe Leroy --- arch/powerpc/mm/mem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3e9e9a051c93..69f99128a8d6 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -306,8 +306,9 @@ void __init mem_init(void) pr_info(" * 0x%08lx..0x%08lx : consistent mem\n", IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE); #endif /* CONFIG_NOT_COHERENT_CACHE */ - pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", - ioremap_bot, IOREMAP_TOP); + if (ioremap_bot != IOREMAP_TOP) + pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", + ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", VMALLOC_START, VMALLOC_END); #endif /* CONFIG_PPC32 */ -- 2.13.3