date:20190814

[PATCH] powerpc: remove the ppc44x ocm.c file

2019-08-14 Thread Christoph Hellwig

The on chip memory allocator is entirely unused in the kernel tree.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/configs/ppc40x_defconfig |   1 -
 arch/powerpc/include/asm/ppc4xx_ocm.h |  31 --
 arch/powerpc/platforms/44x/Kconfig|   8 -
 arch/powerpc/platforms/4xx/Makefile   |   1 -
 arch/powerpc/platforms/4xx/ocm.c  | 390 --
 5 files changed, 431 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/ppc4xx_ocm.h
 delete mode 100644 arch/powerpc/platforms/4xx/ocm.c

diff --git a/arch/powerpc/configs/ppc40x_defconfig 
b/arch/powerpc/configs/ppc40x_defconfig
index 8f136b52198b..a5f683aed328 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -84,4 +84,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-CONFIG_PPC4xx_OCM=y
diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h 
b/arch/powerpc/include/asm/ppc4xx_ocm.h
deleted file mode 100644
index fc4db6dcde84..
--- a/arch/powerpc/include/asm/ppc4xx_ocm.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgalla...@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#ifndef __ASM_POWERPC_PPC4XX_OCM_H__
-#define __ASM_POWERPC_PPC4XX_OCM_H__
-
-#define PPC4XX_OCM_NON_CACHED 0
-#define PPC4XX_OCM_CACHED 1
-
-#if defined(CONFIG_PPC4xx_OCM)
-
-void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
- int flags, const char *owner);
-void ppc4xx_ocm_free(const void *virt);
-
-#else
-
-#define ppc4xx_ocm_alloc(phys, size, align, flags, owner)  NULL
-#define ppc4xx_ocm_free(addr)  ((void)0)
-
-#endif /* CONFIG_PPC4xx_OCM */
-
-#endif  /* __ASM_POWERPC_PPC4XX_OCM_H__ */
diff --git a/arch/powerpc/platforms/44x/Kconfig 
b/arch/powerpc/platforms/44x/Kconfig
index b369ed4e3675..25ebe634a661 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -272,14 +272,6 @@ config PPC4xx_GPIO
help
  Enable gpiolib support for ppc440 based boards
 
-config PPC4xx_OCM
-   bool "PPC4xx On Chip Memory (OCM) support"
-   depends on 4xx
-   select PPC_LIB_RHEAP
-   help
- Enable OCM support for PowerPC 4xx platforms with on chip memory,
- OCM provides the fast place for memory access to improve performance.
-
 # 44x specific CPU modules, selected based on the board above.
 config 440EP
bool
diff --git a/arch/powerpc/platforms/4xx/Makefile 
b/arch/powerpc/platforms/4xx/Makefile
index f5ae27ca131b..d009d2e0b9e8 100644
--- a/arch/powerpc/platforms/4xx/Makefile
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y  += uic.o machine_check.o
-obj-$(CONFIG_PPC4xx_OCM)   += ocm.o
 obj-$(CONFIG_4xx_SOC)  += soc.o
 obj-$(CONFIG_PCI)  += pci.o
 obj-$(CONFIG_PPC4xx_HSTA_MSI)  += hsta_msi.o
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
deleted file mode 100644
index ba3257406ced..
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ /dev/null
@@ -1,390 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgalla...@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#define OCM_DISABLED   0
-#define OCM_ENABLED1
-
-struct ocm_block {
-   struct list_headlist;
-   void __iomem*addr;
-   int size;
-   const char  *owner;
-};
-
-/* non-cached or cached region */
-struct ocm_region {
-   phys_addr_t phys;
-   void __iomem*virt;
-
-   int memtotal;
-   int memfree;
-
-   rh_info_t   *rh;
-   struct list_headlist;
-};
-
-struct ocm_info {
-   int index;
-   int status;
-   int ready;
-
-   phys_addr_t phys;
-
-   int alignment;
-   int memtotal;
-   int cache_size;
-
-   struct ocm_region   nc; /* non-cached region */
-   struct ocm_region   c;  /* cached region */
-};
-
-static struct ocm_info *ocm_nodes;
-static int ocm_count;
-
-static struct ocm_info *ocm_get_node(unsigned int index)
-{
-   if (index

Re: [PATCH v4 11/25] powernv/fadump: register kernel metadata address with opal

2019-08-14 Thread Hari Bathini




On 13/08/19 4:11 PM, Mahesh J Salgaonkar wrote:
> On 2019-07-16 17:03:15 Tue, Hari Bathini wrote:
>> OPAL allows registering address with it in the first kernel and
>> retrieving it after MPIPL. Setup kernel metadata and register its
>> address with OPAL to use it for processing the crash dump.
>>
>> Signed-off-by: Hari Bathini 
>> ---
>>  arch/powerpc/kernel/fadump-common.h  |4 +
>>  arch/powerpc/kernel/fadump.c |   65 ++-
>>  arch/powerpc/platforms/powernv/opal-fadump.c |   73 
>> ++
>>  arch/powerpc/platforms/powernv/opal-fadump.h |   37 +
>>  arch/powerpc/platforms/pseries/rtas-fadump.c |   32 +--
>>  5 files changed, 177 insertions(+), 34 deletions(-)
>>  create mode 100644 arch/powerpc/platforms/powernv/opal-fadump.h
>>
> [...]
>> @@ -346,30 +349,42 @@ int __init fadump_reserve_mem(void)
>>   * use memblock_find_in_range() here since it doesn't allocate
>>   * from bottom to top.
>>   */
>> -for (base = fw_dump.boot_memory_size;
>> - base <= (memory_boundary - size);
>> - base += size) {
>> +while (base <= (memory_boundary - size)) {
>>  if (memblock_is_region_memory(base, size) &&
>>  !memblock_is_region_reserved(base, size))
>>  break;
>> +
>> +base += size;
>>  }
>> -if ((base > (memory_boundary - size)) ||
>> -memblock_reserve(base, size)) {
>> +
>> +if (base > (memory_boundary - size)) {
>> +pr_err("Failed to find memory chunk for reservation\n");
>> +goto error_out;
>> +}
>> +fw_dump.reserve_dump_area_start = base;
>> +
>> +/*
>> + * Calculate the kernel metadata address and register it with
>> + * f/w if the platform supports.
>> + */
>> +if (fw_dump.ops->setup_kernel_metadata(_dump) < 0)
>> +goto error_out;
> 
> I see setup_kernel_metadata() registers the metadata address with opal without
> having any minimum data initialized in it. Secondaly, why can't this wait 
> until> registration ? I think we should defer this until fadump registration.

If setting up metadata address fails (it should ideally not fail, but..), 
everything else
is useless. So, we might as well try that early and fall back to KDump in case 
of an error..

> What if kernel crashes before metadata area is initialized ?

registered_regions would be '0'. So, it is treated as fadump is not registered 
case. Let me
initialize metadata explicitly before registering the address with f/w to avoid 
any assumption...

> 
>> +
>> +if (memblock_reserve(base, size)) {
>>  pr_err("Failed to reserve memory\n");
>> -return 0;
>> +goto error_out;
>>  }
> [...]
>> -
>>  static struct fadump_ops rtas_fadump_ops = {
>> -.init_fadump_mem_struct = rtas_fadump_init_mem_struct,
>> -.register_fadump= rtas_fadump_register_fadump,
>> -.unregister_fadump  = rtas_fadump_unregister_fadump,
>> -.invalidate_fadump  = rtas_fadump_invalidate_fadump,
>> -.process_fadump = rtas_fadump_process_fadump,
>> -.fadump_region_show = rtas_fadump_region_show,
>> -.fadump_trigger = rtas_fadump_trigger,
>> +.init_fadump_mem_struct = rtas_fadump_init_mem_struct,
>> +.get_kernel_metadata_size   = rtas_fadump_get_kernel_metadata_size,
>> +.setup_kernel_metadata  = rtas_fadump_setup_kernel_metadata,
>> +.register_fadump= rtas_fadump_register_fadump,
>> +.unregister_fadump  = rtas_fadump_unregister_fadump,
>> +.invalidate_fadump  = rtas_fadump_invalidate_fadump,
>> +.process_fadump = rtas_fadump_process_fadump,
>> +.fadump_region_show = rtas_fadump_region_show,
>> +.fadump_trigger = rtas_fadump_trigger,
> 
> Can you make the tab space changes in your previous patch where these
> were initially introduced ? So that this patch can only show new members
> that are added.

done.

Thanks
Hari

[PATCH v1 3/4] arm64: dts: ls1028a: fix little-big endian issue for dcfg

2019-08-14 Thread Yinbo Zhu

dcfg use little endian that SoC register value will be correct

Signed-off-by: Yinbo Zhu 
---
 arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi 
b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index b0d4f8916ede..5538e8e354b2 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -162,7 +162,7 @@
dcfg: syscon@1e0 {
compatible = "fsl,ls1028a-dcfg", "syscon";
reg = <0x0 0x1e0 0x0 0x1>;
-   big-endian;
+   little-endian;
};
 
scfg: syscon@1fc {
-- 
2.17.1

[PATCH 1/3] powerpc/mce: Add MCE notification chain

2019-08-14 Thread Santosh Sivaraj

This is needed to report bad blocks for persistent memory.

Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/include/asm/mce.h |  3 +++
 arch/powerpc/kernel/mce.c  | 15 +++
 2 files changed, 18 insertions(+)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index e1931c8c2743..b1c6363f924c 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -212,6 +212,9 @@ extern void machine_check_queue_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt,
   bool user_mode, bool in_guest);
 unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr);
+int mce_register_notifier(struct notifier_block *nb);
+int mce_unregister_notifier(struct notifier_block *nb);
+
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ec4b3e1087be..a78210ca6cd9 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -47,6 +47,20 @@ static struct irq_work mce_ue_event_irq_work = {
 
 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+   return blocking_notifier_chain_register(_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+   return blocking_notifier_chain_unregister(_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
+
 static void mce_set_error_info(struct machine_check_event *mce,
   struct mce_error_info *mce_err)
 {
@@ -263,6 +277,7 @@ static void machine_process_ue_event(struct work_struct 
*work)
while (__this_cpu_read(mce_ue_count) > 0) {
index = __this_cpu_read(mce_ue_count) - 1;
evt = this_cpu_ptr(_ue_event_queue[index]);
+   blocking_notifier_call_chain(_notifier_list, 0, evt);
 #ifdef CONFIG_MEMORY_FAILURE
/*
 * This should probably queued elsewhere, but
-- 
2.21.0

[PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges

2019-08-14 Thread Santosh Sivaraj

Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.

Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/platforms/pseries/papr_scm.c | 65 +++
 1 file changed, 65 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..4d25c98a9835 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 
@@ -39,8 +41,12 @@ struct papr_scm_priv {
struct resource res;
struct nd_region *region;
struct nd_interleave_set nd_set;
+   struct list_head list;
 };
 
+LIST_HEAD(papr_nd_regions);
+DEFINE_MUTEX(papr_ndr_lock);
+
 static int drc_pmem_bind(struct papr_scm_priv *p)
 {
unsigned long ret[PLPAR_HCALL_BUFSIZE];
@@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
dev_info(dev, "Region registered with target node %d and online 
node %d",
 target_nid, online_nid);
 
+   mutex_lock(_ndr_lock);
+   list_add_tail(>list, _nd_regions);
+   mutex_unlock(_ndr_lock);
+
return 0;
 
 err:   nvdimm_bus_unregister(p->bus);
@@ -371,6 +381,60 @@ err:   nvdimm_bus_unregister(p->bus);
return -ENXIO;
 }
 
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+void *data)
+{
+   struct machine_check_event *evt = data;
+   struct papr_scm_priv *p;
+   u64 phys_addr;
+
+   if (evt->error_type != MCE_ERROR_TYPE_UE)
+   return NOTIFY_DONE;
+
+   if (list_empty(_nd_regions))
+   return NOTIFY_DONE;
+
+   phys_addr = evt->u.ue_error.physical_address +
+   (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+   if (!evt->u.ue_error.physical_address_provided ||
+   !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+   return NOTIFY_DONE;
+
+   mutex_lock(_ndr_lock);
+   list_for_each_entry(p, _nd_regions, list) {
+   struct resource res = p->res;
+   u64 aligned_addr;
+
+   if (res.start > phys_addr)
+   continue;
+
+   if (res.end < phys_addr)
+   continue;
+
+   aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+   pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+   if (nvdimm_bus_add_badrange(p->bus,
+   aligned_addr, L1_CACHE_BYTES))
+   pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
+   aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+   nvdimm_region_notify(p->region,
+NVDIMM_REVALIDATE_POISON);
+
+   break;
+   }
+   mutex_unlock(_ndr_lock);
+
+   return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+   .notifier_call = handle_mce_ue
+};
+
 static int papr_scm_probe(struct platform_device *pdev)
 {
struct device_node *dn = pdev->dev.of_node;
@@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev)
goto err2;
 
platform_set_drvdata(pdev, p);
+   mce_register_notifier(_ue_nb);
 
return 0;
 
-- 
2.21.0

Re: [PATCH v2 1/3] KVM: PPC: Book3S HV: Fix race in re-enabling XIVE escalation interrupts

2019-08-14 Thread Paul Mackerras

On Wed, Aug 14, 2019 at 02:46:38PM +1000, Jordan Niethe wrote:
> On Tue, 2019-08-13 at 20:03 +1000, Paul Mackerras wrote:

[snip]
> > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > index 337e644..2e7e788 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > @@ -2831,29 +2831,39 @@ kvm_cede_prodded:
> >  kvm_cede_exit:
> > ld  r9, HSTATE_KVM_VCPU(r13)
> >  #ifdef CONFIG_KVM_XICS
> > -   /* Abort if we still have a pending escalation */
> > +   /* are we using XIVE with single escalation? */
> > +   ld  r10, VCPU_XIVE_ESC_VADDR(r9)
> > +   cmpdi   r10, 0
> > +   beq 3f
> > +   li  r6, XIVE_ESB_SET_PQ_00
> Would it make sense to put the above instruction down into the 4: label
> instead? If we do not branch to 4, r6 is overwriten anyway. 

Right.

> I think that would save a load when we do not branch to 4. Also it

Well, li is a load immediate rather than a load ("load" would normally
imply a load from memory).  Load-immediate instructions are
essentially free since they can easily be executed in parallel with
other instructions and execute in a single cycle.

> would mean that you could use r5 everywhere instead of changing it to
> r6? 

Yes.  If I have to respin the patch for other reasons then I will
rearrange things as you suggest.  I don't think it's worth respinning
just for this change -- it won't reduce the total number of
instructions, and I strongly doubt there would be any measurable
performance difference.

> > +   /*
> > +* If we still have a pending escalation, abort the cede,
> > +* and we must set PQ to 10 rather than 00 so that we don't
> > +* potentially end up with two entries for the escalation
> > +* interrupt in the XIVE interrupt queue.  In that case
> > +* we also don't want to set xive_esc_on to 1 here in
> > +* case we race with xive_esc_irq().
> > +*/
> > lbz r5, VCPU_XIVE_ESC_ON(r9)
> > cmpwi   r5, 0
> > -   beq 1f
> > +   beq 4f
> > li  r0, 0
> > stb r0, VCPU_CEDED(r9)
> > -1: /* Enable XIVE escalation */
> > -   li  r5, XIVE_ESB_SET_PQ_00
> > +   li  r6, XIVE_ESB_SET_PQ_10
> > +   b   5f
> > +4: li  r0, 1
> > +   stb r0, VCPU_XIVE_ESC_ON(r9)
> > +   /* make sure store to xive_esc_on is seen before xive_esc_irq
> > runs */
> > +   sync
> > +5: /* Enable XIVE escalation */
> > mfmsr   r0
> > andi.   r0, r0, MSR_DR  /* in real mode? */
> > beq 1f
> > -   ld  r10, VCPU_XIVE_ESC_VADDR(r9)
> > -   cmpdi   r10, 0
> > -   beq 3f
> > -   ldx r0, r10, r5
> > +   ldx r0, r10, r6
> > b   2f
> >  1: ld  r10, VCPU_XIVE_ESC_RADDR(r9)
> > -   cmpdi   r10, 0
> > -   beq 3f
> > -   ldcix   r0, r10, r5
> > +   ldcix   r0, r10, r6
> >  2: sync
> > -   li  r0, 1
> > -   stb r0, VCPU_XIVE_ESC_ON(r9)
> >  #endif /* CONFIG_KVM_XICS */
> >  3: b   guest_exit_cont
> >  

Paul.

Re: [PATCH v1 10/10] powerpc/mm: refactor ioremap_range() and use ioremap_page_range()

2019-08-14 Thread Christophe Leroy





Le 14/08/2019 à 07:49, Christoph Hellwig a écrit :

Somehow this series is missing a cover letter.

While you are touching all this "fun" can you also look into killing
__ioremap?  It seems to be a weird non-standard version of ioremap_prot
(probably predating ioremap_prot) that is missing a few lines of code
setting attributes that might not even be applicable for the two drivers
calling it.



ocm_init_node() [arch/powerpc/platforms/4xx/ocm.c] calls __ioremap() 
with _PAGE_EXEC set while ioremap_prot() clears _PAGE_EXEC


Christophe

Re: [PATCH v4 05/25] pseries/fadump: introduce callbacks for platform specific operations

2019-08-14 Thread Hari Bathini




On 12/08/19 3:12 PM, Mahesh J Salgaonkar wrote:
> On 2019-07-16 17:02:30 Tue, Hari Bathini wrote:
>> Introduce callback functions for platform specific operations like
>> register, unregister, invalidate & such. Also, define place-holders
>> for the same on pSeries platform.
>>
>> Signed-off-by: Hari Bathini 
>> ---
>>  arch/powerpc/kernel/fadump-common.h  |   33 ++
>>  arch/powerpc/kernel/fadump.c |   47 +
>>  arch/powerpc/platforms/pseries/Makefile  |1 
>>  arch/powerpc/platforms/pseries/rtas-fadump.c |  134 
>> ++
>>  4 files changed, 171 insertions(+), 44 deletions(-)
>>  create mode 100644 arch/powerpc/platforms/pseries/rtas-fadump.c
>>
>> diff --git a/arch/powerpc/kernel/fadump-common.h 
>> b/arch/powerpc/kernel/fadump-common.h
>> index 09d6161..020d582 100644
>> --- a/arch/powerpc/kernel/fadump-common.h
>> +++ b/arch/powerpc/kernel/fadump-common.h
>> @@ -50,6 +50,12 @@
>>  #define FADUMP_UNREGISTER   2
>>  #define FADUMP_INVALIDATE   3
>>  
>> +/* Firmware-Assited Dump platforms */
>> +enum fadump_platform_type {
>> +FADUMP_PLATFORM_UNKNOWN = 0,
>> +FADUMP_PLATFORM_PSERIES,
>> +};
> 
> Do we really need these ? Aren't we hiding all platform specific things
> under fadump_ops functions ? I see that these values are used only for
> assignements and not making any decision in code flow. Am I missing
> anything here ?

True. This isn't really useful. will drop it..

Thanks
Hari

[PATCH v1 2/4] soc: fsl: guts: Add definition for LS1028A

2019-08-14 Thread Yinbo Zhu

Adding compatible string "ls1028a-dcfg" to initialize guts driver
for ls1028 and SoC die attribute definition for LS1028A

Signed-off-by: Yinbo Zhu 
---
 drivers/soc/fsl/guts.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c
index 1ef8068c8dd3..34810f9bb2ee 100644
--- a/drivers/soc/fsl/guts.c
+++ b/drivers/soc/fsl/guts.c
@@ -102,6 +102,11 @@ static const struct fsl_soc_die_attr fsl_soc_die[] = {
  .svr  = 0x8736,
  .mask = 0xff3f,
},
+   /* Die: LS1028A, SoC: LS1028A */
+   { .die  = "LS1028A",
+ .svr  = 0x870b,
+ .mask = 0xff3f,
+   },
{ },
 };
 
@@ -224,6 +229,7 @@ static const struct of_device_id fsl_guts_of_match[] = {
{ .compatible = "fsl,ls1012a-dcfg", },
{ .compatible = "fsl,ls1046a-dcfg", },
{ .compatible = "fsl,lx2160a-dcfg", },
+   { .compatible = "fsl,ls1028a-dcfg", },
{}
 };
 MODULE_DEVICE_TABLE(of, fsl_guts_of_match);
-- 
2.17.1

Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-08-14 Thread Mahesh Jagannath Salgaonkar

On 8/12/19 2:52 PM, Santosh Sivaraj wrote:
> If we take a UE on one of the instructions with a fixup entry, set nip
> to continue execution at the fixup entry. Stop processing the event
> further or print it.
> 
> Co-developed-by: Reza Arbab 
> Signed-off-by: Reza Arbab 
> Cc: Mahesh Salgaonkar 
> Signed-off-by: Santosh Sivaraj 

Looks good to me.

Reviewed-by: Mahesh Salgaonkar 

Thanks,
-Mahesh.

> ---
>  arch/powerpc/include/asm/mce.h  |  4 +++-
>  arch/powerpc/kernel/mce.c   | 16 
>  arch/powerpc/kernel/mce_power.c | 15 +--
>  3 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index f3a6036b6bc0..e1931c8c2743 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -122,7 +122,8 @@ struct machine_check_event {
>   enum MCE_UeErrorType ue_error_type:8;
>   u8  effective_address_provided;
>   u8  physical_address_provided;
> - u8  reserved_1[5];
> + u8  ignore_event;
> + u8  reserved_1[4];
>   u64 effective_address;
>   u64 physical_address;
>   u8  reserved_2[8];
> @@ -193,6 +194,7 @@ struct mce_error_info {
>   enum MCE_Initiator  initiator:8;
>   enum MCE_ErrorClass error_class:8;
>   boolsync_error;
> + boolignore_event;
>  };
>  
>  #define MAX_MC_EVT   100
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index a3b122a685a5..ec4b3e1087be 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>   if (phys_addr != ULONG_MAX) {
>   mce->u.ue_error.physical_address_provided = true;
>   mce->u.ue_error.physical_address = phys_addr;
> + mce->u.ue_error.ignore_event = mce_err->ignore_event;
>   machine_check_ue_event(mce);
>   }
>   }
> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct 
> *work)
>   /*
>* This should probably queued elsewhere, but
>* oh! well
> +  *
> +  * Don't report this machine check because the caller has a
> +  * asked us to ignore the event, it has a fixup handler which
> +  * will do the appropriate error handling and reporting.
>*/
>   if (evt->error_type == MCE_ERROR_TYPE_UE) {
> + if (evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_ue_count);
> + continue;
> + }
> +
>   if (evt->u.ue_error.physical_address_provided) {
>   unsigned long pfn;
>  
> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct 
> irq_work *work)
>   while (__this_cpu_read(mce_queue_count) > 0) {
>   index = __this_cpu_read(mce_queue_count) - 1;
>   evt = this_cpu_ptr(_event_queue[index]);
> +
> + if (evt->error_type == MCE_ERROR_TYPE_UE &&
> + evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_queue_count);
> + continue;
> + }
>   machine_check_print_event_info(evt, false, false);
>   __this_cpu_dec(mce_queue_count);
>   }
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index e74816f045f8..1dd87f6f5186 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -11,6 +11,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -18,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * Convert an address related to an mm to a physical address.
> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>   return 0;
>  }
>  
> -static long mce_handle_ue_error(struct pt_regs *regs)
> +static long mce_handle_ue_error(struct pt_regs *regs,
> + struct mce_error_info *mce_err)
>  {
>   long handled = 0;
> + const struct exception_table_entry *entry;
> +
> + entry = search_kernel_exception_table(regs->nip);
> + if (entry) {
> + mce_err->ignore_event = true;
> + regs->nip = extable_fixup(entry);
> + return 1;
> + }
>  
>   /*
>* On specific SCOM read via MMIO we may get a machine check
> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs,
>   _addr);
>  
>   if (!handled &&

[PATCH 0/3] Add bad pmem bad blocks to bad range

2019-08-14 Thread Santosh Sivaraj

This series, which should be based on top of the still un-merged
"powerpc: implement machine check safe memcpy" series, adds support
to add the bad blocks which generated an MCE to the NVDIMM bad blocks.
The next access of the same memory will be blocked by the NVDIMM layer
itself.

Santosh Sivaraj (3):
  powerpc/mce: Add MCE notification chain
  of_pmem: Add memory ranges which took a mce to bad range
  papr/scm: Add bad memory ranges to nvdimm bad ranges

 arch/powerpc/include/asm/mce.h|   3 +
 arch/powerpc/kernel/mce.c |  15 +++
 arch/powerpc/platforms/pseries/papr_scm.c |  65 
 drivers/nvdimm/of_pmem.c  | 122 ++
 4 files changed, 186 insertions(+), 19 deletions(-)

-- 
2.21.0

Re: [PATCH v1 05/10] powerpc/mm: Do early ioremaps from top to bottom on PPC64 too.

2019-08-14 Thread Christoph Hellwig

On Wed, Aug 14, 2019 at 08:10:59AM +0200, Christophe Leroy wrote:
> > Note that while a few other architectures have a magic hack like powerpc
> > to make ioremap work before vmalloc, the normal practice would be
> > to explicitly use early_ioremap.  I guess your change is fine for now,
> > but it might make sense convert powerpc to the explicit early_ioremap
> > scheme as well.
> > 
> 
> I've been looking into early_ioremap(), but IIUC early_ioremap() is for
> ephemeral mappings only, it expects all early mappings to be gone at the end
> of init.

Yes.

> PPC installs definitive early mappings (for instance for PCI). How does that
> have to be handled ?

Good question, and no good answer.  I've just been looking at a generic
ioremap for simple architectures, and been finding all kinds of crap
and inconsistencies, and this is one of the things I noticed.

Re: [PATCH v1 10/10] powerpc/mm: refactor ioremap_range() and use ioremap_page_range()

2019-08-14 Thread Christoph Hellwig

On Wed, Aug 14, 2019 at 08:23:54AM +0200, Christophe Leroy wrote:
> Le 14/08/2019 à 07:49, Christoph Hellwig a écrit :
> > Somehow this series is missing a cover letter.
> > 
> > While you are touching all this "fun" can you also look into killing
> > __ioremap?  It seems to be a weird non-standard version of ioremap_prot
> > (probably predating ioremap_prot) that is missing a few lines of code
> > setting attributes that might not even be applicable for the two drivers
> > calling it.
> > 
> 
> ocm_init_node() [arch/powerpc/platforms/4xx/ocm.c] calls __ioremap() with
> _PAGE_EXEC set while ioremap_prot() clears _PAGE_EXEC

Indeed.  But I don't see anything marking this intentional.  Then again
the driver is entirely unused, so we might as well kill it off now.

Re: [PATCH v4 1/2] powerpc/time: Only set CONFIG_ARCH_HAS_SCALED_CPUTIME on PPC64

2019-08-14 Thread Christophe Leroy


Hi Nick,


Le 07/06/2018 à 03:43, Nicholas Piggin a écrit :

On Wed,  6 Jun 2018 14:21:08 + (UTC)
Christophe Leroy  wrote:


scaled cputime is only meaningfull when the processor has
SPURR and/or PURR, which means only on PPC64.



[...]



I wonder if we could make this depend on PPC_PSERIES or even
PPC_SPLPAR as well? (That would be for a later patch)


Can we go further on this ?

Do we know exactly which configuration support scaled cputime, in 
extenso have SPRN_SPURR and/or SPRN_PURR ?


Ref https://github.com/linuxppc/issues/issues/171

Christophe

Re: [PATCH v4 06/25] pseries/fadump: define register/un-register callback functions

2019-08-14 Thread Hari Bathini




On 12/08/19 9:31 PM, Mahesh J Salgaonkar wrote:
> On 2019-07-16 17:02:38 Tue, Hari Bathini wrote:
>> Make RTAS calls to register and un-register for FADump. Also, update
>> how fadump_region contents are diplayed to provide more information.
>>
>> Signed-off-by: Hari Bathini 
>> ---
>>  arch/powerpc/kernel/fadump-common.h  |2 
>>  arch/powerpc/kernel/fadump.c |  164 
>> ++
>>  arch/powerpc/platforms/pseries/rtas-fadump.c |  163 
>> +-
>>  3 files changed, 176 insertions(+), 153 deletions(-)
>>
> [...]
>>  static int rtas_fadump_register_fadump(struct fw_dump *fadump_conf)
>>  {
>> -return -EIO;
>> +int rc, err = -EIO;
>> +unsigned int wait_time;
>> +
>> +/* TODO: Add upper time limit for the delay */
>> +do {
>> +rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
>> +NULL, FADUMP_REGISTER, ,
>> +sizeof(struct rtas_fadump_mem_struct));
>> +
>> +wait_time = rtas_busy_delay_time(rc);
>> +if (wait_time)
>> +mdelay(wait_time);
>> +
>> +} while (wait_time);
>> +
>> +switch (rc) {
>> +case 0:
>> +pr_info("Registration is successful!\n");
>> +fadump_conf->dump_registered = 1;
>> +err = 0;
>> +break;
>> +case -1:
>> +pr_err("Failed to register. Hardware Error(%d).\n", rc);
>> +break;
>> +case -3:
>> +if (!is_fadump_boot_mem_contiguous(fadump_conf))
>> +pr_err("Can't hot-remove boot memory area.\n");
>> +else if (!is_fadump_reserved_mem_contiguous(fadump_conf))
>> +pr_err("Can't hot-remove reserved memory area.\n");
> 
> Any reason why we changed the error messages here ? it gives an impression as
> if fadump reservation tried to hot remove memory and failed.

Yeah, the message is indeed a bit confusing. Will stick with old message..

Thanks
Hari

Re: [PATCH v1 05/10] powerpc/mm: Do early ioremaps from top to bottom on PPC64 too.

2019-08-14 Thread Christophe Leroy





Le 14/08/2019 à 07:55, Christoph Hellwig a écrit :

On Tue, Aug 13, 2019 at 08:11:38PM +, Christophe Leroy wrote:

Until vmalloc system is up and running, ioremap basically
allocates addresses at the border of the IOREMAP area.


Note that while a few other architectures have a magic hack like powerpc
to make ioremap work before vmalloc, the normal practice would be
to explicitly use early_ioremap.  I guess your change is fine for now,
but it might make sense convert powerpc to the explicit early_ioremap
scheme as well.



I've been looking into early_ioremap(), but IIUC early_ioremap() is for 
ephemeral mappings only, it expects all early mappings to be gone at the 
end of init.


PPC installs definitive early mappings (for instance for PCI). How does 
that have to be handled ?


Christophe

Re: [PATCH] powerpc: remove the ppc44x ocm.c file

2019-08-14 Thread Christophe Leroy





Le 14/08/2019 à 08:32, Christoph Hellwig a écrit :

The on chip memory allocator is entirely unused in the kernel tree.

Signed-off-by: Christoph Hellwig 


Since this driver was added in Linux 3.9, functions ppc4xx_ocm_alloc() 
and ppc4xx_ocm_free() have never been used in any driver, and are not 
even exported to modules.


Acked-by: Christophe Leroy 


---
  arch/powerpc/configs/ppc40x_defconfig |   1 -
  arch/powerpc/include/asm/ppc4xx_ocm.h |  31 --
  arch/powerpc/platforms/44x/Kconfig|   8 -
  arch/powerpc/platforms/4xx/Makefile   |   1 -
  arch/powerpc/platforms/4xx/ocm.c  | 390 --
  5 files changed, 431 deletions(-)
  delete mode 100644 arch/powerpc/include/asm/ppc4xx_ocm.h
  delete mode 100644 arch/powerpc/platforms/4xx/ocm.c

diff --git a/arch/powerpc/configs/ppc40x_defconfig 
b/arch/powerpc/configs/ppc40x_defconfig
index 8f136b52198b..a5f683aed328 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -84,4 +84,3 @@ CONFIG_CRYPTO_ECB=y
  CONFIG_CRYPTO_PCBC=y
  CONFIG_CRYPTO_MD5=y
  CONFIG_CRYPTO_DES=y
-CONFIG_PPC4xx_OCM=y
diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h 
b/arch/powerpc/include/asm/ppc4xx_ocm.h
deleted file mode 100644
index fc4db6dcde84..
--- a/arch/powerpc/include/asm/ppc4xx_ocm.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgalla...@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#ifndef __ASM_POWERPC_PPC4XX_OCM_H__
-#define __ASM_POWERPC_PPC4XX_OCM_H__
-
-#define PPC4XX_OCM_NON_CACHED 0
-#define PPC4XX_OCM_CACHED 1
-
-#if defined(CONFIG_PPC4xx_OCM)
-
-void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
- int flags, const char *owner);
-void ppc4xx_ocm_free(const void *virt);
-
-#else
-
-#define ppc4xx_ocm_alloc(phys, size, align, flags, owner)  NULL
-#define ppc4xx_ocm_free(addr)  ((void)0)
-
-#endif /* CONFIG_PPC4xx_OCM */
-
-#endif  /* __ASM_POWERPC_PPC4XX_OCM_H__ */
diff --git a/arch/powerpc/platforms/44x/Kconfig 
b/arch/powerpc/platforms/44x/Kconfig
index b369ed4e3675..25ebe634a661 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -272,14 +272,6 @@ config PPC4xx_GPIO
help
  Enable gpiolib support for ppc440 based boards
  
-config PPC4xx_OCM

-   bool "PPC4xx On Chip Memory (OCM) support"
-   depends on 4xx
-   select PPC_LIB_RHEAP
-   help
- Enable OCM support for PowerPC 4xx platforms with on chip memory,
- OCM provides the fast place for memory access to improve performance.
-
  # 44x specific CPU modules, selected based on the board above.
  config 440EP
bool
diff --git a/arch/powerpc/platforms/4xx/Makefile 
b/arch/powerpc/platforms/4xx/Makefile
index f5ae27ca131b..d009d2e0b9e8 100644
--- a/arch/powerpc/platforms/4xx/Makefile
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -1,6 +1,5 @@
  # SPDX-License-Identifier: GPL-2.0-only
  obj-y += uic.o machine_check.o
-obj-$(CONFIG_PPC4xx_OCM)   += ocm.o
  obj-$(CONFIG_4xx_SOC) += soc.o
  obj-$(CONFIG_PCI) += pci.o
  obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
deleted file mode 100644
index ba3257406ced..
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ /dev/null
@@ -1,390 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgalla...@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#define OCM_DISABLED   0
-#define OCM_ENABLED1
-
-struct ocm_block {
-   struct list_headlist;
-   void __iomem*addr;
-   int size;
-   const char  *owner;
-};
-
-/* non-cached or cached region */
-struct ocm_region {
-   phys_addr_t phys;
-   void __iomem*virt;
-
-   int memtotal;
-   int memfree;
-
-   rh_info_t   *rh;
-   struct list_headlist;
-};
-
-struct ocm_info {
-   int index;
-   int status;
-   int ready;
-
-   phys_addr_t phys;
-
-   int alignment;
-   int memtotal;
-   int

Re: [PATCH v4 12/25] powernv/fadump: define register/un-register callback functions

2019-08-14 Thread Hari Bathini




On 13/08/19 8:04 PM, Mahesh J Salgaonkar wrote:
> On 2019-07-16 17:03:23 Tue, Hari Bathini wrote:
>> Make OPAL calls to register and un-register with firmware for MPIPL.
>>
>> Signed-off-by: Hari Bathini 
>> ---
>>  arch/powerpc/platforms/powernv/opal-fadump.c |   71 
>> +-
>>  1 file changed, 69 insertions(+), 2 deletions(-)
>>
> [...]
>> @@ -88,12 +104,63 @@ static int opal_fadump_setup_kernel_metadata(struct 
>> fw_dump *fadump_conf)
>>  
>>  static int opal_fadump_register_fadump(struct fw_dump *fadump_conf)
>>  {
>> -return -EIO;
>> +int i, err = -EIO;
>> +s64 rc;
>> +
>> +for (i = 0; i < opal_fdm->region_cnt; i++) {
>> +rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
>> +   opal_fdm->rgn[i].src,
>> +   opal_fdm->rgn[i].dest,
>> +   opal_fdm->rgn[i].size);
>> +if (rc != OPAL_SUCCESS)
> 
> You may want to remove ranges which has been added so far on error and reset
> opal_fdm->registered_regions.

Thanks for catching this, Mahesh.
Will update..

> 
>> +break;
>> +
>> +opal_fdm->registered_regions++;
>> +}
>> +
>> +switch (rc) {
>> +case OPAL_SUCCESS:
>> +pr_info("Registration is successful!\n");
>> +fadump_conf->dump_registered = 1;
>> +err = 0;
>> +break;
>> +case OPAL_UNSUPPORTED:
>> +pr_err("Support not available.\n");
>> +fadump_conf->fadump_supported = 0;
>> +fadump_conf->fadump_enabled = 0;
>> +break;
>> +case OPAL_INTERNAL_ERROR:
>> +pr_err("Failed to register. Hardware Error(%lld).\n", rc);
>> +break;
>> +case OPAL_PARAMETER:
>> +pr_err("Failed to register. Parameter Error(%lld).\n", rc);
>> +break;
>> +case OPAL_PERMISSION:
> 
> You may want to remove this check. With latest opal mpipl patches
> opal_mpipl_update() no more returns OPAL_PERMISSION.
> 
> Even if opal does, we can not say fadump already registered just by
> looking at return status of single entry addition.

Sure.

Thanks
Hari

[PATCH v1 4/4] mmc: sdhci-of-esdhc: add erratum A011334 support in ls1028a 1.0 SoC

2019-08-14 Thread Yinbo Zhu

This patch is to add erratum A011334 support in ls1028a 1.0 SoC

Signed-off-by: Yinbo Zhu 
---
 drivers/mmc/host/sdhci-of-esdhc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/sdhci-of-esdhc.c 
b/drivers/mmc/host/sdhci-of-esdhc.c
index b16f7d440f78..eb2b290447fc 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -1006,6 +1006,7 @@ static struct soc_device_attribute 
soc_incorrect_hostver[] = {
 static struct soc_device_attribute soc_fixup_sdhc_clkdivs[] = {
{ .family = "QorIQ LX2160A", .revision = "1.0", },
{ .family = "QorIQ LX2160A", .revision = "2.0", },
+   { .family = "QorIQ LS1028A", .revision = "1.0", },
{ },
 };
 
-- 
2.17.1

Re: [PATCH] powerpc/32s: fix boot failure with DEBUG_PAGEALLOC without KASAN.

2019-08-14 Thread Jonathan Neuschäfer

On Wed, Aug 14, 2019 at 05:28:35AM +, Christophe Leroy wrote:
> When KASAN is selected, the definitive hash table has to be
> set up later, but there is already an early temporary one.
> 
> When KASAN is not selected, there is no early hash table,
> so the setup of the definitive hash table cannot be delayed.
> 
> Reported-by: Jonathan Neuschafer 
> Fixes: 72f208c6a8f7 ("powerpc/32s: move hash code patching out of 
> MMU_init_hw()")
> Signed-off-by: Christophe Leroy 
> ---

Thanks. This does fix the DEBUG_PAGEALLOC-without-KASAN case.

Tested-by: Jonathan Neuschafer 


signature.asc
Description: PGP signature

[PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range

2019-08-14 Thread Santosh Sivaraj

Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.

Signed-off-by: Santosh Sivaraj 
---
 drivers/nvdimm/of_pmem.c | 122 +--
 1 file changed, 103 insertions(+), 19 deletions(-)

diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index a0c8dcfa0bf9..828dbfe44ca6 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -8,6 +8,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 static const struct attribute_group *region_attr_groups[] = {
_region_attribute_group,
@@ -25,11 +28,77 @@ struct of_pmem_private {
struct nvdimm_bus *bus;
 };
 
+struct of_pmem_region {
+   struct of_pmem_private *priv;
+   struct nd_region_desc *region_desc;
+   struct nd_region *region;
+   struct list_head list;
+};
+
+LIST_HEAD(pmem_regions);
+DEFINE_MUTEX(pmem_region_lock);
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+void *data)
+{
+   struct machine_check_event *evt = data;
+   struct of_pmem_region *pmem_region;
+   u64 phys_addr;
+
+   if (evt->error_type != MCE_ERROR_TYPE_UE)
+   return NOTIFY_DONE;
+
+   if (list_empty(_regions))
+   return NOTIFY_DONE;
+
+   phys_addr = evt->u.ue_error.physical_address +
+   (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+   if (!evt->u.ue_error.physical_address_provided ||
+   !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+   return NOTIFY_DONE;
+
+   mutex_lock(_region_lock);
+   list_for_each_entry(pmem_region, _regions, list) {
+   struct resource *res = pmem_region->region_desc->res;
+   u64 aligned_addr;
+
+   if (res->start > phys_addr)
+   continue;
+
+   if (res->end < phys_addr)
+   continue;
+
+   aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+   pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+   if (nvdimm_bus_add_badrange(pmem_region->priv->bus,
+aligned_addr, L1_CACHE_BYTES))
+   pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
+   aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+   nvdimm_region_notify(pmem_region->region,
+NVDIMM_REVALIDATE_POISON);
+
+   break;
+   }
+   mutex_unlock(_region_lock);
+
+   return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+   .notifier_call = handle_mce_ue
+};
+
 static int of_pmem_region_probe(struct platform_device *pdev)
 {
struct of_pmem_private *priv;
struct device_node *np;
struct nvdimm_bus *bus;
+   struct of_pmem_region *pmem_region;
+   struct nd_region_desc *ndr_desc;
bool is_volatile;
int i;
 
@@ -58,34 +127,49 @@ static int of_pmem_region_probe(struct platform_device 
*pdev)
is_volatile ? "volatile" : "non-volatile",  np);
 
for (i = 0; i < pdev->num_resources; i++) {
-   struct nd_region_desc ndr_desc;
struct nd_region *region;
 
-   /*
-* NB: libnvdimm copies the data from ndr_desc into it's own
-* structures so passing a stack pointer is fine.
-*/
-   memset(_desc, 0, sizeof(ndr_desc));
-   ndr_desc.attr_groups = region_attr_groups;
-   ndr_desc.numa_node = dev_to_node(>dev);
-   ndr_desc.target_node = ndr_desc.numa_node;
-   ndr_desc.res = >resource[i];
-   ndr_desc.of_node = np;
-   set_bit(ND_REGION_PAGEMAP, _desc.flags);
+   ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL);
+   if (!ndr_desc) {
+   nvdimm_bus_unregister(priv->bus);
+   kfree(priv);
+   return -ENOMEM;
+   }
+
+   ndr_desc->attr_groups = region_attr_groups;
+   ndr_desc->numa_node = dev_to_node(>dev);
+   ndr_desc->target_node = ndr_desc->numa_node;
+   ndr_desc->res = >resource[i];
+   ndr_desc->of_node = np;
+   set_bit(ND_REGION_PAGEMAP, _desc->flags);
 
if (is_volatile)
-   region = nvdimm_volatile_region_create(bus, _desc);
+   region = nvdimm_volatile_region_create(bus, ndr_desc);
else
-   region = nvdimm_pmem_region_create(bus, _desc);
+   region = nvdimm_pmem_region_create(bus, ndr_desc);
 
if (!region)
-   dev_warn(>dev, "Unable

Re: [PATCH v4 13/25] powernv/fadump: support copying multiple kernel memory regions

2019-08-14 Thread Hari Bathini




On 13/08/19 8:33 PM, Mahesh J Salgaonkar wrote:
> On 2019-07-16 17:03:30 Tue, Hari Bathini wrote:
>> Firmware uses 32-bit field for region size while copying/backing-up
>> memory during MPIPL. So, the maximum copy size for a region would
>> be a page less than 4GB (aligned to pagesize) but FADump capture
>> kernel usually needs more memory than that to be preserved to avoid
>> running into out of memory errors.
>>
>> So, request firmware to copy multiple kernel memory regions instead
>> of just one (which worked fine for pseries as 64-bit field was used
>> for size there). With support to copy multiple kernel memory regions,
>> also handle holes in the memory area to be preserved. Support as many
>> as 128 kernel memory regions. This allows having an adequate FADump
>> capture kernel size for different scenarios.
> 
> Can you split this patch into 2 ? One for handling holes in boot memory
> and other for handling 4Gb region size ? So that it will be easy to
> review changes.

Sure. Let me split and have the patch that handles holes in boot memory
as the last patch in the series.

[PATCH v1 1/4] arm64: dts: ls1028a-rdb: enable emmc hs400 mode

2019-08-14 Thread Yinbo Zhu

This patch is to enable emmc hs400 mode for ls1028ardb

Signed-off-by: Yinbo Zhu 
---
 arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts 
b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
index 8a725409e881..f1e46cc4cea1 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
@@ -92,8 +92,10 @@
};
 
  {
-   status = "okay";
mmc-hs200-1_8v;
+   mmc-hs400-1_8v;
+   bus-width = <8>;
+   status = "okay";
};
 
  {
-- 
2.17.1

[PATCH 4/5] powerpc/ptdump: get out of note_prot_wx() when CONFIG_PPC_DEBUG_WX is not selected.

2019-08-14 Thread Christophe Leroy

When CONFIG_PPC_DEBUG_WX, note_prot_wx() is useless.

Get out of it early and inconditionnally in that case,
so that GCC can kick all the code out.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/ptdump/ptdump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 9a2186c133e6..ab6a572202b4 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -177,7 +177,7 @@ static void dump_addr(struct pg_state *st, unsigned long 
addr)
 
 static void note_prot_wx(struct pg_state *st, unsigned long addr)
 {
-   if (!st->check_wx)
+   if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
return;
 
if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == 
pgprot_val(PAGE_KERNEL_X)))
-- 
2.13.3

Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro

2019-08-14 Thread Christophe Leroy





Le 14/08/2019 à 04:08, Paul Mackerras a écrit :

On Tue, Aug 13, 2019 at 09:59:35AM +, Christophe Leroy wrote:

[snip]


+.macro __LOAD_REG_IMMEDIATE r, x
+   .if \x & ~0x != 0
+   __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+   rldicr  \r, \r, 32, 31
+   .if (\x) & 0x != 0
+   oris \r, \r, (\x)@__AS_ATHIGH
+   .endif
+   .if (\x) & 0x != 0
+   oris \r, \r, (\x)@l
+   .endif
+   .else
+   __LOAD_REG_IMMEDIATE_32 \r, \x
+   .endif
+.endm


Doesn't this force all negative constants, even small ones, to use
the long sequence?  For example, __LOAD_REG_IMMEDIATE r3, -1 will
generate (as far as I can see):

li  r3, -1
rldicr  r3, r3, 32, 31
orisr3, r3, 0x
ori r3, r3, 0x

which seems suboptimal.


Ah yes, thanks. And it is also buggy when \x is over 0x8000 because 
lis is a signed ops


I'll send v2

Christophe

[PATCH v2 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE()

2019-08-14 Thread Christophe Leroy

LOAD_MSR_KERNEL() and LOAD_REG_IMMEDIATE() are doing the same thing
in the same way. Drop LOAD_MSR_KERNEL()

Signed-off-by: Christophe Leroy 
---
 v2: no change

 arch/powerpc/kernel/entry_32.S | 18 +-
 arch/powerpc/kernel/head_32.h  | 21 -
 2 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 54fab22c9a43..972b05504a0a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -230,7 +230,7 @@ transfer_to_handler_cont:
 */
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
-   LOAD_MSR_KERNEL(r0, MSR_KERNEL)
+   LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr   SPRN_SRR0,r12
mtspr   SPRN_SRR1,r0
SYNC
@@ -304,7 +304,7 @@ stack_ovf:
addir1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addir9,r9,StackOverflow@l
-   LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
 #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr   SPRN_NRI, r0
 #endif
@@ -324,7 +324,7 @@ trace_syscall_entry_irq_off:
bl  trace_hardirqs_on
 
/* Now enable for real */
-   LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr   r10
 
REST_GPR(0, r1)
@@ -394,7 +394,7 @@ ret_from_syscall:
 #endif
mr  r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
-   LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)  /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
@@ -824,7 +824,7 @@ ret_from_except:
 * can't change between when we test it and when we return
 * from the interrupt. */
/* Note: We don't bother telling lockdep about it */
-   LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC/* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
 
@@ -991,7 +991,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 * can restart the exception exit path at the label
 * exc_exit_restart below.  -- paulus
 */
-   LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
@@ -1066,7 +1066,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1);\
lwz r3,_MSR(r1);\
andi.   r3,r3,MSR_PR;   \
-   LOAD_MSR_KERNEL(r10,MSR_KERNEL);\
+   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return;\
lwz r0,GPR0(r1);\
lwz r2,GPR2(r1);\
@@ -1236,7 +1236,7 @@ recheck:
 * neither. Those disable/enable cycles used to peek at
 * TI_FLAGS aren't advertised.
 */
-   LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
lwz r9,TI_FLAGS(r2)
@@ -1329,7 +1329,7 @@ _GLOBAL(enter_rtas)
lwz r4,RTASBASE(r4)
mfmsr   r9
stw r9,8(r1)
-   LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+   LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC/* disable interrupts so SRR0/1 */
MTMSRD(r0)  /* don't get trashed */
li  r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..8abc7783dbe5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,19 +5,6 @@
 #include /* for STACK_FRAME_REGS_MARKER */
 
 /*
- * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
- */
-.macro __LOAD_MSR_KERNEL r, x
-.if \x >= 0x8000
-   lis \r, (\x)@h
-   ori \r, \r, (\x)@l
-.else
-   li \r, (\x)
-.endif
-.endm
-#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
-
-/*
  * Exception entry code.  This code runs with address translation
  * turned off, i.e. using physical addresses.
  * We assume sprg3 has the physical address of the current
@@ -92,7 +79,7 @@
 #ifdef CONFIG_40x
rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?) */
 #else
-   LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take 
exceptions */
+   LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take 
exceptions */
MTMSRD(r10) /* (except

[PATCH v2] powerpc/32s: fix boot failure with DEBUG_PAGEALLOC without KASAN.

2019-08-14 Thread Christophe Leroy

When KASAN is selected, the definitive hash table has to be
set up later, but there is already an early temporary one.

When KASAN is not selected, there is no early hash table,
so the setup of the definitive hash table cannot be delayed.

Reported-by: Jonathan Neuschafer 
Fixes: 72f208c6a8f7 ("powerpc/32s: move hash code patching out of 
MMU_init_hw()")
Tested-by: Jonathan Neuschafer 
Signed-off-by: Christophe Leroy 
---
 v2: Added a comment in MMU_init_hw()

 arch/powerpc/kernel/head_32.S  | 2 ++
 arch/powerpc/mm/book3s32/mmu.c | 9 +
 2 files changed, 11 insertions(+)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f255e22184b4..c8b4f7ed318c 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -897,9 +897,11 @@ start_here:
bl  machine_init
bl  __save_cpu_setup
bl  MMU_init
+#ifdef CONFIG_KASAN
 BEGIN_MMU_FTR_SECTION
bl  MMU_init_hw_patch
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
 
 /*
  * Go back to running unmapped so we can load up new values
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index e249fbf6b9c3..8d68f03bf5a4 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -358,6 +358,15 @@ void __init MMU_init_hw(void)
hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
if (lg_n_hpteg > 16)
hash_mb2 = 16 - LG_HPTEG_SIZE;
+
+   /*
+* When KASAN is selected, there is already an early temporary hash
+* table and the switch to the final hash table is done later.
+*/
+   if (IS_ENABLED(CONFIG_KASAN))
+   return;
+
+   MMU_init_hw_patch();
 }
 
 void __init MMU_init_hw_patch(void)
-- 
2.13.3

Re: [PATCH v5 2/7] powerpc/kernel: Add ucall_norets() ultravisor call handler

2019-08-14 Thread Michael Ellerman

Claudio Carvalho  writes:
> diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
> new file mode 100644
> index ..de9133e45d21
> --- /dev/null
> +++ b/arch/powerpc/kernel/ucall.S
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Generic code to perform an ultravisor call.
> + *
> + * Copyright 2019, IBM Corporation.
> + *
> + */
> +#include 
> +#include 
> +
> +_GLOBAL(ucall_norets)
> +EXPORT_SYMBOL_GPL(ucall_norets)
> + mfcrr0
> + stw r0,8(r1)
> +
> + sc  2   /* Invoke the ultravisor */
> +
> + lwz r0,8(r1)
> + mtcrf   0xff,r0
> + blr /* Return r3 = status */

Paulus points that we shouldn't need to save CR here. Our caller will
have already saved it if it needed to, and we don't use CR in this
function so we don't need to save it.

That's assuming the Ultravisor follows the hcall ABI in which CR2-4 are
non-volatile (PAPR § 14.5.3).

I know plpar_hcall_norets() does save CR, but it shouldn't need to, that
seems to be historical. aka. no one knows why it does it but it always
has.

cheers

Re: [PATCH v4 14/25] powernv/fadump: process the crashdump by exporting it as /proc/vmcore

2019-08-14 Thread Hari Bathini




On 14/08/19 3:48 PM, Mahesh J Salgaonkar wrote:
> On 2019-07-16 17:03:38 Tue, Hari Bathini wrote:
>> Add support in the kernel to process the crash'ed kernel's memory
>> preserved during MPIPL and export it as /proc/vmcore file for the
>> userland scripts to filter and analyze it later.
>>
>> Signed-off-by: Hari Bathini 
>> ---
>>  arch/powerpc/platforms/powernv/opal-fadump.c |  190 
>> ++
>>  1 file changed, 187 insertions(+), 3 deletions(-)
>>
> [...]
>> +ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, );
>> +if ((ret != OPAL_SUCCESS) || !addr) {
>> +pr_err("Failed to get Kernel metadata (%lld)\n", ret);
>> +return 1;
>> +}
>> +
>> +addr = be64_to_cpu(addr);
>> +pr_debug("Kernel metadata addr: %llx\n", addr);
>> +
>> +opal_fdm_active = __va(addr);
>> +r_opal_fdm_active = (void *)addr;
>> +if (r_opal_fdm_active->version != OPAL_FADUMP_VERSION) {
>> +pr_err("FADump active but version (%u) unsupported!\n",
>> +   r_opal_fdm_active->version);
>> +return 1;
>> +}
>> +
>> +/* Kernel regions not registered with f/w  for MPIPL */
>> +if (r_opal_fdm_active->registered_regions == 0) {
>> +opal_fdm_active = NULL;
> 
> What about partial dump capture scenario ? What if opal crashes while
> kernel was in middle of registering ranges ? We may have partial dump
> captured which won't be useful.
> e,g. If we have total of 4 ranges to be registered and opal crashes
> after successful registration of only 2 ranges with 2 pending, we will get a
> partial dump which needs to be ignored.
> 
> I think check shuold be comparing registered_regions against total number of
> regions. What do you think ?

Yes, Mahesh.
Taking care of that in 22/25

Thanks
Hari

Re: [PATCH v5 4/7] powerpc/mm: Use UV_WRITE_PATE ucall to register a PATE

2019-08-14 Thread Michael Ellerman

Hi Claudio,

Claudio Carvalho  writes:
> From: Michael Anderson 
>
> In ultravisor enabled systems, the ultravisor creates and maintains the
> partition table in secure memory where the hypervisor cannot access, and
   ^
   which?

> therefore, the hypervisor have to do the UV_WRITE_PATE ucall whenever it
^  ^
hasa
> wants to set a partition table entry (PATE).
>
> This patch adds the UV_WRITE_PATE ucall and uses it to set a PATE if
> ultravisor is enabled. Additionally, this also also keeps a copy of the
> partition table because the nestMMU does not have access to secure
> memory. Such copy has entries for nonsecure and hypervisor partition.

I'm having trouble parsing the last sentence there.

Or at least it doesn't seem to match the code, or I don't understand
either the code or the comment. More below.

> diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> b/arch/powerpc/mm/book3s64/pgtable.c
> index 85bc81abd286..033731f5dbaa 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -213,34 +223,50 @@ void __init mmu_partition_table_init(void)
>   powernv_set_nmmu_ptcr(ptcr);
>  }
>  
> -void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
> -unsigned long dw1)
> +/*
> + * Global flush of TLBs and partition table caches for this lpid. The type of
> + * flush (hash or radix) depends on what the previous use of this partition 
> ID
> + * was, not the new use.
> + */
> +static void flush_partition(unsigned int lpid, unsigned long old_patb0)

A nicer API would be for the 2nd param to be a "bool radix", and have
the caller worry about the fact that it comes from (patb0 & PATB_HR).

>  {
> - unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
> -
> - partition_tb[lpid].patb0 = cpu_to_be64(dw0);
> - partition_tb[lpid].patb1 = cpu_to_be64(dw1);
> -
> - /*
> -  * Global flush of TLBs and partition table caches for this lpid.
> -  * The type of flush (hash or radix) depends on what the previous
> -  * use of this partition ID was, not the new use.
> -  */
>   asm volatile("ptesync" : : : "memory");
> - if (old & PATB_HR) {
> - asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
> + if (old_patb0 & PATB_HR) {
> + asm volatile(PPC_TLBIE_5(%0, %1, 2, 0, 1) : :
>"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
> - asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
> + asm volatile(PPC_TLBIE_5(%0, %1, 2, 1, 1) : :

That looks like an unrelated whitespace change.

>"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
>   trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
>   } else {
> - asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
> + asm volatile(PPC_TLBIE_5(%0, %1, 2, 0, 0) : :

Ditto.

>"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
>   trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
>   }
>   /* do we need fixup here ?*/
>   asm volatile("eieio; tlbsync; ptesync" : : : "memory");
>  }
> +
> +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
> +   unsigned long dw1)
> +{
> + unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
> +
> + partition_tb[lpid].patb0 = cpu_to_be64(dw0);
> + partition_tb[lpid].patb1 = cpu_to_be64(dw1);

ie. here we always update the copy of the partition table, regardless of
whether we're running under an ultravisor or not. So the copy is a
complete copy isn't it?

> + /*
> +  * In ultravisor enabled systems, the ultravisor maintains the partition
> +  * table in secure memory where we don't have access, therefore, we have
> +  * to do a ucall to set an entry.
> +  */
> + if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
> + uv_register_pate(lpid, dw0, dw1);
> + pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 
> 0x%lx\n",
> + dw0, dw1);
> + } else {
> + flush_partition(lpid, old);
> + }

What is different is whether we flush or not.

And don't we still need to do the flush for the nestMMU? I assume we're
saying the ultravisor will broadcast a flush for us, which will also
handle the nestMMU case?

cheers

Re: [PATCH v5 5/7] powerpc/mm: Write to PTCR only if ultravisor disabled

2019-08-14 Thread Michael Ellerman

Claudio Carvalho  writes:
> In ultravisor enabled systems, PTCR becomes ultravisor privileged only
> for writing and an attempt to write to it will cause a Hypervisor
> Emulation Assitance interrupt.
>
> This patch adds the try_set_ptcr(val) macro as an accessor to
> mtspr(SPRN_PTCR, val), which will be executed only if ultravisor
> disabled.
>
> Signed-off-by: Claudio Carvalho 
> ---
>  arch/powerpc/include/asm/reg.h   | 13 +
>  arch/powerpc/mm/book3s64/hash_utils.c|  4 ++--
>  arch/powerpc/mm/book3s64/pgtable.c   |  2 +-
>  arch/powerpc/mm/book3s64/radix_pgtable.c |  6 +++---
>  4 files changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 10caa145f98b..14139b1ebdb8 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 

reg.h is already too big and unwieldy.

Can you put this in ultravisor.h and include that in the appropriate places.

> @@ -1452,6 +1453,18 @@ static inline void update_power8_hid0(unsigned long 
> hid0)
>*/
>   asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
>  }
> +
> +/*
> + * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for
> + * writing and an attempt to write to it will cause a Hypervisor Emulation
> + * Assistance interrupt.
> + */
> +#define try_set_ptcr(val)\
> + do {\
> + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))   \
> + mtspr(SPRN_PTCR, val);  \
> + } while (0)

This should be a static inline please, not a macro.

Sorry, I don't like the name, we're not trying to set it, we know when
to set it and when not to.

It is awkward to come up with a good name because we don't have a term
for "hypervisor that's not running under an ultravisor".

Maybe set_ptcr_when_no_uv()

Which is kinda messy, someone feel free to come up with something
better.

I also see some more accesses to the PTCR in
arch/powerpc/platforms/powernv/idle.c which you haven't patched?

cheers

[PATCH 3/5] powerpc/ptdump: drop dummy KERN_VIRT_START on PPC32

2019-08-14 Thread Christophe Leroy

PPC32 doesn't have KERN_VIRT_START. Make PAGE_OFFSET the
default starting address for the dump, and drop the dummy
definition of KERN_VIRT_START. Only use KERN_VIRT_START for
non radix PPC64.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/ptdump/ptdump.c | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 74ff2bff4ea0..9a2186c133e6 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -26,10 +26,6 @@
 
 #include "ptdump.h"
 
-#ifdef CONFIG_PPC32
-#define KERN_VIRT_START0
-#endif
-
 /*
  * To visualise what is happening,
  *
@@ -362,12 +358,13 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
+   .start_address = PAGE_OFFSET,
};
 
-   if (radix_enabled())
-   st.start_address = PAGE_OFFSET;
-   else
+#ifdef CONFIG_PPC64
+   if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
 
/* Traverse kernel page tables */
walk_pagetables();
@@ -405,12 +402,13 @@ void ptdump_check_wx(void)
.seq = NULL,
.marker = address_markers,
.check_wx = true,
+   .start_address = PAGE_OFFSET,
};
 
-   if (radix_enabled())
-   st.start_address = PAGE_OFFSET;
-   else
+#ifdef CONFIG_PPC64
+   if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
 
walk_pagetables();
 
-- 
2.13.3

[PATCH 1/5] powerpc/ptdump: fix addresses display on PPC32

2019-08-14 Thread Christophe Leroy

Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")
wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a
shift in the displayed addresses.

Lets revert that change to resync walk_pagetables()'s addr val and
pgd_t pointer for PPC32.

Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")
Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/ptdump/ptdump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 6a88a9f585d4..3ad64fc11419 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -27,7 +27,7 @@
 #include "ptdump.h"
 
 #ifdef CONFIG_PPC32
-#define KERN_VIRT_STARTPAGE_OFFSET
+#define KERN_VIRT_START0
 #endif
 
 /*
-- 
2.13.3

Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-08-14 Thread Santosh Sivaraj

Hi Balbir,

Balbir Singh  writes:

> On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
>> If we take a UE on one of the instructions with a fixup entry, set nip
>> to continue execution at the fixup entry. Stop processing the event
>> further or print it.
>> 
>> Co-developed-by: Reza Arbab 
>> Signed-off-by: Reza Arbab 
>> Cc: Mahesh Salgaonkar 
>> Signed-off-by: Santosh Sivaraj 
>> ---
>
> Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it
> should still have my author tag and signed-off-by

Originally when I received the series for posting, I had Reza's authorship and
signed-off-by, since the patch changed significantly I added co-developed-by as
Reza. I will update in the next spin.

https://lore.kernel.org/linuxppc-dev/20190702051932.511-1-sant...@fossix.org/

Santosh
>
> Balbir Singh
>
>>  arch/powerpc/include/asm/mce.h  |  4 +++-
>>  arch/powerpc/kernel/mce.c   | 16 
>>  arch/powerpc/kernel/mce_power.c | 15 +--
>>  3 files changed, 32 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
>> index f3a6036b6bc0..e1931c8c2743 100644
>> --- a/arch/powerpc/include/asm/mce.h
>> +++ b/arch/powerpc/include/asm/mce.h
>> @@ -122,7 +122,8 @@ struct machine_check_event {
>>  enum MCE_UeErrorType ue_error_type:8;
>>  u8  effective_address_provided;
>>  u8  physical_address_provided;
>> -u8  reserved_1[5];
>> +u8  ignore_event;
>> +u8  reserved_1[4];
>>  u64 effective_address;
>>  u64 physical_address;
>>  u8  reserved_2[8];
>> @@ -193,6 +194,7 @@ struct mce_error_info {
>>  enum MCE_Initiator  initiator:8;
>>  enum MCE_ErrorClass error_class:8;
>>  boolsync_error;
>> +boolignore_event;
>>  };
>>  
>>  #define MAX_MC_EVT  100
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index a3b122a685a5..ec4b3e1087be 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>>  if (phys_addr != ULONG_MAX) {
>>  mce->u.ue_error.physical_address_provided = true;
>>  mce->u.ue_error.physical_address = phys_addr;
>> +mce->u.ue_error.ignore_event = mce_err->ignore_event;
>>  machine_check_ue_event(mce);
>>  }
>>  }
>> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct 
>> *work)
>>  /*
>>   * This should probably queued elsewhere, but
>>   * oh! well
>> + *
>> + * Don't report this machine check because the caller has a
>> + * asked us to ignore the event, it has a fixup handler which
>> + * will do the appropriate error handling and reporting.
>>   */
>>  if (evt->error_type == MCE_ERROR_TYPE_UE) {
>> +if (evt->u.ue_error.ignore_event) {
>> +__this_cpu_dec(mce_ue_count);
>> +continue;
>> +}
>> +
>>  if (evt->u.ue_error.physical_address_provided) {
>>  unsigned long pfn;
>>  
>> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct 
>> irq_work *work)
>>  while (__this_cpu_read(mce_queue_count) > 0) {
>>  index = __this_cpu_read(mce_queue_count) - 1;
>>  evt = this_cpu_ptr(_event_queue[index]);
>> +
>> +if (evt->error_type == MCE_ERROR_TYPE_UE &&
>> +evt->u.ue_error.ignore_event) {
>> +__this_cpu_dec(mce_queue_count);
>> +continue;
>> +}
>>  machine_check_print_event_info(evt, false, false);
>>  __this_cpu_dec(mce_queue_count);
>>  }
>> diff --git a/arch/powerpc/kernel/mce_power.c 
>> b/arch/powerpc/kernel/mce_power.c
>> index e74816f045f8..1dd87f6f5186 100644
>> --- a/arch/powerpc/kernel/mce_power.c
>> +++ b/arch/powerpc/kernel/mce_power.c
>> @@ -11,6 +11,7 @@
>>  
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -18,6 +19,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  /*
>>   * Convert an address related to an mm to a physical address.
>> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>>  return 0;
>>  }
>>  
>> -static long mce_handle_ue_error(struct pt_regs *regs)
>> +static long mce_handle_ue_error(struct pt_regs *regs,
>> +struct mce_error_info *mce_err)
>>  {
>>  long handled = 0;
>> +const struct

[PATCH v10 2/7] powerpc/mce: Fix MCE handling for huge pages

2019-08-14 Thread Santosh Sivaraj

From: Balbir Singh 

The current code would fail on huge pages addresses, since the shift would
be incorrect. Use the correct page shift value returned by
__find_linux_pte() to get the correct physical address. The code is more
generic and can handle both regular and compound pages.

Fixes: ba41e1e1ccb9 ("powerpc/mce: Hookup derror (load/store) UE errors")
Signed-off-by: Balbir Singh 
[ar...@linux.ibm.com: Fixup pseries_do_memory_failure()]
Signed-off-by: Reza Arbab 
Co-developed-by: Santosh Sivaraj 
Signed-off-by: Santosh Sivaraj 
Tested-by: Mahesh Salgaonkar 
Cc: sta...@vger.kernel.org # v4.15+
---
 arch/powerpc/include/asm/mce.h   |  2 +-
 arch/powerpc/kernel/mce_power.c  | 55 ++--
 arch/powerpc/platforms/pseries/ras.c |  9 ++---
 3 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index a4c6a74ad2fb..f3a6036b6bc0 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -209,7 +209,7 @@ extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt,
   bool user_mode, bool in_guest);
-unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
+unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr);
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index a814d2dfb5b0..e74816f045f8 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -20,13 +20,14 @@
 #include 
 
 /*
- * Convert an address related to an mm to a PFN. NOTE: we are in real
- * mode, we could potentially race with page table updates.
+ * Convert an address related to an mm to a physical address.
+ * NOTE: we are in real mode, we could potentially race with page table 
updates.
  */
-unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr)
 {
-   pte_t *ptep;
-   unsigned long flags;
+   pte_t *ptep, pte;
+   unsigned int shift;
+   unsigned long flags, phys_addr;
struct mm_struct *mm;
 
if (user_mode(regs))
@@ -35,14 +36,21 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned 
long addr)
mm = _mm;
 
local_irq_save(flags);
-   if (mm == current->mm)
-   ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
-   else
-   ptep = find_init_mm_pte(addr, NULL);
+   ptep = __find_linux_pte(mm->pgd, addr, NULL, );
local_irq_restore(flags);
+
if (!ptep || pte_special(*ptep))
return ULONG_MAX;
-   return pte_pfn(*ptep);
+
+   pte = *ptep;
+   if (shift > PAGE_SHIFT) {
+   unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+
+   pte = __pte(pte_val(pte) | (addr & rpnmask));
+   }
+   phys_addr = pte_pfn(pte) << PAGE_SHIFT;
+
+   return phys_addr;
 }
 
 /* flush SLBs and reload */
@@ -344,7 +352,7 @@ static const struct mce_derror_table mce_p9_derror_table[] 
= {
   MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
 { 0, false, 0, 0, 0, 0, 0 } };
 
-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
 {
/*
@@ -354,18 +362,16 @@ static int mce_find_instr_ea_and_pfn(struct pt_regs 
*regs, uint64_t *addr,
 * faults
 */
int instr;
-   unsigned long pfn, instr_addr;
+   unsigned long instr_addr;
struct instruction_op op;
struct pt_regs tmp = *regs;
 
-   pfn = addr_to_pfn(regs, regs->nip);
-   if (pfn != ULONG_MAX) {
-   instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+   instr_addr = addr_to_phys(regs, regs->nip) + (regs->nip & ~PAGE_MASK);
+   if (instr_addr != ULONG_MAX) {
instr = *(unsigned int *)(instr_addr);
if (!analyse_instr(, , instr)) {
-   pfn = addr_to_pfn(regs, op.ea);
*addr = op.ea;
-   *phys_addr = (pfn << PAGE_SHIFT);
+   *phys_addr = addr_to_phys(regs, op.ea);
return 0;
}
/*
@@ -440,15 +446,9 @@ static int mce_handle_ierror(struct pt_regs *regs,
*addr = regs->nip;
if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
-   unsigned long pfn;
-
-   if (get_paca()->in_mce < MAX_MCE_DEPTH) {
-   pfn = addr_to_pfn(regs, regs->nip);
-

[PATCH 3/6] powerpc: Convert flush_icache_range & friends to C

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

Similar to commit 22e9c88d486a
("powerpc/64: reuse PPC32 static inline flush_dcache_range()")
this patch converts flush_icache_range() to C, and reimplements the
following functions as wrappers around it:
__flush_dcache_icache
__flush_dcache_icache_phys

This was done as we discovered a long-standing bug where the length of the
range was truncated due to using a 32 bit shift instead of a 64 bit one.

By converting these functions to C, it becomes easier to maintain.

Signed-off-by: Alastair D'Silva 
---
 arch/powerpc/include/asm/cache.h  |  26 +++---
 arch/powerpc/include/asm/cacheflush.h |  32 ---
 arch/powerpc/kernel/misc_32.S | 117 --
 arch/powerpc/kernel/misc_64.S |  97 -
 arch/powerpc/mm/mem.c |  71 +++-
 5 files changed, 102 insertions(+), 241 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index f852d5cd746c..728f154204db 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -98,20 +98,7 @@ static inline u32 l1_icache_bytes(void)
 #endif
 #endif /* ! __ASSEMBLY__ */
 
-#if defined(__ASSEMBLY__)
-/*
- * For a snooping icache, we still need a dummy icbi to purge all the
- * prefetched instructions from the ifetch buffers. We also need a sync
- * before the icbi to order the the actual stores to memory that might
- * have modified instructions with the icbi.
- */
-#define PURGE_PREFETCHED_INS   \
-   sync;   \
-   icbi0,r3;   \
-   sync;   \
-   isync
-
-#else
+#if !defined(__ASSEMBLY__)
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -145,6 +132,17 @@ static inline void dcbst(void *addr)
 {
__asm__ __volatile__ ("dcbst %y0" : : "Z"(*(u8 *)addr) : "memory");
 }
+
+static inline void icbi(void *addr)
+{
+   __asm__ __volatile__ ("icbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void iccci(void)
+{
+   __asm__ __volatile__ ("iccci 0, r0");
+}
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index ed57843ef452..4c3377aff8ed 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -42,24 +42,18 @@ extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_lock(mapping)do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)  do { } while (0)
 
-extern void flush_icache_range(unsigned long, unsigned long);
+void flush_icache_range(unsigned long start, unsigned long stop);
 extern void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr,
int len);
-extern void __flush_dcache_icache(void *page_va);
 extern void flush_dcache_icache_page(struct page *page);
-#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
-extern void __flush_dcache_icache_phys(unsigned long physaddr);
-#else
-static inline void __flush_dcache_icache_phys(unsigned long physaddr)
-{
-   BUG();
-}
-#endif
 
-/*
- * Write any modified data cache blocks out to memory and invalidate them.
+/**
+ * flush_dcache_range(): Write any modified data cache blocks out to memory 
and invalidate them.
  * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
  */
 static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 {
@@ -82,6 +76,20 @@ static inline void flush_dcache_range(unsigned long start, 
unsigned long stop)
isync();
 }
 
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @page: the address of the page to flush
+ */
+static inline void __flush_dcache_icache(void *page)
+{
+   unsigned long page_addr = (unsigned long)page;
+
+   flush_icache_range(page_addr, page_addr + PAGE_SIZE);
+}
+
 /*
  * Write any modified data cache blocks out to memory.
  * Does not invalidate the corresponding cache lines (especially for
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index fe4bd321730e..12b95e6799d4 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -318,123 +318,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
 EXPORT_SYMBOL(flush_instruction_cache)
 #endif /* CONFIG_PPC_8xx */
 
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- * This is a no-op on the 601.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(flush_icache_range)
-BEGIN_FTR_SECTION
-

[PATCH 4/6] powerpc: Chunk calls to flush_dcache_range in arch_*_memory

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

When presented with large amounts of memory being hotplugged
(in my test case, ~890GB), the call to flush_dcache_range takes
a while (~50 seconds), triggering RCU stalls.

This patch breaks up the call into 16GB chunks, calling
cond_resched() inbetween to allow the scheduler to run.

Signed-off-by: Alastair D'Silva 
---
 arch/powerpc/mm/mem.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5400da87a804..fb0d5e9aa11b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, 
unsigned long end)
return -ENODEV;
 }
 
+#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)
+
 int __ref arch_add_memory(int nid, u64 start, u64 size,
struct mhp_restrictions *restrictions)
 {
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+   unsigned long i;
int rc;
 
resize_hpt_for_hotplug(memblock_phys_mem_size());
@@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
start, start + size, rc);
return -EFAULT;
}
-   flush_dcache_range(start, start + size);
+
+   for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+   flush_dcache_range(start + i, min(start + size, start + i + 
FLUSH_CHUNK_SIZE));
+   cond_resched();
+   }
 
return __add_pages(nid, start_pfn, nr_pages, restrictions);
 }
@@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64 start, u64 
size,
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+   unsigned long i;
int ret;
 
__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
 
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
-   flush_dcache_range(start, start + size);
+   for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+   flush_dcache_range(start + i, min(start + size, start + i + 
FLUSH_CHUNK_SIZE));
+   cond_resched();
+   }
+
ret = remove_section_mapping(start, start + size);
WARN_ON_ONCE(ret);
 
-- 
2.21.0

[PATCH v4 3/3] x86/kasan: support KASAN_VMALLOC

2019-08-14 Thread Daniel Axtens

In the case where KASAN directly allocates memory to back vmalloc
space, don't map the early shadow page over it.

We prepopulate pgds/p4ds for the range that would otherwise be empty.
This is required to get it synced to hardware on boot, allowing the
lower levels of the page tables to be filled dynamically.

Acked-by: Dmitry Vyukov 
Signed-off-by: Daniel Axtens 

---

v2: move from faulting in shadow pgds to prepopulating
---
 arch/x86/Kconfig|  1 +
 arch/x86/mm/kasan_init_64.c | 61 +
 2 files changed, 62 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 222855cc0158..40562cc3771f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -134,6 +134,7 @@ config X86
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN  if X86_64
+   select HAVE_ARCH_KASAN_VMALLOC  if X86_64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS  if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 296da58f3013..2f57c4ddff61 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -245,6 +245,52 @@ static void __init kasan_map_early_shadow(pgd_t *pgd)
} while (pgd++, addr = next, addr != end);
 }
 
+static void __init kasan_shallow_populate_p4ds(pgd_t *pgd,
+   unsigned long addr,
+   unsigned long end,
+   int nid)
+{
+   p4d_t *p4d;
+   unsigned long next;
+   void *p;
+
+   p4d = p4d_offset(pgd, addr);
+   do {
+   next = p4d_addr_end(addr, end);
+
+   if (p4d_none(*p4d)) {
+   p = early_alloc(PAGE_SIZE, nid, true);
+   p4d_populate(_mm, p4d, p);
+   }
+   } while (p4d++, addr = next, addr != end);
+}
+
+static void __init kasan_shallow_populate_pgds(void *start, void *end)
+{
+   unsigned long addr, next;
+   pgd_t *pgd;
+   void *p;
+   int nid = early_pfn_to_nid((unsigned long)start);
+
+   addr = (unsigned long)start;
+   pgd = pgd_offset_k(addr);
+   do {
+   next = pgd_addr_end(addr, (unsigned long)end);
+
+   if (pgd_none(*pgd)) {
+   p = early_alloc(PAGE_SIZE, nid, true);
+   pgd_populate(_mm, pgd, p);
+   }
+
+   /*
+* we need to populate p4ds to be synced when running in
+* four level mode - see sync_global_pgds_l4()
+*/
+   kasan_shallow_populate_p4ds(pgd, addr, next, nid);
+   } while (pgd++, addr = next, addr != (unsigned long)end);
+}
+
+
 #ifdef CONFIG_KASAN_INLINE
 static int kasan_die_handler(struct notifier_block *self,
 unsigned long val,
@@ -352,9 +398,24 @@ void __init kasan_init(void)
shadow_cpu_entry_end = (void *)round_up(
(unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
 
+   /*
+* If we're in full vmalloc mode, don't back vmalloc space with early
+* shadow pages. Instead, prepopulate pgds/p4ds so they are synced to
+* the global table and we can populate the lower levels on demand.
+*/
+#ifdef CONFIG_KASAN_VMALLOC
+   kasan_shallow_populate_pgds(
+   kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+   kasan_mem_to_shadow((void *)VMALLOC_END));
+
+   kasan_populate_early_shadow(
+   kasan_mem_to_shadow((void *)VMALLOC_END + 1),
+   shadow_cpu_entry_begin);
+#else
kasan_populate_early_shadow(
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
shadow_cpu_entry_begin);
+#endif
 
kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
  (unsigned long)shadow_cpu_entry_end, 0);
-- 
2.20.1

[PATCH v10 5/7] powerpc/memcpy: Add memcpy_mcsafe for pmem

2019-08-14 Thread Santosh Sivaraj

From: Balbir Singh 

The pmem infrastructure uses memcpy_mcsafe in the pmem layer so as to
convert machine check exceptions into a return value on failure in case
a machine check exception is encountered during the memcpy. The return
value is the number of bytes remaining to be copied.

This patch largely borrows from the copyuser_power7 logic and does not add
the VMX optimizations, largely to keep the patch simple. If needed those
optimizations can be folded in.

Signed-off-by: Balbir Singh 
[ar...@linux.ibm.com: Added symbol export]
Co-developed-by: Santosh Sivaraj 
Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/include/asm/string.h   |   2 +
 arch/powerpc/lib/Makefile   |   2 +-
 arch/powerpc/lib/memcpy_mcsafe_64.S | 242 
 3 files changed, 245 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/lib/memcpy_mcsafe_64.S

diff --git a/arch/powerpc/include/asm/string.h 
b/arch/powerpc/include/asm/string.h
index 9bf6dffb4090..b72692702f35 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -53,7 +53,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t 
n);
 #ifndef CONFIG_KASAN
 #define __HAVE_ARCH_MEMSET32
 #define __HAVE_ARCH_MEMSET64
+#define __HAVE_ARCH_MEMCPY_MCSAFE
 
+extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
 extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
 extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index eebc782d89a5..fa6b1b657b43 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o 
copypage_power7.o \
   memcpy_power7.o
 
 obj64-y+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-  memcpy_64.o pmem.o
+  memcpy_64.o pmem.o memcpy_mcsafe_64.o
 
 obj64-$(CONFIG_SMP)+= locks.o
 obj64-$(CONFIG_ALTIVEC)+= vmx-helper.o
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S 
b/arch/powerpc/lib/memcpy_mcsafe_64.S
new file mode 100644
index ..949976dc115d
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_mcsafe_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard 
+ * Author - Balbir Singh 
+ */
+#include 
+#include 
+#include 
+
+   .macro err1
+100:
+   EX_TABLE(100b,.Ldo_err1)
+   .endm
+
+   .macro err2
+200:
+   EX_TABLE(200b,.Ldo_err2)
+   .endm
+
+   .macro err3
+300:   EX_TABLE(300b,.Ldone)
+   .endm
+
+.Ldo_err2:
+   ld  r22,STK_REG(R22)(r1)
+   ld  r21,STK_REG(R21)(r1)
+   ld  r20,STK_REG(R20)(r1)
+   ld  r19,STK_REG(R19)(r1)
+   ld  r18,STK_REG(R18)(r1)
+   ld  r17,STK_REG(R17)(r1)
+   ld  r16,STK_REG(R16)(r1)
+   ld  r15,STK_REG(R15)(r1)
+   ld  r14,STK_REG(R14)(r1)
+   addir1,r1,STACKFRAMESIZE
+.Ldo_err1:
+   /* Do a byte by byte copy to get the exact remaining size */
+   mtctr   r7
+46:
+err3;  lbz r0,0(r4)
+   addir4,r4,1
+err3;  stb r0,0(r3)
+   addir3,r3,1
+   bdnz46b
+   li  r3,0
+   blr
+
+.Ldone:
+   mfctr   r3
+   blr
+
+
+_GLOBAL(memcpy_mcsafe)
+   mr  r7,r5
+   cmpldi  r5,16
+   blt .Lshort_copy
+
+.Lcopy:
+   /* Get the source 8B aligned */
+   neg r6,r4
+   mtocrf  0x01,r6
+   clrldi  r6,r6,(64-3)
+
+   bf  cr7*4+3,1f
+err1;  lbz r0,0(r4)
+   addir4,r4,1
+err1;  stb r0,0(r3)
+   addir3,r3,1
+   subir7,r7,1
+
+1: bf  cr7*4+2,2f
+err1;  lhz r0,0(r4)
+   addir4,r4,2
+err1;  sth r0,0(r3)
+   addir3,r3,2
+   subir7,r7,2
+
+2: bf  cr7*4+1,3f
+err1;  lwz r0,0(r4)
+   addir4,r4,4
+err1;  stw r0,0(r3)
+   addir3,r3,4
+   subir7,r7,4
+
+3: sub r5,r5,r6
+   cmpldi  r5,128
+   blt 5f
+
+   mflrr0
+   stdur1,-STACKFRAMESIZE(r1)
+   std r14,STK_REG(R14)(r1)
+   std r15,STK_REG(R15)(r1)
+   std r16,STK_REG(R16)(r1)
+   std r17,STK_REG(R17)(r1)
+   std r18,STK_REG(R18)(r1)
+   std r19,STK_REG(R19)(r1)
+   std r20,STK_REG(R20)(r1)
+   std r21,STK_REG(R21)(r1)
+   std r22,STK_REG(R22)(r1)
+   std r0,STACKFRAMESIZE+16(r1)
+
+   srdir6,r5,7
+   mtctr   r6
+
+   /* Now do cacheline (128B) sized loads and stores. */
+   .align  5
+4:
+err2;  ld  r0,0(r4)
+err2;  ld  r6,8(r4)
+err2;  ld  r8,16(r4)
+err2;  ld  r9,24(r4)
+err2;  ld  r10,32(r4)
+err2;  ld  r11,40(r4)
+err2;  ld  r12,48(r4)
+err2;  ld  r14,56(r4)
+err2;  ld  r15,64(r4)
+err2;  ld

[PATCH 1/6] powerpc: Allow flush_icache_range to work across ranges >4GB

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

When calling flush_icache_range with a size >4GB, we were masking
off the upper 32 bits, so we would incorrectly flush a range smaller
than intended.

This patch replaces the 32 bit shifts with 64 bit ones, so that
the full size is accounted for.

Signed-off-by: Alastair D'Silva 
Cc: sta...@vger.kernel.org
---
 arch/powerpc/kernel/misc_64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..9bc0aa9aeb65 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -82,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subfr8,r6,r4/* compute length */
add r8,r8,r5/* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of cache block 
size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
mtctr   r8
 1: dcbst   0,r6
@@ -98,7 +98,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subfr8,r6,r4/* compute length */
add r8,r8,r5
lwz r9,ICACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of Icache block 
size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
mtctr   r8
 2: icbi0,r6
-- 
2.21.0

[PATCH 0/6] powerpc: convert cache asm to C

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

This series addresses a few issues discovered in how we flush caches:
1. Flushes were truncated at 4GB, so larger flushes were incorrect.
2. Flushing the dcache in arch_add_memory was unnecessary

This series also converts much of the cache assembler to C, with the
aim of making it easier to maintain.

Alastair D'Silva (6):
  powerpc: Allow flush_icache_range to work across ranges >4GB
  powerpc: define helpers to get L1 icache sizes
  powerpc: Convert flush_icache_range & friends to C
  powerpc: Chunk calls to flush_dcache_range in arch_*_memory
  powerpc: Remove 'extern' from func prototypes in cache headers
  powerpc: Don't flush caches when adding memory

 arch/powerpc/include/asm/cache.h  |  63 +-
 arch/powerpc/include/asm/cacheflush.h |  49 ++-
 arch/powerpc/kernel/misc_32.S | 117 --
 arch/powerpc/kernel/misc_64.S |  97 -
 arch/powerpc/mm/mem.c |  80 +-
 5 files changed, 146 insertions(+), 260 deletions(-)

-- 
2.21.0

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #25 from Christophe Leroy (christophe.le...@c-s.fr) ---
You can use get_zeroed_page(GFP_NOFS) instead of __get_free_page(GFP_NOFS |
__GFP_ZERO)

-- 
You are receiving this mail because:
You are on the CC list for the bug.

[PATCH v5 06/18] compat_ioctl: move WDIOC handling into wdt drivers

2019-08-14 Thread Arnd Bergmann

All watchdog drivers implement the same set of ioctl commands, and
fortunately all of them are compatible between 32-bit and 64-bit
architectures.

Modern drivers always go through drivers/watchdog/wdt.c as an abstraction
layer, but older ones implement their own file_operations on a character
device for this.

Move the handling from fs/compat_ioctl.c into the individual drivers.

Note that most of the legacy drivers will never be used on 64-bit
hardware, because they are for an old 32-bit SoC implementation, but
doing them all at once is safer than trying to guess which ones do
or do not need the compat_ioctl handling.

Signed-off-by: Arnd Bergmann 
---
 arch/powerpc/platforms/52xx/mpc52xx_gpt.c |  1 +
 arch/um/drivers/harddog_kern.c|  1 +
 drivers/char/ipmi/ipmi_watchdog.c |  1 +
 drivers/hwmon/fschmd.c|  1 +
 drivers/rtc/rtc-ds1374.c  |  1 +
 drivers/watchdog/acquirewdt.c |  1 +
 drivers/watchdog/advantechwdt.c   |  1 +
 drivers/watchdog/alim1535_wdt.c   |  1 +
 drivers/watchdog/alim7101_wdt.c   |  1 +
 drivers/watchdog/ar7_wdt.c|  1 +
 drivers/watchdog/at91rm9200_wdt.c |  1 +
 drivers/watchdog/ath79_wdt.c  |  1 +
 drivers/watchdog/bcm63xx_wdt.c|  1 +
 drivers/watchdog/cpu5wdt.c|  1 +
 drivers/watchdog/eurotechwdt.c|  1 +
 drivers/watchdog/f71808e_wdt.c|  1 +
 drivers/watchdog/gef_wdt.c|  1 +
 drivers/watchdog/geodewdt.c   |  1 +
 drivers/watchdog/ib700wdt.c   |  1 +
 drivers/watchdog/ibmasr.c |  1 +
 drivers/watchdog/indydog.c|  1 +
 drivers/watchdog/intel_scu_watchdog.c |  1 +
 drivers/watchdog/iop_wdt.c|  1 +
 drivers/watchdog/it8712f_wdt.c|  1 +
 drivers/watchdog/ixp4xx_wdt.c |  1 +
 drivers/watchdog/ks8695_wdt.c |  1 +
 drivers/watchdog/m54xx_wdt.c  |  1 +
 drivers/watchdog/machzwd.c|  1 +
 drivers/watchdog/mixcomwd.c   |  1 +
 drivers/watchdog/mtx-1_wdt.c  |  1 +
 drivers/watchdog/mv64x60_wdt.c|  1 +
 drivers/watchdog/nuc900_wdt.c |  1 +
 drivers/watchdog/nv_tco.c |  1 +
 drivers/watchdog/pc87413_wdt.c|  1 +
 drivers/watchdog/pcwd.c   |  1 +
 drivers/watchdog/pcwd_pci.c   |  1 +
 drivers/watchdog/pcwd_usb.c   |  1 +
 drivers/watchdog/pika_wdt.c   |  1 +
 drivers/watchdog/pnx833x_wdt.c|  1 +
 drivers/watchdog/rc32434_wdt.c|  1 +
 drivers/watchdog/rdc321x_wdt.c|  1 +
 drivers/watchdog/riowd.c  |  1 +
 drivers/watchdog/sa1100_wdt.c |  1 +
 drivers/watchdog/sb_wdog.c|  1 +
 drivers/watchdog/sbc60xxwdt.c |  1 +
 drivers/watchdog/sbc7240_wdt.c|  1 +
 drivers/watchdog/sbc_epx_c3.c |  1 +
 drivers/watchdog/sbc_fitpc2_wdt.c |  1 +
 drivers/watchdog/sc1200wdt.c  |  1 +
 drivers/watchdog/sc520_wdt.c  |  1 +
 drivers/watchdog/sch311x_wdt.c|  1 +
 drivers/watchdog/scx200_wdt.c |  1 +
 drivers/watchdog/smsc37b787_wdt.c |  1 +
 drivers/watchdog/w83877f_wdt.c|  1 +
 drivers/watchdog/w83977f_wdt.c|  1 +
 drivers/watchdog/wafer5823wdt.c   |  1 +
 drivers/watchdog/watchdog_dev.c   |  1 +
 drivers/watchdog/wdrtas.c |  1 +
 drivers/watchdog/wdt.c|  1 +
 drivers/watchdog/wdt285.c |  1 +
 drivers/watchdog/wdt977.c |  1 +
 drivers/watchdog/wdt_pci.c|  1 +
 fs/compat_ioctl.c | 11 ---
 63 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c 
b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index ba12dc14a3d1..8c0d324f657e 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -650,6 +650,7 @@ static const struct file_operations mpc52xx_wdt_fops = {
.llseek = no_llseek,
.write  = mpc52xx_wdt_write,
.unlocked_ioctl = mpc52xx_wdt_ioctl,
+   .compat_ioctl   = compat_ptr_ioctl,
.open   = mpc52xx_wdt_open,
.release= mpc52xx_wdt_release,
 };
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 000cb69ba0bc..e6d4f43deba8 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -165,6 +165,7 @@ static const struct file_operations harddog_fops = {
.owner  = THIS_MODULE,
.write  = harddog_write,
.unlocked_ioctl = harddog_ioctl,
+   .compat_ioctl   = compat_ptr_ioctl,
.open   = harddog_open,
.release= harddog_release,
.llseek =

[PATCH 0/6] drm+dma: cache support for arm, etc

2019-08-14 Thread Rob Clark

From: Rob Clark 

This is a replacement for a previous patches[1] that was adding arm64
support for drm_clflush.  I've also added a patch to solve a similar
cache issue in vgem.

The first few patches just export arch_sync_dma_for_*().  Possibly
instead the EXPORT_SYMBOL_GPL() should be somewere central, rather
than per-arch (but where would make sense?)

The fourth adds (and exports) these ops for arch/arm.  (Arnd Bergmann
mentioned on IRC that Christoph Hellwig was working on this already
for arch/arm which could replace the fourth patch.)

The last two patches actually fix things.

[1] https://patchwork.freedesktop.org/series/64732/

Rob Clark (6):
  arm64: export arch_sync_dma_for_*()
  mips: export arch_sync_dma_for_*()
  powerpc: export arch_sync_dma_for_*()
  arm: add arch_sync_dma_for_*()
  drm/msm: stop abusing DMA API
  drm/vgem: fix cache synchronization on arm/arm64 (take two)

 arch/arm/Kconfig  |   2 +
 arch/arm/mm/dma-mapping-nommu.c   |  14 +++
 arch/arm/mm/dma-mapping.c |  28 ++
 arch/arm64/mm/dma-mapping.c   |   2 +
 arch/arm64/mm/flush.c |   2 +
 arch/mips/mm/dma-noncoherent.c|   2 +
 arch/powerpc/mm/dma-noncoherent.c |   2 +
 drivers/gpu/drm/drm_cache.c   |  20 -
 drivers/gpu/drm/msm/msm_gem.c |  37 +++-
 drivers/gpu/drm/vgem/vgem_drv.c   | 145 --
 include/drm/drm_cache.h   |   4 +
 11 files changed, 182 insertions(+), 76 deletions(-)

-- 
2.21.0

[PATCH v10 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-08-14 Thread Santosh Sivaraj

From: Balbir Singh 

If we take a UE on one of the instructions with a fixup entry, set nip
to continue execution at the fixup entry. Stop processing the event
further or print it.

Co-developed-by: Reza Arbab 
Signed-off-by: Reza Arbab 
Signed-off-by: Balbir Singh 
Signed-off-by: Santosh Sivaraj 
Reviewed-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/mce.h  |  4 +++-
 arch/powerpc/kernel/mce.c   | 16 
 arch/powerpc/kernel/mce_power.c | 15 +--
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index f3a6036b6bc0..e1931c8c2743 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -122,7 +122,8 @@ struct machine_check_event {
enum MCE_UeErrorType ue_error_type:8;
u8  effective_address_provided;
u8  physical_address_provided;
-   u8  reserved_1[5];
+   u8  ignore_event;
+   u8  reserved_1[4];
u64 effective_address;
u64 physical_address;
u8  reserved_2[8];
@@ -193,6 +194,7 @@ struct mce_error_info {
enum MCE_Initiator  initiator:8;
enum MCE_ErrorClass error_class:8;
boolsync_error;
+   boolignore_event;
 };
 
 #define MAX_MC_EVT 100
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index a3b122a685a5..ec4b3e1087be 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
+   mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct 
*work)
/*
 * This should probably queued elsewhere, but
 * oh! well
+*
+* Don't report this machine check because the caller has a
+* asked us to ignore the event, it has a fixup handler which
+* will do the appropriate error handling and reporting.
 */
if (evt->error_type == MCE_ERROR_TYPE_UE) {
+   if (evt->u.ue_error.ignore_event) {
+   __this_cpu_dec(mce_ue_count);
+   continue;
+   }
+
if (evt->u.ue_error.physical_address_provided) {
unsigned long pfn;
 
@@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct 
irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(_event_queue[index]);
+
+   if (evt->error_type == MCE_ERROR_TYPE_UE &&
+   evt->u.ue_error.ignore_event) {
+   __this_cpu_dec(mce_queue_count);
+   continue;
+   }
machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index e74816f045f8..1dd87f6f5186 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -11,6 +11,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -18,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Convert an address related to an mm to a physical address.
@@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
return 0;
 }
 
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+   struct mce_error_info *mce_err)
 {
long handled = 0;
+   const struct exception_table_entry *entry;
+
+   entry = search_kernel_exception_table(regs->nip);
+   if (entry) {
+   mce_err->ignore_event = true;
+   regs->nip = extable_fixup(entry);
+   return 1;
+   }
 
/*
 * On specific SCOM read via MMIO we may get a machine check
@@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs,
_addr);
 
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
-   handled = mce_handle_ue_error(regs);
+   handled = mce_handle_ue_error(regs, _err);
 
save_mce_event(regs,

[PATCH v10 7/7] powerpc: add machine check safe copy_to_user

2019-08-14 Thread Santosh Sivaraj

Use  memcpy_mcsafe() implementation to define copy_to_user_mcsafe()

Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/uaccess.h | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 77f6ebf97113..4316e36095a2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE  if PPC64
+   select ARCH_HAS_UACCESS_MCSAFE  if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index 8b03eb44e876..15002b51ff18 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -387,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void __user 
*to,
return ret;
 }
 
+static __always_inline unsigned long __must_check
+copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
+{
+   if (likely(check_copy_size(from, n, true))) {
+   if (access_ok(to, n)) {
+   allow_write_to_user(to, n);
+   n = memcpy_mcsafe((void *)to, from, n);
+   prevent_write_to_user(to, n);
+   }
+   }
+
+   return n;
+}
+
 extern unsigned long __clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
-- 
2.21.0

[PATCH 3/6] powerpc: export arch_sync_dma_for_*()

2019-08-14 Thread Rob Clark

From: Rob Clark 

Signed-off-by: Rob Clark 
---
 arch/powerpc/mm/dma-noncoherent.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/dma-noncoherent.c 
b/arch/powerpc/mm/dma-noncoherent.c
index c617282d5b2a..80d53b950821 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -401,12 +401,14 @@ void arch_sync_dma_for_device(struct device *dev, 
phys_addr_t paddr,
 {
__dma_sync_page(paddr, size, dir);
 }
+EXPORT_SYMBOL_GPL(arch_sync_dma_for_device);
 
 void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
 {
__dma_sync_page(paddr, size, dir);
 }
+EXPORT_SYMBOL_GPL(arch_sync_dma_for_cpu);
 
 /*
  * Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
-- 
2.21.0

[PATCH v4 1/3] kasan: support backing vmalloc space with real shadow memory

2019-08-14 Thread Daniel Axtens

Hook into vmalloc and vmap, and dynamically allocate real shadow
memory to back the mappings.

Most mappings in vmalloc space are small, requiring less than a full
page of shadow space. Allocating a full shadow page per mapping would
therefore be wasteful. Furthermore, to ensure that different mappings
use different shadow pages, mappings would have to be aligned to
KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE.

Instead, share backing space across multiple mappings. Allocate
a backing page the first time a mapping in vmalloc space uses a
particular page of the shadow region. Keep this page around
regardless of whether the mapping is later freed - in the mean time
the page could have become shared by another vmalloc mapping.

This can in theory lead to unbounded memory growth, but the vmalloc
allocator is pretty good at reusing addresses, so the practical memory
usage grows at first but then stays fairly stable.

This requires architecture support to actually use: arches must stop
mapping the read-only zero page over portion of the shadow region that
covers the vmalloc space and instead leave it unmapped.

This allows KASAN with VMAP_STACK, and will be needed for architectures
that do not have a separate module space (e.g. powerpc64, which I am
currently working on). It also allows relaxing the module alignment
back to PAGE_SIZE.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=202009
Acked-by: Vasily Gorbik 
Signed-off-by: Daniel Axtens 
[Mark: rework shadow allocation]
Signed-off-by: Mark Rutland 

--

v2: let kasan_unpoison_shadow deal with ranges that do not use a
full shadow byte.

v3: relax module alignment
rename to kasan_populate_vmalloc which is a much better name
deal with concurrency correctly

v4: Integrate Mark's rework
Poision pages on vfree
Handle allocation failures. I've tested this by inserting artificial
 failures and using test_vmalloc to stress it. I haven't handled the
 per-cpu case: it looked like it would require a messy hacking-up of
 the function to deal with an OOM failure case in a debug feature.

---
 Documentation/dev-tools/kasan.rst | 60 +++
 include/linux/kasan.h | 24 +++
 include/linux/moduleloader.h  |  2 +-
 include/linux/vmalloc.h   | 12 ++
 lib/Kconfig.kasan | 16 
 lib/test_kasan.c  | 26 
 mm/kasan/common.c | 67 +++
 mm/kasan/generic_report.c |  3 ++
 mm/kasan/kasan.h  |  1 +
 mm/vmalloc.c  | 28 -
 10 files changed, 237 insertions(+), 2 deletions(-)

diff --git a/Documentation/dev-tools/kasan.rst 
b/Documentation/dev-tools/kasan.rst
index b72d07d70239..35fda484a672 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -215,3 +215,63 @@ brk handler is used to print bug reports.
 A potential expansion of this mode is a hardware tag-based mode, which would
 use hardware memory tagging support instead of compiler instrumentation and
 manual shadow memory manipulation.
+
+What memory accesses are sanitised by KASAN?
+
+
+The kernel maps memory in a number of different parts of the address
+space. This poses something of a problem for KASAN, which requires
+that all addresses accessed by instrumented code have a valid shadow
+region.
+
+The range of kernel virtual addresses is large: there is not enough
+real memory to support a real shadow region for every address that
+could be accessed by the kernel.
+
+By default
+~~
+
+By default, architectures only map real memory over the shadow region
+for the linear mapping (and potentially other small areas). For all
+other areas - such as vmalloc and vmemmap space - a single read-only
+page is mapped over the shadow area. This read-only shadow page
+declares all memory accesses as permitted.
+
+This presents a problem for modules: they do not live in the linear
+mapping, but in a dedicated module space. By hooking in to the module
+allocator, KASAN can temporarily map real shadow memory to cover
+them. This allows detection of invalid accesses to module globals, for
+example.
+
+This also creates an incompatibility with ``VMAP_STACK``: if the stack
+lives in vmalloc space, it will be shadowed by the read-only page, and
+the kernel will fault when trying to set up the shadow data for stack
+variables.
+
+CONFIG_KASAN_VMALLOC
+
+
+With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
+cost of greater memory usage. Currently this is only supported on x86.
+
+This works by hooking into vmalloc and vmap, and dynamically
+allocating real shadow memory to back the mappings.
+
+Most mappings in vmalloc space are small, requiring less than a full
+page of shadow space. Allocating a full shadow page per mapping would
+therefore be wasteful. Furthermore, to ensure that different mappings

[PATCH v4 2/3] fork: support VMAP_STACK with KASAN_VMALLOC

2019-08-14 Thread Daniel Axtens

Supporting VMAP_STACK with KASAN_VMALLOC is straightforward:

 - clear the shadow region of vmapped stacks when swapping them in
 - tweak Kconfig to allow VMAP_STACK to be turned on with KASAN

Reviewed-by: Dmitry Vyukov 
Signed-off-by: Daniel Axtens 
---
 arch/Kconfig  | 9 +
 kernel/fork.c | 4 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index a7b57dd42c26..e791196005e1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -825,16 +825,17 @@ config HAVE_ARCH_VMAP_STACK
 config VMAP_STACK
default y
bool "Use a virtually-mapped stack"
-   depends on HAVE_ARCH_VMAP_STACK && !KASAN
+   depends on HAVE_ARCH_VMAP_STACK
+   depends on !KASAN || KASAN_VMALLOC
---help---
  Enable this if you want the use virtually-mapped kernel stacks
  with guard pages.  This causes kernel stack overflows to be
  caught immediately rather than causing difficult-to-diagnose
  corruption.
 
- This is presently incompatible with KASAN because KASAN expects
- the stack to map directly to the KASAN shadow map using a formula
- that is incorrect if the stack is in vmalloc space.
+ To use this with KASAN, the architecture must support backing
+ virtual mappings with real shadow memory, and KASAN_VMALLOC must
+ be enabled.
 
 config ARCH_OPTIONAL_KERNEL_RWX
def_bool n
diff --git a/kernel/fork.c b/kernel/fork.c
index d8ae0f1b4148..ce3150fe8ff2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -215,6 +216,9 @@ static unsigned long *alloc_thread_stack_node(struct 
task_struct *tsk, int node)
if (!s)
continue;
 
+   /* Clear the KASAN shadow of the stack. */
+   kasan_unpoison_shadow(s->addr, THREAD_SIZE);
+
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
 
-- 
2.20.1

[PATCH v4 0/3] kasan: support backing vmalloc space with real shadow memory

2019-08-14 Thread Daniel Axtens

Currently, vmalloc space is backed by the early shadow page. This
means that kasan is incompatible with VMAP_STACK, and it also provides
a hurdle for architectures that do not have a dedicated module space
(like powerpc64).

This series provides a mechanism to back vmalloc space with real,
dynamically allocated memory. I have only wired up x86, because that's
the only currently supported arch I can work with easily, but it's
very easy to wire up other architectures.

This has been discussed before in the context of VMAP_STACK:
 - https://bugzilla.kernel.org/show_bug.cgi?id=202009
 - https://lkml.org/lkml/2018/7/22/198
 - https://lkml.org/lkml/2019/7/19/822

In terms of implementation details:

Most mappings in vmalloc space are small, requiring less than a full
page of shadow space. Allocating a full shadow page per mapping would
therefore be wasteful. Furthermore, to ensure that different mappings
use different shadow pages, mappings would have to be aligned to
KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE.

Instead, share backing space across multiple mappings. Allocate
a backing page the first time a mapping in vmalloc space uses a
particular page of the shadow region. Keep this page around
regardless of whether the mapping is later freed - in the mean time
the page could have become shared by another vmalloc mapping.

This can in theory lead to unbounded memory growth, but the vmalloc
allocator is pretty good at reusing addresses, so the practical memory
usage appears to grow at first but then stay fairly stable.

If we run into practical memory exhaustion issues, I'm happy to
consider hooking into the book-keeping that vmap does, but I am not
convinced that it will be an issue.

v1: https://lore.kernel.org/linux-mm/20190725055503.19507-1-...@axtens.net/
v2: https://lore.kernel.org/linux-mm/20190729142108.23343-1-...@axtens.net/
 Address review comments:
 - Patch 1: use kasan_unpoison_shadow's built-in handling of
ranges that do not align to a full shadow byte
 - Patch 3: prepopulate pgds rather than faulting things in
v3: https://lore.kernel.org/linux-mm/20190731071550.31814-1-...@axtens.net/
 Address comments from Mark Rutland:
 - kasan_populate_vmalloc is a better name
 - handle concurrency correctly
 - various nits and cleanups
 - relax module alignment in KASAN_VMALLOC case
v4: Changes to patch 1 only:
 - Integrate Mark's rework, thanks Mark!
 - handle the case where kasan_populate_shadow might fail
 - poision shadow on free, allowing the alloc path to just
 unpoision memory that it uses

Daniel Axtens (3):
  kasan: support backing vmalloc space with real shadow memory
  fork: support VMAP_STACK with KASAN_VMALLOC
  x86/kasan: support KASAN_VMALLOC

 Documentation/dev-tools/kasan.rst | 60 +++
 arch/Kconfig  |  9 +++--
 arch/x86/Kconfig  |  1 +
 arch/x86/mm/kasan_init_64.c   | 61 
 include/linux/kasan.h | 24 +++
 include/linux/moduleloader.h  |  2 +-
 include/linux/vmalloc.h   | 12 ++
 kernel/fork.c |  4 ++
 lib/Kconfig.kasan | 16 
 lib/test_kasan.c  | 26 
 mm/kasan/common.c | 67 +++
 mm/kasan/generic_report.c |  3 ++
 mm/kasan/kasan.h  |  1 +
 mm/vmalloc.c  | 28 -
 14 files changed, 308 insertions(+), 6 deletions(-)

-- 
2.20.1

[PATCH v10 4/7] extable: Add function to search only kernel exception table

2019-08-14 Thread Santosh Sivaraj

Certain architecture specific operating modes (e.g., in powerpc machine
check handler that is unable to access vmalloc memory), the
search_exception_tables cannot be called because it also searches the
module exception tables if entry is not found in the kernel exception
table.

Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Nicholas Piggin 
Signed-off-by: Santosh Sivaraj 
Reviewed-by: Nicholas Piggin 
---
 include/linux/extable.h |  2 ++
 kernel/extable.c| 11 +--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/extable.h b/include/linux/extable.h
index 41c5b3a25f67..81ecfaa83ad3 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -19,6 +19,8 @@ void trim_init_extable(struct module *m);
 
 /* Given an address, look for it in the exception tables */
 const struct exception_table_entry *search_exception_tables(unsigned long add);
+const struct exception_table_entry *
+search_kernel_exception_table(unsigned long addr);
 
 #ifdef CONFIG_MODULES
 /* For extable.c to search modules' exception tables. */
diff --git a/kernel/extable.c b/kernel/extable.c
index e23cce6e6092..f6c9406eec7d 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -40,13 +40,20 @@ void __init sort_main_extable(void)
}
 }
 
+/* Given an address, look for it in the kernel exception table */
+const
+struct exception_table_entry *search_kernel_exception_table(unsigned long addr)
+{
+   return search_extable(__start___ex_table,
+ __stop___ex_table - __start___ex_table, addr);
+}
+
 /* Given an address, look for it in the exception tables. */
 const struct exception_table_entry *search_exception_tables(unsigned long addr)
 {
const struct exception_table_entry *e;
 
-   e = search_extable(__start___ex_table,
-  __stop___ex_table - __start___ex_table, addr);
+   e = search_kernel_exception_table(addr);
if (!e)
e = search_module_extables(addr);
return e;
-- 
2.21.0

[PATCH v10 3/7] powerpc/mce: Make machine_check_ue_event() static

2019-08-14 Thread Santosh Sivaraj

From: Reza Arbab 

The function doesn't get used outside this file, so make it static.

Signed-off-by: Reza Arbab 
Signed-off-by: Santosh Sivaraj 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/kernel/mce.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index cff31d4a501f..a3b122a685a5 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -34,7 +34,7 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
 
 static void machine_check_process_queued_event(struct irq_work *work);
 static void machine_check_ue_irq_work(struct irq_work *work);
-void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_check_ue_event(struct machine_check_event *evt);
 static void machine_process_ue_event(struct work_struct *work);
 
 static struct irq_work mce_event_process_work = {
@@ -212,7 +212,7 @@ static void machine_check_ue_irq_work(struct irq_work *work)
 /*
  * Queue up the MCE event which then can be handled later.
  */
-void machine_check_ue_event(struct machine_check_event *evt)
+static void machine_check_ue_event(struct machine_check_event *evt)
 {
int index;
 
-- 
2.21.0

Re: [5.3.0-rc4-next][bisected 882632][qla2xxx] WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 qla2x00_status_entry.isra

2019-08-14 Thread Bart Van Assche


On 8/14/19 10:18 AM, Abdul Haleem wrote:

On Wed, 2019-08-14 at 10:05 -0700, Bart Van Assche wrote:

On 8/14/19 9:52 AM, Abdul Haleem wrote:

Greeting's

Today's linux-next kernel (5.3.0-rc4-next-20190813)  booted with warning on my 
powerpc power 8 lpar

The WARN_ON_ONCE() was introduced by commit 88263208 (scsi: qla2xxx: Complain if 
sp->done() is not...)

boot logs:

WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784


Hi Abdul,

Thank you for having reported this. Is that the only warning reported on your 
setup by the qla2xxx
driver? If that warning is commented out, does the qla2xxx driver work as 
expected?


boot warning did not show up when the commit is reverted.

should I comment out only the WARN_ON_ONCE() which is causing the issue,
and not the other one ?


Yes please. Commit 88263208 introduced five kernel warnings but I think 
only one of these should be removed again, e.g. as follows:


diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index cd39ac18c5fd..d81b5ecce24b 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2780,8 +2780,6 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct 
rsp_que *rsp, void *pkt)


if (rsp->status_srb == NULL)
sp->done(sp, res);
-   else
-   WARN_ON_ONCE(true);
 }

 /**

[PATCH 6/6] powerpc: Don't flush caches when adding memory

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

This operation takes a significant amount of time when hotplugging
large amounts of memory (~50 seconds with 890GB of persistent memory).

This was orignally in commit fb5924fddf9e
("powerpc/mm: Flush cache on memory hot(un)plug") to support memtrace,
but the flush on add is not needed as it is flushed on remove.

Signed-off-by: Alastair D'Silva 
---
 arch/powerpc/mm/mem.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index fb0d5e9aa11b..43be99de7c9a 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -111,7 +111,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
 {
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
-   unsigned long i;
int rc;
 
resize_hpt_for_hotplug(memblock_phys_mem_size());
@@ -124,11 +123,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
return -EFAULT;
}
 
-   for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
-   flush_dcache_range(start + i, min(start + size, start + i + 
FLUSH_CHUNK_SIZE));
-   cond_resched();
-   }
-
return __add_pages(nid, start_pfn, nr_pages, restrictions);
 }
 
-- 
2.21.0

[PATCH 5/6] powerpc: Remove 'extern' from func prototypes in cache headers

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

The 'extern' keyword does not value-add for function prototypes.

Signed-off-by: Alastair D'Silva 
---
 arch/powerpc/include/asm/cache.h  | 8 
 arch/powerpc/include/asm/cacheflush.h | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 728f154204db..c5c096e968e0 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -102,10 +102,10 @@ static inline u32 l1_icache_bytes(void)
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #ifdef CONFIG_PPC_BOOK3S_32
-extern long _get_L2CR(void);
-extern long _get_L3CR(void);
-extern void _set_L2CR(unsigned long);
-extern void _set_L3CR(unsigned long);
+long _get_L2CR(void);
+long _get_L3CR(void);
+void _set_L2CR(unsigned long val);
+void _set_L3CR(unsigned long val);
 #else
 #define _get_L2CR()0L
 #define _get_L3CR()0L
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index 4c3377aff8ed..1826bf2cc137 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -38,15 +38,15 @@ static inline void flush_cache_vmap(unsigned long start, 
unsigned long end) { }
 #endif
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_lock(mapping)do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)  do { } while (0)
 
 void flush_icache_range(unsigned long start, unsigned long stop);
-extern void flush_icache_user_range(struct vm_area_struct *vma,
+void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr,
int len);
-extern void flush_dcache_icache_page(struct page *page);
+void flush_dcache_icache_page(struct page *page);
 
 /**
  * flush_dcache_range(): Write any modified data cache blocks out to memory 
and invalidate them.
-- 
2.21.0

Re: [PATCH v9 7/7] powerpc: add machine check safe copy_to_user

2019-08-14 Thread Santosh Sivaraj

Hi Balbir,

Balbir Singh  writes:

> On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
>> Use  memcpy_mcsafe() implementation to define copy_to_user_mcsafe()
>> 
>> Signed-off-by: Santosh Sivaraj 
>> ---
>>  arch/powerpc/Kconfig   |  1 +
>>  arch/powerpc/include/asm/uaccess.h | 14 ++
>>  2 files changed, 15 insertions(+)
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 77f6ebf97113..4316e36095a2 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -137,6 +137,7 @@ config PPC
>>  select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
>> !RELOCATABLE && !HIBERNATION)
>>  select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
>>  select ARCH_HAS_UACCESS_FLUSHCACHE  if PPC64
>> +select ARCH_HAS_UACCESS_MCSAFE  if PPC64
>>  select ARCH_HAS_UBSAN_SANITIZE_ALL
>>  select ARCH_HAVE_NMI_SAFE_CMPXCHG
>>  select ARCH_KEEP_MEMBLOCK
>> diff --git a/arch/powerpc/include/asm/uaccess.h 
>> b/arch/powerpc/include/asm/uaccess.h
>> index 8b03eb44e876..15002b51ff18 100644
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -387,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void 
>> __user *to,
>>  return ret;
>>  }
>>  
>> +static __always_inline unsigned long __must_check
>> +copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
>> +{
>> +if (likely(check_copy_size(from, n, true))) {
>> +if (access_ok(to, n)) {
>> +allow_write_to_user(to, n);
>> +n = memcpy_mcsafe((void *)to, from, n);
>> +prevent_write_to_user(to, n);
>> +}
>> +}
>> +
>> +return n;
>
> Do we always return n independent of the check_copy_size return value and
> access_ok return values?

Yes we always return the remaining bytes not copied even if check_copy_size
or access_ok fails.

Santosh

>
> Balbir Singh.
>
>> +}
>> +
>>  extern unsigned long __clear_user(void __user *addr, unsigned long size);
>>  
>>  static inline unsigned long clear_user(void __user *addr, unsigned long 
>> size)
>>

[bug report] powerpc/iommu: Implement IOMMU pools to improve multiqueue adapter performance

2019-08-14 Thread Dan Carpenter

[ Ancient code.  The warning is correct but the bug seems harmless.
  -- dan ]

Hello Anton Blanchard,

The patch b4c3a8729ae5: "powerpc/iommu: Implement IOMMU pools to
improve multiqueue adapter performance" from Jun 7, 2012, leads to
the following static checker warning:

arch/powerpc/kernel/iommu.c:377 get_pool()
warn: array off by one? '*tbl->pools + pool_nr'

arch/powerpc/kernel/iommu.c
   364  static struct iommu_pool *get_pool(struct iommu_table *tbl,
   365 unsigned long entry)
   366  {
   367  struct iommu_pool *p;
   368  unsigned long largepool_start = tbl->large_pool.start;
   369  
   370  /* The large pool is the last pool at the top of the table */
   371  if (entry >= largepool_start) {
   372  p = >large_pool;
   373  } else {
   374  unsigned int pool_nr = entry / tbl->poolsize;
   375  
   376  BUG_ON(pool_nr > tbl->nr_pools);
   ^
This should be ">=".  The tbl->nr_pools value is either 1 or
IOMMU_NR_POOLS and the tbl->pools[] array has IOMMU_NR_POOLS elements.

   377  p = >pools[pool_nr];
   378  }
   379  
   380  return p;
   381  }

regards,
dan carpenter

[PATCH v10 1/7] powerpc/mce: Schedule work from irq_work

2019-08-14 Thread Santosh Sivaraj

schedule_work() cannot be called from MCE exception context as MCE can
interrupt even in interrupt disabled context.

fixes: 733e4a4c ("powerpc/mce: hookup memory_failure for UE errors")
Suggested-by: Mahesh Salgaonkar 
Signed-off-by: Santosh Sivaraj 
Reviewed-by: Mahesh Salgaonkar 
Acked-by: Balbir Singh 
Cc: sta...@vger.kernel.org # v4.15+
---
 arch/powerpc/kernel/mce.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b18df633eae9..cff31d4a501f 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -33,6 +33,7 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
mce_ue_event_queue);
 
 static void machine_check_process_queued_event(struct irq_work *work);
+static void machine_check_ue_irq_work(struct irq_work *work);
 void machine_check_ue_event(struct machine_check_event *evt);
 static void machine_process_ue_event(struct work_struct *work);
 
@@ -40,6 +41,10 @@ static struct irq_work mce_event_process_work = {
 .func = machine_check_process_queued_event,
 };
 
+static struct irq_work mce_ue_event_irq_work = {
+   .func = machine_check_ue_irq_work,
+};
+
 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 
 static void mce_set_error_info(struct machine_check_event *mce,
@@ -199,6 +204,10 @@ void release_mce_event(void)
get_mce_event(NULL, true);
 }
 
+static void machine_check_ue_irq_work(struct irq_work *work)
+{
+   schedule_work(_ue_event_work);
+}
 
 /*
  * Queue up the MCE event which then can be handled later.
@@ -216,7 +225,7 @@ void machine_check_ue_event(struct machine_check_event *evt)
memcpy(this_cpu_ptr(_ue_event_queue[index]), evt, sizeof(*evt));
 
/* Queue work to process this event later. */
-   schedule_work(_ue_event_work);
+   irq_work_queue(_ue_event_irq_work);
 }
 
 /*
-- 
2.21.0

[PATCH v10 0/7] powerpc: implement machine check safe memcpy

2019-08-14 Thread Santosh Sivaraj

During a memcpy from a pmem device, if a machine check exception is
generated we end up in a panic. In case of fsdax read, this should
only result in a -EIO. Avoid MCE by implementing memcpy_mcsafe.

Before this patch series:

```
bash-4.4# mount -o dax /dev/pmem0 /mnt/pmem/
[ 7621.714094] Disabling lock debugging due to kernel taint
[ 7621.714099] MCE: CPU0: machine check (Severe) Host UE Load/Store [Not 
recovered]
[ 7621.714104] MCE: CPU0: NIP: [c0088978] memcpy_power7+0x418/0x7e0
[ 7621.714107] MCE: CPU0: Hardware error
[ 7621.714112] opal: Hardware platform error: Unrecoverable Machine Check 
exception
[ 7621.714118] CPU: 0 PID: 1368 Comm: mount Tainted: G   M  
5.2.0-rc5-00239-g241e39004581
#50
[ 7621.714123] NIP:  c0088978 LR: c08e16f8 CTR: 01de
[ 7621.714129] REGS: c000fffbfd70 TRAP: 0200   Tainted: G   M  
(5.2.0-rc5-00239-g241e39004581)
[ 7621.714131] MSR:  92209033   CR: 
24428840  XER: 0004
[ 7621.714160] CFAR: c00889a8 DAR: deadbeefdeadbeef DSISR: 8000 
IRQMASK: 0
[ 7621.714171] GPR00: 0e00 c000f0b8b1e0 c12cf100 
c000ed8e1100 
[ 7621.714186] GPR04: c2001100 0001 0200 
03fff1272000 
[ 7621.714201] GPR08: 8000 0010 0020 
0030 
[ 7621.714216] GPR12: 0040 7fffb8c6d390 0050 
0060 
[ 7621.714232] GPR16: 0070  0001 
c000f0b8b960 
[ 7621.714247] GPR20: 0001 c000f0b8b940 0001 
0001 
[ 7621.714262] GPR24: c1382560 c00c003b6380 c00c003b6380 
0001 
[ 7621.714277] GPR28:  0001 c200 
0001 
[ 7621.714294] NIP [c0088978] memcpy_power7+0x418/0x7e0
[ 7621.714298] LR [c08e16f8] pmem_do_bvec+0xf8/0x430
...  ...
```

After this patch series:

```
bash-4.4# mount -o dax /dev/pmem0 /mnt/pmem/
[25302.883978] Buffer I/O error on dev pmem0, logical block 0, async page read
[25303.020816] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your 
own risk
[25303.021236] EXT4-fs (pmem0): Can't read superblock on 2nd try
[25303.152515] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your 
own risk
[25303.284031] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your 
own risk
[25304.084100] UDF-fs: bad mount option "dax" or missing value
mount: /mnt/pmem: wrong fs type, bad option, bad superblock on /dev/pmem0, 
missing codepage or helper
program, or other error.
```

MCE is injected on a pmem address using mambo. The last patch which adds a
nop is only for testing on mambo, where r13 is not restored upon hitting
vector 200.

The memcpy code can be optimised by adding VMX optimizations and GAS macros
can be used to enable code reusablity, which I will send as another series.
--
v10: Fix authorship; add reviewed-bys and acks.

v9:
* Add a new IRQ work for UE events [mahesh]
* Reorder patches, and copy stable

v8:
* While ignoring UE events, return was used instead of continue.
* Checkpatch fixups for commit log

v7:
* Move schedule_work to be called from irq_work.

v6:
* Don't return pfn, all callees are expecting physical address anyway [nick]
* Patch re-ordering: move exception table patch before memcpy_mcsafe patch 
[nick]
* Reword commit log for search_exception_tables patch [nick]

v5:
* Don't use search_exception_tables since it searches for module exception 
tables
  also [Nicholas]
* Fix commit message for patch 2 [Nicholas]

v4:
* Squash return remaining bytes patch to memcpy_mcsafe implemtation patch 
[christophe]
* Access ok should be checked for copy_to_user_mcsafe() [christophe]

v3:
* Drop patch which enables DR/IR for external modules
* Drop notifier call chain, we don't want to do that in real mode
* Return remaining bytes from memcpy_mcsafe correctly
* We no longer restore r13 for simulator tests, rather use a nop at 
  vector 0x200 [workaround for simulator; not to be merged]

v2:
* Don't set RI bit explicitly [mahesh]
* Re-ordered series to get r13 workaround as the last patch

--
Balbir Singh (3):
  powerpc/mce: Fix MCE handling for huge pages
  powerpc/memcpy: Add memcpy_mcsafe for pmem
  powerpc/mce: Handle UE event for memcpy_mcsafe

Reza Arbab (1):
  powerpc/mce: Make machine_check_ue_event() static

Santosh Sivaraj (3):
  powerpc/mce: Schedule work from irq_work
  extable: Add function to search only kernel exception table
  powerpc: add machine check safe copy_to_user

 arch/powerpc/Kconfig |   1 +
 arch/powerpc/include/asm/mce.h   |   6 +-
 arch/powerpc/include/asm/string.h|   2 +
 arch/powerpc/include/asm/uaccess.h   |  14 ++
 arch/powerpc/kernel/mce.c|  31 +++-
 arch/powerpc/kernel/mce_power.c  |  70 
 arch/powerpc/lib/Makefile|   2 +-
 arch/powerpc/lib/memcpy_mcsafe_64.S  | 242

[PATCH 2/6] powerpc: define helpers to get L1 icache sizes

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

This patch adds helpers to retrieve icache sizes, and renames the existing
helpers to make it clear that they are for dcache.

Signed-off-by: Alastair D'Silva 
---
 arch/powerpc/include/asm/cache.h  | 29 +++
 arch/powerpc/include/asm/cacheflush.h | 12 +--
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index b3388d95f451..f852d5cd746c 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -55,25 +55,46 @@ struct ppc64_caches {
 
 extern struct ppc64_caches ppc64_caches;
 
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
 {
return ppc64_caches.l1d.log_block_size;
 }
 
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
 {
return ppc64_caches.l1d.block_size;
 }
+
+static inline u32 l1_icache_shift(void)
+{
+   return ppc64_caches.l1i.log_block_size;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+   return ppc64_caches.l1i.block_size;
+}
 #else
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
 {
return L1_CACHE_SHIFT;
 }
 
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
 {
return L1_CACHE_BYTES;
 }
+
+static inline u32 l1_icache_shift(void)
+{
+   return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+   return L1_CACHE_BYTES;
+}
+
 #endif
 #endif /* ! __ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index eef388f2659f..ed57843ef452 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -63,8 +63,8 @@ static inline void __flush_dcache_icache_phys(unsigned long 
physaddr)
  */
 static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 {
-   unsigned long shift = l1_cache_shift();
-   unsigned long bytes = l1_cache_bytes();
+   unsigned long shift = l1_dcache_shift();
+   unsigned long bytes = l1_dcache_bytes();
void *addr = (void *)(start & ~(bytes - 1));
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
@@ -89,8 +89,8 @@ static inline void flush_dcache_range(unsigned long start, 
unsigned long stop)
  */
 static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 {
-   unsigned long shift = l1_cache_shift();
-   unsigned long bytes = l1_cache_bytes();
+   unsigned long shift = l1_dcache_shift();
+   unsigned long bytes = l1_dcache_bytes();
void *addr = (void *)(start & ~(bytes - 1));
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
@@ -108,8 +108,8 @@ static inline void clean_dcache_range(unsigned long start, 
unsigned long stop)
 static inline void invalidate_dcache_range(unsigned long start,
   unsigned long stop)
 {
-   unsigned long shift = l1_cache_shift();
-   unsigned long bytes = l1_cache_bytes();
+   unsigned long shift = l1_dcache_shift();
+   unsigned long bytes = l1_dcache_bytes();
void *addr = (void *)(start & ~(bytes - 1));
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
-- 
2.21.0

[RFC PATCH v4 2/2] powerpc/xmon: Restrict when kernel is locked down

2019-08-14 Thread Christopher M. Riedl

Xmon should be either fully or partially disabled depending on the
kernel lockdown state.

Put xmon into read-only mode for lockdown=integrity and completely
disable xmon when lockdown=confidentiality. Xmon checks the lockdown
state and takes appropriate action:

 (1) during xmon_setup to prevent early xmon'ing

 (2) when triggered via sysrq

 (3) when toggled via debugfs

 (4) when triggered via a previously enabled breakpoint

The following lockdown state transitions are handled:

 (1) lockdown=none -> lockdown=integrity
 set xmon read-only mode

 (2) lockdown=none -> lockdown=confidentiality
 clear all breakpoints, set xmon read-only mode,
 prevent re-entry into xmon

 (3) lockdown=integrity -> lockdown=confidentiality
 clear all breakpoints, set xmon read-only mode,
 prevent re-entry into xmon

Suggested-by: Andrew Donnellan 
Signed-off-by: Christopher M. Riedl 
---
 arch/powerpc/xmon/xmon.c | 59 ++--
 include/linux/security.h |  2 ++
 security/lockdown/lockdown.c |  2 ++
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index bb63ecc599fd..8fd79369974e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -187,6 +188,9 @@ static void dump_tlb_44x(void);
 static void dump_tlb_book3e(void);
 #endif
 
+static void clear_all_bpt(void);
+static void xmon_init(int);
+
 #ifdef CONFIG_PPC64
 #define REG"%.16lx"
 #else
@@ -283,10 +287,41 @@ Commands:\n\
 "  U   show uptime information\n"
 "  ?   help\n"
 "  # n limit output to n lines per page (for dp, dpa, dl)\n"
-"  zr  reboot\n\
-  zh   halt\n"
+"  zr  reboot\n"
+"  zh  halt\n"
 ;
 
+#ifdef CONFIG_SECURITY
+static bool xmon_is_locked_down(void)
+{
+   static bool lockdown;
+
+   if (!lockdown) {
+   lockdown = !!security_locked_down(LOCKDOWN_XMON_RW);
+   if (lockdown) {
+   printf("xmon: Disabled due to kernel lockdown\n");
+   xmon_is_ro = true;
+   xmon_on = 0;
+   xmon_init(0);
+   clear_all_bpt();
+   }
+   }
+
+   if (!xmon_is_ro) {
+   xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR);
+   if (xmon_is_ro)
+   printf("xmon: Read-only due to kernel lockdown\n");
+   }
+
+   return lockdown;
+}
+#else /* CONFIG_SECURITY */
+static inline bool xmon_is_locked_down(void)
+{
+   return false;
+}
+#endif
+
 static struct pt_regs *xmon_regs;
 
 static inline void sync(void)
@@ -704,6 +739,9 @@ static int xmon_bpt(struct pt_regs *regs)
struct bpt *bp;
unsigned long offset;
 
+   if (xmon_is_locked_down())
+   return 0;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
 
@@ -735,6 +773,9 @@ static int xmon_sstep(struct pt_regs *regs)
 
 static int xmon_break_match(struct pt_regs *regs)
 {
+   if (xmon_is_locked_down())
+   return 0;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
if (dabr.enabled == 0)
@@ -745,6 +786,9 @@ static int xmon_break_match(struct pt_regs *regs)
 
 static int xmon_iabr_match(struct pt_regs *regs)
 {
+   if (xmon_is_locked_down())
+   return 0;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
if (iabr == NULL)
@@ -3750,6 +3794,9 @@ static void xmon_init(int enable)
 #ifdef CONFIG_MAGIC_SYSRQ
 static void sysrq_handle_xmon(int key)
 {
+   if (xmon_is_locked_down())
+   return;
+
/* ensure xmon is enabled */
xmon_init(1);
debugger(get_irq_regs());
@@ -3771,7 +3818,6 @@ static int __init setup_xmon_sysrq(void)
 device_initcall(setup_xmon_sysrq);
 #endif /* CONFIG_MAGIC_SYSRQ */
 
-#ifdef CONFIG_DEBUG_FS
 static void clear_all_bpt(void)
 {
int i;
@@ -3793,8 +3839,12 @@ static void clear_all_bpt(void)
printf("xmon: All breakpoints cleared\n");
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int xmon_dbgfs_set(void *data, u64 val)
 {
+   if (xmon_is_locked_down())
+   return 0;
+
xmon_on = !!val;
xmon_init(xmon_on);
 
@@ -3853,6 +3903,9 @@ early_param("xmon", early_parse_xmon);
 
 void __init xmon_setup(void)
 {
+   if (xmon_is_locked_down())
+   return;
+
if (xmon_on)
xmon_init(1);
if (xmon_early)
diff --git a/include/linux/security.h b/include/linux/security.h
index 807dc0d24982..379b74b5d545 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -116,12 +116,14 @@ enum lockdown_reason {
LOCKDOWN_MODULE_PARAMETERS,
LOCKDOWN_MMIOTRACE,
LOCKDOWN_DEBUGFS,
+   LOCKDOWN_XMON_WR,

[PATCH] powerpc: Allow flush_(inval_)dcache_range to work across ranges >4GB

2019-08-14 Thread Alastair D'Silva

From: Alastair D'Silva 

Heads Up: This patch cannot be submitted to Linus's tree, as the affected
assembler functions have already been converted to C.

When calling flush_(inval_)dcache_range with a size >4GB, we were masking
off the upper 32 bits, so we would incorrectly flush a range smaller
than intended.

This patch replaces the 32 bit shifts with 64 bit ones, so that
the full size is accounted for.

Signed-off-by: Alastair D'Silva 
---
 arch/powerpc/kernel/misc_64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1ad4089dd110..d4d096f80f4b 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -130,7 +130,7 @@ _GLOBAL_TOC(flush_dcache_range)
subfr8,r6,r4/* compute length */
add r8,r8,r5/* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block 
size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
mtctr   r8
 0: dcbst   0,r6
@@ -148,7 +148,7 @@ _GLOBAL(flush_inval_dcache_range)
subfr8,r6,r4/* compute length */
add r8,r8,r5/* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
sync
isync
-- 
2.21.0

[RFC PATCH v4 0/2] Restrict xmon when kernel is locked down

2019-08-14 Thread Christopher M. Riedl

Xmon should be either fully or partially disabled depending on the
kernel lockdown state.

Put xmon into read-only mode for lockdown=integrity and completely
disable xmon when lockdown=confidentiality. Since this can occur
dynamically, there may be pre-existing, active breakpoints in xmon when
transitioning into read-only mode. These breakpoints will still trigger,
so allow them to be listed, but not cleared or altered, using xmon.

Changes since v3:
 - Allow active breakpoints to be shown/listed in read-only mode

Changes since v2:
 - Rebased onto v36 of https://patchwork.kernel.org/cover/11049461/
   (based on: f632a8170a6b667ee4e3f552087588f0fe13c4bb)
 - Do not clear existing breakpoints when transitioning from
   lockdown=none to lockdown=integrity
 - Remove line continuation and dangling quote (confuses checkpatch.pl)
   from the xmon command help/usage string

Christopher M. Riedl (2):
  powerpc/xmon: Allow listing active breakpoints in read-only mode
  powerpc/xmon: Restrict when kernel is locked down

 arch/powerpc/xmon/xmon.c | 78 
 include/linux/security.h |  2 +
 security/lockdown/lockdown.c |  2 +
 3 files changed, 74 insertions(+), 8 deletions(-)

-- 
2.22.0

[RFC PATCH v4 1/2] powerpc/xmon: Allow listing active breakpoints in read-only mode

2019-08-14 Thread Christopher M. Riedl

Xmon can enter read-only mode dynamically due to changes in kernel
lockdown state. This transition does not clear active breakpoints and
any these breakpoints should remain visible to the xmon'er.

Signed-off-by: Christopher M. Riedl 
---
 arch/powerpc/xmon/xmon.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d0620d762a5a..bb63ecc599fd 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1045,10 +1045,6 @@ cmds(struct pt_regs *excp)
set_lpp_cmd();
break;
case 'b':
-   if (xmon_is_ro) {
-   printf(xmon_ro_msg);
-   break;
-   }
bpt_cmds();
break;
case 'C':
@@ -1317,11 +1313,16 @@ bpt_cmds(void)
struct bpt *bp;
 
cmd = inchar();
+
switch (cmd) {
 #ifndef CONFIG_PPC_8xx
static const char badaddr[] = "Only kernel addresses are permitted for 
breakpoints\n";
int mode;
case 'd':   /* bd - hardware data breakpoint */
+   if (xmon_is_ro) {
+   printf(xmon_ro_msg);
+   break;
+   }
if (!ppc_breakpoint_available()) {
printf("Hardware data breakpoint not supported on this 
cpu\n");
break;
@@ -1349,6 +1350,10 @@ bpt_cmds(void)
break;
 
case 'i':   /* bi - hardware instr breakpoint */
+   if (xmon_is_ro) {
+   printf(xmon_ro_msg);
+   break;
+   }
if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
printf("Hardware instruction breakpoint "
   "not supported on this cpu\n");
@@ -1372,6 +1377,10 @@ bpt_cmds(void)
 #endif
 
case 'c':
+   if (xmon_is_ro) {
+   printf(xmon_ro_msg);
+   break;
+   }
if (!scanhex()) {
/* clear all breakpoints */
for (i = 0; i < NBPTS; ++i)
@@ -1407,7 +1416,7 @@ bpt_cmds(void)
break;
}
termch = cmd;
-   if (!scanhex()) {
+   if (xmon_is_ro || !scanhex()) {
/* print all breakpoints */
printf("   typeaddress\n");
if (dabr.enabled) {
-- 
2.22.0

Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro

2019-08-14 Thread Segher Boessenkool

Hi Christophe,

On Tue, Aug 13, 2019 at 09:59:35AM +, Christophe Leroy wrote:
> + rldicr  \r, \r, 32, 31

Could you please write this as
sldi\r, \r, 32
?  It's much easier to read, imo (it's the exact same instruction).

You can do a lot cheaper sequences if you have a temporary reg, as well
(longest path of 3 insns instead of 5):
lis rt,A
ori rt,B
lis rd,C
ori rd,D
rldimi rd,rt,32,0
to load ABCD.


Segher

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #24 from Christophe Leroy (christophe.le...@c-s.fr) ---
It confirms what I suspected: due to some debug options, kzalloc() doesn't
provide aligned areas.

In __load_free_space_cache() can you replace 
e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
By
e->bitmap = (void *)__get_free_page(GFP_NOFS | __GFP_ZERO);

And same in insert_into_bitmap()

Then replace the three kfree() which free bitmaps by something like
free_page((unsigned long)entry->bitmap)

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property in EP driver.

2019-08-14 Thread Lorenzo Pieralisi

I asked you to remove the period at the end of the patch $SUBJECT and
you did not, either you do not read what I write or explain me what's
going on.

On Wed, Aug 14, 2019 at 10:03:29AM +0800, Xiaowei Bao wrote:
> The PCIe controller of layerscape just have 4 BARs, BAR0 and BAR1
> is 32bit, BAR2 and BAR4 is 64bit, this is determined by hardware,
> so set the bar_fixed_64bit with 0x14.
> 
> Signed-off-by: Xiaowei Bao 

Kishon ACK'ed this patch and you have not carried his tag.

I will make these changes but that's the last time I do that
for you.

Lorenzo

> ---
> v2:
>  - Replace value 0x14 with a macro.
> v3:
>  - No change.
> v4:
>  - send the patch again with '--to'.
> v5:
>  - fix the commit message.
> v6:
>  - remove the [EXT] tag of the $SUBJECT in email.
> 
>  drivers/pci/controller/dwc/pci-layerscape-ep.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
> b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> index be61d96..ca9aa45 100644
> --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> @@ -44,6 +44,7 @@ static const struct pci_epc_features ls_pcie_epc_features = 
> {
>   .linkup_notifier = false,
>   .msi_capable = true,
>   .msix_capable = false,
> + .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
>  };
>  
>  static const struct pci_epc_features*
> -- 
> 2.9.5
>

Re: [PATCH v9 7/7] powerpc: add machine check safe copy_to_user

2019-08-14 Thread Balbir Singh




On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
> Use  memcpy_mcsafe() implementation to define copy_to_user_mcsafe()
> 
> Signed-off-by: Santosh Sivaraj 
> ---
>  arch/powerpc/Kconfig   |  1 +
>  arch/powerpc/include/asm/uaccess.h | 14 ++
>  2 files changed, 15 insertions(+)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 77f6ebf97113..4316e36095a2 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -137,6 +137,7 @@ config PPC
>   select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
> !RELOCATABLE && !HIBERNATION)
>   select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
>   select ARCH_HAS_UACCESS_FLUSHCACHE  if PPC64
> + select ARCH_HAS_UACCESS_MCSAFE  if PPC64
>   select ARCH_HAS_UBSAN_SANITIZE_ALL
>   select ARCH_HAVE_NMI_SAFE_CMPXCHG
>   select ARCH_KEEP_MEMBLOCK
> diff --git a/arch/powerpc/include/asm/uaccess.h 
> b/arch/powerpc/include/asm/uaccess.h
> index 8b03eb44e876..15002b51ff18 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -387,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void __user 
> *to,
>   return ret;
>  }
>  
> +static __always_inline unsigned long __must_check
> +copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
> +{
> + if (likely(check_copy_size(from, n, true))) {
> + if (access_ok(to, n)) {
> + allow_write_to_user(to, n);
> + n = memcpy_mcsafe((void *)to, from, n);
> + prevent_write_to_user(to, n);
> + }
> + }
> +
> + return n;

Do we always return n independent of the check_copy_size return value and 
access_ok return values?

Balbir Singh.

> +}
> +
>  extern unsigned long __clear_user(void __user *addr, unsigned long size);
>  
>  static inline unsigned long clear_user(void __user *addr, unsigned long size)
>

[PATCH v2 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro

2019-08-14 Thread Christophe Leroy

Today LOAD_REG_IMMEDIATE() is a basic #define which loads all
parts on a value into a register, including the parts that are NUL.

This means always 2 instructions on PPC32 and always 5 instructions
on PPC64. And those instructions cannot run in parallele as they are
updating the same register.

Ex: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:

3c 20 00 00 lis r1,0
60 21 00 00 ori r1,r1,0
78 21 07 c6 rldicr  r1,r1,32,31
64 21 00 00 orisr1,r1,0
60 21 40 00 ori r1,r1,16384

Rewrite LOAD_REG_IMMEDIATE() with GAS macro in order to skip
the parts that are NUL.

Rename existing LOAD_REG_IMMEDIATE() as LOAD_REG_IMMEDIATE_SYM()
and use that one for loading value of symbols which are not known
at compile time.

Now LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:

38 20 40 00 li  r1,16384

Signed-off-by: Christophe Leroy 
---
 v2: Fixed the test from (\x) & 0x to (\x) >= 0x8000 || (\x) < 
-0x8000 in __LOAD_REG_IMMEDIATE()

 arch/powerpc/include/asm/ppc_asm.h   | 42 +++-
 arch/powerpc/kernel/exceptions-64e.S | 10 -
 arch/powerpc/kernel/head_64.S|  2 +-
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index e0637730a8e7..bc1385b2f0aa 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -311,13 +311,43 @@ GLUE(.,name):
addis   reg,reg,(name - 0b)@ha; \
addireg,reg,(name - 0b)@l;
 
-#ifdef __powerpc64__
-#ifdef HAVE_AS_ATHIGH
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
 #define __AS_ATHIGH high
 #else
 #define __AS_ATHIGH h
 #endif
-#define LOAD_REG_IMMEDIATE(reg,expr)   \
+
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+   .if (\x) >= 0x8000 || (\x) < -0x8000
+   lis \r, (\x)@__AS_ATHIGH
+   .if (\x) & 0x != 0
+   ori \r, \r, (\x)@l
+   .endif
+   .else
+   li \r, (\x)@l
+   .endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+   .if (\x) >= 0x8000 || (\x) < -0x8000
+   __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+   rldicr  \r, \r, 32, 31
+   .if (\x) & 0x != 0
+   oris \r, \r, (\x)@__AS_ATHIGH
+   .endif
+   .if (\x) & 0x != 0
+   oris \r, \r, (\x)@l
+   .endif
+   .else
+   __LOAD_REG_IMMEDIATE_32 \r, \x
+   .endif
+.endm
+
+#ifdef __powerpc64__
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr)   \
lis reg,(expr)@highest; \
ori reg,reg,(expr)@higher;  \
rldicr  reg,reg,32,31;  \
@@ -335,11 +365,13 @@ GLUE(.,name):
 
 #else /* 32-bit */
 
-#define LOAD_REG_IMMEDIATE(reg,expr)   \
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr)   \
lis reg,(expr)@ha;  \
addireg,reg,(expr)@l;
 
-#define LOAD_REG_ADDR(reg,name)LOAD_REG_IMMEDIATE(reg, name)
+#define LOAD_REG_ADDR(reg,name)LOAD_REG_IMMEDIATE_SYM(reg, 
name)
 
 #define LOAD_REG_ADDRBASE(reg, name)   lis reg,name@ha
 #define ADDROFF(name)  name@l
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..898aae6da167 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -751,8 +751,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld  r14,interrupt_base_book3e@got(r15)
ld  r15,__end_interrupts@got(r15)
 #else
-   LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
-   LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+   LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+   LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
 #endif
cmpld   cr0,r10,r14
cmpld   cr1,r10,r15
@@ -821,8 +821,8 @@ kernel_dbg_exc:
ld  r14,interrupt_base_book3e@got(r15)
ld  r15,__end_interrupts@got(r15)
 #else
-   LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
-   LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+   LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+   LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
 #endif
cmpld   cr0,r10,r14
cmpld   cr1,r10,r15
@@ -1449,7 +1449,7 @@ a2_tlbinit_code_start:
 a2_tlbinit_after_linear_map:
 
/* Now we branch the new virtual address mapped by this entry */
-   LOAD_REG_IMMEDIATE(r3,1f)
+   LOAD_REG_IMMEDIATE_SYM(r3,1f)
mtctr   r3
bctr
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 91d297e696dd..1fd44761e997 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -635,7 +635,7 @@

Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property in EP driver.

2019-08-14 Thread Lorenzo Pieralisi

On Wed, Aug 14, 2019 at 09:48:00AM +, Xiaowei Bao wrote:
> 
> 
> > -Original Message-
> > From: Lorenzo Pieralisi 
> > Sent: 2019年8月14日 17:30
> > To: Xiaowei Bao 
> > Cc: M.h. Lian ; Mingkai Hu
> > ; Roy Zang ;
> > bhelg...@google.com; linuxppc-dev@lists.ozlabs.org;
> > linux-...@vger.kernel.org; linux-arm-ker...@lists.infradead.org;
> > linux-ker...@vger.kernel.org
> > Subject: Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property
> > in EP driver.

Do not quote the email header in your replies.

> > I asked you to remove the period at the end of the patch $SUBJECT and you
> > did not, either you do not read what I write or explain me what's going on.
> Sorry, I didn't understand the meaning of period correctly before. 
> > 
> > On Wed, Aug 14, 2019 at 10:03:29AM +0800, Xiaowei Bao wrote:
> > > The PCIe controller of layerscape just have 4 BARs, BAR0 and BAR1 is
> > > 32bit, BAR2 and BAR4 is 64bit, this is determined by hardware, so set
> > > the bar_fixed_64bit with 0x14.
> > >
> > > Signed-off-by: Xiaowei Bao 
> > 
> > Kishon ACK'ed this patch and you have not carried his tag.
> > 
> > I will make these changes but that's the last time I do that for you.
> Thanks a lot, your means is that I don't need to send the v7 patch and you 
> help me to
> Correct this patch, yes? Thanks a lot for your help about the rules of the 
> upstream. I will
> Correct this error next time. ^.^ 

I fixed that up and pushed out, pci/layerscape, for v5.4.

Thanks,
Lorenzo

> > Lorenzo
> > 
> > > ---
> > > v2:
> > >  - Replace value 0x14 with a macro.
> > > v3:
> > >  - No change.
> > > v4:
> > >  - send the patch again with '--to'.
> > > v5:
> > >  - fix the commit message.
> > > v6:
> > >  - remove the [EXT] tag of the $SUBJECT in email.
> > >
> > >  drivers/pci/controller/dwc/pci-layerscape-ep.c | 1 +
> > >  1 file changed, 1 insertion(+)
> > >
> > > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > > b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > > index be61d96..ca9aa45 100644
> > > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > > @@ -44,6 +44,7 @@ static const struct pci_epc_features
> > ls_pcie_epc_features = {
> > >   .linkup_notifier = false,
> > >   .msi_capable = true,
> > >   .msix_capable = false,
> > > + .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
> > >  };
> > >
> > >  static const struct pci_epc_features*
> > > --
> > > 2.9.5
> > >

Re: [PATCH v4 14/25] powernv/fadump: process the crashdump by exporting it as /proc/vmcore

2019-08-14 Thread Mahesh J Salgaonkar

On 2019-07-16 17:03:38 Tue, Hari Bathini wrote:
> Add support in the kernel to process the crash'ed kernel's memory
> preserved during MPIPL and export it as /proc/vmcore file for the
> userland scripts to filter and analyze it later.
> 
> Signed-off-by: Hari Bathini 
> ---
>  arch/powerpc/platforms/powernv/opal-fadump.c |  190 
> ++
>  1 file changed, 187 insertions(+), 3 deletions(-)
> 
[...]
> + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, );
> + if ((ret != OPAL_SUCCESS) || !addr) {
> + pr_err("Failed to get Kernel metadata (%lld)\n", ret);
> + return 1;
> + }
> +
> + addr = be64_to_cpu(addr);
> + pr_debug("Kernel metadata addr: %llx\n", addr);
> +
> + opal_fdm_active = __va(addr);
> + r_opal_fdm_active = (void *)addr;
> + if (r_opal_fdm_active->version != OPAL_FADUMP_VERSION) {
> + pr_err("FADump active but version (%u) unsupported!\n",
> +r_opal_fdm_active->version);
> + return 1;
> + }
> +
> + /* Kernel regions not registered with f/w  for MPIPL */
> + if (r_opal_fdm_active->registered_regions == 0) {
> + opal_fdm_active = NULL;

What about partial dump capture scenario ? What if opal crashes while
kernel was in middle of registering ranges ? We may have partial dump
captured which won't be useful.
e,g. If we have total of 4 ranges to be registered and opal crashes
after successful registration of only 2 ranges with 2 pending, we will get a
partial dump which needs to be ignored.

I think check shuold be comparing registered_regions against total number of
regions. What do you think ?

Thanks,
-Mahesh.

> + return 1;
> + }
> +
> + pr_info("Firmware-assisted dump is active.\n");
> + fadump_conf->dump_active = 1;
> + opal_fadump_get_config(fadump_conf, r_opal_fdm_active);
> + }
> +
>   return 1;
>  }
> 

-- 
Mahesh J Salgaonkar

Re: [PATCH v4 11/25] powernv/fadump: register kernel metadata address with opal

2019-08-14 Thread Mahesh Jagannath Salgaonkar

On 8/14/19 12:36 PM, Hari Bathini wrote:
> 
> 
> On 13/08/19 4:11 PM, Mahesh J Salgaonkar wrote:
>> On 2019-07-16 17:03:15 Tue, Hari Bathini wrote:
>>> OPAL allows registering address with it in the first kernel and
>>> retrieving it after MPIPL. Setup kernel metadata and register its
>>> address with OPAL to use it for processing the crash dump.
>>>
>>> Signed-off-by: Hari Bathini 
>>> ---
>>>  arch/powerpc/kernel/fadump-common.h  |4 +
>>>  arch/powerpc/kernel/fadump.c |   65 ++-
>>>  arch/powerpc/platforms/powernv/opal-fadump.c |   73 
>>> ++
>>>  arch/powerpc/platforms/powernv/opal-fadump.h |   37 +
>>>  arch/powerpc/platforms/pseries/rtas-fadump.c |   32 +--
>>>  5 files changed, 177 insertions(+), 34 deletions(-)
>>>  create mode 100644 arch/powerpc/platforms/powernv/opal-fadump.h
>>>
>> [...]
>>> @@ -346,30 +349,42 @@ int __init fadump_reserve_mem(void)
>>>  * use memblock_find_in_range() here since it doesn't allocate
>>>  * from bottom to top.
>>>  */
>>> -   for (base = fw_dump.boot_memory_size;
>>> -base <= (memory_boundary - size);
>>> -base += size) {
>>> +   while (base <= (memory_boundary - size)) {
>>> if (memblock_is_region_memory(base, size) &&
>>> !memblock_is_region_reserved(base, size))
>>> break;
>>> +
>>> +   base += size;
>>> }
>>> -   if ((base > (memory_boundary - size)) ||
>>> -   memblock_reserve(base, size)) {
>>> +
>>> +   if (base > (memory_boundary - size)) {
>>> +   pr_err("Failed to find memory chunk for reservation\n");
>>> +   goto error_out;
>>> +   }
>>> +   fw_dump.reserve_dump_area_start = base;
>>> +
>>> +   /*
>>> +* Calculate the kernel metadata address and register it with
>>> +* f/w if the platform supports.
>>> +*/
>>> +   if (fw_dump.ops->setup_kernel_metadata(_dump) < 0)
>>> +   goto error_out;
>>
>> I see setup_kernel_metadata() registers the metadata address with opal 
>> without
>> having any minimum data initialized in it. Secondaly, why can't this wait 
>> until> registration ? I think we should defer this until fadump registration.
> 
> If setting up metadata address fails (it should ideally not fail, but..), 
> everything else
> is useless. 

That's less likely.. so is true with opal_mpipl_update() as well.

> So, we might as well try that early and fall back to KDump in case of an 
> error..

ok. Yeah but not uninitialized metadata.

> 
>> What if kernel crashes before metadata area is initialized ?
> 
> registered_regions would be '0'. So, it is treated as fadump is not 
> registered case.
> Let me
> initialize metadata explicitly before registering the address with f/w to 
> avoid any assumption...

Do you want to do that before memblock reservation ? Should we move this
to setup_fadump() ?

Thanks,
-Mahesh.

> 
>>
>>> +
>>> +   if (memblock_reserve(base, size)) {
>>> pr_err("Failed to reserve memory\n");
>>> -   return 0;
>>> +   goto error_out;
>>> }
>> [...]
>>> -
>>>  static struct fadump_ops rtas_fadump_ops = {
>>> -   .init_fadump_mem_struct = rtas_fadump_init_mem_struct,
>>> -   .register_fadump= rtas_fadump_register_fadump,
>>> -   .unregister_fadump  = rtas_fadump_unregister_fadump,
>>> -   .invalidate_fadump  = rtas_fadump_invalidate_fadump,
>>> -   .process_fadump = rtas_fadump_process_fadump,
>>> -   .fadump_region_show = rtas_fadump_region_show,
>>> -   .fadump_trigger = rtas_fadump_trigger,
>>> +   .init_fadump_mem_struct = rtas_fadump_init_mem_struct,
>>> +   .get_kernel_metadata_size   = rtas_fadump_get_kernel_metadata_size,
>>> +   .setup_kernel_metadata  = rtas_fadump_setup_kernel_metadata,
>>> +   .register_fadump= rtas_fadump_register_fadump,
>>> +   .unregister_fadump  = rtas_fadump_unregister_fadump,
>>> +   .invalidate_fadump  = rtas_fadump_invalidate_fadump,
>>> +   .process_fadump = rtas_fadump_process_fadump,
>>> +   .fadump_region_show = rtas_fadump_region_show,
>>> +   .fadump_trigger = rtas_fadump_trigger,
>>
>> Can you make the tab space changes in your previous patch where these
>> were initially introduced ? So that this patch can only show new members
>> that are added.
> 
> done.
> 
> Thanks
> Hari
>

[PATCH] powerpc/futex: fix warning: 'oldval' may be used uninitialized in this function

2019-08-14 Thread Christophe Leroy

  CC  kernel/futex.o
kernel/futex.c: In function 'do_futex':
kernel/futex.c:1676:17: warning: 'oldval' may be used uninitialized in this 
function [-Wmaybe-uninitialized]
   return oldval == cmparg;
 ^
kernel/futex.c:1651:6: note: 'oldval' was declared here
  int oldval, ret;
  ^

This is because arch_futex_atomic_op_inuser() only sets *oval
if ret is NUL and GCC doesn't see that it will use it only when
ret is NUL.

Anyway, the non-NUL ret path is an error path that won't suffer from
setting *oval, and as *oval is a local var in futex_atomic_op_inuser()
it will have no impact.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/futex.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 3a6aa57b9d90..eea28ca679db 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -60,8 +60,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int 
oparg, int *oval,
 
pagefault_enable();
 
-   if (!ret)
-   *oval = oldval;
+   *oval = oldval;
 
prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
-- 
2.13.3

Re: [PATCH v9 4/7] extable: Add function to search only kernel exception table

2019-08-14 Thread Santosh Sivaraj

Balbir Singh  writes:

> On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
>> Certain architecture specific operating modes (e.g., in powerpc machine
>> check handler that is unable to access vmalloc memory), the
>> search_exception_tables cannot be called because it also searches the
>> module exception tables if entry is not found in the kernel exception
>> table.
>> 
>> Cc: Thomas Gleixner 
>> Cc: Ingo Molnar 
>> Cc: Nicholas Piggin 
>> Signed-off-by: Santosh Sivaraj 
>> Reviewed-by: Nicholas Piggin 
>> ---
>>  include/linux/extable.h |  2 ++
>>  kernel/extable.c| 11 +--
>>  2 files changed, 11 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/linux/extable.h b/include/linux/extable.h
>> index 41c5b3a25f67..81ecfaa83ad3 100644
>> --- a/include/linux/extable.h
>> +++ b/include/linux/extable.h
>> @@ -19,6 +19,8 @@ void trim_init_extable(struct module *m);
>>  
>>  /* Given an address, look for it in the exception tables */
>>  const struct exception_table_entry *search_exception_tables(unsigned long 
>> add);
>> +const struct exception_table_entry *
>> +search_kernel_exception_table(unsigned long addr);
>> 
>
> Can we find a better name search_kernel still sounds like all of the kernel.
> Can we rename it to search_kernel_linear_map_extable?

I thought search_kernel_exception_table and search_module_extables were
non-ambiguous enough :-) But If you think name will be confusing, I can
change that as suggested.

Thanks,
Santosh

>
>  
>>  #ifdef CONFIG_MODULES
>>  /* For extable.c to search modules' exception tables. */
>> diff --git a/kernel/extable.c b/kernel/extable.c
>> index e23cce6e6092..f6c9406eec7d 100644
>> --- a/kernel/extable.c
>> +++ b/kernel/extable.c
>> @@ -40,13 +40,20 @@ void __init sort_main_extable(void)
>>  }
>>  }
>>  
>> +/* Given an address, look for it in the kernel exception table */
>> +const
>> +struct exception_table_entry *search_kernel_exception_table(unsigned long 
>> addr)
>> +{
>> +return search_extable(__start___ex_table,
>> +  __stop___ex_table - __start___ex_table, addr);
>> +}
>> +
>>  /* Given an address, look for it in the exception tables. */
>>  const struct exception_table_entry *search_exception_tables(unsigned long 
>> addr)
>>  {
>>  const struct exception_table_entry *e;
>>  
>> -e = search_extable(__start___ex_table,
>> -   __stop___ex_table - __start___ex_table, addr);
>> +e = search_kernel_exception_table(addr);
>>  if (!e)
>>  e = search_module_extables(addr);
>>  return e;
>> 

-- 
if (( RANDOM % 2 )); then ~/bin/cookie; else fortune -s; fi
#cat ~/notes/quotes | sort -R | head -1 | cut -f2- -d " "

Re: [PATCH v3 08/16] powerpc/pseries/svm: Use shared memory for LPPACA structures

2019-08-14 Thread Michael Ellerman

Thiago Jung Bauermann  writes:
> Michael Ellerman  writes:
>> Thiago Jung Bauermann  writes:
>>> From: Anshuman Khandual 
>>>
>>> LPPACA structures need to be shared with the host. Hence they need to be in
>>> shared memory. Instead of allocating individual chunks of memory for a
>>> given structure from memblock, a contiguous chunk of memory is allocated
>>> and then converted into shared memory. Subsequent allocation requests will
>>> come from the contiguous chunk which will be always shared memory for all
>>> structures.
>>>
>>> While we are able to use a kmem_cache constructor for the Debug Trace Log,
>>> LPPACAs are allocated very early in the boot process (before SLUB is
>>> available) so we need to use a simpler scheme here.
>>>
>>> Introduce helper is_svm_platform() which uses the S bit of the MSR to tell
>>> whether we're running as a secure guest.
>>>
>>> Signed-off-by: Anshuman Khandual 
>>> Signed-off-by: Thiago Jung Bauermann 
>>> ---
>>>  arch/powerpc/include/asm/svm.h | 26 
>>>  arch/powerpc/kernel/paca.c | 43 +-
>>>  2 files changed, 68 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
>>> new file mode 100644
>>> index ..fef3740f46a6
>>> --- /dev/null
>>> +++ b/arch/powerpc/include/asm/svm.h
>>> @@ -0,0 +1,26 @@
>>> +/* SPDX-License-Identifier: GPL-2.0+ */
>>> +/*
>>> + * SVM helper functions
>>> + *
>>> + * Copyright 2019 Anshuman Khandual, IBM Corporation.
>>
>> Are we sure this copyright date is correct?
>
> I may be confused about which year the copyright refers to. I thought it
> was the year when the patch was committed. If it is the first time the
> patch was published then this one should be 2018.

I'm not a lawyer etc. but AIUI the date above is about the authorship,
ie. when it was originally written, not when it was published or
committed.

In general I don't think it matters too much, but in this case I'm
pretty sure Anshuman can't have possibly written it in 2019 on behalf of
IBM :)

So we can either change the date to 2018, or drop his name and just say
it's copyright 2019 by IBM.

cheers

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #17 from Christophe Leroy (christophe.le...@c-s.fr) ---
Created attachment 284379
  --> https://bugzilla.kernel.org/attachment.cgi?id=284379=edit
Patch to trace misaligned destination in copy_page() on PPC32

Can you try the attached patch to trace misaligned destination on copy_page() ?

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: [PATCH v9 1/7] powerpc/mce: Schedule work from irq_work

2019-08-14 Thread Balbir Singh




On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
> schedule_work() cannot be called from MCE exception context as MCE can
> interrupt even in interrupt disabled context.
> 
> fixes: 733e4a4c ("powerpc/mce: hookup memory_failure for UE errors")
> Suggested-by: Mahesh Salgaonkar 
> Signed-off-by: Santosh Sivaraj 
> Cc: sta...@vger.kernel.org # v4.15+
> ---

Acked-by: Balbir Singh

Re: [PATCH v9 4/7] extable: Add function to search only kernel exception table

2019-08-14 Thread Balbir Singh




On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
> Certain architecture specific operating modes (e.g., in powerpc machine
> check handler that is unable to access vmalloc memory), the
> search_exception_tables cannot be called because it also searches the
> module exception tables if entry is not found in the kernel exception
> table.
> 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: Nicholas Piggin 
> Signed-off-by: Santosh Sivaraj 
> Reviewed-by: Nicholas Piggin 
> ---
>  include/linux/extable.h |  2 ++
>  kernel/extable.c| 11 +--
>  2 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/extable.h b/include/linux/extable.h
> index 41c5b3a25f67..81ecfaa83ad3 100644
> --- a/include/linux/extable.h
> +++ b/include/linux/extable.h
> @@ -19,6 +19,8 @@ void trim_init_extable(struct module *m);
>  
>  /* Given an address, look for it in the exception tables */
>  const struct exception_table_entry *search_exception_tables(unsigned long 
> add);
> +const struct exception_table_entry *
> +search_kernel_exception_table(unsigned long addr);
> 

Can we find a better name search_kernel still sounds like all of the kernel.
Can we rename it to search_kernel_linear_map_extable?

 
>  #ifdef CONFIG_MODULES
>  /* For extable.c to search modules' exception tables. */
> diff --git a/kernel/extable.c b/kernel/extable.c
> index e23cce6e6092..f6c9406eec7d 100644
> --- a/kernel/extable.c
> +++ b/kernel/extable.c
> @@ -40,13 +40,20 @@ void __init sort_main_extable(void)
>   }
>  }
>  
> +/* Given an address, look for it in the kernel exception table */
> +const
> +struct exception_table_entry *search_kernel_exception_table(unsigned long 
> addr)
> +{
> + return search_extable(__start___ex_table,
> +   __stop___ex_table - __start___ex_table, addr);
> +}
> +
>  /* Given an address, look for it in the exception tables. */
>  const struct exception_table_entry *search_exception_tables(unsigned long 
> addr)
>  {
>   const struct exception_table_entry *e;
>  
> - e = search_extable(__start___ex_table,
> -__stop___ex_table - __start___ex_table, addr);
> + e = search_kernel_exception_table(addr);
>   if (!e)
>   e = search_module_extables(addr);
>   return e;
>

Re: [PATCH v9 6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-08-14 Thread Balbir Singh




On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
> If we take a UE on one of the instructions with a fixup entry, set nip
> to continue execution at the fixup entry. Stop processing the event
> further or print it.
> 
> Co-developed-by: Reza Arbab 
> Signed-off-by: Reza Arbab 
> Cc: Mahesh Salgaonkar 
> Signed-off-by: Santosh Sivaraj 
> ---

Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it should 
still have my author tag and signed-off-by

Balbir Singh

>  arch/powerpc/include/asm/mce.h  |  4 +++-
>  arch/powerpc/kernel/mce.c   | 16 
>  arch/powerpc/kernel/mce_power.c | 15 +--
>  3 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index f3a6036b6bc0..e1931c8c2743 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -122,7 +122,8 @@ struct machine_check_event {
>   enum MCE_UeErrorType ue_error_type:8;
>   u8  effective_address_provided;
>   u8  physical_address_provided;
> - u8  reserved_1[5];
> + u8  ignore_event;
> + u8  reserved_1[4];
>   u64 effective_address;
>   u64 physical_address;
>   u8  reserved_2[8];
> @@ -193,6 +194,7 @@ struct mce_error_info {
>   enum MCE_Initiator  initiator:8;
>   enum MCE_ErrorClass error_class:8;
>   boolsync_error;
> + boolignore_event;
>  };
>  
>  #define MAX_MC_EVT   100
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index a3b122a685a5..ec4b3e1087be 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>   if (phys_addr != ULONG_MAX) {
>   mce->u.ue_error.physical_address_provided = true;
>   mce->u.ue_error.physical_address = phys_addr;
> + mce->u.ue_error.ignore_event = mce_err->ignore_event;
>   machine_check_ue_event(mce);
>   }
>   }
> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct 
> *work)
>   /*
>* This should probably queued elsewhere, but
>* oh! well
> +  *
> +  * Don't report this machine check because the caller has a
> +  * asked us to ignore the event, it has a fixup handler which
> +  * will do the appropriate error handling and reporting.
>*/
>   if (evt->error_type == MCE_ERROR_TYPE_UE) {
> + if (evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_ue_count);
> + continue;
> + }
> +
>   if (evt->u.ue_error.physical_address_provided) {
>   unsigned long pfn;
>  
> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct 
> irq_work *work)
>   while (__this_cpu_read(mce_queue_count) > 0) {
>   index = __this_cpu_read(mce_queue_count) - 1;
>   evt = this_cpu_ptr(_event_queue[index]);
> +
> + if (evt->error_type == MCE_ERROR_TYPE_UE &&
> + evt->u.ue_error.ignore_event) {
> + __this_cpu_dec(mce_queue_count);
> + continue;
> + }
>   machine_check_print_event_info(evt, false, false);
>   __this_cpu_dec(mce_queue_count);
>   }
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index e74816f045f8..1dd87f6f5186 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -11,6 +11,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -18,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * Convert an address related to an mm to a physical address.
> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>   return 0;
>  }
>  
> -static long mce_handle_ue_error(struct pt_regs *regs)
> +static long mce_handle_ue_error(struct pt_regs *regs,
> + struct mce_error_info *mce_err)
>  {
>   long handled = 0;
> + const struct exception_table_entry *entry;
> +
> + entry = search_kernel_exception_table(regs->nip);
> + if (entry) {
> + mce_err->ignore_event = true;
> + regs->nip = extable_fixup(entry);
> + return 1;
> + }
>  
>   /*
>* On specific SCOM read via MMIO we may get a machine check
> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs

RE: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property in EP driver.

2019-08-14 Thread Xiaowei Bao



> -Original Message-
> From: Lorenzo Pieralisi 
> Sent: 2019年8月14日 17:30
> To: Xiaowei Bao 
> Cc: M.h. Lian ; Mingkai Hu
> ; Roy Zang ;
> bhelg...@google.com; linuxppc-dev@lists.ozlabs.org;
> linux-...@vger.kernel.org; linux-arm-ker...@lists.infradead.org;
> linux-ker...@vger.kernel.org
> Subject: Re: [PATCHv6 1/2] PCI: layerscape: Add the bar_fixed_64bit property
> in EP driver.
> 
> I asked you to remove the period at the end of the patch $SUBJECT and you
> did not, either you do not read what I write or explain me what's going on.
Sorry, I didn't understand the meaning of period correctly before. 
> 
> On Wed, Aug 14, 2019 at 10:03:29AM +0800, Xiaowei Bao wrote:
> > The PCIe controller of layerscape just have 4 BARs, BAR0 and BAR1 is
> > 32bit, BAR2 and BAR4 is 64bit, this is determined by hardware, so set
> > the bar_fixed_64bit with 0x14.
> >
> > Signed-off-by: Xiaowei Bao 
> 
> Kishon ACK'ed this patch and you have not carried his tag.
> 
> I will make these changes but that's the last time I do that for you.
Thanks a lot, your means is that I don't need to send the v7 patch and you help 
me to
Correct this patch, yes? Thanks a lot for your help about the rules of the 
upstream. I will
Correct this error next time. ^.^ 
> 
> Lorenzo
> 
> > ---
> > v2:
> >  - Replace value 0x14 with a macro.
> > v3:
> >  - No change.
> > v4:
> >  - send the patch again with '--to'.
> > v5:
> >  - fix the commit message.
> > v6:
> >  - remove the [EXT] tag of the $SUBJECT in email.
> >
> >  drivers/pci/controller/dwc/pci-layerscape-ep.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > index be61d96..ca9aa45 100644
> > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > @@ -44,6 +44,7 @@ static const struct pci_epc_features
> ls_pcie_epc_features = {
> > .linkup_notifier = false,
> > .msi_capable = true,
> > .msix_capable = false,
> > +   .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
> >  };
> >
> >  static const struct pci_epc_features*
> > --
> > 2.9.5
> >

Re: [PATCH 1/5] powerpc/ptdump: fix addresses display on PPC32

2019-08-14 Thread Christophe Leroy


Michael,

Le 14/08/2019 à 14:36, Christophe Leroy a écrit :

Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")
wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a
shift in the displayed addresses.

Lets revert that change to resync walk_pagetables()'s addr val and
pgd_t pointer for PPC32.

Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")


Either this patch or patch 2 of the series has to go into fixes.

If you prefer next patch for fixes, then this one can be squashed into 
patch 3 which drops the PPC32 hacked definition of KERN_VIRT_START


Christophe


Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
  arch/powerpc/mm/ptdump/ptdump.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 6a88a9f585d4..3ad64fc11419 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -27,7 +27,7 @@
  #include "ptdump.h"
  
  #ifdef CONFIG_PPC32

-#define KERN_VIRT_STARTPAGE_OFFSET
+#define KERN_VIRT_START0
  #endif
  
  /*

[PATCH 2/5] powerpc/ptdump: fix walk_pagetables() address mismatch

2019-08-14 Thread Christophe Leroy

walk_pagetables() always walk the entire pgdir from address 0
but considers PAGE_OFFSET or KERN_VIRT_START as the starting
address of the walk, resulting in a possible mismatch in the
displayed addresses.

Ex: on PPC32, when KERN_VIRT_START was locally defined as
PAGE_OFFSET, ptdump displayed 0x8000
instead of 0xc000 for the first kernel page,
because 0xc000 + 0xc000 = 0x8000

Start the walk at st->start_address instead of starting at 0.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/ptdump/ptdump.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 3ad64fc11419..74ff2bff4ea0 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -299,17 +299,15 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, 
unsigned long start)
 
 static void walk_pagetables(struct pg_state *st)
 {
-   pgd_t *pgd = pgd_offset_k(0UL);
unsigned int i;
-   unsigned long addr;
-
-   addr = st->start_address;
+   unsigned long addr = st->start_address & PGDIR_MASK;
+   pgd_t *pgd = pgd_offset_k(addr);
 
/*
 * Traverse the linux pagetable structure and dump pages that are in
 * the hash pagetable.
 */
-   for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
+   for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += 
PGDIR_SIZE) {
if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd))
/* pgd exists */
walk_pud(st, pgd, addr);
-- 
2.13.3

use the generic DMA direct remap code on powerpc

2019-08-14 Thread Christoph Hellwig

Hi powerpc maintainers,

this patch convers powerpc to use the generic dma remapping code
for the uncached coherent allocation on non-coherent CPUs.

Christophe Leroy tested a slightly earlier version on ppc8xx.

Note that I plan to move the need for the arch to call
dma_atomic_pool_init in this cycle, so either this needs to go in
through the dma-mapping tree (or a shared stable branch with it), or
we will need a small manual fixup in linux-next and when Linus merged
the later of the two pull requests.

[PATCH 5/5] powerpc/ptdump: drop non vital #ifdefs

2019-08-14 Thread Christophe Leroy

hashpagetable.c is only compiled when CONFIG_PPC_BOOK3S_64 is
defined, so drop the test and its 'else' branch.

Use IS_ENABLED(CONFIG_PPC_PSERIES) instead of #ifdef, this allows the
code to be checked at any build. It is still optimised out by GCC.

Use IS_ENABLED(CONFIG_PPC_64K_PAGES) instead of #ifdef.

Use IS_ENABLED(CONFIG_SPARSEMEN_VMEMMAP) instead of #ifdef.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/plpar_wrappers.h |  6 ++
 arch/powerpc/mm/ptdump/hashpagetable.c| 24 +---
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/plpar_wrappers.h 
b/arch/powerpc/include/asm/plpar_wrappers.h
index cff5a411e595..4497c8afb573 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -340,6 +340,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr)
 {
return 0;
 }
+
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+   unsigned long *ptes)
+{
+   return 0;
+}
 #endif /* CONFIG_PPC_PSERIES */
 
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c 
b/arch/powerpc/mm/ptdump/hashpagetable.c
index 72f0e4a3d839..a07278027c6f 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -237,7 +237,6 @@ static int native_find(unsigned long ea, int psize, bool 
primary, u64 *v, u64
return -1;
 }
 
-#ifdef CONFIG_PPC_PSERIES
 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 
*r)
 {
struct hash_pte ptes[4];
@@ -274,7 +273,6 @@ static int pseries_find(unsigned long ea, int psize, bool 
primary, u64 *v, u64 *
}
return -1;
 }
-#endif
 
 static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
unsigned long *lp_bits)
@@ -316,10 +314,9 @@ static void decode_r(int bps, unsigned long r, unsigned 
long *rpn, int *aps,
 static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
  u64 *r)
 {
-#ifdef CONFIG_PPC_PSERIES
-   if (firmware_has_feature(FW_FEATURE_LPAR))
+   if (IS_ENABLED(CONFIG_PPC_PSERIES) && 
firmware_has_feature(FW_FEATURE_LPAR))
return pseries_find(ea, psize, primary, v, r);
-#endif
+
return native_find(ea, psize, primary, v, r);
 }
 
@@ -386,12 +383,13 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, 
unsigned long start)
psize = mmu_vmalloc_psize;
else
psize = mmu_io_psize;
-#ifdef CONFIG_PPC_64K_PAGES
+
/* check for secret 4K mappings */
-   if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) ||
-   ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
+   if (IS_ENABLED(CONFIG_PPC_64K_PAGES) &&
+   ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO ||
+(pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
psize = mmu_io_psize;
-#endif
+
/* check for hashpte */
status = hpte_find(st, addr, psize);
 
@@ -469,9 +467,10 @@ static void walk_linearmapping(struct pg_state *st)
 
 static void walk_vmemmap(struct pg_state *st)
 {
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
struct vmemmap_backing *ptr = vmemmap_list;
 
+   if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+   return;
/*
 * Traverse the vmemmaped memory and dump pages that are in the hash
 * pagetable.
@@ -481,7 +480,6 @@ static void walk_vmemmap(struct pg_state *st)
ptr = ptr->list;
}
seq_puts(st->seq, "---[ vmemmap end ]---\n");
-#endif
 }
 
 static void populate_markers(void)
@@ -495,11 +493,7 @@ static void populate_markers(void)
address_markers[6].start_address = PHB_IO_END;
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_BOOK3S_64
address_markers[9].start_address =  H_VMEMMAP_START;
-#else
-   address_markers[9].start_address =  VMEMMAP_BASE;
-#endif
 }
 
 static int ptdump_show(struct seq_file *m, void *v)
-- 
2.13.3

[PATCH] powerpc: use the generic dma coherent remap allocator

2019-08-14 Thread Christoph Hellwig

This switches to using common code for the DMA allocations, including
potential use of the CMA allocator if configured.

Switching to the generic code enables DMA allocations from atomic
context, which is required by the DMA API documentation, and also
adds various other minor features drivers start relying upon.  It
also makes sure we have on tested code base for all architectures
that require uncached pte bits for coherent DMA allocations.

Another advantage is that consistent memory allocations now share
the general vmalloc pool instead of needing an explicit careout
from it.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/Kconfig |  12 -
 arch/powerpc/include/asm/book3s/32/pgtable.h |  12 +-
 arch/powerpc/include/asm/nohash/32/pgtable.h |  12 +-
 arch/powerpc/mm/dma-noncoherent.c| 318 +--
 arch/powerpc/mm/mem.c|   4 -
 arch/powerpc/mm/ptdump/ptdump.c  |   9 -
 arch/powerpc/platforms/Kconfig.cputype   |   2 +
 7 files changed, 17 insertions(+), 352 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 77f6ebf97113..7135e47390f3 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1139,18 +1139,6 @@ config TASK_SIZE
default "0x8000" if PPC_8xx
default "0xc000"
 
-config CONSISTENT_SIZE_BOOL
-   bool "Set custom consistent memory pool size"
-   depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
-   help
- This option allows you to set the size of the
- consistent memory pool.  This pool of virtual memory
- is used to make consistent memory allocations.
-
-config CONSISTENT_SIZE
-   hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
-   default "0x0020" if NOT_COHERENT_CACHE
-
 config PIN_TLB
bool "Pinned Kernel TLBs (860 ONLY)"
depends on ADVANCED_OPTIONS && PPC_8xx && \
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 838de59f6754..b6c7214113ab 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -148,21 +148,15 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, 
pgprot_t prot);
  */
 #include 
 
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP  PKMAP_BASE
-#else
-#define KVIRT_TOP  FIXADDR_START
-#endif
-
 /*
  * ioremap_bot starts at that address. Early ioremaps move down from there,
  * until mem_init() at which point this becomes the top of the vmalloc
  * and ioremap space
  */
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOPPKMAP_BASE
 #else
-#define IOREMAP_TOPKVIRT_TOP
+#define IOREMAP_TOPFIXADDR_START
 #endif
 
 /*
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0284f8f5305f..c3764638c27f 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -78,21 +78,15 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, 
pgprot_t prot);
  */
 #include 
 
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP  PKMAP_BASE
-#else
-#define KVIRT_TOP  FIXADDR_START
-#endif
-
 /*
  * ioremap_bot starts at that address. Early ioremaps move down from there,
  * until mem_init() at which point this becomes the top of the vmalloc
  * and ioremap space
  */
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOPPKMAP_BASE
 #else
-#define IOREMAP_TOPKVIRT_TOP
+#define IOREMAP_TOPFIXADDR_START
 #endif
 
 /*
diff --git a/arch/powerpc/mm/dma-noncoherent.c 
b/arch/powerpc/mm/dma-noncoherent.c
index c617282d5b2a..4272ca5e8159 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -4,310 +4,18 @@
  *Copyright (C) 2001 Dan Malek (dma...@jlc.net)
  *
  *  Copyright (C) 2000 Russell King
- *
- * Consistent memory allocators.  Used for DMA devices that want to
- * share uncached memory with the processor core.  The function return
- * is the virtual address and 'dma_handle' is the physical address.
- * Mostly stolen from the ARM port, with some changes for PowerPC.
- * -- Dan
- *
- * Reorganized to get rid of the arch-specific consistent_* functions
- * and provide non-coherent implementations for the DMA API. -Matt
- *
- * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
- * implementation. This is pulled straight from ARM and barely
- * modified. -Matt
  */
 
-#include 
-#include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include 
 
-#include 
-
-/*
- * This address range defaults to a value that is safe for all
- * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
-

[PATCH 1/3] powerpc/xmon: Check for HV mode when dumping XIVE info from OPAL

2019-08-14 Thread Cédric Le Goater

Currently, the xmon 'dx' command calls OPAL to dump the XIVE state in
the OPAL logs and also outputs some of the fields of the internal XIVE
structures in Linux. The OPAL calls can only be done on baremetal
(PowerNV) and they crash a pseries machine. Fix by checking the
hypervisor feature of the CPU.

Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/xmon/xmon.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 14e56c25879f..25d4adccf750 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2534,13 +2534,16 @@ static void dump_pacas(void)
 static void dump_one_xive(int cpu)
 {
unsigned int hwid = get_hard_smp_processor_id(cpu);
-
-   opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
-   opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
-   opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
-   opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
-   opal_xive_dump(XIVE_DUMP_VP, hwid);
-   opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
+   bool hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+   if (hv) {
+   opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
+   opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
+   opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
+   opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
+   opal_xive_dump(XIVE_DUMP_VP, hwid);
+   opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
+   }
 
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
-- 
2.21.0

Re: [PATCH 6/6] driver core: initialize a default DMA mask for platform device

2019-08-14 Thread Robin Murphy


On 11/08/2019 09:05, Christoph Hellwig wrote:

We still treat devices without a DMA mask as defaulting to 32-bits for
both mask, but a few releases ago we've started warning about such
cases, as they require special cases to work around this sloppyness.
Add a dma_mask field to struct platform_object so that we can initialize


s/object/device/


the dma_mask pointer in struct device and initialize both masks to
32-bits by default.  Architectures can still override this in
arch_setup_pdev_archdata if needed.

Note that the code looks a little odd with the various conditionals
because we have to support platform_device structures that are
statically allocated.


This would be a good point to also get rid of the long-standing bodge in 
platform_device_register_full().



Signed-off-by: Christoph Hellwig 
---
  drivers/base/platform.c | 15 +--
  include/linux/platform_device.h |  1 +
  2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index ec974ba9c0c4..b216fcb0a8af 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -264,6 +264,17 @@ struct platform_object {
char name[];
  };
  
+static void setup_pdev_archdata(struct platform_device *pdev)


Bikeshed: painting the generic DMA API properties as "archdata" feels a 
bit off-target :/



+{
+   if (!pdev->dev.coherent_dma_mask)
+   pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+   if (!pdev->dma_mask)
+   pdev->dma_mask = DMA_BIT_MASK(32);
+   if (!pdev->dev.dma_mask)
+   pdev->dev.dma_mask = >dma_mask;
+   arch_setup_pdev_archdata(pdev);


AFAICS m68k's implementation of that arch hook becomes entirely 
redundant after this change, so may as well go. That would just leave 
powerpc's actual archdata, which at a glance looks like it could 
probably be cleaned up with not *too* much trouble.


Robin.


+};
+
  /**
   * platform_device_put - destroy a platform device
   * @pdev: platform device to free
@@ -310,7 +321,7 @@ struct platform_device *platform_device_alloc(const char 
*name, int id)
pa->pdev.id = id;
device_initialize(>pdev.dev);
pa->pdev.dev.release = platform_device_release;
-   arch_setup_pdev_archdata(>pdev);
+   setup_pdev_archdata(>pdev);
}
  
  	return pa ? >pdev : NULL;

@@ -512,7 +523,7 @@ EXPORT_SYMBOL_GPL(platform_device_del);
  int platform_device_register(struct platform_device *pdev)
  {
device_initialize(>dev);
-   arch_setup_pdev_archdata(pdev);
+   setup_pdev_archdata(pdev);
return platform_device_add(pdev);
  }
  EXPORT_SYMBOL_GPL(platform_device_register);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 9bc36b589827..a2abde2aef25 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -24,6 +24,7 @@ struct platform_device {
int id;
boolid_auto;
struct device   dev;
+   u64 dma_mask;
u32 num_resources;
struct resource *resource;

[PATCH 2/3] kbuild: rebuild modules when module linker scripts are updated

2019-08-14 Thread Masahiro Yamada

Currently, the timestamp of module linker scripts are not checked.
Add them to the dependency of modules so they are correctly rebuilt.

Signed-off-by: Masahiro Yamada 
---

 Documentation/kbuild/makefiles.rst | 5 +
 Makefile   | 3 ++-
 arch/arm/Makefile  | 2 +-
 arch/arm64/Makefile| 2 +-
 arch/ia64/Makefile | 2 +-
 arch/m68k/Makefile | 2 +-
 arch/parisc/Makefile   | 2 +-
 arch/powerpc/Makefile  | 2 +-
 arch/riscv/Makefile| 2 +-
 scripts/Makefile.modpost   | 5 +++--
 10 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/Documentation/kbuild/makefiles.rst 
b/Documentation/kbuild/makefiles.rst
index d3448d2c8017..36ba92e199d2 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -999,6 +999,11 @@ When kbuild executes, the following steps are followed 
(roughly):
 
The linker script with full path. Assigned by the top-level Makefile.
 
+KBUILD_LDS_MODULE
+
+   The module linker script with full path. Assigned by the top-level
+   Makefile and additionally by the arch Makefile.
+
 KBUILD_VMLINUX_OBJS
 
All object files for vmlinux. They are linked to vmlinux in the same
diff --git a/Makefile b/Makefile
index 164ca615e2f6..af808837a1f2 100644
--- a/Makefile
+++ b/Makefile
@@ -485,7 +485,8 @@ KBUILD_AFLAGS_KERNEL :=
 KBUILD_CFLAGS_KERNEL :=
 KBUILD_AFLAGS_MODULE  := -DMODULE
 KBUILD_CFLAGS_MODULE  := -DMODULE
-KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
+KBUILD_LDFLAGS_MODULE :=
+export KBUILD_LDS_MODULE := $(srctree)/scripts/module-common.lds
 KBUILD_LDFLAGS :=
 GCC_PLUGINS_CFLAGS :=
 CLANG_FLAGS :=
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c3624ca6c0bc..fbe50eec8f34 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -17,7 +17,7 @@ KBUILD_LDFLAGS_MODULE += --be8
 endif
 
 ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
-KBUILD_LDFLAGS_MODULE  += -T $(srctree)/arch/arm/kernel/module.lds
+KBUILD_LDS_MODULE  += $(srctree)/arch/arm/kernel/module.lds
 endif
 
 GZFLAGS:=-9
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 61de992bbea3..d4ed1869e536 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -101,7 +101,7 @@ endif
 CHECKFLAGS += -D__aarch64__
 
 ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
-KBUILD_LDFLAGS_MODULE  += -T $(srctree)/arch/arm64/kernel/module.lds
+KBUILD_LDS_MODULE  += $(srctree)/arch/arm64/kernel/module.lds
 endif
 
 # Default value
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 171290f9f1de..5c3bcaee5980 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -20,7 +20,7 @@ CHECKFLAGS+= -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
 
 OBJCOPYFLAGS   := --strip-all
 LDFLAGS_vmlinux:= -static
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds
+KBUILD_LDS_MODULE += $(srctree)/arch/ia64/module.lds
 KBUILD_AFLAGS_KERNEL := -mconstant-gp
 EXTRA  :=
 
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index 482513b9af2c..5d9288384096 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -73,7 +73,7 @@ KBUILD_AFLAGS += -D__uClinux__
 endif
 
 KBUILD_LDFLAGS := -m m68kelf
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds
+KBUILD_LDS_MODULE += $(srctree)/arch/m68k/kernel/module.lds
 
 ifdef CONFIG_SUN3
 LDFLAGS_vmlinux = -N
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 3b77d729057f..36b834f1c933 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -60,7 +60,7 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \
 -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT)
 
 CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo 
$$(($(NOP_COUNT)-1)))
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/parisc/kernel/module.lds
+KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds
 endif
 
 OBJCOPY_FLAGS =-O binary -R .note -R .comment -S
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index c345b79414a9..b2227855de20 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -67,7 +67,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
 ifdef CONFIG_PPC32
 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
 else
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds
+KBUILD_LDS_MODULE += $(srctree)/arch/powerpc/kernel/module.lds
 ifeq ($(call ld-ifversion, -ge, 22500, y),y)
 # Have the linker provide sfpr if possible.
 # There is a corresponding test in arch/powerpc/lib/Makefile
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 7a117be8297c..426d989125a8 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -52,7 +52,7 @@ ifeq ($(CONFIG_CMODEL_MEDANY),y)
KBUILD_CFLAGS += -mcmodel=medany
 endif
 ifeq ($(CONFIG_MODULE_SECTIONS),y)
-   KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/riscv/kernel/module.lds
+   KBUILD_LDS_MODULE

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #19 from Christophe Leroy (christophe.le...@c-s.fr) ---
Created attachment 284389
  --> https://bugzilla.kernel.org/attachment.cgi?id=284389=edit
Patch to trace misaligned destination in copy_page() in asm on PPC32

Oops.

Can you test with this new patch which implements the warning directly in
assembly.? This time it only modifies misc_32.S and It builds ok.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

[PATCH 0/3] powerpc/xmon: Fix dump of XIVE interrupt under pseries

2019-08-14 Thread Cédric Le Goater

Hello,

The xmon 'dx*' commands call OPAL to query information on XIVE but
this can only be done on baremetal (PowerNV) and it crashes a pseries
machine. This little series fixes support on pseries and extend the
'dxi' command.

Thanks,

C.

Cédric Le Goater (3):
  powerpc/xmon: Check for HV mode when dumping XIVE info from OPAL
  powerpc/xive: Fix dump of XIVE interrupt under pseries
  powerpc/xmon: Add a dump of all XIVE interrupts

 arch/powerpc/include/asm/xive.h  |  2 +
 arch/powerpc/sysdev/xive/xive-internal.h |  2 +
 arch/powerpc/sysdev/xive/common.c|  7 
 arch/powerpc/sysdev/xive/native.c| 15 +++
 arch/powerpc/sysdev/xive/spapr.c | 51 
 arch/powerpc/xmon/xmon.c | 50 +--
 6 files changed, 114 insertions(+), 13 deletions(-)

-- 
2.21.0

[PATCH 2/3] powerpc/xive: Fix dump of XIVE interrupt under pseries

2019-08-14 Thread Cédric Le Goater

The xmon 'dxi' command calls OPAL to query the XIVE configuration of a
interrupt. This can only be done on baremetal (PowerNV) and it will
crash a pseries machine.

Introduce a new XIVE get_irq_config() operation which implements a
different query depending on the platform, PowerNV or pseries, and
modify xmon to use a top level wrapper.

Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/include/asm/xive.h  |  2 +
 arch/powerpc/sysdev/xive/xive-internal.h |  2 +
 arch/powerpc/sysdev/xive/common.c|  7 
 arch/powerpc/sysdev/xive/native.c| 15 +++
 arch/powerpc/sysdev/xive/spapr.c | 51 
 arch/powerpc/xmon/xmon.c | 12 +++---
 6 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index efb0e597b272..967d6ab3c977 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -99,6 +99,8 @@ extern void xive_flush_interrupt(void);
 
 /* xmon hook */
 extern void xmon_xive_do_dump(int cpu);
+extern int xmon_xive_get_irq_config(u32 irq, u32 *target, u8 *prio,
+   u32 *sw_irq);
 
 /* APIs used by KVM */
 extern u32 xive_native_default_eq_shift(void);
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h 
b/arch/powerpc/sysdev/xive/xive-internal.h
index 211725dbf364..59cd366e7933 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -33,6 +33,8 @@ struct xive_cpu {
 struct xive_ops {
int (*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data);
int (*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+   int (*get_irq_config)(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq);
int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
void(*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 
prio);
void(*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
diff --git a/arch/powerpc/sysdev/xive/common.c 
b/arch/powerpc/sysdev/xive/common.c
index be86fce1a84e..ed4561e71951 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -257,6 +257,13 @@ notrace void xmon_xive_do_dump(int cpu)
}
 #endif
 }
+
+int xmon_xive_get_irq_config(u32 irq, u32 *target, u8 *prio,
+u32 *sw_irq)
+{
+   return xive_ops->get_irq_config(irq, target, prio, sw_irq);
+}
+
 #endif /* CONFIG_XMON */
 
 static unsigned int xive_get_irq(void)
diff --git a/arch/powerpc/sysdev/xive/native.c 
b/arch/powerpc/sysdev/xive/native.c
index 2f26b74f6cfa..4b61e44f0171 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -111,6 +111,20 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 
prio, u32 sw_irq)
 }
 EXPORT_SYMBOL_GPL(xive_native_configure_irq);
 
+static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq)
+{
+   s64 rc;
+   __be64 vp;
+   __be32 lirq;
+
+   rc = opal_xive_get_irq_config(hw_irq, , prio, );
+
+   *target = be64_to_cpu(vp);
+   *sw_irq = be32_to_cpu(lirq);
+
+   return rc == 0 ? 0 : -ENXIO;
+}
 
 /* This can be called multiple time to change a queue configuration */
 int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@@ -442,6 +456,7 @@ EXPORT_SYMBOL_GPL(xive_native_sync_queue);
 static const struct xive_ops xive_native_ops = {
.populate_irq_data  = xive_native_populate_irq_data,
.configure_irq  = xive_native_configure_irq,
+   .get_irq_config = xive_native_get_irq_config,
.setup_queue= xive_native_setup_queue,
.cleanup_queue  = xive_native_cleanup_queue,
.match  = xive_native_match,
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 8ef9cf4ebb1c..2a2d209336f7 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -211,6 +211,38 @@ static long plpar_int_set_source_config(unsigned long 
flags,
return 0;
 }
 
+static long plpar_int_get_source_config(unsigned long flags,
+   unsigned long lisn,
+   unsigned long *target,
+   unsigned long *prio,
+   unsigned long *sw_irq)
+{
+   unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+   long rc;
+
+   pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn);
+
+   do {
+   rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn,
+target, prio, sw_irq);
+   } while (plpar_busy_delay(rc));
+
+   if (rc) {
+   pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n",
+

Re: [5.3.0-rc4-next][bisected 882632][qla2xxx] WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 qla2x00_status_entry.isra

2019-08-14 Thread Bart Van Assche

On 8/14/19 9:52 AM, Abdul Haleem wrote:
> Greeting's
> 
> Today's linux-next kernel (5.3.0-rc4-next-20190813)  booted with warning on 
> my powerpc power 8 lpar
> 
> The WARN_ON_ONCE() was introduced by commit 88263208 (scsi: qla2xxx: Complain 
> if sp->done() is not...)
> 
> boot logs:
> 
> WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784

Hi Abdul,

Thank you for having reported this. Is that the only warning reported on your 
setup by the qla2xxx
driver? If that warning is commented out, does the qla2xxx driver work as 
expected?

Thanks,

Bart.

Re: [PATCH v4 18/25] powernv/fadump: process architected register state data provided by firmware

2019-08-14 Thread Mahesh J Salgaonkar

On 2019-07-16 17:04:08 Tue, Hari Bathini wrote:
> From: Hari Bathini 
> 
> Firmware provides architected register state data at the time of crash.
> Process this data and build CPU notes to append to ELF core.
> 
> Signed-off-by: Hari Bathini 
> Signed-off-by: Vasant Hegde 
> ---
>  arch/powerpc/kernel/fadump-common.h  |4 +
>  arch/powerpc/platforms/powernv/opal-fadump.c |  197 
> --
>  arch/powerpc/platforms/powernv/opal-fadump.h |   39 +
>  3 files changed, 228 insertions(+), 12 deletions(-)
> 
[...]
> @@ -430,6 +577,32 @@ int __init opal_fadump_dt_scan(struct fw_dump 
> *fadump_conf, ulong node)
>   return 1;
>   }
>  
> + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, );
> + if ((ret != OPAL_SUCCESS) || !addr) {
> + pr_err("Failed to get CPU metadata (%lld)\n", ret);
> + return 1;
> + }
> +
> + addr = be64_to_cpu(addr);
> + pr_debug("CPU metadata addr: %llx\n", addr);
> +
> + opal_cpu_metadata = __va(addr);
> + r_opal_cpu_metadata = (void *)addr;
> + fadump_conf->cpu_state_data_version =
> + be32_to_cpu(r_opal_cpu_metadata->cpu_data_version);
> + if (fadump_conf->cpu_state_data_version !=
> + HDAT_FADUMP_CPU_DATA_VERSION) {
> + pr_err("CPU data format version (%lu) mismatch!\n",
> +fadump_conf->cpu_state_data_version);
> + return 1;
> + }
> + fadump_conf->cpu_state_entry_size =
> + be32_to_cpu(r_opal_cpu_metadata->cpu_data_size);
> + fadump_conf->cpu_state_destination_addr =
> + be64_to_cpu(r_opal_cpu_metadata->region[0].dest);
> + fadump_conf->cpu_state_data_size =
> + be64_to_cpu(r_opal_cpu_metadata->region[0].size);
> +

opal_fadump_dt_scan isn't the right place to do this. Can you please move above
cpu related data processing to opal_fadump_build_cpu_notes() ?

Thanks,
-Mahesh.

>   pr_info("Firmware-assisted dump is active.\n");
>   fadump_conf->dump_active = 1;
>   opal_fadump_get_config(fadump_conf, r_opal_fdm_active);

Re: [5.3.0-rc4-next][bisected 882632][qla2xxx] WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784 qla2x00_status_entry.isra

2019-08-14 Thread Abdul Haleem

On Wed, 2019-08-14 at 10:05 -0700, Bart Van Assche wrote:
> On 8/14/19 9:52 AM, Abdul Haleem wrote:
> > Greeting's
> > 
> > Today's linux-next kernel (5.3.0-rc4-next-20190813)  booted with warning on 
> > my powerpc power 8 lpar
> > 
> > The WARN_ON_ONCE() was introduced by commit 88263208 (scsi: qla2xxx: 
> > Complain if sp->done() is not...)
> > 
> > boot logs:
> > 
> > WARNING: CPU: 10 PID: 425 at drivers/scsi/qla2xxx/qla_isr.c:2784
> 
> Hi Abdul,
> 
> Thank you for having reported this. Is that the only warning reported on your 
> setup by the qla2xxx
> driver? If that warning is commented out, does the qla2xxx driver work as 
> expected?

boot warning did not show up when the commit is reverted.

should I comment out only the WARN_ON_ONCE() which is causing the issue,
and not the other one ?

-- 
Regard's

Abdul Haleem
IBM Linux Technology Centre

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #20 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 284397
  --> https://bugzilla.kernel.org/attachment.cgi?id=284397=edit
dmesg (PowerMac G4 DP, kernel 5.3-rc4 + debug patch)

/dev/sdb2 mounted after booting, dmesg after unmounting

-- 
You are receiving this mail because:
You are on the CC list for the bug.

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #21 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 284399
  --> https://bugzilla.kernel.org/attachment.cgi?id=284399=edit
dmesg (PowerMac G4 DP, kernel 5.3-rc4 + debug patch)

/dev/sdb2 mounted at boot, dmesg after unmounting.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

Erhard F. (erhar...@mailbox.org) changed:

   What|Removed |Added

 Attachment #284071|0   |1
is obsolete||

--- Comment #22 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 284401
  --> https://bugzilla.kernel.org/attachment.cgi?id=284401=edit
kernel .config (PowerMac G4 DP, kernel 5.3-rc4)

-- 
You are receiving this mail because:
You are on the CC list for the bug.

[Bug 204371] BUG kmalloc-4k (Tainted: G W ): Object padding overwritten

2019-08-14 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=204371

--- Comment #23 from Erhard F. (erhar...@mailbox.org) ---
On Wed, 14 Aug 2019 16:10:53 +
bugzilla-dae...@bugzilla.kernel.org wrote:

> https://bugzilla.kernel.org/show_bug.cgi?id=204371
> 
> --- Comment #19 from Christophe Leroy (christophe.le...@c-s.fr) ---
> Created attachment 284389
>   --> https://bugzilla.kernel.org/attachment.cgi?id=284389=edit  
> Patch to trace misaligned destination in copy_page() in asm on PPC32
> 
> Oops.
> 
> Can you test with this new patch which implements the warning directly in
> assembly.? This time it only modifies misc_32.S and It builds ok.
Please find the full dmesg attatched at the kernel bugtracker.

[...]
Aug 14 19:32:52 T600 kernel: WARNING: CPU: 1 PID: 252 at
arch/powerpc/kernel/misc_32.S:457 copy_page+0x4/0x98
Aug 14 19:32:52 T600 kernel: Modules linked in: b43legacy input_leds led_class
mac80211 joydev hid_generic usbhid hid cfg80211 snd_aoa_codec_tas
snd_aoa_fabric_layout snd_aoa rfkill libarc4 evdev ohci_pci btrfs xor
zstd_decompress zstd_compress zlib_deflate radeon raid6_pq zlib_inflate
ehci_pci ohci_hcd therm_windtunnel ehci_hcd hwmon i2c_algo_bit firewire_ohci
backlight firewire_core sr_mod sungem crc_itu_t drm_kms_helper cdrom sungem_phy
usbcore syscopyarea sysfillrect usb_common sysimgblt fb_sys_fops ttm
snd_aoa_i2sbus drm snd_aoa_soundbus snd_pcm snd_timer
drm_panel_orientation_quirks ssb snd uninorth_agp soundcore agpgart lzo
lzo_compress lzo_decompress zram zsmalloc
Aug 14 19:32:52 T600 kernel: CPU: 1 PID: 252 Comm: umount Tainted: GW  
  5.3.0-rc4+ #1
Aug 14 19:32:52 T600 kernel: NIP:  c0011524 LR: f1a563f8 CTR: c0011520
Aug 14 19:32:52 T600 kernel: REGS: ed22b810 TRAP: 0700   Tainted: GW   
  (5.3.0-rc4+)
Aug 14 19:32:52 T600 kernel: MSR:  00029032   CR: 22048222 
XER: 2000
Aug 14 19:32:52 T600 kernel: 
 GPR00: f1a563e0 ed22b8c8 e7348020 e6b442e8
dae3e000 0008 c0596c20 dae3effc 
 GPR08:  b2209525  ed22b8c8
c0011520 00745ff4  e8dec1fc 
 GPR16: 0001  c07fe5f8 0001
  f1af07f0 c06fd6fc 
 GPR24: e8dec178  ed22b8d8 f1af
ec13f1e8  ec13f1e8 e8a945e8 
Aug 14 19:32:52 T600 kernel: NIP [c0011524] copy_page+0x4/0x98
Aug 14 19:32:52 T600 kernel: LR [f1a563f8] __load_free_space_cache+0x540/0x61c
[btrfs]
Aug 14 19:32:52 T600 kernel: Call Trace:
Aug 14 19:32:52 T600 kernel: [ed22b8c8] [f1a563e0]
__load_free_space_cache+0x528/0x61c [btrfs] (unreliable)
Aug 14 19:32:52 T600 kernel: [ed22b958] [f1a565bc]
load_free_space_cache+0xe8/0x1bc [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22b998] [f19e83f4]
cache_block_group+0x1cc/0x3b4 [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22b9f8] [f19f04c8] find_free_extent+0x56c/0xe70
[btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bad8] [f19f0eb8]
btrfs_reserve_extent+0xec/0x220 [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bb48] [f19f1130]
btrfs_alloc_tree_block+0x144/0x35c [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bc38] [f19dc9c0]
alloc_tree_block_no_bg_flush+0x88/0x98 [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bc78] [f19dfce0]
__btrfs_cow_block+0x140/0x4d0 [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bce8] [f19e021c] btrfs_cow_block+0x144/0x23c
[btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bd18] [f1a039e4]
commit_cowonly_roots+0x50/0x294 [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bd68] [f1a062c4]
btrfs_commit_transaction+0x5e4/0x994 [btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bdb8] [f1a01800] close_ctree+0xf4/0x2c4
[btrfs]
Aug 14 19:32:52 T600 kernel: [ed22bdf8] [c01ab508]
generic_shutdown_super+0x80/0x110
Aug 14 19:32:52 T600 kernel: [ed22be18] [c01ab718] kill_anon_super+0x18/0x30
Aug 14 19:32:53 T600 kernel: [ed22be38] [f19d88b4] btrfs_kill_super+0x18/0x30
[btrfs]
Aug 14 19:32:53 T600 kernel: [ed22be58] [c01abdbc]
deactivate_locked_super+0x54/0xa4
Aug 14 19:32:53 T600 kernel: [ed22be78] [c01cbcb4] cleanup_mnt+0x6c/0xe4
Aug 14 19:32:53 T600 kernel: [ed22bea8] [c0054f50] task_work_run+0xa0/0xc0
Aug 14 19:32:53 T600 kernel: [ed22bed8] [c000bc44] do_notify_resume+0x160/0x2c8
Aug 14 19:32:53 T600 kernel: [ed22bf38] [c0014800] do_user_signal+0x2c/0x34
Aug 14 19:32:53 T600 kernel: --- interrupt: c00 at 0x5a93d4
 LR = 0x5a93b8
Aug 14 19:32:53 T600 kernel: Instruction dump:
Aug 14 19:32:53 T600 kernel: 38630020 4200fff8 7c0004ac 7c8903a6 7c0037ac
38c60020 4200fff8 7c0004ac 
Aug 14 19:32:53 T600 kernel: 7d400124 4c00012c 4e800020 546506fe <0f05>
3863fffc 3884fffc 38a4 
Aug 14 19:32:53 T600 kernel: irq event stamp: 0
Aug 14 19:32:53 T600 kernel: hardirqs last  enabled at (0): [<>] 0x0
Aug 14 19:32:53 T600 kernel: hardirqs last disabled at (0): []
copy_process+0x474/0x1368
Aug 14 19:32:53 T600 kernel: softirqs last  enabled at (0): []
copy_process+0x474/0x1368
Aug 14 19:32:53 T600 kernel: softirqs last disabled at (0): [<>] 0x0
Aug 14

Re: [PATCH] powerpc: use the generic dma coherent remap allocator

2019-08-14 Thread Christophe Leroy





Le 14/08/2019 à 15:22, Christoph Hellwig a écrit :

This switches to using common code for the DMA allocations, including
potential use of the CMA allocator if configured.

Switching to the generic code enables DMA allocations from atomic
context, which is required by the DMA API documentation, and also
adds various other minor features drivers start relying upon.  It
also makes sure we have on tested code base for all architectures
that require uncached pte bits for coherent DMA allocations.

Another advantage is that consistent memory allocations now share
the general vmalloc pool instead of needing an explicit careout
from it.

Signed-off-by: Christoph Hellwig 
---
  arch/powerpc/Kconfig |  12 -
  arch/powerpc/include/asm/book3s/32/pgtable.h |  12 +-
  arch/powerpc/include/asm/nohash/32/pgtable.h |  12 +-
  arch/powerpc/mm/dma-noncoherent.c| 318 +--
  arch/powerpc/mm/mem.c|   4 -
  arch/powerpc/mm/ptdump/ptdump.c  |   9 -
  arch/powerpc/platforms/Kconfig.cputype   |   2 +
  7 files changed, 17 insertions(+), 352 deletions(-)



[...]


diff --git a/arch/powerpc/mm/dma-noncoherent.c 
b/arch/powerpc/mm/dma-noncoherent.c
index c617282d5b2a..4272ca5e8159 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c


[...]


@@ -408,23 +116,15 @@ void arch_sync_dma_for_cpu(struct device *dev, 
phys_addr_t paddr,
__dma_sync_page(paddr, size, dir);
  }
  
-/*

- * Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
- */
-long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
-   dma_addr_t dma_addr)
+void arch_dma_prep_coherent(struct page *page, size_t size)
  {
-   /* This should always be populated, so we don't test every
-* level. If that fails, we'll have a nice crash which
-* will be as good as a BUG_ON()
-*/
-   unsigned long cpu_addr = (unsigned long)vaddr;
-   pgd_t *pgd = pgd_offset_k(cpu_addr);
-   pud_t *pud = pud_offset(pgd, cpu_addr);
-   pmd_t *pmd = pmd_offset(pud, cpu_addr);
-   pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
+   unsigned long kaddr = (unsigned long)page_address(page);
  
-	if (pte_none(*ptep) || !pte_present(*ptep))

-   return 0;
-   return pte_pfn(*ptep);
+   flush_dcache_range(kaddr, kaddr + size);
+}
+
+static int __init atomic_pool_init(void)
+{
+   return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
  }
+postcore_initcall(atomic_pool_init);


[...]


diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index 56a7c814160d..afe71b89dec3 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -450,8 +450,10 @@ config NOT_COHERENT_CACHE
depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
GAMECUBE_COMMON || AMIGAONE
select ARCH_HAS_DMA_COHERENT_TO_PFN


You drop arch_dma_coherent_to_pfn(), it's surprising to see 
ARCH_HAS_DMA_COHERENT_TO_PFN remains. At first I thought I'd get a build 
failure.


After looking more, I see there is a arch_dma_coherent_to_pfn()
defined in kernel/dma/remap.c when DMA_DIRECT_REMAP is selected.

I think the naming is not really consistant and should be fixed some 
how, because that's misleading to have an arch_something() being common 
to all.


Christophe


+   select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
+   select DMA_DIRECT_REMAP
default n if PPC_47x
default y

[PATCH] powerpc/mm: don't display empty early ioremap area

2019-08-14 Thread Christophe Leroy

On the 8xx, the layout displayed at boot is:

[0.00] Memory: 121856K/131072K available (5728K kernel code, 592K 
rwdata, 1248K rodata, 560K init, 448K bss, 9216K reserved, 0K cma-reserved)
[0.00] Kernel virtual memory layout:
[0.00]   * 0xffefc000..0xc000  : fixmap
[0.00]   * 0xffefc000..0xffefc000  : early ioremap
[0.00]   * 0xc900..0xffefc000  : vmalloc & ioremap
[0.00] SLUB: HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1

Remove display of an empty early ioremap.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/mem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 3e9e9a051c93..69f99128a8d6 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -306,8 +306,9 @@ void __init mem_init(void)
pr_info("  * 0x%08lx..0x%08lx  : consistent mem\n",
IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
 #endif /* CONFIG_NOT_COHERENT_CACHE */
-   pr_info("  * 0x%08lx..0x%08lx  : early ioremap\n",
-   ioremap_bot, IOREMAP_TOP);
+   if (ioremap_bot != IOREMAP_TOP)
+   pr_info("  * 0x%08lx..0x%08lx  : early ioremap\n",
+   ioremap_bot, IOREMAP_TOP);
pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
VMALLOC_START, VMALLOC_END);
 #endif /* CONFIG_PPC32 */
-- 
2.13.3

1 2 >

1 - 100 of 105 matches

Mail list logo