[PATCH] headers: untangle kmemleak.h from mm.h

2018-02-11 Thread Randy Dunlap
From: Randy Dunlap 

Currently  #includes  for no obvious
reason. It looks like it's only a convenience, so remove kmemleak.h
from slab.h and add  to any users of kmemleak_*
that don't already #include it.
Also remove  from source files that do not use it.

This is tested on i386 allmodconfig and x86_64 allmodconfig. It
would be good to run it through the 0day bot for other $ARCHes.
I have neither the horsepower nor the storage space for the other
$ARCHes.

[slab.h is the second most used header file after module.h; kernel.h
is right there with slab.h. There could be some minor error in the
counting due to some #includes having comments after them and I
didn't combine all of those.]

This is Lingchi patch #1 (death by a thousand cuts, applied to kernel
header files).

Signed-off-by: Randy Dunlap 
---

Fengguang, can you have this patch run thru 0day builds, please?

 arch/powerpc/sysdev/dart_iommu.c  |1 +
 arch/powerpc/sysdev/msi_bitmap.c  |1 +
 arch/s390/kernel/nmi.c|2 +-
 arch/s390/kernel/smp.c|1 -
 arch/sparc/kernel/irq_64.c|1 -
 arch/x86/kernel/pci-dma.c |1 -
 drivers/iommu/exynos-iommu.c  |1 +
 drivers/iommu/mtk_iommu_v1.c  |1 -
 drivers/net/ethernet/ti/cpsw.c|1 +
 drivers/net/wireless/realtek/rtlwifi/pci.c|1 -
 drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c |1 -
 drivers/staging/rtl8188eu/hal/fw.c|2 +-
 drivers/staging/rtlwifi/pci.c |1 -
 drivers/virtio/virtio_ring.c  |1 -
 include/linux/slab.h  |1 -
 kernel/ucount.c   |1 +
 mm/cma.c  |1 +
 mm/memblock.c |1 +
 net/core/sysctl_net_core.c|1 -
 net/ipv4/route.c  |1 -
 security/apparmor/lsm.c   |1 -
 21 files changed, 9 insertions(+), 14 deletions(-)

--- lnx-416-rc1.orig/include/linux/slab.h
+++ lnx-416-rc1/include/linux/slab.h
@@ -125,7 +125,6 @@
 #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
(unsigned long)ZERO_SIZE_PTR)
 
-#include 
 #include 
 
 struct mem_cgroup;
--- lnx-416-rc1.orig/kernel/ucount.c
+++ lnx-416-rc1/kernel/ucount.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define UCOUNTS_HASHTABLE_BITS 10
--- lnx-416-rc1.orig/mm/memblock.c
+++ lnx-416-rc1/mm/memblock.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
--- lnx-416-rc1.orig/mm/cma.c
+++ lnx-416-rc1/mm/cma.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "cma.h"
--- lnx-416-rc1.orig/drivers/staging/rtl8188eu/hal/fw.c
+++ lnx-416-rc1/drivers/staging/rtl8188eu/hal/fw.c
@@ -30,7 +30,7 @@
 #include "rtl8188e_hal.h"
 
 #include 
-#include 
+#include 
 
 static void _rtl88e_enable_fw_download(struct adapter *adapt, bool enable)
 {
--- lnx-416-rc1.orig/drivers/iommu/exynos-iommu.c
+++ lnx-416-rc1/drivers/iommu/exynos-iommu.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
--- lnx-416-rc1.orig/arch/s390/kernel/nmi.c
+++ lnx-416-rc1/arch/s390/kernel/nmi.c
@@ -15,7 +15,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
--- lnx-416-rc1.orig/arch/powerpc/sysdev/dart_iommu.c
+++ lnx-416-rc1/arch/powerpc/sysdev/dart_iommu.c
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
--- lnx-416-rc1.orig/arch/powerpc/sysdev/msi_bitmap.c
+++ lnx-416-rc1/arch/powerpc/sysdev/msi_bitmap.c
@@ -10,6 +10,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
--- lnx-416-rc1.orig/drivers/net/ethernet/ti/cpsw.c
+++ lnx-416-rc1/drivers/net/ethernet/ti/cpsw.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
--- lnx-416-rc1.orig/drivers/virtio/virtio_ring.c
+++ lnx-416-rc1/drivers/virtio/virtio_ring.c
@@ -23,7 +23,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
--- lnx-416-rc1.orig/security/apparmor/lsm.c
+++ lnx-416-rc1/security/apparmor/lsm.c
@@ -23,7 +23,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "include/apparmor.h"
--- lnx-416-rc1.orig/drivers/iommu/mtk_iommu_v1.c
+++ lnx-416-rc1/drivers/iommu/mtk_iommu_v1.c
@@ -25,7 +25,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
--- lnx-416-rc1.orig/drivers/staging/rtlwifi/pci.c
+++ 

Re: [PATCH] headers: untangle kmemleak.h from mm.h

2018-02-11 Thread Ingo Molnar

* Randy Dunlap  wrote:

> From: Randy Dunlap 
> 
> Currently  #includes  for no obvious
> reason. It looks like it's only a convenience, so remove kmemleak.h
> from slab.h and add  to any users of kmemleak_*
> that don't already #include it.
> Also remove  from source files that do not use it.
> 
> This is tested on i386 allmodconfig and x86_64 allmodconfig. It
> would be good to run it through the 0day bot for other $ARCHes.
> I have neither the horsepower nor the storage space for the other
> $ARCHes.
> 
> [slab.h is the second most used header file after module.h; kernel.h
> is right there with slab.h. There could be some minor error in the
> counting due to some #includes having comments after them and I
> didn't combine all of those.]
> 
> This is Lingchi patch #1 (death by a thousand cuts, applied to kernel
> header files).
> 
> Signed-off-by: Randy Dunlap 

Nice find:

Reviewed-by: Ingo Molnar 

I agree that it needs to go through 0-day to find any hidden dependencies we 
might 
have grown due to this.

Thanks,

Ingo


Re: [PATCH v1] PCI: Make PCI_SCAN_ALL_PCIE_DEVS work for Root as well as Downstream Ports

2018-02-11 Thread Christian Zigotzky
Hi Bjorn,

Sorry for my late answer. The X1000 boots and works since yesterday. I think 
the following patch solved the issue: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c591c2e36ccc9a08f265841d2fd68e35327ab3c4

Cheers,
Christian

Sent from my iPhone

> On 10. Feb 2018, at 16:43, Bjorn Helgaas  wrote:
> 
>> On Sat, Feb 10, 2018 at 09:05:40AM +0100, Christian Zigotzky wrote:
>> Hi All,
>> 
>> The AmigaOne X1000 doesn’t boot anymore since the PCI updates. I
>> have seen, that the PCI updates are different to the updates below.
>> The code below works but the latest not. Is there a problem with the
>> latest PCI updates currently?
> 
> I'm not aware of a problem, and it *looks* like the patch below is in
> Linus' tree (I'm looking at 9a61df9e5f74 ("Merge tag 'kbuild-v4.16-2'
> of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild")).
> 
> I assume you're still booting with "pci=pcie_scan_all", since I don't
> think we ever got a quirk to set PCI_SCAN_ALL_PCIE_DEVS automatically.
> 
> If AmigaOne X1000 doesn't boot with "pci=pcie_scan_all", can you diff
> the working only_one_child() with the current upstream?  I compared
> the version in my pci/enumeration branch with what's upstream, and
> they're identical.  So maybe the original patch I applied was wrong?
> 
> If you have a patch that works, can you post it and maybe I can sort
> out what's different?
> 
>> On 2. Dec 2017, at 20:18, Bjorn Helgaas  wrote:
>> 
>> On Fri, Dec 01, 2017 at 06:27:10PM -0600, Bjorn Helgaas wrote:
>> From: Bjorn Helgaas 
>> 
>> PCIe Downstream Ports normally have only a Device 0 below them.  To
>> optimize enumeration, we don't scan for other devices *unless* the
>> PCI_SCAN_ALL_PCIE_DEVS flag is set by set by quirks or the
>> "pci=pcie_scan_all" kernel parameter.
>> 
>> Previously PCI_SCAN_ALL_PCIE_DEVS only affected scanning below Switch
>> Downstream Ports, not Root Ports.
>> 
>> But the "Nemo" system, also known as the AmigaOne X1000, has a PA Semi Root
>> Port whose link leads to an AMD/ATI SB600 South Bridge.  The Root Port is a
>> PCIe device, of course, but the SB600 contains only conventional PCI
>> devices with no visible PCIe port.
>> 
>> Simplify and restructure only_one_child() so that we scan for all possible
>> devices below Root Ports as well as Switch Downstream Ports when
>> PCI_SCAN_ALL_PCIE_DEVS is set.
>> 
>> This is enough to make Nemo work with "pci=pcie_scan_all".  We would also
>> like to add a quirk to set PCI_SCAN_ALL_PCIE_DEVS automatically on Nemo so
>> users wouldn't have to use the "pci=pcie_scan_all" parameter, but we don't
>> have that yet.
>> 
>> Link: 
>> https://lkml.kernel.org/r/CAErSpo55Q8Q=5p6_+uu7ahnw+53ibvdnrxxrzrv9qnur_9e...@mail.gmail.com
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=198057
>> Reported-and-Tested-by: Christian Zigotzky 
>> Signed-off-by: Bjorn Helgaas 
>> 
>> Applied to pci/enumeration for v4.16.
>> 
>> ---
>> drivers/pci/probe.c |   25 +++--
>> 1 file changed, 15 insertions(+), 10 deletions(-)
>> 
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index 14e0ea1ff38b..303c0cb0550c 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -2215,22 +2215,27 @@ static unsigned next_fn(struct pci_bus *bus, struct 
>> pci_dev *dev, unsigned fn)
>> 
>> static int only_one_child(struct pci_bus *bus)
>> {
>> -struct pci_dev *parent = bus->self;
>> +struct pci_dev *bridge = bus->self;
>> 
>> -if (!parent || !pci_is_pcie(parent))
>> +/*
>> + * Systems with unusual topologies set PCI_SCAN_ALL_PCIE_DEVS so
>> + * we scan for all possible devices, not just Device 0.
>> + */
>> +if (pci_has_flag(PCI_SCAN_ALL_PCIE_DEVS))
>>   return 0;
>> -if (pci_pcie_type(parent) == PCI_EXP_TYPE_ROOT_PORT)
>> -return 1;
>> 
>>   /*
>> - * PCIe downstream ports are bridges that normally lead to only a
>> - * device 0, but if PCI_SCAN_ALL_PCIE_DEVS is set, scan all
>> - * possible devices, not just device 0.  See PCIe spec r3.0,
>> - * sec 7.3.1.
>> + * A PCIe Downstream Port normally leads to a Link with only Device
>> + * 0 on it (PCIe spec r3.1, sec 7.3.1).  As an optimization, scan
>> + * only for Device 0 in that situation.
>> + *
>> + * Checking has_secondary_link is a hack to identify Downstream
>> + * Ports because sometimes Switches are configured such that the
>> + * PCIe Port Type labels are backwards.
>>*/
>> -if (parent->has_secondary_link &&
>> -!pci_has_flag(PCI_SCAN_ALL_PCIE_DEVS))
>> +if (bridge && pci_is_pcie(bridge) && bridge->has_secondary_link)
>>   return 1;
>> +
>>   return 0;
>> }
>> 


Re: [PATCH v4 4/5] powerpc/mm/slice: Allow up to 64 low slices

2018-02-11 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> While the implementation of the "slices" address space allows
> a significant amount of high slices, it limits the number of
> low slices to 16 due to the use of a single u64 low_slices_psize
> element in struct mm_context_t
>
> On the 8xx, the minimum slice size is the size of the area
> covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
> 16K pages mode. This means we could have at least 64 slices.
>
> In order to override this limitation, this patch switches the
> handling of low_slices_psize to char array as done already for
> high_slices_psize.
>

Reviewed-by: Aneesh Kumar K.V 

> Signed-off-by: Christophe Leroy 
> ---
>  v2: Using slice_bitmap_xxx() macros instead of bitmap_xxx() functions.
>  v3: keep low_slices as a u64, this allows 64 slices which is enough.
>  v4: Moved the 8xx specifics to next patch
>  
>  arch/powerpc/include/asm/book3s/64/mmu.h |  3 +-
>  arch/powerpc/include/asm/mmu-8xx.h   |  7 +++-
>  arch/powerpc/include/asm/paca.h  |  2 +-
>  arch/powerpc/kernel/paca.c   |  3 +-
>  arch/powerpc/mm/hash_utils_64.c  | 13 
>  arch/powerpc/mm/slb_low.S|  8 +++--
>  arch/powerpc/mm/slice.c  | 57 
> +---
>  7 files changed, 52 insertions(+), 41 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
> b/arch/powerpc/include/asm/book3s/64/mmu.h
> index 0abeb0e2d616..bef6e39ed63a 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -91,7 +91,8 @@ typedef struct {
>   struct npu_context *npu_context;
>  
>  #ifdef CONFIG_PPC_MM_SLICES
> - u64 low_slices_psize;   /* SLB page size encodings */
> +  /* SLB page size encodings*/
> + unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
>   unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
>   unsigned long slb_addr_limit;
>  #else
> diff --git a/arch/powerpc/include/asm/mmu-8xx.h 
> b/arch/powerpc/include/asm/mmu-8xx.h
> index b324ab46d838..d3d7e79140c6 100644
> --- a/arch/powerpc/include/asm/mmu-8xx.h
> +++ b/arch/powerpc/include/asm/mmu-8xx.h
> @@ -186,6 +186,11 @@
>  #define M_APG2   0x0040
>  #define M_APG3   0x0060
>  
> +#ifdef CONFIG_PPC_MM_SLICES
> +#include 
> +#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
> +#endif
> +
>  #ifndef __ASSEMBLY__
>  typedef struct {
>   unsigned int id;
> @@ -193,7 +198,7 @@ typedef struct {
>   unsigned long vdso_base;
>  #ifdef CONFIG_PPC_MM_SLICES
>   u16 user_psize; /* page size index */
> - u64 low_slices_psize;   /* page size encodings */
> + unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
>   unsigned char high_slices_psize[0];
>   unsigned long slb_addr_limit;
>  #endif
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index b62c31037cad..d2bf71dddbef 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -141,7 +141,7 @@ struct paca_struct {
>  #ifdef CONFIG_PPC_BOOK3S
>   mm_context_id_t mm_ctx_id;
>  #ifdef CONFIG_PPC_MM_SLICES
> - u64 mm_ctx_low_slices_psize;
> + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
>   unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
>   unsigned long mm_ctx_slb_addr_limit;
>  #else
> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
> index 95ffedf14885..2fd563d05831 100644
> --- a/arch/powerpc/kernel/paca.c
> +++ b/arch/powerpc/kernel/paca.c
> @@ -265,7 +265,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
>  #ifdef CONFIG_PPC_MM_SLICES
>   VM_BUG_ON(!mm->context.slb_addr_limit);
>   get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
> - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
> + memcpy(_paca()->mm_ctx_low_slices_psize,
> +>low_slices_psize, sizeof(context->low_slices_psize));
>   memcpy(_paca()->mm_ctx_high_slices_psize,
>  >high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
>  #else /* CONFIG_PPC_MM_SLICES */
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 7d07c7e17db6..2c1f4dac1098 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -1109,19 +1109,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int 
> pp, pte_t pte, int trap)
>  #ifdef CONFIG_PPC_MM_SLICES
>  static unsigned int get_paca_psize(unsigned long addr)
>  {
> - u64 lpsizes;
> - unsigned char *hpsizes;
> + unsigned char *psizes;
>   unsigned long index, mask_index;
>  
>   if (addr < SLICE_LOW_TOP) {
> - lpsizes = get_paca()->mm_ctx_low_slices_psize;
> + psizes = get_paca()->mm_ctx_low_slices_psize;
>   index = GET_LOW_SLICE_INDEX(addr);
> -  

Re: [PATCH 1/1] powerpc/pseries: Enable RAS hotplug events late

2018-02-11 Thread Balbir Singh
On Mon, Feb 12, 2018 at 11:19 AM, Sam Bobroff  wrote:
> Currently if the kernel receives a memory hot-unplug event early
> enough, it may get stuck in an infinite loop in
> dissolve_free_huge_pages(). This appears as a stall just after:
>
> pseries-hotplug-mem: Attempting to hot-remove XX LMB(s) at 
>
> It appears to be caused by "minimum_order" being uninitialized, due to
> init_ras_IRQ() executing before hugetlb_init().
>
> To correct this, extract the part of init_ras_IRQ() that enables
> hotplug event processing and place it in the machine_late_initcall
> phase, which is guaranteed to be after hugetlb_init() is called.
>
> Signed-off-by: Sam Bobroff 
> ---
>  arch/powerpc/platforms/pseries/ras.c | 29 +
>  1 file changed, 21 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/ras.c 
> b/arch/powerpc/platforms/pseries/ras.c
> index 81d8614e7379..ba284949af06 100644
> --- a/arch/powerpc/platforms/pseries/ras.c
> +++ b/arch/powerpc/platforms/pseries/ras.c
> @@ -66,6 +66,26 @@ static int __init init_ras_IRQ(void)
> of_node_put(np);
> }
>
> +   /* EPOW Events */
> +   np = of_find_node_by_path("/event-sources/epow-events");
> +   if (np != NULL) {
> +   request_event_sources_irqs(np, ras_epow_interrupt, 
> "RAS_EPOW");
> +   of_node_put(np);
> +   }
> +
> +   return 0;
> +}
> +machine_subsys_initcall(pseries, init_ras_IRQ);
> +
> +/*
> + * Enable the hotplug interrupt late because processing them may touch other
> + * devices or systems (e.g. hugepages) that have not been initialized at the
> + * subsys stage.
> + */
> +int __init init_ras_hotplug_IRQ(void)
> +{
> +   struct device_node *np;
> +
> /* Hotplug Events */
> np = of_find_node_by_path("/event-sources/hot-plug-events");
> if (np != NULL) {
> @@ -75,16 +95,9 @@ static int __init init_ras_IRQ(void)
> of_node_put(np);
> }
>
> -   /* EPOW Events */
> -   np = of_find_node_by_path("/event-sources/epow-events");
> -   if (np != NULL) {
> -   request_event_sources_irqs(np, ras_epow_interrupt, 
> "RAS_EPOW");
> -   of_node_put(np);
> -   }
> -
> return 0;
>  }
> -machine_subsys_initcall(pseries, init_ras_IRQ);
> +machine_late_initcall(pseries, init_ras_hotplug_IRQ);
>

Seems reasonable to me, the other RAS events internal error and epow
seem like they are in the right place.

Acked-by: Balbir Singh 


Re: [PATCH v4 2/5] powerpc/mm/slice: Enhance for supporting PPC32

2018-02-11 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> In preparation for the following patch which will fix an issue on
> the 8xx by re-using the 'slices', this patch enhances the
> 'slices' implementation to support 32 bits CPUs.
>
> On PPC32, the address space is limited to 4Gbytes, hence only the low
> slices will be used.
>
> This patch moves "slices" functions prototypes from page64.h to slice.h
>
> The high slices use bitmaps. As bitmap functions are not prepared to
> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
> slice_bitmap_xxx() functions which will void on PPC32
>

Reviewed-by: Aneesh Kumar K.V 

> Signed-off-by: Christophe Leroy 
> ---
>  v2: First patch of v1 serie split in two parts ; added slice_bitmap_xxx() 
> macros.
>  v3: Moving slice related stuff in slice.h and slice_32/64.h
>  slice_bitmap_xxx() are now static inline functions and platform dependent
>  SLICE_LOW_TOP declared ull on PPC32 with correct casts allows to keep it 
> 0x1
>  v4: Moved slice_32.h and slice_64.h to respective subarch dirs
>  Moved somes #ifdefs from asm/slice.h to respective subarch slice.h
>  SLICE_LOW_ details distributed in repective subarch slices allthough 
> they are identical for the moment
>
>  arch/powerpc/include/asm/book3s/64/slice.h | 79 
> ++
>  arch/powerpc/include/asm/nohash/32/slice.h | 65 
>  arch/powerpc/include/asm/nohash/64/slice.h | 12 +
>  arch/powerpc/include/asm/page.h|  1 +
>  arch/powerpc/include/asm/page_64.h | 59 --
>  arch/powerpc/include/asm/slice.h   | 42 
>  arch/powerpc/mm/slice.c| 38 --
>  7 files changed, 221 insertions(+), 75 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/book3s/64/slice.h
>  create mode 100644 arch/powerpc/include/asm/nohash/32/slice.h
>  create mode 100644 arch/powerpc/include/asm/nohash/64/slice.h
>  create mode 100644 arch/powerpc/include/asm/slice.h
>
> diff --git a/arch/powerpc/include/asm/book3s/64/slice.h 
> b/arch/powerpc/include/asm/book3s/64/slice.h
> new file mode 100644
> index ..f9a2c8bd7a77
> --- /dev/null
> +++ b/arch/powerpc/include/asm/book3s/64/slice.h
> @@ -0,0 +1,79 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
> +#define _ASM_POWERPC_BOOK3S_64_SLICE_H
> +
> +#ifdef CONFIG_PPC_MM_SLICES
> +
> +#define SLICE_LOW_SHIFT  28
> +#define SLICE_LOW_TOP(0x1ul)
> +#define SLICE_NUM_LOW(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
> +#define GET_LOW_SLICE_INDEX(addr)((addr) >> SLICE_LOW_SHIFT)
> +
> +#define SLICE_HIGH_SHIFT 40
> +#define SLICE_NUM_HIGH   (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
> +#define GET_HIGH_SLICE_INDEX(addr)   ((addr) >> SLICE_HIGH_SHIFT)
> +
> +#ifndef __ASSEMBLY__
> +
> +#include 
> +
> +static inline void slice_bitmap_zero(unsigned long *dst, unsigned int nbits)
> +{
> + bitmap_zero(dst, nbits);
> +}
> +
> +static inline int slice_bitmap_and(unsigned long *dst,
> +const unsigned long *src1,
> +const unsigned long *src2,
> +unsigned int nbits)
> +{
> + return bitmap_and(dst, src1, src2, nbits);
> +}
> +
> +static inline void slice_bitmap_or(unsigned long *dst,
> +const unsigned long *src1,
> +const unsigned long *src2,
> +unsigned int nbits)
> +{
> + bitmap_or(dst, src1, src2, nbits);
> +}
> +
> +static inline int slice_bitmap_andnot(unsigned long *dst,
> +   const unsigned long *src1,
> +   const unsigned long *src2,
> +   unsigned int nbits)
> +{
> + return bitmap_andnot(dst, src1, src2, nbits);
> +}
> +
> +static inline int slice_bitmap_equal(const unsigned long *src1,
> +  const unsigned long *src2,
> +  unsigned int nbits)
> +{
> + return bitmap_equal(src1, src2, nbits);
> +}
> +
> +static inline int slice_bitmap_empty(const unsigned long *src, unsigned 
> nbits)
> +{
> + return bitmap_empty(src, nbits);
> +}
> +
> +static inline void slice_bitmap_set(unsigned long *map, unsigned int start,
> + unsigned int nbits)
> +{
> + bitmap_set(map, start, nbits);
> +}
> +#endif /* __ASSEMBLY__ */
> +
> +#else /* CONFIG_PPC_MM_SLICES */
> +
> +#define get_slice_psize(mm, addr)((mm)->context.user_psize)
> +#define slice_set_user_psize(mm, psize)  \
> +do { \
> + (mm)->context.user_psize = (psize); \
> + (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
> +} while 

Re: [PATCH 2/4] powerpc/vas: Fix cleanup when VAS is not configured

2018-02-11 Thread Michael Ellerman
Sukadev Bhattiprolu  writes:

> When VAS is not configured in the system, make sure to remove
> the VAS debugfs directory and unregister the platform driver.
>
> Signed-off-by: Sukadev Bhattiprolu 
...
> diff --git a/arch/powerpc/platforms/powernv/vas.c 
> b/arch/powerpc/platforms/powernv/vas.c
> index aebbe95..f83e27d8 100644
> --- a/arch/powerpc/platforms/powernv/vas.c
> +++ b/arch/powerpc/platforms/powernv/vas.c
> @@ -169,8 +169,11 @@ static int __init vas_init(void)
>   found++;
>   }
>  
> - if (!found)
> + if (!found) {
> + platform_driver_unregister(_driver);
> + vas_cleanup_dbgdir();
>   return -ENODEV;
> + }

The better patch would be to move the call to vas_init_dbgdir() down
here, where we know we have successfully registered the driver.

cheers


Re: [PATCH kernel v3] powerpc/pci: Fix broken INTx configuration via OF

2018-02-11 Thread Michael Ellerman
Bjorn Helgaas  writes:

> On Fri, Feb 09, 2018 at 12:07:41PM -0600, Bjorn Helgaas wrote:
>> On Fri, Feb 09, 2018 at 05:23:58PM +1100, Alexey Kardashevskiy wrote:
>> > Commit 59f47eff03a0 ("powerpc/pci: Use of_irq_parse_and_map_pci() helper")
>> > replaced of_irq_parse_pci() + irq_create_of_mapping() with
>> > of_irq_parse_and_map_pci() but this change lost virq returned by
>> > irq_create_of_mapping() so virq remained zero causing INTx
>> > misconfiguration.
>> > 
>> > This fixes pci_read_irq_line() not to loose a virq returned by
>> > of_irq_parse_and_map_pci().
>> 
>> s/not to loose a/to not lose the/
>> 
>> > Fixes: 59f47eff03a0 "powerpc/pci: Use of_irq_parse_and_map_pci() helper"
>> > Signed-off-by: Alexey Kardashevskiy 
>> 
>> I'm fine with this version.
>> 
>> Since you started applying a previous version, Michael, I'll assume
>> you will handle this unless you tell me otherwise.  One way or another
>> it would be good to get this in before -rc1.
>
> I went ahead and applied this and asked Linus to pull it.

Thanks.

cheers


[PATCH 1/1] powerpc/pseries: Enable RAS hotplug events late

2018-02-11 Thread Sam Bobroff
Currently if the kernel receives a memory hot-unplug event early
enough, it may get stuck in an infinite loop in
dissolve_free_huge_pages(). This appears as a stall just after:

pseries-hotplug-mem: Attempting to hot-remove XX LMB(s) at 

It appears to be caused by "minimum_order" being uninitialized, due to
init_ras_IRQ() executing before hugetlb_init().

To correct this, extract the part of init_ras_IRQ() that enables
hotplug event processing and place it in the machine_late_initcall
phase, which is guaranteed to be after hugetlb_init() is called.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/platforms/pseries/ras.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index 81d8614e7379..ba284949af06 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -66,6 +66,26 @@ static int __init init_ras_IRQ(void)
of_node_put(np);
}
 
+   /* EPOW Events */
+   np = of_find_node_by_path("/event-sources/epow-events");
+   if (np != NULL) {
+   request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
+   of_node_put(np);
+   }
+
+   return 0;
+}
+machine_subsys_initcall(pseries, init_ras_IRQ);
+
+/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+   struct device_node *np;
+
/* Hotplug Events */
np = of_find_node_by_path("/event-sources/hot-plug-events");
if (np != NULL) {
@@ -75,16 +95,9 @@ static int __init init_ras_IRQ(void)
of_node_put(np);
}
 
-   /* EPOW Events */
-   np = of_find_node_by_path("/event-sources/epow-events");
-   if (np != NULL) {
-   request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
-   of_node_put(np);
-   }
-
return 0;
 }
-machine_subsys_initcall(pseries, init_ras_IRQ);
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
 
 #define EPOW_SHUTDOWN_NORMAL   1
 #define EPOW_SHUTDOWN_ON_UPS   2
-- 
2.16.1.74.g9b0b1f47b



Re: [PATCH v4 2/5] powerpc/mm/slice: Enhance for supporting PPC32

2018-02-11 Thread Nicholas Piggin
On Sun, 11 Feb 2018 21:04:42 +0530
"Aneesh Kumar K.V"  wrote:

> On 02/11/2018 07:29 PM, Nicholas Piggin wrote:
> > On Sat, 10 Feb 2018 13:54:27 +0100 (CET)
> > Christophe Leroy  wrote:
> >   
> >> In preparation for the following patch which will fix an issue on
> >> the 8xx by re-using the 'slices', this patch enhances the
> >> 'slices' implementation to support 32 bits CPUs.
> >>
> >> On PPC32, the address space is limited to 4Gbytes, hence only the low
> >> slices will be used.
> >>
> >> This patch moves "slices" functions prototypes from page64.h to slice.h
> >>
> >> The high slices use bitmaps. As bitmap functions are not prepared to
> >> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
> >> slice_bitmap_xxx() functions which will void on PPC32  
> > 
> > On this last point, I think it would be better to put these with the
> > existing slice bitmap functions in slice.c and just have a few #ifdefs
> > for SLICE_NUM_HIGH == 0.
> >   
> 
> We went back and forth with that. IMHO, we should avoid as much #ifdef 
> as possible across platforms. It helps to understand the platform 
> restrictions better as we have less and less access to these platforms. 
> The above change indicates that nohash 32 wants to use the slice code 
> and they have different restrictions. With that we now know that 
> book3s64 and nohash 32 are the two different configs using slice code.

I don't think it's the right place to put it. It's not platform dependent
so much as it just depends on whether or not you have 0 high slices as
a workaround for bitmap API not accepting 0 length.

Another platform that uses the slice code would just have to copy and
paste either the nop or the bitmap implementation depending if it has
high slices. So I don't think it's the right abstraction. And it
implies a bitmap operation but it very specifically only works for
struct slice_mask.high_slices bitmap, which is not clear. Better to
just work with struct slice_mask.

Some ifdefs inside .c code for small helper functions like this IMO isn't
really a big deal -- it's not worse than having it in headers. You just
want to avoid ifdef mess when looking at non-trivial logic.

static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
{
dst->low_slices |= src->low_slices;
#if SLICE_NUM_HIGH > 0
bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
#endif
}

I think that's pretty fine. If you have a singular hatred for ifdef in .c,
then if() works just as well.

Thanks,
Nick


Re: [PATCH V2 3/4] powerpc/mm/hash64: Store the slot information at the right offset.

2018-02-11 Thread Ram Pai
On Sun, Feb 11, 2018 at 08:30:08PM +0530, Aneesh Kumar K.V wrote:
> The hugetlb pte entries are at the PMD and PUD level. Use the right offset
> for them to get the second half of the table.
> 
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/include/asm/book3s/64/hash-4k.h  |  3 ++-
>  arch/powerpc/include/asm/book3s/64/hash-64k.h |  9 +
>  arch/powerpc/include/asm/book3s/64/pgtable.h  |  2 +-
>  arch/powerpc/mm/hash64_4k.c   |  4 ++--
>  arch/powerpc/mm/hash64_64k.c  |  8 
>  arch/powerpc/mm/hugetlbpage-hash64.c  | 10 +++---
>  arch/powerpc/mm/tlb_hash64.c  |  9 +++--
>  7 files changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h 
> b/arch/powerpc/include/asm/book3s/64/hash-4k.h
> index 949d691094a4..67c5475311ee 100644
> diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c

snip...

> index 881ebd53ffc2..9b23f12e863c 100644
> --- a/arch/powerpc/mm/tlb_hash64.c
> +++ b/arch/powerpc/mm/tlb_hash64.c
> @@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long 
> addr,
>   unsigned int psize;
>   int ssize;
>   real_pte_t rpte;
> - int i;
> + int i, offset;
> 
>   i = batch->index;
> 
> @@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long 
> addr,
>   psize = get_slice_psize(mm, addr);
>   /* Mask the address for the correct page size */
>   addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
> + if (unlikely(psize == MMU_PAGE_16G))
> + offset = PTRS_PER_PUD;
> + else
> + offset = PTRS_PER_PMD;

I prefer to encapsulate this under some function/macro; somewhere in hugetlb.h, 
which returns
the offset given a mmu_size.  But no big deal..


Reviewed-by: Ram Pai 

RP



Re: [PATCH V2 2/4] powerpc/mm/hash64: Allocate larger PMD table if hugetlb config is enabled.

2018-02-11 Thread Ram Pai
On Sun, Feb 11, 2018 at 08:30:07PM +0530, Aneesh Kumar K.V wrote:
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/include/asm/book3s/64/hash-64k.h | 2 +-
>  arch/powerpc/include/asm/book3s/64/hash.h | 3 ++-
>  2 files changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> index c08b3b032ec0..ee440fb3d240 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> @@ -140,7 +140,7 @@ static inline int hash__remap_4k_pfn(struct 
> vm_area_struct *vma, unsigned long a
>  }
> 
>  #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
>  #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
>(sizeof(unsigned long) << PMD_INDEX_SIZE))
>  #else
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
> b/arch/powerpc/include/asm/book3s/64/hash.h
> index 234f141fb151..0851c328bea6 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -23,7 +23,8 @@
>H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + 
> PAGE_SHIFT)
>  #define H_PGTABLE_RANGE  (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
> 
> -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES)
> +#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) 
> && \
> + defined(CONFIG_PPC_64K_PAGES)
>  /*
>   * only with hash 64k we need to use the second half of pmd page table
>   * to store pointer to deposited pgtable_t

A small nitpick.  the definition of  H_PMD_CACHE_INDEX and
H_PUD_CACHE_INDEX (introduced in the previous patch)
can be nested under #ifdef CONFIG_PPC_64K_PAGES


Reviewed-by: Ram Pai 



Re: [PATCH V2 1/4] powerpc/mm: Fix crashes with PUD level hugetlb config

2018-02-11 Thread Ram Pai
On Sun, Feb 11, 2018 at 08:30:06PM +0530, Aneesh Kumar K.V wrote:
> To support memory keys, we moved the hash pte slot information to the second
> half of the page table. This was ok with PTE entries at level 4 and level 3.
> We already allocate larger page table pages at those level to accomodate extra
> details. For level 4 we already have the extra space which was used to track
> 4k hash page table entry details and at pmd level the extra space was 
> allocated
> to track the THP details.
> 
> With hugetlbfs PTE, we used this extra space at the PMD level to store the
> slot details. But we also support hugetlbfs PTE at PUD leve and PUD level page
> didn't allocate extra space. This resulted in memory corruption.
> 
> Fix this by allocating extra space at PUD level when HUGETLB is enabled. We
> may need further changes to allocate larger space at PMD level when we enable
> HUGETLB. That will be done in next patch.
> 
> Fixes:bf9a95f9a6481bc6e(" powerpc: Free up four 64K PTE bits in 64K backed 
> HPTE pages")
> 
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/include/asm/book3s/32/pgtable.h  |  1 +
>  arch/powerpc/include/asm/book3s/64/hash-64k.h |  5 +
>  arch/powerpc/include/asm/book3s/64/hash.h | 10 ++
>  arch/powerpc/include/asm/book3s/64/pgalloc.h  |  6 +++---
>  arch/powerpc/include/asm/book3s/64/pgtable.h  |  2 ++
>  arch/powerpc/include/asm/nohash/32/pgtable.h  |  1 +
>  arch/powerpc/include/asm/nohash/64/pgtable.h  |  1 +
>  arch/powerpc/mm/hash_utils_64.c   |  1 +
>  arch/powerpc/mm/init-common.c |  4 ++--
>  arch/powerpc/mm/pgtable-radix.c   |  1 +
>  arch/powerpc/mm/pgtable_64.c  |  2 ++
>  11 files changed, 29 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
> b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 30a155c0a6b0..c615abdce119 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -16,6 +16,7 @@
>  #define PGD_INDEX_SIZE   (32 - PGDIR_SHIFT)
> 
>  #define PMD_CACHE_INDEX  PMD_INDEX_SIZE
> +#define PUD_CACHE_INDEX  PUD_INDEX_SIZE
> 
>  #ifndef __ASSEMBLY__
>  #define PTE_TABLE_SIZE   (sizeof(pte_t) << PTE_INDEX_SIZE)
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> index 338b7da468ce..c08b3b032ec0 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> @@ -146,7 +146,12 @@ static inline int hash__remap_4k_pfn(struct 
> vm_area_struct *vma, unsigned long a
>  #else
>  #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
>  #endif
> +#ifdef CONFIG_HUGETLB_PAGE
> +#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) +\
> +  (sizeof(unsigned long) << PUD_INDEX_SIZE))
> +#else
>  #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
> +#endif
>  #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
> 
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
> b/arch/powerpc/include/asm/book3s/64/hash.h
> index 0920eff731b3..234f141fb151 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -32,6 +32,16 @@
>  #else
>  #define H_PMD_CACHE_INDEXH_PMD_INDEX_SIZE
>  #endif
> +/*
> + * We not store the slot details in the second half of page table.

s/not//
We store


Reviewed-by: Ram Pai 



Re: [PATCH 2/3] cxl: Introduce module parameter 'enable_psltrace'

2018-02-11 Thread Vaibhav Jain
Thanks for reviewing the patch Christophe,

christophe lombard  writes:
>> +bool cxl_enable_psltrace = true;
>> +module_param_named(enable_psltrace, cxl_enable_psltrace, bool, 0600);
>> +MODULE_PARM_DESC(enable_psltrace, "Set PSL traces on probe. default: on");
>> +
> I am not too agree to add a new parameter. This can cause doubts.
> PSL team has confirmed that enabling traces has no impact.
> Do you see any reason to disable the traces ?

Traces on PSL follow a 'set and fetch' model. So once the trace buffer for
a specific array is full it will stop and switch to 'FIN' state and at
that point we need to fetch the trace-data and reinit the array to
re-arm it.

There might be some circumstances where this model may lead to confusion
specifically when AFU developers assume that the trace arrays are
already armed and dont re-arm it causing miss of trace data.

So this module param is a compromise to keep the old behaviour of traces
array intact where in the arming/disarming of the trace arrays is
controlled completely by userspace tooling and not by cxl.

-- 
Vaibhav Jain 
Linux Technology Center, IBM India Pvt. Ltd.



Re: [PATCH 1/3] cxl: Introduce various enums/defines for PSL9 trace arrays

2018-02-11 Thread Vaibhav Jain
Thanks for reviewing the patch Christophe,
christophe lombard  writes:

>> +for (traceid = 0; traceid < CXL_PSL9_TRACEID_MAX; ++traceid) {
>> +trace_state = CXL_PSL9_TRACE_STATE(trace_cfg, traceid);
>> +dev_dbg(>dev, "Traceid-%d trace_state=0x%0llX\n",
>>  traceid, trace_state);
>> 
> any reason to use dev_dbg instead of pr_devel ?
Wanted to distinguish among multiple cxl cards in the system.

-- 
Vaibhav Jain 
Linux Technology Center, IBM India Pvt. Ltd.



KVM compile error

2018-02-11 Thread Christian Zigotzky
Just for info: KVM doesn’t compile currently.

Error messages:

CC  arch/powerpc/kvm/powerpc.o
arch/powerpc/kvm/powerpc.c: In function 'kvm_arch_vcpu_ioctl_run':
arch/powerpc/kvm/powerpc.c:1611:1: error: label 'out' defined but not used 
[-Werror=unused-label]
 out:
 ^
cc1: all warnings being treated as errors

— Christian


Re: [PATCH v4 2/5] powerpc/mm/slice: Enhance for supporting PPC32

2018-02-11 Thread Aneesh Kumar K.V



On 02/11/2018 07:29 PM, Nicholas Piggin wrote:

On Sat, 10 Feb 2018 13:54:27 +0100 (CET)
Christophe Leroy  wrote:


In preparation for the following patch which will fix an issue on
the 8xx by re-using the 'slices', this patch enhances the
'slices' implementation to support 32 bits CPUs.

On PPC32, the address space is limited to 4Gbytes, hence only the low
slices will be used.

This patch moves "slices" functions prototypes from page64.h to slice.h

The high slices use bitmaps. As bitmap functions are not prepared to
handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
slice_bitmap_xxx() functions which will void on PPC32


On this last point, I think it would be better to put these with the
existing slice bitmap functions in slice.c and just have a few #ifdefs
for SLICE_NUM_HIGH == 0.



We went back and forth with that. IMHO, we should avoid as much #ifdef 
as possible across platforms. It helps to understand the platform 
restrictions better as we have less and less access to these platforms. 
The above change indicates that nohash 32 wants to use the slice code 
and they have different restrictions. With that we now know that 
book3s64 and nohash 32 are the two different configs using slice code.


-aneesh



[PATCH V2 4/4] powerpc/mm/hash64: memset the pagetable pages on allocation.

2018-02-11 Thread Aneesh Kumar K.V
Now that we are using second half of the table to store slot details and we
don't clear them in the huge_pte_get_and_clear, we need to make sure we zero
out the range on allocation.

Simplify this by calling the object initialization after kmem_cache_alloc and
update the constructor do nothing.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/32/pgalloc.h | 16 +
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 34 +++-
 arch/powerpc/include/asm/nohash/pgalloc.h| 16 +
 arch/powerpc/mm/init-common.c| 15 
 4 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 5073cc75f1c8..9f5c411bce1b 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -139,4 +139,20 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, 
pgtable_t table,
pgtable_page_dtor(table);
pgtable_free_tlb(tlb, page_address(table), 0);
 }
+
+static inline void pgd_ctor(void *addr)
+{
+   memset(addr, 0, PGD_TABLE_SIZE);
+}
+
+static inline void pud_ctor(void *addr)
+{
+   memset(addr, 0, PUD_TABLE_SIZE);
+}
+
+static inline void pmd_ctor(void *addr)
+{
+   memset(addr, 0, PMD_TABLE_SIZE);
+}
+
 #endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h 
b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 53df86d3cfce..d6ee7563b09d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -73,10 +73,13 @@ static inline void radix__pgd_free(struct mm_struct *mm, 
pgd_t *pgd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+   pgd_t *pgd;
if (radix_enabled())
return radix__pgd_alloc(mm);
-   return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
-   pgtable_gfp_flags(mm, GFP_KERNEL));
+   pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+  pgtable_gfp_flags(mm, GFP_KERNEL));
+   memset(pgd, 0, PGD_TABLE_SIZE);
+   return pgd;
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,8 +96,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t 
*pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-   return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
-   pgtable_gfp_flags(mm, GFP_KERNEL));
+   pud_t *pud;
+   pud = kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
+  pgtable_gfp_flags(mm, GFP_KERNEL));
+   memset(pud, 0, PUD_TABLE_SIZE);
+   return pud;
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -120,8 +126,12 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, 
pud_t *pud,
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-   return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-   pgtable_gfp_flags(mm, GFP_KERNEL));
+   pmd_t *pmd;
+   pmd = kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+  pgtable_gfp_flags(mm, GFP_KERNEL));
+   memset(pmd, 0, PMD_TABLE_SIZE);
+   return pmd;
+
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -218,4 +228,16 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, 
pgtable_t table,
 
 #define check_pgt_cache()  do { } while (0)
 
+static inline void pgd_ctor(void *addr)
+{
+}
+
+static inline void pud_ctor(void *addr)
+{
+}
+
+static inline void pmd_ctor(void *addr)
+{
+}
+
 #endif /* _ASM_POWERPC_BOOK3S_64_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h 
b/arch/powerpc/include/asm/nohash/pgalloc.h
index 0634f2949438..df3be548ff97 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -21,4 +21,20 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
 #else
 #include 
 #endif
+
+static inline void pgd_ctor(void *addr)
+{
+   memset(addr, 0, PGD_TABLE_SIZE);
+}
+
+static inline void pud_ctor(void *addr)
+{
+   memset(addr, 0, PUD_TABLE_SIZE);
+}
+
+static inline void pmd_ctor(void *addr)
+{
+   memset(addr, 0, PMD_TABLE_SIZE);
+}
+
 #endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 2b656e67f2ea..f92dd8cee3c5 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -25,21 +25,6 @@
 #include 
 #include 
 
-static void pgd_ctor(void *addr)
-{
-   memset(addr, 0, PGD_TABLE_SIZE);
-}
-
-static void pud_ctor(void *addr)
-{
-   memset(addr, 0, PUD_TABLE_SIZE);
-}
-
-static void pmd_ctor(void *addr)
-{
-   memset(addr, 0, PMD_TABLE_SIZE);
-}
-
 struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
 

[PATCH V2 3/4] powerpc/mm/hash64: Store the slot information at the right offset.

2018-02-11 Thread Aneesh Kumar K.V
The hugetlb pte entries are at the PMD and PUD level. Use the right offset
for them to get the second half of the table.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  3 ++-
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  9 +
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  2 +-
 arch/powerpc/mm/hash64_4k.c   |  4 ++--
 arch/powerpc/mm/hash64_64k.c  |  8 
 arch/powerpc/mm/hugetlbpage-hash64.c  | 10 +++---
 arch/powerpc/mm/tlb_hash64.c  |  9 +++--
 7 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h 
b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 949d691094a4..67c5475311ee 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
  * keeping the prototype consistent across the two formats.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-   unsigned int subpg_index, unsigned long hidx)
+unsigned int subpg_index, unsigned 
long hidx,
+int offset)
 {
return (hidx << H_PAGE_F_GIX_SHIFT) &
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index ee440fb3d240..3bcf269f8f55 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -45,7 +45,7 @@
  * generic accessors and iterators here
  */
 #define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
 {
real_pte_t rpte;
unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
 */
smp_rmb();
 
-   hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+   hidxp = (unsigned long *)(ptep + offset);
rpte.hidx = *hidxp;
return rpte;
 }
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, 
unsigned long index)
  * expected to modify the PTE bits accordingly and commit the PTE to memory.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-   unsigned int subpg_index, unsigned long hidx)
+unsigned int subpg_index,
+unsigned long hidx, int offset)
 {
-   unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+   unsigned long *hidxp = (unsigned long *)(ptep + offset);
 
rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
*hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1c8c88e90553..a6b9f1d74600 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -350,7 +350,7 @@ extern unsigned long pci_io_base;
  */
 #ifndef __real_pte
 
-#define __real_pte(e,p)((real_pte_t){(e)})
+#define __real_pte(e, p, o)((real_pte_t){(e)})
 #define __rpte_to_pte(r)   ((r).pte)
 #define __rpte_to_hidx(r,index)(pte_val(__rpte_to_pte(r)) >> 
H_PAGE_F_GIX_SHIFT)
 
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 5a69b51d08a3..d573d7d07f25 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, 
unsigned long vsid,
 * need to add in 0x1 if it's a read-only user page
 */
rflags = htab_convert_pte_flags(new_pte);
-   rpte = __real_pte(__pte(old_pte), ptep);
+   rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, 
unsigned long vsid,
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-   new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+   new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 2253bbc6a599..e601d95c3b20 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, 
unsigned long vsid,
 
subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
vpn  = hpt_vpn(ea, vsid, ssize);
-   rpte = 

[PATCH V2 2/4] powerpc/mm/hash64: Allocate larger PMD table if hugetlb config is enabled.

2018-02-11 Thread Aneesh Kumar K.V
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 2 +-
 arch/powerpc/include/asm/book3s/64/hash.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index c08b3b032ec0..ee440fb3d240 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -140,7 +140,7 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct 
*vma, unsigned long a
 }
 
 #define H_PTE_TABLE_SIZE   PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
 #define H_PMD_TABLE_SIZE   ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
 (sizeof(unsigned long) << PMD_INDEX_SIZE))
 #else
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 234f141fb151..0851c328bea6 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -23,7 +23,8 @@
 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + 
PAGE_SHIFT)
 #define H_PGTABLE_RANGE(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+   defined(CONFIG_PPC_64K_PAGES)
 /*
  * only with hash 64k we need to use the second half of pmd page table
  * to store pointer to deposited pgtable_t
-- 
2.14.3



[PATCH V2 1/4] powerpc/mm: Fix crashes with PUD level hugetlb config

2018-02-11 Thread Aneesh Kumar K.V
To support memory keys, we moved the hash pte slot information to the second
half of the page table. This was ok with PTE entries at level 4 and level 3.
We already allocate larger page table pages at those level to accomodate extra
details. For level 4 we already have the extra space which was used to track
4k hash page table entry details and at pmd level the extra space was allocated
to track the THP details.

With hugetlbfs PTE, we used this extra space at the PMD level to store the
slot details. But we also support hugetlbfs PTE at PUD leve and PUD level page
didn't allocate extra space. This resulted in memory corruption.

Fix this by allocating extra space at PUD level when HUGETLB is enabled. We
may need further changes to allocate larger space at PMD level when we enable
HUGETLB. That will be done in next patch.

Fixes:bf9a95f9a6481bc6e(" powerpc: Free up four 64K PTE bits in 64K backed HPTE 
pages")

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h  |  1 +
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  5 +
 arch/powerpc/include/asm/book3s/64/hash.h | 10 ++
 arch/powerpc/include/asm/book3s/64/pgalloc.h  |  6 +++---
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  2 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h  |  1 +
 arch/powerpc/include/asm/nohash/64/pgtable.h  |  1 +
 arch/powerpc/mm/hash_utils_64.c   |  1 +
 arch/powerpc/mm/init-common.c |  4 ++--
 arch/powerpc/mm/pgtable-radix.c   |  1 +
 arch/powerpc/mm/pgtable_64.c  |  2 ++
 11 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 30a155c0a6b0..c615abdce119 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -16,6 +16,7 @@
 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEXPMD_INDEX_SIZE
+#define PUD_CACHE_INDEXPUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 338b7da468ce..c08b3b032ec0 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -146,7 +146,12 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct 
*vma, unsigned long a
 #else
 #define H_PMD_TABLE_SIZE   (sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE   ((sizeof(pud_t) << PUD_INDEX_SIZE) +\
+(sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
 #define H_PUD_TABLE_SIZE   (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
 #define H_PGD_TABLE_SIZE   (sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 0920eff731b3..234f141fb151 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -32,6 +32,16 @@
 #else
 #define H_PMD_CACHE_INDEX  H_PMD_INDEX_SIZE
 #endif
+/*
+ * We not store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX  (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX  (H_PUD_INDEX_SIZE)
+#endif
 /*
  * Define the address range of the kernel non-linear virtual area
  */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h 
b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 1fcfa425cefa..53df86d3cfce 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -93,13 +93,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t 
*pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-   return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+   return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
pgtable_gfp_flags(mm, GFP_KERNEL));
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-   kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+   kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +115,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, 
pud_t *pud,
 * ahead and flush the page walk cache
 */
flush_tlb_pgtable(tlb, address);
-pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
diff --git 

Re: [PATCH v4 2/5] powerpc/mm/slice: Enhance for supporting PPC32

2018-02-11 Thread Nicholas Piggin
On Sat, 10 Feb 2018 13:54:27 +0100 (CET)
Christophe Leroy  wrote:

> In preparation for the following patch which will fix an issue on
> the 8xx by re-using the 'slices', this patch enhances the
> 'slices' implementation to support 32 bits CPUs.
> 
> On PPC32, the address space is limited to 4Gbytes, hence only the low
> slices will be used.
> 
> This patch moves "slices" functions prototypes from page64.h to slice.h
> 
> The high slices use bitmaps. As bitmap functions are not prepared to
> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
> slice_bitmap_xxx() functions which will void on PPC32

On this last point, I think it would be better to put these with the
existing slice bitmap functions in slice.c and just have a few #ifdefs
for SLICE_NUM_HIGH == 0.

Thanks,
Nick