from:"Mike Rapoport"

[PATCH 08/12] parisc: use pgtable-nopXd instead of 4level-fixup

2019-10-23 Thread Mike Rapoport

From: Mike Rapoport 

parisc has two or three levels of page tables and can use appropriate
pgtable-nopXd and folding of the upper layers.

Replace usage of include/asm-generic/4level-fixup.h and explicit
definitions of __PAGETABLE_PxD_FOLDED in parisc with
include/asm-generic/pgtable-nopmd.h for two-level configurations and with
include/asm-generic/pgtable-nopmd.h for three-lelve configurations and
adjust page table manipulation macros and functions accordingly.

Signed-off-by: Mike Rapoport 
---
 arch/parisc/include/asm/page.h| 30 +-
 arch/parisc/include/asm/pgalloc.h | 41 +++---
 arch/parisc/include/asm/pgtable.h | 52 +++
 arch/parisc/include/asm/tlb.h |  2 ++
 arch/parisc/kernel/cache.c| 13 ++
 arch/parisc/kernel/pci-dma.c  |  9 +--
 arch/parisc/mm/fixmap.c   | 10 +---
 7 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 93caf17..1d339ee 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -42,48 +42,54 @@ typedef struct { unsigned long pte; } pte_t; /* either 32 
or 64bit */
 
 /* NOTE: even on 64 bits, these entries are __u32 because we allocate
  * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
-typedef struct { __u32 pmd; } pmd_t;
 typedef struct { __u32 pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 
-#define pte_val(x) ((x).pte)
-/* These do not work lvalues, so make sure we don't use them as such. */
+#if CONFIG_PGTABLE_LEVELS == 3
+typedef struct { __u32 pmd; } pmd_t;
+#define __pmd(x)   ((pmd_t) { (x) } )
+/* pXd_val() do not work lvalues, so make sure we don't use them as such. */
 #define pmd_val(x) ((x).pmd + 0)
+#endif
+
+#define pte_val(x) ((x).pte)
 #define pgd_val(x) ((x).pgd + 0)
 #define pgprot_val(x)  ((x).pgprot)
 
 #define __pte(x)   ((pte_t) { (x) } )
-#define __pmd(x)   ((pmd_t) { (x) } )
 #define __pgd(x)   ((pgd_t) { (x) } )
 #define __pgprot(x)((pgprot_t) { (x) } )
 
-#define __pmd_val_set(x,n) (x).pmd = (n)
-#define __pgd_val_set(x,n) (x).pgd = (n)
-
 #else
 /*
  * .. while these make it easier on the compiler
  */
 typedef unsigned long pte_t;
+
+#if CONFIG_PGTABLE_LEVELS == 3
 typedef __u32 pmd_t;
+#define pmd_val(x)  (x)
+#define __pmd(x)   (x)
+#endif
+
 typedef __u32 pgd_t;
 typedef unsigned long pgprot_t;
 
 #define pte_val(x)  (x)
-#define pmd_val(x)  (x)
 #define pgd_val(x)  (x)
 #define pgprot_val(x)   (x)
 
 #define __pte(x)(x)
-#define __pmd(x)   (x)
 #define __pgd(x)(x)
 #define __pgprot(x) (x)
 
-#define __pmd_val_set(x,n) (x) = (n)
-#define __pgd_val_set(x,n) (x) = (n)
-
 #endif /* STRICT_MM_TYPECHECKS */
 
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#if CONFIG_PGTABLE_LEVELS == 3
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+#endif
+
 typedef struct page *pgtable_t;
 
 typedef struct __physmem_range {
diff --git a/arch/parisc/include/asm/pgalloc.h 
b/arch/parisc/include/asm/pgalloc.h
index d98647c..9ac74da 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -34,13 +34,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
/* Populate first pmd with allocated memory.  We mark it
 * with PxD_FLAG_ATTACHED as a signal to the system that this
 * pmd entry may not be cleared. */
-   __pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT | 
-   PxD_FLAG_VALID | 
-   PxD_FLAG_ATTACHED) 
-   + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
+   set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
+   PxD_FLAG_VALID |
+   PxD_FLAG_ATTACHED)
+   + (__u32)(__pa((unsigned long)pgd) >> 
PxD_VALUE_SHIFT)));
/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
 * a signal that this pmd may not be freed */
-   __pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
+   set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
 #endif
}
spin_lock_init(pgd_spinlock(actual_pgd));
@@ -59,10 +59,10 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t 
*pgd)
 
 /* Three Level Page Table Support for pmd's */
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-   __pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
-   (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
+   set_pud(pud, __pud((PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
+   (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT

[PATCH 09/12] sparc32: use pgtable-nopud instead of 4level-fixup

2019-10-23 Thread Mike Rapoport

From: Mike Rapoport 

32-bit version of sparc has three-level page tables and can use
pgtable-nopud and folding of the upper layers.

Replace usage of include/asm-generic/4level-fixup.h with
include/asm-generic/pgtable-nopud.h and adjust page table manipulation
macros and functions accordingly.

Signed-off-by: Mike Rapoport 
---
 arch/sparc/include/asm/pgalloc_32.h |  6 ++---
 arch/sparc/include/asm/pgtable_32.h | 28 ++--
 arch/sparc/mm/fault_32.c| 11 ++--
 arch/sparc/mm/highmem.c |  6 -
 arch/sparc/mm/io-unit.c |  6 -
 arch/sparc/mm/iommu.c   |  6 -
 arch/sparc/mm/srmmu.c   | 51 +
 7 files changed, 81 insertions(+), 33 deletions(-)

diff --git a/arch/sparc/include/asm/pgalloc_32.h 
b/arch/sparc/include/asm/pgalloc_32.h
index 10538a4..eae0c92 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -26,14 +26,14 @@ static inline void free_pgd_fast(pgd_t *pgd)
 #define pgd_free(mm, pgd)  free_pgd_fast(pgd)
 #define pgd_alloc(mm)  get_pgd_fast()
 
-static inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
+static inline void pud_set(pud_t * pudp, pmd_t * pmdp)
 {
unsigned long pa = __nocache_pa(pmdp);
 
-   set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (pa >> 4;
+   set_pte((pte_t *)pudp, __pte((SRMMU_ET_PTD | (pa >> 4;
 }
 
-#define pgd_populate(MM, PGD, PMD)  pgd_set(PGD, PMD)
+#define pud_populate(MM, PGD, PMD)  pud_set(PGD, PMD)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm,
   unsigned long address)
diff --git a/arch/sparc/include/asm/pgtable_32.h 
b/arch/sparc/include/asm/pgtable_32.h
index 31da448..6d6f44c 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -12,7 +12,7 @@
 #include 
 
 #ifndef __ASSEMBLY__
-#include 
+#include 
 
 #include 
 #include 
@@ -132,12 +132,12 @@ static inline struct page *pmd_page(pmd_t pmd)
return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
 }
 
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long pud_page_vaddr(pud_t pud)
 {
-   if (srmmu_device_memory(pgd_val(pgd))) {
+   if (srmmu_device_memory(pud_val(pud))) {
return ~0;
} else {
-   unsigned long v = pgd_val(pgd) & SRMMU_PTD_PMASK;
+   unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK;
return (unsigned long)__nocache_va(v << 4);
}
 }
@@ -184,24 +184,24 @@ static inline void pmd_clear(pmd_t *pmdp)
set_pte((pte_t *)>pmdv[i], __pte(0));
 }
 
-static inline int pgd_none(pgd_t pgd)  
+static inline int pud_none(pud_t pud)
 {
-   return !(pgd_val(pgd) & 0xFFF);
+   return !(pud_val(pud) & 0xFFF);
 }
 
-static inline int pgd_bad(pgd_t pgd)
+static inline int pud_bad(pud_t pud)
 {
-   return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
+   return (pud_val(pud) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
 }
 
-static inline int pgd_present(pgd_t pgd)
+static inline int pud_present(pud_t pud)
 {
-   return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
+   return ((pud_val(pud) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
 }
 
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void pud_clear(pud_t *pudp)
 {
-   set_pte((pte_t *)pgdp, __pte(0));
+   set_pte((pte_t *)pudp, __pte(0));
 }
 
 /*
@@ -319,9 +319,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pgd_offset_k(address) pgd_offset(_mm, address)
 
 /* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t * dir, unsigned long address)
 {
-   return (pmd_t *) pgd_page_vaddr(*dir) +
+   return (pmd_t *) pud_page_vaddr(*dir) +
((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
 }
 
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 8d69de1..89976c9 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -351,6 +351,8 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int 
text_fault, int write,
 */
int offset = pgd_index(address);
pgd_t *pgd, *pgd_k;
+   p4d_t *p4d, *p4d_k;
+   pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
 
pgd = tsk->active_mm->pgd + offset;
@@ -363,8 +365,13 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int 
text_fault, int write,
return;
}
 
-   pmd = pmd_offset(pgd, address);
-   pmd_k = pmd_offset(pgd_k, address);
+   p4d = p4d_offset(pgd, address);
+   pud = pud_offset(p4d, address);
+   pmd

[PATCH 03/12] c6x: use pgtable-nopud instead of 4level-fixup

2019-10-23 Thread Mike Rapoport

From: Mike Rapoport 

c6x is a nommu architecture and does not require fixup for upper layers of
the page tables because it is already handled by the generic nommu
implementation.

Replace usage of include/asm-generic/4level-fixup.h with
include/asm-generic/pgtable-nopud.h

Signed-off-by: Mike Rapoport 
---
 arch/c6x/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 0b6919c..197c473 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -8,7 +8,7 @@
 #ifndef _ASM_C6X_PGTABLE_H
 #define _ASM_C6X_PGTABLE_H
 
-#include 
+#include 
 
 #include 
 #include 
-- 
2.7.4

Re: [PATCH v3 7/8] mips: numa: make node_to_cpumask_map() NUMA_NO_NODE aware for mips

2019-09-15 Thread Mike Rapoport

On Sun, Sep 15, 2019 at 02:13:51PM +0800, Yunsheng Lin wrote:
> On 2019/9/15 13:49, Mike Rapoport wrote:
> > Hi,
> > 
> > On Thu, Sep 12, 2019 at 06:15:33PM +0800, Yunsheng Lin wrote:
> >> When passing the return value of dev_to_node() to cpumask_of_node()
> >> without checking the node id if the node id is NUMA_NO_NODE, there is
> >> global-out-of-bounds detected by KASAN.
> >>
> >> From the discussion [1], NUMA_NO_NODE really means no node affinity,
> >> which also means all cpus should be usable. So the cpumask_of_node()
> >> should always return all cpus online when user passes the node id
> >> as NUMA_NO_NODE, just like similar semantic that page allocator handles
> >> NUMA_NO_NODE.
> >>
> >> But we cannot really copy the page allocator logic. Simply because the
> >> page allocator doesn't enforce the near node affinity. It just picks it
> >> up as a preferred node but then it is free to fallback to any other numa
> >> node. This is not the case here and node_to_cpumask_map will only restrict
> >> to the particular node's cpus which would have really non deterministic
> >> behavior depending on where the code is executed. So in fact we really
> >> want to return cpu_online_mask for NUMA_NO_NODE.
> >>
> >> Since this arch was already NUMA_NO_NODE aware, this patch only changes
> >> it to return cpu_online_mask and use NUMA_NO_NODE instead of "-1".
> >>
> >> [1] https://lore.kernel.org/patchwork/patch/1125789/
> >> Signed-off-by: Yunsheng Lin 
> >> Suggested-by: Michal Hocko 
> >> ---
> >> V3: Change to only handle NUMA_NO_NODE, and return cpu_online_mask
> >> for NUMA_NO_NODE case, and change the commit log to better justify
> >> the change.
> >> ---
> >>  arch/mips/include/asm/mach-ip27/topology.h | 4 ++--
> > 
> > Nit: the subject says "mips:", but this patch only touches sgi-ip27 and
> > loongson is updated as a separate patch. I don't see why both patches
> > cannot be merged. Moreover, the whole set can be made as a single patch,
> > IMHO.
> 
> Thanks for reviewing.
> 
> As this patchset touches a few files, which may has different maintainer.
> I am not sure if a separate patch for different arch will make the merging
> process easy, or a single patch will make the merging process easy?

The set makes the same logical change to several definitions of
cpumask_of_node(). It's appropriate to have all these changes in a single
patch.
 
> It can be made as a single patch if a single patch will make the merging
> process easy.
> 
> > 
> >>  1 file changed, 2 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/mips/include/asm/mach-ip27/topology.h 
> >> b/arch/mips/include/asm/mach-ip27/topology.h
> >> index 965f079..04505e6 100644
> >> --- a/arch/mips/include/asm/mach-ip27/topology.h
> >> +++ b/arch/mips/include/asm/mach-ip27/topology.h
> >> @@ -15,8 +15,8 @@ struct cpuinfo_ip27 {
> >>  extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
> >>  
> >>  #define cpu_to_node(cpu)  (sn_cpu_info[(cpu)].p_nodeid)
> >> -#define cpumask_of_node(node) ((node) == -1 ? 
> >> \
> >> -   cpu_all_mask : \
> >> +#define cpumask_of_node(node) ((node) == NUMA_NO_NODE ?   
> >> \
> >> +   cpu_online_mask :  \
> >> _data(node)->h_cpus)
> >>  struct pci_bus;
> >>  extern int pcibus_to_node(struct pci_bus *);
> >> -- 
> >> 2.8.1
> >>
> > 
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH v3 7/8] mips: numa: make node_to_cpumask_map() NUMA_NO_NODE aware for mips

2019-09-14 Thread Mike Rapoport

Hi,

On Thu, Sep 12, 2019 at 06:15:33PM +0800, Yunsheng Lin wrote:
> When passing the return value of dev_to_node() to cpumask_of_node()
> without checking the node id if the node id is NUMA_NO_NODE, there is
> global-out-of-bounds detected by KASAN.
> 
> From the discussion [1], NUMA_NO_NODE really means no node affinity,
> which also means all cpus should be usable. So the cpumask_of_node()
> should always return all cpus online when user passes the node id
> as NUMA_NO_NODE, just like similar semantic that page allocator handles
> NUMA_NO_NODE.
> 
> But we cannot really copy the page allocator logic. Simply because the
> page allocator doesn't enforce the near node affinity. It just picks it
> up as a preferred node but then it is free to fallback to any other numa
> node. This is not the case here and node_to_cpumask_map will only restrict
> to the particular node's cpus which would have really non deterministic
> behavior depending on where the code is executed. So in fact we really
> want to return cpu_online_mask for NUMA_NO_NODE.
> 
> Since this arch was already NUMA_NO_NODE aware, this patch only changes
> it to return cpu_online_mask and use NUMA_NO_NODE instead of "-1".
> 
> [1] https://lore.kernel.org/patchwork/patch/1125789/
> Signed-off-by: Yunsheng Lin 
> Suggested-by: Michal Hocko 
> ---
> V3: Change to only handle NUMA_NO_NODE, and return cpu_online_mask
> for NUMA_NO_NODE case, and change the commit log to better justify
> the change.
> ---
>  arch/mips/include/asm/mach-ip27/topology.h | 4 ++--

Nit: the subject says "mips:", but this patch only touches sgi-ip27 and
loongson is updated as a separate patch. I don't see why both patches
cannot be merged. Moreover, the whole set can be made as a single patch,
IMHO.

>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/mips/include/asm/mach-ip27/topology.h 
> b/arch/mips/include/asm/mach-ip27/topology.h
> index 965f079..04505e6 100644
> --- a/arch/mips/include/asm/mach-ip27/topology.h
> +++ b/arch/mips/include/asm/mach-ip27/topology.h
> @@ -15,8 +15,8 @@ struct cpuinfo_ip27 {
>  extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
>  
>  #define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid)
> -#define cpumask_of_node(node)((node) == -1 ? 
> \
> -  cpu_all_mask : \
> +#define cpumask_of_node(node)((node) == NUMA_NO_NODE ?   
> \
> +  cpu_online_mask :  \
>_data(node)->h_cpus)
>  struct pci_bus;
>  extern int pcibus_to_node(struct pci_bus *);
> -- 
> 2.8.1
> 

-- 
Sincerely yours,
Mike.

[PATCH v2 12/14] riscv: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

The only difference between the generic and RISC-V implementation of PTE
allocation is the usage of __GFP_RETRY_MAYFAIL for both kernel and user
PTEs and the absence of __GFP_ACCOUNT for the user PTEs.

The conversion to the generic version removes the __GFP_RETRY_MAYFAIL and
ensures that GFP_ACCOUNT is used for the user PTE allocations.

The pte_free() and pte_free_kernel() versions are identical to the generic
ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
Reviewed-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/pgalloc.h | 29 ++---
 1 file changed, 2 insertions(+), 27 deletions(-)

diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 94043cf..48f28bb 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -18,6 +18,8 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
 {
@@ -82,33 +84,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   return (pte_t *)__get_free_page(
-   GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
-}
-
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
-   if (likely(pte != NULL))
-   pgtable_page_ctor(pte);
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 #define __pte_free_tlb(tlb, pte, buf)   \
 do {\
pgtable_page_dtor(pte); \
-- 
2.7.4

[PATCH v2 00/14] introduce generic pte_{alloc,free}_one[_kernel]

2019-05-08 Thread Mike Rapoport

Hi,

Many architectures have similar, if not identical implementation of
pte_alloc_one_kernel(), pte_alloc_one(), pte_free_kernel() and pte_free().

A while ago Anshuman suggested to introduce a common definition of
GFP_PGTABLE and during the discussion it was suggested to rather
consolidate the allocators.

These patches introduce generic version of PTE allocation and free and
enable their use on several architectures.

The conversion introduces some changes for some of the architectures.
Here's the executive summary and the details are described at each patch.

* Most architectures do not set __GFP_ACCOUNT for the user page tables.
Switch to the generic functions is "spreading that goodness to all other
architectures"
* arm, arm64 and unicore32 used to check if the pte is not NULL before
freeing its memory in pte_free_kernel(). It's dropped during the
conversion as it seems superfluous.
* x86 used to BUG_ON() is pte was not page aligned duirng
pte_free_kernel(), the generic version simply frees the memory without any
checks.

This set only performs the straightforward conversion, the architectures
with different logic in pte_alloc_one() and pte_alloc_one_kernel() are not
touched, as well as architectures that have custom page table allocators.

v2 changes:
* rebase on the current upstream
* fix copy-paste error in the description of pte_free()
* fix changelog for MIPS to match actual changes
* drop powerpc changes
* add Acked/Reviewed tags

[1] 
https://lore.kernel.org/lkml/1547619692-7946-1-git-send-email-anshuman.khand...@arm.com

Mike Rapoport (14):
  asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel]
  alpha: switch to generic version of pte allocation
  arm: switch to generic version of pte allocation
  arm64: switch to generic version of pte allocation
  csky: switch to generic version of pte allocation
  hexagon: switch to generic version of pte allocation
  m68k: sun3: switch to generic version of pte allocation
  mips: switch to generic version of pte allocation
  nds32: switch to generic version of pte allocation
  nios2: switch to generic version of pte allocation
  parisc: switch to generic version of pte allocation
  riscv: switch to generic version of pte allocation
  um: switch to generic version of pte allocation
  unicore32: switch to generic version of pte allocation

 arch/alpha/include/asm/pgalloc.h |  40 +
 arch/arm/include/asm/pgalloc.h   |  41 +-
 arch/arm/mm/mmu.c|   2 +-
 arch/arm64/include/asm/pgalloc.h |  47 +++
 arch/arm64/mm/mmu.c  |   2 +-
 arch/arm64/mm/pgd.c  |   9 ++-
 arch/csky/include/asm/pgalloc.h  |  30 +-
 arch/hexagon/include/asm/pgalloc.h   |  34 +--
 arch/m68k/include/asm/sun3_pgalloc.h |  41 +-
 arch/mips/include/asm/pgalloc.h  |  33 +--
 arch/nds32/include/asm/pgalloc.h |  31 ++
 arch/nios2/include/asm/pgalloc.h |  37 +---
 arch/parisc/include/asm/pgalloc.h|  33 +--
 arch/riscv/include/asm/pgalloc.h |  29 +-
 arch/um/include/asm/pgalloc.h|  16 +-
 arch/um/kernel/mem.c |  22 ---
 arch/unicore32/include/asm/pgalloc.h |  36 +++-
 arch/x86/include/asm/pgalloc.h   |  19 +--
 arch/x86/mm/pgtable.c|  33 +++
 include/asm-generic/pgalloc.h| 107 +--
 virt/kvm/arm/mmu.c   |   2 +-
 21 files changed, 178 insertions(+), 466 deletions(-)

-- 
2.7.4

[PATCH v2 11/14] parisc: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

parisc allocates PTE pages with __get_free_page() and uses
GFP_KERNEL | __GFP_ZERO for the allocations.

Switch it to the generic version that does exactly the same thing for the
kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The pte_free_kernel() and pte_free() versions on are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/parisc/include/asm/pgalloc.h | 33 ++---
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/arch/parisc/include/asm/pgalloc.h 
b/arch/parisc/include/asm/pgalloc.h
index ea75cc9..4f2059a 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 /* Allocate the top level pgd (page directory)
  *
  * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
@@ -122,37 +124,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, 
pte_t *pte)
pmd_populate_kernel(mm, pmd, page_address(pte_page))
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
-   if (!page)
-   return NULL;
-   if (!pgtable_page_ctor(page)) {
-   __free_page(page);
-   return NULL;
-   }
-   return page;
-}
-
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-   pgtable_page_dtor(pte);
-   pte_free_kernel(mm, page_address(pte));
-}
-
 #define check_pgt_cache()  do { } while (0)
 
 #endif
-- 
2.7.4

[PATCH v2 09/14] nds32: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

The nds32 implementation of pte_alloc_one_kernel() differs from the generic
in the use of __GFP_RETRY_MAYFAIL flag, which is removed after the
conversion.

The nds32 version of pte_alloc_one() missed the call to pgtable_page_ctor()
and also used __GFP_RETRY_MAYFAIL. Switching it to use generic
__pte_alloc_one() for the PTE page allocation ensures that page table
constructor is run and the user page tables are allocated with
__GFP_ACCOUNT.

The conversion to the generic version of pte_free_kernel() removes the NULL
check for pte.

The pte_free() version on nds32 is identical to the generic one and can be
simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/nds32/include/asm/pgalloc.h | 31 ---
 1 file changed, 4 insertions(+), 27 deletions(-)

diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 3c5fee5..954696c 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -9,6 +9,9 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include/* for pte_{alloc,free}_one */
+
 /*
  * Since we have only two-level page tables, these are trivial
  */
@@ -22,22 +25,11 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()  do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte;
-
-   pte =
-   (pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL |
- __GFP_ZERO);
-
-   return pte;
-}
-
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
pgtable_t pte;
 
-   pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0);
+   pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (pte)
cpu_dcache_wb_page((unsigned long)page_address(pte));
 
@@ -45,21 +37,6 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 }
 
 /*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte)
-{
-   if (pte) {
-   free_page((unsigned long)pte);
-   }
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   __free_page(pte);
-}
-
-/*
  * Populate the pmdp entry with a pointer to the pte.  This pmd is part
  * of the mm address space.
  *
-- 
2.7.4

[PATCH v2 06/14] hexagon: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

The hexagon implementation pte_alloc_one(), pte_alloc_one_kernel(),
pte_free_kernel() and pte_free() is identical to the generic except of
lack of __GFP_ACCOUNT for the user PTEs allocation.

Switch hexagon to use generic version of these functions.

Signed-off-by: Mike Rapoport 
---
 arch/hexagon/include/asm/pgalloc.h | 34 ++
 1 file changed, 2 insertions(+), 32 deletions(-)

diff --git a/arch/hexagon/include/asm/pgalloc.h 
b/arch/hexagon/include/asm/pgalloc.h
index d361838..7661a26 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -24,6 +24,8 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 #define check_pgt_cache() do {} while (0)
 
 extern unsigned long long kmap_generation;
@@ -59,38 +61,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_page(GFP_KERNEL | __GFP_ZERO);
-   if (!pte)
-   return NULL;
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
-/* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
-   return (pte_t *) __get_free_page(flags);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte)
 {
-- 
2.7.4

[PATCH v2 02/14] alpha: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

alpha allocates PTE pages with __get_free_page() and uses
GFP_KERNEL | __GFP_ZERO for the allocations.

Switch it to the generic version that does exactly the same thing for the
kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The alpha pte_free() and pte_free_kernel() versions are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgalloc.h | 40 +++-
 1 file changed, 3 insertions(+), 37 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 02f9f91..71ded3b 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -5,6 +5,8 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 /*  
  * Allocate and free page tables. The xxx_kernel() versions are
  * used to allocate a kernel page table - this turns on ASN bits
@@ -41,7 +43,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pmd_t *
 pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-   pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+   pmd_t *ret = (pmd_t *)__get_free_page(GFP_PGTABLE_USER);
return ret;
 }
 
@@ -51,42 +53,6 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_page((unsigned long)pmd);
 }
 
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-   return pte;
-}
-
-static inline void
-pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
-   pte_t *pte = pte_alloc_one_kernel(mm);
-   struct page *page;
-
-   if (!pte)
-   return NULL;
-   page = virt_to_page(pte);
-   if (!pgtable_page_ctor(page)) {
-   __free_page(page);
-   return NULL;
-   }
-   return page;
-}
-
-static inline void
-pte_free(struct mm_struct *mm, pgtable_t page)
-{
-   pgtable_page_dtor(page);
-   __free_page(page);
-}
-
 #define check_pgt_cache()  do { } while (0)
 
 #endif /* _ALPHA_PGALLOC_H */
-- 
2.7.4

[PATCH v2 04/14] arm64: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

The PTE allocations in arm64 are identical to the generic ones modulo the
GFP flags.

Using the generic pte_alloc_one() functions ensures that the user page
tables are allocated with __GFP_ACCOUNT set.

The arm64 definition of PGALLOC_GFP is removed and replaced with
GFP_PGTABLE_USER for p[gum]d_alloc_one() for the user page tables and
GFP_PGTABLE_KERNEL for the kernel page tables. The KVM memory cache is now
using GFP_PGTABLE_USER.

The mappings created with create_pgd_mapping() are now using
GFP_PGTABLE_KERNEL.

The conversion to the generic version of pte_free_kernel() removes the NULL
check for pte.

The pte_free() version on arm64 is identical to the generic one and
can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/include/asm/pgalloc.h | 47 +++-
 arch/arm64/mm/mmu.c  |  2 +-
 arch/arm64/mm/pgd.c  |  9 ++--
 virt/kvm/arm/mmu.c   |  2 +-
 4 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index dabba4b..07be429 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -24,18 +24,23 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 #define check_pgt_cache()  do { } while (0)
 
-#define PGALLOC_GFP(GFP_KERNEL | __GFP_ZERO)
 #define PGD_SIZE   (PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
+   gfp_t gfp = GFP_PGTABLE_USER;
struct page *page;
 
-   page = alloc_page(PGALLOC_GFP);
+   if (mm == _mm)
+   gfp = GFP_PGTABLE_KERNEL;
+
+   page = alloc_page(gfp);
if (!page)
return NULL;
if (!pgtable_pmd_page_ctor(page)) {
@@ -72,7 +77,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t 
pmdp, pudval_t prot)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-   return (pud_t *)__get_free_page(PGALLOC_GFP);
+   return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
@@ -100,42 +105,6 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t 
pudp, pgdval_t prot)
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   return (pte_t *)__get_free_page(PGALLOC_GFP);
-}
-
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_pages(PGALLOC_GFP, 0);
-   if (!pte)
-   return NULL;
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
-/*
- * Free a PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
-{
-   if (ptep)
-   free_page((unsigned long)ptep);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
  pmdval_t prot)
 {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ef82312..bf42f07 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -373,7 +373,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t 
phys,
 
 static phys_addr_t __pgd_pgtable_alloc(int shift)
 {
-   void *ptr = (void *)__get_free_page(PGALLOC_GFP);
+   void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
BUG_ON(!ptr);
 
/* Ensure the zeroed page is visible to the page table walker */
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 289f911..769516c 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -30,10 +30,15 @@ static struct kmem_cache *pgd_cache __ro_after_init;
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+   gfp_t gfp = GFP_PGTABLE_USER;
+
+   if (unlikely(mm == _mm))
+   gfp = GFP_PGTABLE_KERNEL;
+
if (PGD_SIZE == PAGE_SIZE)
-   return (pgd_t *)__get_free_page(PGALLOC_GFP);
+   return (pgd_t *)__get_free_page(gfp);
else
-   return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+   return kmem_cache_alloc(pgd_cache, gfp);
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 74b6582..17aa4ac 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -141,7 +141,7 @@ static int mmu_topup_memory_cache(struct 
kvm_mmu_memory_cache *cache,
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < max) {
-   page = (void *)__get_free_page(PGALLOC_GFP);
+   page = (void *)__get_free_page(GFP_PGTABLE_USER);

[PATCH v2 01/14] asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel]

2019-05-08 Thread Mike Rapoport

Most architectures have identical or very similar implementation of
pte_alloc_one_kernel(), pte_alloc_one(), pte_free_kernel() and pte_free().

Add a generic implementation that can be reused across architectures and
enable its use on x86.

The generic implementation uses

GFP_KERNEL | __GFP_ZERO

for the kernel page tables and

GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT

for the user page tables.

The "base" functions for PTE allocation, namely __pte_alloc_one_kernel()
and __pte_alloc_one() are intended for the architectures that require
additional actions after actual memory allocation or must use non-default
GFP flags.

x86 is switched to use generic pte_alloc_one_kernel(), pte_free_kernel() and
pte_free().

x86 still implements pte_alloc_one() to allow run-time control of GFP flags
required for "userpte" command line option.

Signed-off-by: Mike Rapoport 
---
 arch/x86/include/asm/pgalloc.h |  19 ++--
 arch/x86/mm/pgtable.c  |  33 -
 include/asm-generic/pgalloc.h  | 107 +++--
 3 files changed, 115 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index a281e61..29aa785 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -6,6 +6,9 @@
 #include   /* for struct page */
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include/* for pte_{alloc,free}_one */
+
 static inline int  __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
 
 #ifdef CONFIG_PARAVIRT_XXL
@@ -47,24 +50,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
 extern pgtable_t pte_alloc_one(struct mm_struct *);
 
-/* Should really implement gc for free page table pages. This could be
-   done with a reference count in struct page. */
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1f67b1e..44816ff 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -13,33 +13,17 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << 
__PHYSICAL_MASK_SHIFT) - 1;
 EXPORT_SYMBOL(physical_mask);
 #endif
 
-#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
-
 #ifdef CONFIG_HIGHPTE
-#define PGALLOC_USER_GFP __GFP_HIGHMEM
+#define PGTABLE_HIGHMEM __GFP_HIGHMEM
 #else
-#define PGALLOC_USER_GFP 0
+#define PGTABLE_HIGHMEM 0
 #endif
 
-gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
-
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
-}
+gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
 
 pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-   struct page *pte;
-
-   pte = alloc_pages(__userpte_alloc_gfp, 0);
-   if (!pte)
-   return NULL;
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
+   return __pte_alloc_one(mm, __userpte_alloc_gfp);
 }
 
 static int __init setup_userpte(char *arg)
@@ -235,7 +219,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t 
*pmds[], int count)
 {
int i;
bool failed = false;
-   gfp_t gfp = PGALLOC_GFP;
+   gfp_t gfp = GFP_PGTABLE_USER;
 
if (mm == _mm)
gfp &= ~__GFP_ACCOUNT;
@@ -399,14 +383,14 @@ static inline pgd_t *_pgd_alloc(void)
 * We allocate one page for pgd.
 */
if (!SHARED_KERNEL_PMD)
-   return (pgd_t *)__get_free_pages(PGALLOC_GFP,
+   return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
 PGD_ALLOCATION_ORDER);
 
/*
 * Now PAE kernel is not running as a Xen domain. We can allocate
 * a 32-byte slab for pgd to save memory space.
 */
-   return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+   return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
 }
 
 static inline void _pgd_free(pgd_t *pgd)
@@ -424,7 +408,8 @@ void __init pgd_cache_init(void)
 
 static inline pgd_t *_pgd_alloc(void)
 {
-   return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+   return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
+PGD_ALLOCATION_ORDER);
 }
 
 static inline void _pgd_free(pgd_t *pgd)
diff --git a/include/asm-generic/pgalloc.h b/include/asm-ge

[PATCH v2 14/14] unicore32: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

Replace __get_free_page() and alloc_pages() calls with the generic
__pte_alloc_one_kernel() and __pte_alloc_one().

There is no functional change for the kernel PTE allocation.

The difference for the user PTEs, is that the clear_pte_table() is now
called after pgtable_page_ctor() and the addition of __GFP_ACCOUNT to the
GFP flags.

The pte_free() and pte_free_kernel() versions are identical to the generic
ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/unicore32/include/asm/pgalloc.h | 36 
 1 file changed, 8 insertions(+), 28 deletions(-)

diff --git a/arch/unicore32/include/asm/pgalloc.h 
b/arch/unicore32/include/asm/pgalloc.h
index 7cceabe..dd09af6 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -17,6 +17,10 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include 
+
 #define check_pgt_cache()  do { } while (0)
 
 #define _PAGE_USER_TABLE   (PMD_TYPE_TABLE | PMD_PRESENT)
@@ -28,17 +32,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
 #define pgd_alloc(mm)  get_pgd_slow(mm)
 #define pgd_free(mm, pgd)  free_pgd_slow(mm, pgd)
 
-#define PGALLOC_GFP(GFP_KERNEL | __GFP_ZERO)
-
 /*
  * Allocate one PTE table.
  */
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm)
 {
-   pte_t *pte;
+   pte_t *pte = __pte_alloc_one_kernel(mm);
 
-   pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t));
 
@@ -50,35 +51,14 @@ pte_alloc_one(struct mm_struct *mm)
 {
struct page *pte;
 
-   pte = alloc_pages(PGALLOC_GFP, 0);
+   pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (!pte)
return NULL;
-   if (!PageHighMem(pte)) {
-   void *page = page_address(pte);
-   clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t));
-   }
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   }
-
+   if (!PageHighMem(pte))
+   clean_pte_table(page_address(pte));
return pte;
 }
 
-/*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   if (pte)
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
 {
set_pmd(pmdp, __pmd(pmdval));
-- 
2.7.4

[PATCH v2 03/14] arm: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

Replace __get_free_page() and alloc_pages() calls with the generic
__pte_alloc_one_kernel() and __pte_alloc_one().

There is no functional change for the kernel PTE allocation.

The difference for the user PTEs, is that the clear_pte_table() is now
called after pgtable_page_ctor() and the addition of __GFP_ACCOUNT to the
GFP flags.

The conversion to the generic version of pte_free_kernel() removes the NULL
check for pte.

The pte_free() version on arm is identical to the generic one and can be
simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/arm/include/asm/pgalloc.h | 41 +
 arch/arm/mm/mmu.c  |  2 +-
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 17ab72f..13c5a9d 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -57,8 +57,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t 
*pud, pmd_t *pmd)
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-#define PGALLOC_GFP(GFP_KERNEL | __GFP_ZERO)
-
 static inline void clean_pte_table(pte_t *pte)
 {
clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE);
@@ -80,54 +78,41 @@ static inline void clean_pte_table(pte_t *pte)
  *  |  h/w pt 1  |
  *  ++
  */
+
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include 
+
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm)
 {
-   pte_t *pte;
+   pte_t *pte = __pte_alloc_one_kernel(mm);
 
-   pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_pte_table(pte);
 
return pte;
 }
 
+#ifdef CONFIG_HIGHPTE
+#define PGTABLE_HIGHMEM __GFP_HIGHMEM
+#else
+#define PGTABLE_HIGHMEM 0
+#endif
+
 static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm)
 {
struct page *pte;
 
-#ifdef CONFIG_HIGHPTE
-   pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
-#else
-   pte = alloc_pages(PGALLOC_GFP, 0);
-#endif
+   pte = __pte_alloc_one(mm, GFP_PGTABLE_USER | PGTABLE_HIGHMEM);
if (!pte)
return NULL;
if (!PageHighMem(pte))
clean_pte_table(page_address(pte));
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
return pte;
 }
 
-/*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   if (pte)
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
  pmdval_t prot)
 {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index f3ce341..e8e0382 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -732,7 +732,7 @@ static void __init *early_alloc(unsigned long sz)
 
 static void *__init late_alloc(unsigned long sz)
 {
-   void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
+   void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
 
if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
BUG();
-- 
2.7.4

[PATCH v2 13/14] um: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

um allocates PTE pages with __get_free_page() and uses
GFP_KERNEL | __GFP_ZERO for the allocations.

Switch it to the generic version that does exactly the same thing for the
kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The pte_free() and pte_free_kernel() versions are identical to the generic
ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
Reviewed-by: Anton Ivanov 
Acked-by: Anton Ivanov 
---
 arch/um/include/asm/pgalloc.h | 16 ++--
 arch/um/kernel/mem.c  | 22 --
 2 files changed, 2 insertions(+), 36 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 99eb568..d7b282e 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 #define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
 
@@ -25,20 +27,6 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
-extern pgtable_t pte_alloc_one(struct mm_struct *);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long) pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 #define __pte_free_tlb(tlb,pte, address)   \
 do {   \
pgtable_page_dtor(pte); \
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 99aa11b..2280374 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -215,28 +215,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte;
-
-   pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-   return pte;
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
-   if (!pte)
-   return NULL;
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
 #ifdef CONFIG_3_LEVEL_PGTABLES
 pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-- 
2.7.4

[PATCH v2 05/14] csky: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

The csky implementation pte_alloc_one(), pte_free_kernel() and pte_free()
is identical to the generic except of lack of __GFP_ACCOUNT for the user
PTEs allocation.

Switch csky to use generic version of these functions.

The csky implementation of pte_alloc_one_kernel() is not replaced because
it does not clear the allocated page but rather sets each PTE in it to a
non-zero value.

The pte_free_kernel() and pte_free() versions on csky are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
Acked-by: Guo Ren 
---
 arch/csky/include/asm/pgalloc.h | 30 +++---
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index d213bb4..98c571670 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -8,6 +8,9 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include/* for pte_{alloc,free}_one */
+
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
 {
@@ -39,33 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct 
*mm)
return pte;
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
-   if (!pte)
-   return NULL;
-
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_pages(pte, PTE_ORDER);
-}
-
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
free_pages((unsigned long)pgd, PGD_ORDER);
-- 
2.7.4

[PATCH v2 08/14] mips: switch to generic version of pte allocation

2019-05-08 Thread Mike Rapoport

MIPS allocates kernel PTE pages with

__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER)

and user PTE pages with

pte = alloc_pages(GFP_KERNEL, PTE_ORDER)

and then uses clear_highpage(pte) to zero out the allocated page for the
user page tables.

The PTE_ORDER is hardwired to zero, which makes MIPS implementation almost
identical to the generic one.

Switch MIPS to the generic version that does exactly the same thing for the
kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The pte_free_kernel() and pte_free() versions on mips are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
Acked-by: Paul Burton 
---
 arch/mips/include/asm/pgalloc.h | 33 ++---
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 27808d9..aa16b85 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
 {
@@ -50,37 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
-}
-
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
-   if (!pte)
-   return NULL;
-   clear_highpage(pte);
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_pages(pte, PTE_ORDER);
-}
-
 #define __pte_free_tlb(tlb,pte,address)\
 do {   \
pgtable_page_dtor(pte); \
-- 
2.7.4

[RFC/RFT PATCH] alpha: switch from DISCONTIGMEM to SPARSEMEM

2019-05-07 Thread Mike Rapoport

Enable SPARSEMEM support on alpha and deprecate DISCONTIGMEM.

The required changes are mostly around moving duplicated definitions of
page access and address conversion macros to a common place and making sure
they are available for all memory models.

The DISCONTINGMEM support is marked as BROKEN an will be removed in a
couple of releases.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/Kconfig |  8 
 arch/alpha/include/asm/mmzone.h| 17 ++---
 arch/alpha/include/asm/page.h  |  7 ---
 arch/alpha/include/asm/pgtable.h   | 12 +---
 arch/alpha/include/asm/sparsemem.h | 22 ++
 arch/alpha/kernel/setup.c  |  2 ++
 6 files changed, 43 insertions(+), 25 deletions(-)
 create mode 100644 arch/alpha/include/asm/sparsemem.h

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 584a6e1..6be7bec 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -36,6 +36,7 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+   select SPARSEMEM_STATIC if SPARSEMEM
help
  The Alpha is a 64-bit general-purpose processor designed and
  marketed by the Digital Equipment Corporation of blessed memory,
@@ -554,12 +555,19 @@ config NR_CPUS
 
 config ARCH_DISCONTIGMEM_ENABLE
bool "Discontiguous Memory Support"
+   depends on BROKEN
help
  Say Y to support efficient handling of discontiguous physical memory,
  for architectures which are either NUMA (Non-Uniform Memory Access)
  or have huge holes in the physical address space for other reasons.
  See  for more.
 
+config ARCH_SPARSEMEM_ENABLE
+   bool "Sparse Memory Support"
+   help
+ Say Y to support efficient handling of discontiguous physical memory,
+ for systems that have huge holes in the physical address space.
+
 config NUMA
bool "NUMA Support (EXPERIMENTAL)"
depends on DISCONTIGMEM && BROKEN
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index 889b5d3..8664460 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -6,9 +6,9 @@
 #ifndef _ASM_MMZONE_H_
 #define _ASM_MMZONE_H_
 
-#include 
+#ifdef CONFIG_DISCONTIGMEM
 
-struct bootmem_data_t; /* stupid forward decl. */
+#include 
 
 /*
  * Following are macros that are specific to this numa platform.
@@ -47,8 +47,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 }
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-
 /*
  * Following are macros that each numa implementation must define.
  */
@@ -70,12 +68,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 /* XXX: FIXME -- nyc */
 #define kern_addr_valid(kaddr) (0)
 
-#define virt_to_page(kaddr)pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-
-#define pmd_page(pmd)  (pfn_to_page(pmd_val(pmd) >> 32))
-#define pgd_page(pgd)  (pfn_to_page(pgd_val(pgd) >> 32))
-#define pte_pfn(pte)   (pte_val(pte) >> 32)
-
 #define mk_pte(page, pgprot)\
 ({  \
pte_t pte;   \
@@ -98,16 +90,11 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
__xx;   \
 })
 
-#define page_to_pa(page)   \
-   (page_to_pfn(page) << PAGE_SHIFT)
-
 #define pfn_to_nid(pfn)pa_to_nid(((u64)(pfn) << PAGE_SHIFT))
 #define pfn_valid(pfn) \
(((pfn) - node_start_pfn(pfn_to_nid(pfn))) <\
 node_spanned_pages(pfn_to_nid(pfn)))   
\
 
-#define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
-
 #endif /* CONFIG_DISCONTIGMEM */
 
 #endif /* _ASM_MMZONE_H_ */
diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index f3fb284..f89eef3 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -83,12 +83,13 @@ typedef struct page *pgtable_t;
 
 #define __pa(x)((unsigned long) (x) - PAGE_OFFSET)
 #define __va(x)((void *)((unsigned long) (x) + 
PAGE_OFFSET))
-#ifndef CONFIG_DISCONTIGMEM
+
 #define virt_to_page(kaddr)pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
 
+#ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn) ((pfn) < max_mapnr)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-#endif /* CONFIG_DISCONTIGMEM */
+#endif /* CONFIG_FLATMEM */
 
 #define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | VM_EXEC | \
 VM_MA

Re: [PATCH 12/15] powerpc/nohash/64: switch to generic version of pte allocation

2019-05-05 Thread Mike Rapoport

On Thu, May 02, 2019 at 06:56:07PM +0200, Christophe Leroy wrote:
> 
> 
> Le 02/05/2019 à 17:28, Mike Rapoport a écrit :
> >The 64-bit book-E powerpc implements pte_alloc_one(),
> >pte_alloc_one_kernel(), pte_free_kernel() and pte_free() the same way as
> >the generic version.
> 
> Will soon be converted to the same as the 3 other PPC subarches, see
> https://patchwork.ozlabs.org/patch/1091590/
 
Thanks for the heads up. I'll drop this from the next re-spin.

> Christophe
> 
> >
> >Switch it to the generic version that does exactly the same thing.
> >
> >Signed-off-by: Mike Rapoport 
> >---
> >  arch/powerpc/include/asm/nohash/64/pgalloc.h | 35 
> > ++--
> >  1 file changed, 2 insertions(+), 33 deletions(-)
> >
> >diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h 
> >b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> >index 66d086f..bfb53a0 100644
> >--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
> >+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
> >@@ -11,6 +11,8 @@
> >  #include 
> >  #include 
> >+#include /* for pte_{alloc,free}_one */
> >+
> >  struct vmemmap_backing {
> > struct vmemmap_backing *list;
> > unsigned long phys;
> >@@ -92,39 +94,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t 
> >*pmd)
> > kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
> >  }
> >-
> >-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
> >-{
> >-return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
> >-}
> >-
> >-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
> >-{
> >-struct page *page;
> >-pte_t *pte;
> >-
> >-pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
> >-if (!pte)
> >-return NULL;
> >-page = virt_to_page(pte);
> >-if (!pgtable_page_ctor(page)) {
> >-__free_page(page);
> >-return NULL;
> >-}
> >-return page;
> >-}
> >-
> >-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> >-{
> >-free_page((unsigned long)pte);
> >-}
> >-
> >-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
> >-{
> >-pgtable_page_dtor(ptepage);
> >-__free_page(ptepage);
> >-}
> >-
> >  static inline void pgtable_free(void *table, int shift)
> >  {
> > if (!shift) {
> >
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH 04/15] arm64: switch to generic version of pte allocation

2019-05-05 Thread Mike Rapoport

On Fri, May 03, 2019 at 11:05:09AM +0100, Mark Rutland wrote:
> Hi,
> 
> On Thu, May 02, 2019 at 06:28:31PM +0300, Mike Rapoport wrote:
> > The PTE allocations in arm64 are identical to the generic ones modulo the
> > GFP flags.
> > 
> > Using the generic pte_alloc_one() functions ensures that the user page
> > tables are allocated with __GFP_ACCOUNT set.
> > 
> > The arm64 definition of PGALLOC_GFP is removed and replaced with
> > GFP_PGTABLE_USER for p[gum]d_alloc_one() and for KVM memory cache.
> > 
> > The mappings created with create_pgd_mapping() are now using
> > GFP_PGTABLE_KERNEL.
> > 
> > The conversion to the generic version of pte_free_kernel() removes the NULL
> > check for pte.
> > 
> > The pte_free() version on arm64 is identical to the generic one and
> > can be simply dropped.
> > 
> > Signed-off-by: Mike Rapoport 
> > ---
> >  arch/arm64/include/asm/pgalloc.h | 43 
> > 
> >  arch/arm64/mm/mmu.c  |  2 +-
> >  arch/arm64/mm/pgd.c  |  4 ++--
> >  virt/kvm/arm/mmu.c   |  2 +-
> >  4 files changed, 8 insertions(+), 43 deletions(-)
> 
> [...]
> 
> > diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
> > index 289f911..2ef1a53 100644
> > --- a/arch/arm64/mm/pgd.c
> > +++ b/arch/arm64/mm/pgd.c
> > @@ -31,9 +31,9 @@ static struct kmem_cache *pgd_cache __ro_after_init;
> >  pgd_t *pgd_alloc(struct mm_struct *mm)
> >  {
> > if (PGD_SIZE == PAGE_SIZE)
> > -   return (pgd_t *)__get_free_page(PGALLOC_GFP);
> > +   return (pgd_t *)__get_free_page(GFP_PGTABLE_USER);
> > else
> > -   return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
> > +   return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
> >  }
> 
> In efi_virtmap_init() we use pgd_alloc() to allocate a pgd for EFI
> runtime services, which we map with a special kernel page table.
> 
> I'm not sure if accounting that is problematic, as it's allocated in a
> kernel thread off the back of an early_initcall.

The accounting bypasses kernel threads so there should be no problem.
 
> Just to check, Is that sound, or do we need a pgd_alloc_kernel()?
> 
> Thanks,
> Mark.
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH 08/15] mips: switch to generic version of pte allocation

2019-05-05 Thread Mike Rapoport

On Thu, May 02, 2019 at 07:09:47PM +, Paul Burton wrote:
> Hi Mike,
> 
> On Thu, May 02, 2019 at 06:28:35PM +0300, Mike Rapoport wrote:
> > MIPS allocates kernel PTE pages with
> > 
> > __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER)
> > 
> > and user PTE pages with
> > 
> > alloc_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER)
> 
> That bit isn't quite true - we don't use __GFP_ZERO in pte_alloc_one() &
> instead call clear_highpage() on the allocated page. Not that I have a
> problem with using __GFP_ZERO - it seems like the more optimal choice.
> It just might be worth mentioning the change & expected equivalent
> behavior.

You are right, I'll fix the changelog.
 
> Otherwise:
> 
> Acked-by: Paul Burton 

Thanks.

> Thanks,
> Paul
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH 01/15] asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel]

2019-05-05 Thread Mike Rapoport

On Thu, May 02, 2019 at 07:03:11PM +, Paul Burton wrote:
> Hi Mike,
> 
> On Thu, May 02, 2019 at 06:28:28PM +0300, Mike Rapoport wrote:
> > +/**
> > + * pte_free_kernel - free PTE-level user page table page
> > + * @mm: the mm_struct of the current context
> > + * @pte_page: the `struct page` representing the page table
> > + */
> > +static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
> > +{
> > +   pgtable_page_dtor(pte_page);
> > +   __free_page(pte_page);
> > +}
> 
> Nit: the comment names the wrong function (s/pte_free_kernel/pte_free/).

Argh, evil copy-paste :)
Thanks!
 
> Thanks,
> Paul
> 

-- 
Sincerely yours,
Mike.

[PATCH 03/15] arm: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

Replace __get_free_page() and alloc_pages() calls with the generic
__pte_alloc_one_kernel() and __pte_alloc_one().

There is no functional change for the kernel PTE allocation.

The difference for the user PTEs, is that the clear_pte_table() is now
called after pgtable_page_ctor() and the addition of __GFP_ACCOUNT to the
GFP flags.

The conversion to the generic version of pte_free_kernel() removes the NULL
check for pte.

The pte_free() version on arm is identical to the generic one and can be
simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/arm/include/asm/pgalloc.h | 41 +
 arch/arm/mm/mmu.c  |  2 +-
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 17ab72f..13c5a9d 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -57,8 +57,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t 
*pud, pmd_t *pmd)
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-#define PGALLOC_GFP(GFP_KERNEL | __GFP_ZERO)
-
 static inline void clean_pte_table(pte_t *pte)
 {
clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE);
@@ -80,54 +78,41 @@ static inline void clean_pte_table(pte_t *pte)
  *  |  h/w pt 1  |
  *  ++
  */
+
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include 
+
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm)
 {
-   pte_t *pte;
+   pte_t *pte = __pte_alloc_one_kernel(mm);
 
-   pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_pte_table(pte);
 
return pte;
 }
 
+#ifdef CONFIG_HIGHPTE
+#define PGTABLE_HIGHMEM __GFP_HIGHMEM
+#else
+#define PGTABLE_HIGHMEM 0
+#endif
+
 static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm)
 {
struct page *pte;
 
-#ifdef CONFIG_HIGHPTE
-   pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
-#else
-   pte = alloc_pages(PGALLOC_GFP, 0);
-#endif
+   pte = __pte_alloc_one(mm, GFP_PGTABLE_USER | PGTABLE_HIGHMEM);
if (!pte)
return NULL;
if (!PageHighMem(pte))
clean_pte_table(page_address(pte));
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
return pte;
 }
 
-/*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   if (pte)
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
  pmdval_t prot)
 {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index f3ce341..e8e0382 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -732,7 +732,7 @@ static void __init *early_alloc(unsigned long sz)
 
 static void *__init late_alloc(unsigned long sz)
 {
-   void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
+   void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
 
if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
BUG();
-- 
2.7.4

[PATCH 05/15] csky: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

The csky implementation pte_alloc_one(), pte_free_kernel() and pte_free()
is identical to the generic except of lack of __GFP_ACCOUNT for the user
PTEs allocation.

Switch csky to use generic version of these functions.

The csky implementation of pte_alloc_one_kernel() is not replaced because
it does not clear the allocated page but rather sets each PTE in it to a
non-zero value.

The pte_free_kernel() and pte_free() versions on csky are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/csky/include/asm/pgalloc.h | 30 +++---
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index d213bb4..98c571670 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -8,6 +8,9 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include/* for pte_{alloc,free}_one */
+
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
 {
@@ -39,33 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct 
*mm)
return pte;
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
-   if (!pte)
-   return NULL;
-
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_pages(pte, PTE_ORDER);
-}
-
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
free_pages((unsigned long)pgd, PGD_ORDER);
-- 
2.7.4

[PATCH 04/15] arm64: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

The PTE allocations in arm64 are identical to the generic ones modulo the
GFP flags.

Using the generic pte_alloc_one() functions ensures that the user page
tables are allocated with __GFP_ACCOUNT set.

The arm64 definition of PGALLOC_GFP is removed and replaced with
GFP_PGTABLE_USER for p[gum]d_alloc_one() and for KVM memory cache.

The mappings created with create_pgd_mapping() are now using
GFP_PGTABLE_KERNEL.

The conversion to the generic version of pte_free_kernel() removes the NULL
check for pte.

The pte_free() version on arm64 is identical to the generic one and
can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/include/asm/pgalloc.h | 43 
 arch/arm64/mm/mmu.c  |  2 +-
 arch/arm64/mm/pgd.c  |  4 ++--
 virt/kvm/arm/mmu.c   |  2 +-
 4 files changed, 8 insertions(+), 43 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 52fa47c..3293b8b 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -24,16 +24,17 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 #define check_pgt_cache()  do { } while (0)
 
-#define PGALLOC_GFP(GFP_KERNEL | __GFP_ZERO)
 #define PGD_SIZE   (PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-   return (pmd_t *)__get_free_page(PGALLOC_GFP);
+   return (pmd_t *)__get_free_page(GFP_PGTABLE_USER);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
@@ -62,7 +63,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t 
pmdp, pudval_t prot)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-   return (pud_t *)__get_free_page(PGALLOC_GFP);
+   return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
@@ -90,42 +91,6 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t 
pudp, pgdval_t prot)
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   return (pte_t *)__get_free_page(PGALLOC_GFP);
-}
-
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_pages(PGALLOC_GFP, 0);
-   if (!pte)
-   return NULL;
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
-/*
- * Free a PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
-{
-   if (ptep)
-   free_page((unsigned long)ptep);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
  pmdval_t prot)
 {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e97f018..d5178c5 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -373,7 +373,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t 
phys,
 
 static phys_addr_t pgd_pgtable_alloc(void)
 {
-   void *ptr = (void *)__get_free_page(PGALLOC_GFP);
+   void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
BUG();
 
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 289f911..2ef1a53 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -31,9 +31,9 @@ static struct kmem_cache *pgd_cache __ro_after_init;
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
if (PGD_SIZE == PAGE_SIZE)
-   return (pgd_t *)__get_free_page(PGALLOC_GFP);
+   return (pgd_t *)__get_free_page(GFP_PGTABLE_USER);
else
-   return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+   return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 27c9583..9f6f638 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -141,7 +141,7 @@ static int mmu_topup_memory_cache(struct 
kvm_mmu_memory_cache *cache,
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < max) {
-   page = (void *)__get_free_page(PGALLOC_GFP);
+   page = (void *)__get_free_page(GFP_PGTABLE_USER);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
-- 
2.7.4

[PATCH 08/15] mips: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

MIPS allocates kernel PTE pages with

__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER)

and user PTE pages with

alloc_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER)

The PTE_ORDER is hardwired to zero, which makes MIPS implementation almost
identical to the generic one.

Switch MIPS to the generic version that does exactly the same thing for the
kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The pte_free_kernel() and pte_free() versions on mips are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/mips/include/asm/pgalloc.h | 33 ++---
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 27808d9..aa16b85 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
 {
@@ -50,37 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
-}
-
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
-   if (!pte)
-   return NULL;
-   clear_highpage(pte);
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_pages(pte, PTE_ORDER);
-}
-
 #define __pte_free_tlb(tlb,pte,address)\
 do {   \
pgtable_page_dtor(pte); \
-- 
2.7.4

[PATCH 07/15] m68k: sun3: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

The sun3 MMU variant of m68k uses GFP_KERNEL to allocate a PTE page and
then memset(0) or clear_highpage() to clear it.

This is equivalent to allocating the page with GFP_KERNEL | __GFP_ZERO,
which allows replacing sun3 implementation of pte_alloc_one() and
pte_alloc_one_kernel() with the generic ones.

The pte_free() and pte_free_kernel() versions are identical to the generic
ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/m68k/include/asm/sun3_pgalloc.h | 41 ++--
 1 file changed, 2 insertions(+), 39 deletions(-)

diff --git a/arch/m68k/include/asm/sun3_pgalloc.h 
b/arch/m68k/include/asm/sun3_pgalloc.h
index 1456c5e..1a8ddbd 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -13,55 +13,18 @@
 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 extern const char bad_pmd_string[];
 
 #define pmd_alloc_one(mm,address)   ({ BUG(); ((pmd_t *)2); })
 
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-free_page((unsigned long) pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t page)
-{
-   pgtable_page_dtor(page);
-__free_page(page);
-}
-
 #define __pte_free_tlb(tlb,pte,addr)   \
 do {   \
pgtable_page_dtor(pte); \
tlb_remove_page((tlb), pte);\
 } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   unsigned long page = __get_free_page(GFP_KERNEL);
-
-   if (!page)
-   return NULL;
-
-   memset((void *)page, 0, PAGE_SIZE);
-   return (pte_t *) (page);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-struct page *page = alloc_pages(GFP_KERNEL, 0);
-
-   if (page == NULL)
-   return NULL;
-
-   clear_highpage(page);
-   if (!pgtable_page_ctor(page)) {
-   __free_page(page);
-   return NULL;
-   }
-   return page;
-
-}
-
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t 
*pte)
 {
pmd_val(*pmd) = __pa((unsigned long)pte);
-- 
2.7.4

[PATCH 09/15] nds32: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

The nds32 implementation of pte_alloc_one_kernel() differs from the generic
in the use of __GFP_RETRY_MAYFAIL flag, which is removed after the
conversion.

The nds32 version of pte_alloc_one() missed the call to pgtable_page_ctor()
and also used __GFP_RETRY_MAYFAIL. Switching it to use generic
__pte_alloc_one() for the PTE page allocation ensures that page table
constructor is run and the user page tables are allocated with
__GFP_ACCOUNT.

The conversion to the generic version of pte_free_kernel() removes the NULL
check for pte.

The pte_free() version on nds32 is identical to the generic one and can be
simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/nds32/include/asm/pgalloc.h | 31 ---
 1 file changed, 4 insertions(+), 27 deletions(-)

diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 3c5fee5..954696c 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -9,6 +9,9 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include/* for pte_{alloc,free}_one */
+
 /*
  * Since we have only two-level page tables, these are trivial
  */
@@ -22,22 +25,11 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
 #define check_pgt_cache()  do { } while (0)
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte;
-
-   pte =
-   (pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL |
- __GFP_ZERO);
-
-   return pte;
-}
-
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
pgtable_t pte;
 
-   pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0);
+   pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (pte)
cpu_dcache_wb_page((unsigned long)page_address(pte));
 
@@ -45,21 +37,6 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 }
 
 /*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte)
-{
-   if (pte) {
-   free_page((unsigned long)pte);
-   }
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   __free_page(pte);
-}
-
-/*
  * Populate the pmdp entry with a pointer to the pte.  This pmd is part
  * of the mm address space.
  *
-- 
2.7.4

[PATCH 10/15] nios2: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

nios2 allocates kernel PTE pages with

__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);

and user page tables with

pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
if (pte)
clear_highpage();

The PTE_ORDER is hardwired to zero, which makes nios2 implementation almost
identical to the generic one.

Switch nios2 to the generic version that does exactly the same thing for
the kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The pte_free_kernel() and pte_free() versions on nios2 are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/nios2/include/asm/pgalloc.h | 37 ++---
 1 file changed, 2 insertions(+), 35 deletions(-)

diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 3a149ea..4bc8cf7 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -12,6 +12,8 @@
 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
 {
@@ -37,41 +39,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte;
-
-   pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
-
-   return pte;
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
-   if (pte) {
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   clear_highpage(pte);
-   }
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_pages((unsigned long)pte, PTE_ORDER);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-   pgtable_page_dtor(pte);
-   __free_pages(pte, PTE_ORDER);
-}
-
 #define __pte_free_tlb(tlb, pte, addr) \
do {\
pgtable_page_dtor(pte); \
-- 
2.7.4

[PATCH 12/15] powerpc/nohash/64: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

The 64-bit book-E powerpc implements pte_alloc_one(),
pte_alloc_one_kernel(), pte_free_kernel() and pte_free() the same way as
the generic version.

Switch it to the generic version that does exactly the same thing.

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/include/asm/nohash/64/pgalloc.h | 35 ++--
 1 file changed, 2 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h 
b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 66d086f..bfb53a0 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -11,6 +11,8 @@
 #include 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 struct vmemmap_backing {
struct vmemmap_backing *list;
unsigned long phys;
@@ -92,39 +94,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
 }
 
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *page;
-   pte_t *pte;
-
-   pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
-   if (!pte)
-   return NULL;
-   page = virt_to_page(pte);
-   if (!pgtable_page_ctor(page)) {
-   __free_page(page);
-   return NULL;
-   }
-   return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
-   pgtable_page_dtor(ptepage);
-   __free_page(ptepage);
-}
-
 static inline void pgtable_free(void *table, int shift)
 {
if (!shift) {
-- 
2.7.4

[PATCH 15/15] unicore32: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

Replace __get_free_page() and alloc_pages() calls with the generic
__pte_alloc_one_kernel() and __pte_alloc_one().

There is no functional change for the kernel PTE allocation.

The difference for the user PTEs, is that the clear_pte_table() is now
called after pgtable_page_ctor() and the addition of __GFP_ACCOUNT to the
GFP flags.

The pte_free() and pte_free_kernel() versions are identical to the generic
ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/unicore32/include/asm/pgalloc.h | 36 
 1 file changed, 8 insertions(+), 28 deletions(-)

diff --git a/arch/unicore32/include/asm/pgalloc.h 
b/arch/unicore32/include/asm/pgalloc.h
index 7cceabe..dd09af6 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -17,6 +17,10 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include 
+
 #define check_pgt_cache()  do { } while (0)
 
 #define _PAGE_USER_TABLE   (PMD_TYPE_TABLE | PMD_PRESENT)
@@ -28,17 +32,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
 #define pgd_alloc(mm)  get_pgd_slow(mm)
 #define pgd_free(mm, pgd)  free_pgd_slow(mm, pgd)
 
-#define PGALLOC_GFP(GFP_KERNEL | __GFP_ZERO)
-
 /*
  * Allocate one PTE table.
  */
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm)
 {
-   pte_t *pte;
+   pte_t *pte = __pte_alloc_one_kernel(mm);
 
-   pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t));
 
@@ -50,35 +51,14 @@ pte_alloc_one(struct mm_struct *mm)
 {
struct page *pte;
 
-   pte = alloc_pages(PGALLOC_GFP, 0);
+   pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (!pte)
return NULL;
-   if (!PageHighMem(pte)) {
-   void *page = page_address(pte);
-   clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t));
-   }
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   }
-
+   if (!PageHighMem(pte))
+   clean_pte_table(page_address(pte));
return pte;
 }
 
-/*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   if (pte)
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
 {
set_pmd(pmdp, __pmd(pmdval));
-- 
2.7.4

[PATCH 14/15] um: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

um allocates PTE pages with __get_free_page() and uses
GFP_KERNEL | __GFP_ZERO for the allocations.

Switch it to the generic version that does exactly the same thing for the
kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The pte_free() and pte_free_kernel() versions are identical to the generic
ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/um/include/asm/pgalloc.h | 16 ++--
 arch/um/kernel/mem.c  | 22 --
 2 files changed, 2 insertions(+), 36 deletions(-)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 99eb568..d7b282e 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 #define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
 
@@ -25,20 +27,6 @@
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
-extern pgtable_t pte_alloc_one(struct mm_struct *);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long) pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_page_dtor(pte);
-   __free_page(pte);
-}
-
 #define __pte_free_tlb(tlb,pte, address)   \
 do {   \
pgtable_page_dtor(pte); \
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 99aa11b..2280374 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -215,28 +215,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte;
-
-   pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-   return pte;
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-
-   pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
-   if (!pte)
-   return NULL;
-   if (!pgtable_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
 #ifdef CONFIG_3_LEVEL_PGTABLES
 pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-- 
2.7.4

[PATCH 11/15] parisc: switch to generic version of pte allocation

2019-05-02 Thread Mike Rapoport

parisc allocates PTE pages with __get_free_page() and uses
GFP_KERNEL | __GFP_ZERO for the allocations.

Switch it to the generic version that does exactly the same thing for the
kernel page tables and adds __GFP_ACCOUNT for the user PTEs.

The pte_free_kernel() and pte_free() versions on are identical to the
generic ones and can be simply dropped.

Signed-off-by: Mike Rapoport 
---
 arch/parisc/include/asm/pgalloc.h | 33 ++---
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/arch/parisc/include/asm/pgalloc.h 
b/arch/parisc/include/asm/pgalloc.h
index d05c678c..265ec42 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
 
 #include 
 
+#include/* for pte_{alloc,free}_one */
+
 /* Allocate the top level pgd (page directory)
  *
  * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
@@ -121,37 +123,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, 
pte_t *pte)
pmd_populate_kernel(mm, pmd, page_address(pte_page))
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
-   if (!page)
-   return NULL;
-   if (!pgtable_page_ctor(page)) {
-   __free_page(page);
-   return NULL;
-   }
-   return page;
-}
-
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
-   pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-   pgtable_page_dtor(pte);
-   pte_free_kernel(mm, page_address(pte));
-}
-
 #define check_pgt_cache()  do { } while (0)
 
 #endif
-- 
2.7.4

[PATCH 00/15] introduce generic pte_{alloc,free}_one[_kernel]

2019-05-02 Thread Mike Rapoport

Hi,

I've tried to trim down the recipients list, but it's still quite long, so
sorry for the spam.

Many architectures have similar, if not identical implementation of
pte_alloc_one_kernel(), pte_alloc_one(), pte_free_kernel() and pte_free().

A while ago Anshuman suggested to introduce a common definition of
GFP_PGTABLE and during the discussion it was suggested to rather
consolidate the allocators.

These patches introduce generic version of PTE allocation and free and
enable their use on several architectures.

The conversion introduces some changes for some of the architectures.
Here's the executive summary and the details are described at each patch.

* Most architectures do not set __GFP_ACCOUNT for the user page tables.
Switch to the generic functions is "spreading that goodness to all other
architectures"
* arm, arm64 and unicore32 used to check if the pte is not NULL before
freeing its memory in pte_free_kernel(). It's dropped during the
conversion as it seems superfluous.
* x86 used to BUG_ON() is pte was not page aligned duirng
pte_free_kernel(), the generic version simply frees the memory without any
checks.

This set only performs the straightforward conversion, the architectures
with different logic in pte_alloc_one() and pte_alloc_one_kernel() are not
touched, as well as architectures that have custom page table allocators.

[1] 
https://lore.kernel.org/lkml/1547619692-7946-1-git-send-email-anshuman.khand...@arm.com

 asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel]

Mike Rapoport (15):
  asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel]
  alpha: switch to generic version of pte allocation
  arm: switch to generic version of pte allocation
  arm64: switch to generic version of pte allocation
  csky: switch to generic version of pte allocation
  hexagon: switch to generic version of pte allocation
  m68k: sun3: switch to generic version of pte allocation
  mips: switch to generic version of pte allocation
  nds32: switch to generic version of pte allocation
  nios2: switch to generic version of pte allocation
  parisc: switch to generic version of pte allocation
  powerpc/nohash/64: switch to generic version of pte allocation
  riscv: switch to generic version of pte allocation
  um: switch to generic version of pte allocation
  unicore32: switch to generic version of pte allocation

 arch/alpha/include/asm/pgalloc.h |  40 +-
 arch/arm/include/asm/pgalloc.h   |  41 --
 arch/arm/mm/mmu.c|   2 +-
 arch/arm64/include/asm/pgalloc.h |  43 +--
 arch/arm64/mm/mmu.c  |   2 +-
 arch/arm64/mm/pgd.c  |   4 +-
 arch/csky/include/asm/pgalloc.h  |  30 +---
 arch/hexagon/include/asm/pgalloc.h   |  34 +
 arch/m68k/include/asm/sun3_pgalloc.h |  41 +-
 arch/mips/include/asm/pgalloc.h  |  33 +
 arch/nds32/include/asm/pgalloc.h |  31 +---
 arch/nios2/include/asm/pgalloc.h |  37 +
 arch/parisc/include/asm/pgalloc.h|  33 +
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  35 +
 arch/riscv/include/asm/pgalloc.h |  29 +---
 arch/um/include/asm/pgalloc.h|  16 +---
 arch/um/kernel/mem.c |  22 --
 arch/unicore32/include/asm/pgalloc.h |  36 ++---
 arch/x86/include/asm/pgalloc.h   |  19 +
 arch/x86/mm/pgtable.c|  33 +++--
 include/asm-generic/pgalloc.h| 107 ++-
 virt/kvm/arm/mmu.c   |   2 +-
 22 files changed, 171 insertions(+), 499 deletions(-)

-- 
2.7.4

Re: [PATCH v2 10/21] memblock: refactor internal allocation functions

2019-02-03 Thread Mike Rapoport

On Sun, Feb 03, 2019 at 08:39:20PM +1100, Michael Ellerman wrote:
> Mike Rapoport  writes:
> 
> > Currently, memblock has several internal functions with overlapping
> > functionality. They all call memblock_find_in_range_node() to find free
> > memory and then reserve the allocated range and mark it with kmemleak.
> > However, there is difference in the allocation constraints and in fallback
> > strategies.
> >
> > The allocations returning physical address first attempt to find free
> > memory on the specified node within mirrored memory regions, then retry on
> > the same node without the requirement for memory mirroring and finally fall
> > back to all available memory.
> >
> > The allocations returning virtual address start with clamping the allowed
> > range to memblock.current_limit, attempt to allocate from the specified
> > node from regions with mirroring and with user defined minimal address. If
> > such allocation fails, next attempt is done with node restriction lifted.
> > Next, the allocation is retried with minimal address reset to zero and at
> > last without the requirement for mirrored regions.
> >
> > Let's consolidate various fallbacks handling and make them more consistent
> > for physical and virtual variants. Most of the fallback handling is moved
> > to memblock_alloc_range_nid() and it now handles node and mirror fallbacks.
> >
> > The memblock_alloc_internal() uses memblock_alloc_range_nid() to get a
> > physical address of the allocated range and converts it to virtual address.
> >
> > The fallback for allocation below the specified minimal address remains in
> > memblock_alloc_internal() because memblock_alloc_range_nid() is used by CMA
> > with exact requirement for lower bounds.
> 
> This is causing problems on some of my machines.
> 
> I see NODE_DATA allocations falling back to node 0 when they shouldn't,
> or didn't previously.
> 
> eg, before:
> 
> 57990190: (116011251): numa:   NODE_DATA [mem 0xfffe4980-0xfffebfff]
> 58152042: (116373087): numa:   NODE_DATA [mem 0x8fff90980-0x8fff97fff]
> 
> after:
> 
> 16356872061562: (6296877055): numa:   NODE_DATA [mem 0xfffe4980-0xfffebfff]
> 16356872079279: (6296894772): numa:   NODE_DATA [mem 0xfffcd300-0xfffd497f]
> 16356872096376: (6296911869): numa: NODE_DATA(1) on node 0
> 
> 
> On some of my other systems it does that, and then panics because it
> can't allocate anything at all:
> 
> [0.00] numa:   NODE_DATA [mem 0x7ffcaee80-0x7ffcb3fff]
> [0.00] numa:   NODE_DATA [mem 0x7ffc99d00-0x7ffc9ee7f]
> [0.00] numa: NODE_DATA(1) on node 0
> [0.00] Kernel panic - not syncing: Cannot allocate 20864 bytes for 
> node 16 data
> [0.00] CPU: 0 PID: 0 Comm: swapper Not tainted 
> 5.0.0-rc4-gccN-next-20190201-gdc4c899 #1
> [0.00] Call Trace:
> [0.00] [c11cfca0] [c0c11044] dump_stack+0xe8/0x164 
> (unreliable)
> [0.00] [c11cfcf0] [c00fdd6c] panic+0x17c/0x3e0
> [0.00] [c11cfd90] [c0f61bc8] initmem_init+0x128/0x260
> [0.00] [c11cfe60] [c0f57940] setup_arch+0x398/0x418
> [0.00] [c11cfee0] [c0f50a94] start_kernel+0xa0/0x684
> [0.00] [c11cff90] [c000af70] 
> start_here_common+0x1c/0x52c
> [0.00] Rebooting in 180 seconds..
> 
> 
> So there's something going wrong there, I haven't had time to dig into
> it though (Sunday night here).

I'll try to see if I can reproduce it with qemu.
 
> cheers
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH v2 19/21] treewide: add checks for the return value of memblock_alloc*()

2019-01-30 Thread Mike Rapoport

On Thu, Jan 31, 2019 at 08:07:29AM +0100, Christophe Leroy wrote:
> 
> 
> Le 31/01/2019 à 07:44, Christophe Leroy a écrit :
> >
> >
> >Le 31/01/2019 à 07:41, Mike Rapoport a écrit :
> >>On Thu, Jan 31, 2019 at 07:07:46AM +0100, Christophe Leroy wrote:
> >>>
> >>>
> >>>Le 21/01/2019 à 09:04, Mike Rapoport a écrit :
> >>>>Add check for the return value of memblock_alloc*() functions and call
> >>>>panic() in case of error.
> >>>>The panic message repeats the one used by panicing memblock
> >>>>allocators with
> >>>>adjustment of parameters to include only relevant ones.
> >>>>
> >>>>The replacement was mostly automated with semantic patches like the one
> >>>>below with manual massaging of format strings.
> >>>>
> >>>>@@
> >>>>expression ptr, size, align;
> >>>>@@
> >>>>ptr = memblock_alloc(size, align);
> >>>>+ if (!ptr)
> >>>>+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__,
> >>>>size, align);
> >>>>
> >>>>Signed-off-by: Mike Rapoport 
> >>>>Reviewed-by: Guo Ren  # c-sky
> >>>>Acked-by: Paul Burton  # MIPS
> >>>>Acked-by: Heiko Carstens  # s390
> >>>>Reviewed-by: Juergen Gross  # Xen
> >>>>---
> >>>
> >>>[...]
> >>>
> >>>>diff --git a/mm/sparse.c b/mm/sparse.c
> >>>>index 7ea5dc6..ad94242 100644
> >>>>--- a/mm/sparse.c
> >>>>+++ b/mm/sparse.c
> >>>
> >>>[...]
> >>>
> >>>>@@ -425,6 +436,10 @@ static void __init sparse_buffer_init(unsigned
> >>>>long size, int nid)
> >>>>  memblock_alloc_try_nid_raw(size, PAGE_SIZE,
> >>>>  __pa(MAX_DMA_ADDRESS),
> >>>>  MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> >>>>+    if (!sparsemap_buf)
> >>>>+    panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d
> >>>>from=%lx\n",
> >>>>+  __func__, size, PAGE_SIZE, nid, __pa(MAX_DMA_ADDRESS));
> >>>>+
> >>>
> >>>memblock_alloc_try_nid_raw() does not panic (help explicitly says:
> >>>Does not
> >>>zero allocated memory, does not panic if request cannot be satisfied.).
> >>
> >>"Does not panic" does not mean it always succeeds.
> >
> >I agree, but at least here you are changing the behaviour by making it
> >panic explicitly. Are we sure there are not cases where the system could
> >just continue functionning ? Maybe a WARN_ON() would be enough there ?
> 
> Looking more in details, it looks like everything is done to live with
> sparsemap_buf NULL, all functions using it check it so having it NULL
> shouldn't imply a panic I believe, see code below.

You are right, I'm preparing the fix right now.
 
> static void *sparsemap_buf __meminitdata;
> static void *sparsemap_buf_end __meminitdata;
> 
> static void __init sparse_buffer_init(unsigned long size, int nid)
> {
>   WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
>   sparsemap_buf =
>   memblock_alloc_try_nid_raw(size, PAGE_SIZE,
>   __pa(MAX_DMA_ADDRESS),
>   MEMBLOCK_ALLOC_ACCESSIBLE, nid);
>   sparsemap_buf_end = sparsemap_buf + size;
> }
> 
> static void __init sparse_buffer_fini(void)
> {
>   unsigned long size = sparsemap_buf_end - sparsemap_buf;
> 
>   if (sparsemap_buf && size > 0)
>   memblock_free_early(__pa(sparsemap_buf), size);
>   sparsemap_buf = NULL;
> }
> 
> void * __meminit sparse_buffer_alloc(unsigned long size)
> {
>   void *ptr = NULL;
> 
>   if (sparsemap_buf) {
>   ptr = PTR_ALIGN(sparsemap_buf, size);
>   if (ptr + size > sparsemap_buf_end)
>   ptr = NULL;
>   else
>   sparsemap_buf = ptr + size;
>   }
>   return ptr;
> }
> 
> 
> Christophe
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH v2 19/21] treewide: add checks for the return value of memblock_alloc*()

2019-01-30 Thread Mike Rapoport

On Thu, Jan 31, 2019 at 07:07:46AM +0100, Christophe Leroy wrote:
> 
> 
> Le 21/01/2019 à 09:04, Mike Rapoport a écrit :
> >Add check for the return value of memblock_alloc*() functions and call
> >panic() in case of error.
> >The panic message repeats the one used by panicing memblock allocators with
> >adjustment of parameters to include only relevant ones.
> >
> >The replacement was mostly automated with semantic patches like the one
> >below with manual massaging of format strings.
> >
> >@@
> >expression ptr, size, align;
> >@@
> >ptr = memblock_alloc(size, align);
> >+ if (!ptr)
> >+panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__,
> >size, align);
> >
> >Signed-off-by: Mike Rapoport 
> >Reviewed-by: Guo Ren  # c-sky
> >Acked-by: Paul Burton   # MIPS
> >Acked-by: Heiko Carstens  # s390
> >Reviewed-by: Juergen Gross  # Xen
> >---
> 
> [...]
> 
> >diff --git a/mm/sparse.c b/mm/sparse.c
> >index 7ea5dc6..ad94242 100644
> >--- a/mm/sparse.c
> >+++ b/mm/sparse.c
> 
> [...]
> 
> >@@ -425,6 +436,10 @@ static void __init sparse_buffer_init(unsigned long 
> >size, int nid)
> > memblock_alloc_try_nid_raw(size, PAGE_SIZE,
> > __pa(MAX_DMA_ADDRESS),
> > MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> >+if (!sparsemap_buf)
> >+panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d 
> >from=%lx\n",
> >+  __func__, size, PAGE_SIZE, nid, __pa(MAX_DMA_ADDRESS));
> >+
> 
> memblock_alloc_try_nid_raw() does not panic (help explicitly says: Does not
> zero allocated memory, does not panic if request cannot be satisfied.).

"Does not panic" does not mean it always succeeds.
 
> Stephen Rothwell reports a boot failure due to this change.

Please see my reply on that thread.

> Christophe
> 
> > sparsemap_buf_end = sparsemap_buf + size;
> >  }
> >
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH v2 06/21] memblock: memblock_phys_alloc_try_nid(): don't panic

2019-01-25 Thread Mike Rapoport

On Fri, Jan 25, 2019 at 05:45:02PM +, Catalin Marinas wrote:
> On Mon, Jan 21, 2019 at 10:03:53AM +0200, Mike Rapoport wrote:
> > diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
> > index ae34e3a..2c61ea4 100644
> > --- a/arch/arm64/mm/numa.c
> > +++ b/arch/arm64/mm/numa.c
> > @@ -237,6 +237,10 @@ static void __init setup_node_data(int nid, u64 
> > start_pfn, u64 end_pfn)
> > pr_info("Initmem setup node %d []\n", nid);
> >  
> > nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
> > +   if (!nd_pa)
> > +   panic("Cannot allocate %zu bytes for node %d data\n",
> > + nd_size, nid);
> > +
> > nd = __va(nd_pa);
> >  
> > /* report and initialize */
> 
> Does it mean that memblock_phys_alloc_try_nid() never returns valid
> physical memory starting at 0?

Yes, it does.
memblock_find_in_range_node() that is used by all allocation methods
skips the first page [1].
 
[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/memblock.c#n257

> -- 
> Catalin
> 

-- 
Sincerely yours,
Mike.

[PATCH v2 00/21] Refine memblock API

2019-01-21 Thread Mike Rapoport

Hi,

Current memblock API is quite extensive and, which is more annoying,
duplicated. Except the low-level functions that allow searching for a free
memory region and marking it as reserved, memblock provides three (well,
two and a half) sets of functions to allocate memory. There are several
overlapping functions that return a physical address and there are
functions that return virtual address. Those that return the virtual
address may also clear the allocated memory. And, on top of all that, some
allocators panic and some return NULL in case of error.

This set tries to reduce the mess, and trim down the amount of memblock
allocation methods.

Patches 1-10 consolidate the functions that return physical address of
the allocated memory

Patches 11-13 are some trivial cleanups

Patches 14-19 add checks for the return value of memblock_alloc*() and
panics in case of errors. The patches 14-18 include some minor refactoring
to have better readability of the resulting code and patch 19 is a
mechanical addition of

if (!ptr)
panic();

after memblock_alloc*() calls.

And, finally, patches 20 and 21 remove panic() calls memblock and _nopanic
variants from memblock.

v2 changes:
* replace some more %lu with %zu
* remove panics where they are not needed in s390 and in printk
* collect Acked-by and Reviewed-by.


Christophe Leroy (1):
  powerpc: use memblock functions returning virtual address

Mike Rapoport (20):
  openrisc: prefer memblock APIs returning virtual address
  memblock: replace memblock_alloc_base(ANYWHERE) with memblock_phys_alloc
  memblock: drop memblock_alloc_base_nid()
  memblock: emphasize that memblock_alloc_range() returns a physical address
  memblock: memblock_phys_alloc_try_nid(): don't panic
  memblock: memblock_phys_alloc(): don't panic
  memblock: drop __memblock_alloc_base()
  memblock: drop memblock_alloc_base()
  memblock: refactor internal allocation functions
  memblock: make memblock_find_in_range_node() and choose_memblock_flags() 
static
  arch: use memblock_alloc() instead of memblock_alloc_from(size, align, 0)
  arch: don't memset(0) memory returned by memblock_alloc()
  ia64: add checks for the return value of memblock_alloc*()
  sparc: add checks for the return value of memblock_alloc*()
  mm/percpu: add checks for the return value of memblock_alloc*()
  init/main: add checks for the return value of memblock_alloc*()
  swiotlb: add checks for the return value of memblock_alloc*()
  treewide: add checks for the return value of memblock_alloc*()
  memblock: memblock_alloc_try_nid: don't panic
  memblock: drop memblock_alloc_*_nopanic() variants

 arch/alpha/kernel/core_cia.c  |   5 +-
 arch/alpha/kernel/core_marvel.c   |   6 +
 arch/alpha/kernel/pci-noop.c  |  13 +-
 arch/alpha/kernel/pci.c   |  11 +-
 arch/alpha/kernel/pci_iommu.c |  16 +-
 arch/alpha/kernel/setup.c |   2 +-
 arch/arc/kernel/unwind.c  |   3 +-
 arch/arc/mm/highmem.c |   4 +
 arch/arm/kernel/setup.c   |   6 +
 arch/arm/mm/init.c|   6 +-
 arch/arm/mm/mmu.c |  14 +-
 arch/arm64/kernel/setup.c |   8 +-
 arch/arm64/mm/kasan_init.c|  10 ++
 arch/arm64/mm/mmu.c   |   2 +
 arch/arm64/mm/numa.c  |   4 +
 arch/c6x/mm/dma-coherent.c|   4 +
 arch/c6x/mm/init.c|   4 +-
 arch/csky/mm/highmem.c|   5 +
 arch/h8300/mm/init.c  |   4 +-
 arch/ia64/kernel/mca.c|  25 +--
 arch/ia64/mm/contig.c |   8 +-
 arch/ia64/mm/discontig.c  |   4 +
 arch/ia64/mm/init.c   |  38 -
 arch/ia64/mm/tlb.c|   6 +
 arch/ia64/sn/kernel/io_common.c   |   3 +
 arch/ia64/sn/kernel/setup.c   |  12 +-
 arch/m68k/atari/stram.c   |   4 +
 arch/m68k/mm/init.c   |   3 +
 arch/m68k/mm/mcfmmu.c |   7 +-
 arch/m68k/mm/motorola.c   |   9 ++
 arch/m68k/mm/sun3mmu.c|   6 +
 arch/m68k/sun3/sun3dvma.c |   3 +
 arch/microblaze/mm/init.c |  10 +-
 arch/mips/cavium-octeon/dma-octeon.c  |   3 +
 arch/mips/kernel/setup.c  |   3 +
 arch/mips/kernel/traps.c  |   5 +-
 arch/mips/mm/init.c   |   5 +
 arch/nds32/mm/init.c  |  12 ++
 arch/openrisc/mm/init.c   |   5 +-
 arch/openrisc/mm/ioremap.c|   8 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c |   8 +-
 arch/powerpc/kernel/irq.c |   5 -
 arch/powerpc/kernel/paca.c|   6 +-
 arch/powerpc/kernel/pci_32.c  |   3 +
 arch/powerpc/kernel/prom.c|   5 +-
 arch/powerpc/kernel/rtas.c

[PATCH v2 11/21] memblock: make memblock_find_in_range_node() and choose_memblock_flags() static

2019-01-21 Thread Mike Rapoport

These functions are not used outside memblock. Make them static.

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h | 4 
 mm/memblock.c| 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index cf4cd9c..f5a83a1 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -111,9 +111,6 @@ void memblock_discard(void);
 #define memblock_dbg(fmt, ...) \
if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
-phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
-   phys_addr_t start, phys_addr_t end,
-   int nid, enum memblock_flags flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -130,7 +127,6 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t 
size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
-enum memblock_flags choose_memblock_flags(void);
 
 unsigned long memblock_free_all(void);
 void reset_node_managed_pages(pg_data_t *pgdat);
diff --git a/mm/memblock.c b/mm/memblock.c
index 739f769..03b3929 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -125,7 +125,7 @@ static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 static int memblock_reserved_in_slab __initdata_memblock = 0;
 
-enum memblock_flags __init_memblock choose_memblock_flags(void)
+static enum memblock_flags __init_memblock choose_memblock_flags(void)
 {
return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
 }
@@ -254,7 +254,7 @@ __memblock_find_range_top_down(phys_addr_t start, 
phys_addr_t end,
  * Return:
  * Found address on success, 0 on failure.
  */
-phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
+static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t 
size,
phys_addr_t align, phys_addr_t start,
phys_addr_t end, int nid,
enum memblock_flags flags)
-- 
2.7.4

[PATCH v2 07/21] memblock: memblock_phys_alloc(): don't panic

2019-01-21 Thread Mike Rapoport

Make the memblock_phys_alloc() function an inline wrapper for
memblock_phys_alloc_range() and update the memblock_phys_alloc() callers to
check the returned value and panic in case of error.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c   | 4 
 arch/arm64/mm/mmu.c  | 2 ++
 arch/powerpc/sysdev/dart_iommu.c | 3 +++
 arch/s390/kernel/crash_dump.c| 3 +++
 arch/s390/kernel/setup.c | 3 +++
 arch/sh/boards/mach-ap325rxa/setup.c | 3 +++
 arch/sh/boards/mach-ecovec24/setup.c | 6 ++
 arch/sh/boards/mach-kfr2r09/setup.c  | 3 +++
 arch/sh/boards/mach-migor/setup.c| 3 +++
 arch/sh/boards/mach-se/7724/setup.c  | 6 ++
 arch/xtensa/mm/kasan_init.c  | 3 +++
 include/linux/memblock.h | 7 ++-
 mm/memblock.c| 5 -
 13 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b76b90e..15dddfe 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -206,6 +206,10 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, 
phys_addr_t align)
BUG_ON(!arm_memblock_steal_permitted);
 
phys = memblock_phys_alloc(size, align);
+   if (!phys)
+   panic("Failed to steal %pa bytes at %pS\n",
+ , (void *)_RET_IP_);
+
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b6f5aa5..a74e4be 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -104,6 +104,8 @@ static phys_addr_t __init early_pgtable_alloc(void)
void *ptr;
 
phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate page table page\n");
 
/*
 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 25bc25f..b82c9ff 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -265,6 +265,9 @@ static void allocate_dart(void)
 * prefetching into invalid pages and corrupting data
 */
tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+   if (!tmp)
+   panic("DART: table allocation failed\n");
+
dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
 DARTMAP_RPNMASK);
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 97eae38..f96a585 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -61,6 +61,9 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
struct save_area *sa;
 
sa = (void *) memblock_phys_alloc(sizeof(*sa), 8);
+   if (!sa)
+   panic("Failed to allocate save area\n");
+
if (is_boot_cpu)
list_add(>list, _save_areas);
else
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 72dd23e..da48397 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -968,6 +968,9 @@ static void __init setup_randomness(void)
 
vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
PAGE_SIZE);
+   if (!vmms)
+   panic("Failed to allocate memory for sysinfo structure\n");
+
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(>vm, sizeof(vmms->vm[0]) * 
vmms->count);
memblock_free((unsigned long) vmms, PAGE_SIZE);
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c 
b/arch/sh/boards/mach-ap325rxa/setup.c
index d7ceab6..08a0cc9 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -558,6 +558,9 @@ static void __init ap325rxa_mv_mem_reserve(void)
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
phys = memblock_phys_alloc(size, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate CEU memory\n");
+
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-ecovec24/setup.c 
b/arch/sh/boards/mach-ecovec24/setup.c
index a3901806..fd264a6 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1481,11 +1481,17 @@ static void __init ecovec_mv_mem_reserve(void)
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
phys = memblock_phys_alloc(size, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate CEU0 memory\n");
+
memblock_free(phys, size);
memblock_remove(phys, size);
ceu0_dma_membase = phys;
 
phys = memblock_phys_alloc(size, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate CEU1 memory\n");
+
memblock_free(ph

[PATCH v2 08/21] memblock: drop __memblock_alloc_base()

2019-01-21 Thread Mike Rapoport

The __memblock_alloc_base() function tries to allocate a memory up to the
limit specified by its max_addr parameter. Depending on the value of this
parameter, the __memblock_alloc_base() can is replaced with the appropriate
memblock_phys_alloc*() variant.

Signed-off-by: Mike Rapoport 
Acked-by: Rob Herring 
---
 arch/sh/kernel/machine_kexec.c |  3 ++-
 arch/x86/kernel/e820.c |  2 +-
 arch/x86/mm/numa.c | 12 
 drivers/of/of_reserved_mem.c   |  7 ++-
 include/linux/memblock.h   |  2 --
 mm/memblock.c  |  9 ++---
 6 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index b9f9f1a..63d63a3 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -168,7 +168,8 @@ void __init reserve_crashkernel(void)
crash_size = PAGE_ALIGN(resource_size(_res));
if (!crashk_res.start) {
unsigned long max = memblock_end_of_DRAM() - memory_limit;
-   crashk_res.start = __memblock_alloc_base(crash_size, PAGE_SIZE, 
max);
+   crashk_res.start = memblock_phys_alloc_range(crash_size,
+PAGE_SIZE, 0, max);
if (!crashk_res.start) {
pr_err("crashkernel allocation failed\n");
goto disable;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 50895c2..9c0eb54 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -778,7 +778,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 
align)
 {
u64 addr;
 
-   addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+   addr = memblock_phys_alloc(size, align);
if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, 
E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for 
e820__memblock_alloc_reserved()\n");
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1308f54..f85ae42 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -195,15 +195,11 @@ static void __init alloc_node_data(int nid)
 * Allocate node data.  Try node-local memory and then any node.
 * Never allocate in DMA zone.
 */
-   nd_pa = memblock_phys_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+   nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
if (!nd_pa) {
-   nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
- MEMBLOCK_ALLOC_ACCESSIBLE);
-   if (!nd_pa) {
-   pr_err("Cannot find %zu bytes in any node (initial 
node: %d)\n",
-  nd_size, nid);
-   return;
-   }
+   pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
+  nd_size, nid);
+   return;
}
nd = __va(nd_pa);
 
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 1977ee0..499f16d 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -31,13 +31,10 @@ int __init __weak 
early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
phys_addr_t *res_base)
 {
phys_addr_t base;
-   /*
-* We use __memblock_alloc_base() because memblock_alloc_base()
-* panic()s on allocation failure.
-*/
+
end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
align = !align ? SMP_CACHE_BYTES : align;
-   base = __memblock_alloc_base(size, align, end);
+   base = memblock_phys_alloc_range(size, align, 0, end);
if (!base)
return -ENOMEM;
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7883c74..768e2b4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -496,8 +496,6 @@ static inline bool memblock_bottom_up(void)
 
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
phys_addr_t max_addr);
-phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
- phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_mem_size(unsigned long limit_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index 461e40a3..e5ffdcd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1363,17 +1363,12 @@ phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t 
size, phys_addr_t align,
return ret;
 }
 
-phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
-{
-   return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
-   MEMBLOCK_NONE);
-}
-
 phys_addr_t __init memblock_alloc_base(phys_addr_t size,

[PATCH v2 06/21] memblock: memblock_phys_alloc_try_nid(): don't panic

2019-01-21 Thread Mike Rapoport

The memblock_phys_alloc_try_nid() function tries to allocate memory from
the requested node and then falls back to allocation from any node in the
system. The memblock_alloc_base() fallback used by this function panics if
the allocation fails.

Replace the memblock_alloc_base() fallback with the direct call to
memblock_alloc_range_nid() and update the memblock_phys_alloc_try_nid()
callers to check the returned value and panic in case of error.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/mm/numa.c   | 4 
 arch/powerpc/mm/numa.c | 4 
 mm/memblock.c  | 4 +++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index ae34e3a..2c61ea4 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -237,6 +237,10 @@ static void __init setup_node_data(int nid, u64 start_pfn, 
u64 end_pfn)
pr_info("Initmem setup node %d []\n", nid);
 
nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+   if (!nd_pa)
+   panic("Cannot allocate %zu bytes for node %d data\n",
+ nd_size, nid);
+
nd = __va(nd_pa);
 
/* report and initialize */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 270cefb..8f2bbe1 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -788,6 +788,10 @@ static void __init setup_node_data(int nid, u64 start_pfn, 
u64 end_pfn)
int tnid;
 
nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+   if (!nd_pa)
+   panic("Cannot allocate %zu bytes for node %d data\n",
+ nd_size, nid);
+
nd = __va(nd_pa);
 
/* report and initialize */
diff --git a/mm/memblock.c b/mm/memblock.c
index f019aee..8aabb1b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1393,7 +1393,9 @@ phys_addr_t __init 
memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
 
if (res)
return res;
-   return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+   return memblock_alloc_range_nid(size, align, 0,
+   MEMBLOCK_ALLOC_ACCESSIBLE,
+   NUMA_NO_NODE, MEMBLOCK_NONE);
 }
 
 /**
-- 
2.7.4

[PATCH v2 04/21] memblock: drop memblock_alloc_base_nid()

2019-01-21 Thread Mike Rapoport

The memblock_alloc_base_nid() is a oneliner wrapper for
memblock_alloc_range_nid() without any side effect.
Replace it's usage by the direct calls to memblock_alloc_range_nid().

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h |  3 ---
 mm/memblock.c| 15 ---
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 60e100f..f7ef313 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -490,9 +490,6 @@ static inline bool memblock_bottom_up(void)
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
phys_addr_t start, phys_addr_t end,
enum memblock_flags flags);
-phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
-   phys_addr_t align, phys_addr_t max_addr,
-   int nid, enum memblock_flags flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/memblock.c b/mm/memblock.c
index a32db30..c80029e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1346,21 +1346,14 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t 
size, phys_addr_t align,
flags);
 }
 
-phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
-   phys_addr_t align, phys_addr_t max_addr,
-   int nid, enum memblock_flags flags)
-{
-   return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
-}
-
 phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t 
align, int nid)
 {
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t ret;
 
 again:
-   ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE,
- nid, flags);
+   ret = memblock_alloc_range_nid(size, align, 0,
+  MEMBLOCK_ALLOC_ACCESSIBLE, nid, flags);
 
if (!ret && (flags & MEMBLOCK_MIRROR)) {
flags &= ~MEMBLOCK_MIRROR;
@@ -1371,8 +1364,8 @@ phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t 
size, phys_addr_t align,
 
 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
 {
-   return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE,
-  MEMBLOCK_NONE);
+   return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
+   MEMBLOCK_NONE);
 }
 
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
-- 
2.7.4

[PATCH v2 10/21] memblock: refactor internal allocation functions

2019-01-21 Thread Mike Rapoport

Currently, memblock has several internal functions with overlapping
functionality. They all call memblock_find_in_range_node() to find free
memory and then reserve the allocated range and mark it with kmemleak.
However, there is difference in the allocation constraints and in fallback
strategies.

The allocations returning physical address first attempt to find free
memory on the specified node within mirrored memory regions, then retry on
the same node without the requirement for memory mirroring and finally fall
back to all available memory.

The allocations returning virtual address start with clamping the allowed
range to memblock.current_limit, attempt to allocate from the specified
node from regions with mirroring and with user defined minimal address. If
such allocation fails, next attempt is done with node restriction lifted.
Next, the allocation is retried with minimal address reset to zero and at
last without the requirement for mirrored regions.

Let's consolidate various fallbacks handling and make them more consistent
for physical and virtual variants. Most of the fallback handling is moved
to memblock_alloc_range_nid() and it now handles node and mirror fallbacks.

The memblock_alloc_internal() uses memblock_alloc_range_nid() to get a
physical address of the allocated range and converts it to virtual address.

The fallback for allocation below the specified minimal address remains in
memblock_alloc_internal() because memblock_alloc_range_nid() is used by CMA
with exact requirement for lower bounds.

The memblock_phys_alloc_nid() function is completely dropped as it is not
used anywhere outside memblock and its only usage can be replaced by a call
to memblock_alloc_range_nid().

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h |   1 -
 mm/memblock.c| 173 +--
 2 files changed, 78 insertions(+), 96 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6874fdc..cf4cd9c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -371,7 +371,6 @@ static inline int memblock_get_region_node(const struct 
memblock_region *r)
 
 phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
  phys_addr_t start, phys_addr_t end);
-phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int 
nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, 
int nid);
 
 static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
diff --git a/mm/memblock.c b/mm/memblock.c
index 531fa77..739f769 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1312,30 +1312,84 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone 
*zone,
 
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
+/**
+ * memblock_alloc_range_nid - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @start: the lower bound of the memory region to allocate (phys address)
+ * @end: the upper bound of the memory region to allocate (phys address)
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * The allocation is performed from memory region limited by
+ * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
+ *
+ * If the specified node can not hold the requested memory the
+ * allocation falls back to any node in the system
+ *
+ * For systems with memory mirroring, the allocation is attempted first
+ * from the regions with mirroring enabled and then retried from any
+ * memory region.
+ *
+ * In addition, function sets the min_count to 0 using kmemleak_alloc_phys for
+ * allocated boot memory block, so that it is never reported as leaks.
+ *
+ * Return:
+ * Physical address of allocated memory block on success, %0 on failure.
+ */
 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
-   phys_addr_t end, int nid,
-   enum memblock_flags flags)
+   phys_addr_t end, int nid)
 {
+   enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
 
+   if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is 
deprecated. Use NUMA_NO_NODE instead\n"))
+   nid = NUMA_NO_NODE;
+
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
align = SMP_CACHE_BYTES;
}
 
+   if (end > memblock.current_limit)
+   end = memblock.current_limit;
+
+again:
found = memblock_find_in_range_node(size, align, start, end, nid,
flags);
-   if (found && !memblock_reserve(found, size)) {
+   if (found && !memblock_reserve(found

[PATCH v2 12/21] arch: use memblock_alloc() instead of memblock_alloc_from(size, align, 0)

2019-01-21 Thread Mike Rapoport

The last parameter of memblock_alloc_from() is the lower limit for the
memory allocation. When it is 0, the call is equivalent to
memblock_alloc().

Signed-off-by: Mike Rapoport 
Acked-by: Paul Burton  # MIPS part
---
 arch/alpha/kernel/core_cia.c  |  2 +-
 arch/alpha/kernel/pci_iommu.c |  4 ++--
 arch/alpha/kernel/setup.c |  2 +-
 arch/ia64/kernel/mca.c|  3 +--
 arch/mips/kernel/traps.c  |  2 +-
 arch/sparc/kernel/prom_32.c   |  2 +-
 arch/sparc/mm/init_32.c   |  2 +-
 arch/sparc/mm/srmmu.c | 10 +-
 8 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 867e873..466cd44 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -331,7 +331,7 @@ cia_prepare_tbia_workaround(int window)
long i;
 
/* Use minimal 1K map. */
-   ppte = memblock_alloc_from(CIA_BROKEN_TBIA_SIZE, 32768, 0);
+   ppte = memblock_alloc(CIA_BROKEN_TBIA_SIZE, 32768);
pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1;
 
for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i)
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index aa0f50d..e4cf77b 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -87,13 +87,13 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, 
dma_addr_t base,
printk("%s: couldn't allocate arena ptes from node %d\n"
   "falling back to system-wide allocation\n",
   __func__, nid);
-   arena->ptes = memblock_alloc_from(mem_size, align, 0);
+   arena->ptes = memblock_alloc(mem_size, align);
}
 
 #else /* CONFIG_DISCONTIGMEM */
 
arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
-   arena->ptes = memblock_alloc_from(mem_size, align, 0);
+   arena->ptes = memblock_alloc(mem_size, align);
 
 #endif /* CONFIG_DISCONTIGMEM */
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 4b5b1b2..5d4c76a 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -293,7 +293,7 @@ move_initrd(unsigned long mem_limit)
unsigned long size;
 
size = initrd_end - initrd_start;
-   start = memblock_alloc_from(PAGE_ALIGN(size), PAGE_SIZE, 0);
+   start = memblock_alloc(PAGE_ALIGN(size), PAGE_SIZE);
if (!start || __pa(start) + size > mem_limit) {
initrd_start = initrd_end = 0;
return NULL;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 91bd1e1..74d148b 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1835,8 +1835,7 @@ format_mca_init_stack(void *mca_data, unsigned long 
offset,
 /* Caller prevents this from being called after init */
 static void * __ref mca_bootmem(void)
 {
-   return memblock_alloc_from(sizeof(struct ia64_mca_cpu),
-  KERNEL_STACK_SIZE, 0);
+   return memblock_alloc(sizeof(struct ia64_mca_cpu), KERNEL_STACK_SIZE);
 }
 
 /* Do per-CPU MCA-related initialization.  */
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c91097f..2bbdee5 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2291,7 +2291,7 @@ void __init trap_init(void)
phys_addr_t ebase_pa;
 
ebase = (unsigned long)
-   memblock_alloc_from(size, 1 << fls(size), 0);
+   memblock_alloc(size, 1 << fls(size));
 
/*
 * Try to ensure ebase resides in KSeg0 if possible.
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 42d7f2a..38940af 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -32,7 +32,7 @@ void * __init prom_early_alloc(unsigned long size)
 {
void *ret;
 
-   ret = memblock_alloc_from(size, SMP_CACHE_BYTES, 0UL);
+   ret = memblock_alloc(size, SMP_CACHE_BYTES);
if (ret != NULL)
memset(ret, 0, size);
 
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index d900952..a8ff298 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -264,7 +264,7 @@ void __init mem_init(void)
i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
i += 1;
sparc_valid_addr_bitmap = (unsigned long *)
-   memblock_alloc_from(i << 2, SMP_CACHE_BYTES, 0UL);
+   memblock_alloc(i << 2, SMP_CACHE_BYTES);
 
if (sparc_valid_addr_bitmap == NULL) {
prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index b609362..a400ec3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -303,13 +303,13 @@ static void __init srmmu_nocache_init(void)
 
bitmap_bits = srmmu_nocache_si

[PATCH v2 13/21] arch: don't memset(0) memory returned by memblock_alloc()

2019-01-21 Thread Mike Rapoport

memblock_alloc() already clears the allocated memory, no point in doing it
twice.

Signed-off-by: Mike Rapoport 
Acked-by: Geert Uytterhoeven  # m68k
---
 arch/c6x/mm/init.c  | 1 -
 arch/h8300/mm/init.c| 1 -
 arch/ia64/kernel/mca.c  | 2 --
 arch/m68k/mm/mcfmmu.c   | 1 -
 arch/microblaze/mm/init.c   | 6 ++
 arch/sparc/kernel/prom_32.c | 2 --
 6 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index af5ada0..e83c046 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -40,7 +40,6 @@ void __init paging_init(void)
 
empty_zero_page  = (unsigned long) memblock_alloc(PAGE_SIZE,
  PAGE_SIZE);
-   memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
/*
 * Set up user data space
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 6519252..a157890 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -68,7 +68,6 @@ void __init paging_init(void)
 * to a couple of allocated pages.
 */
empty_zero_page = (unsigned long)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-   memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
/*
 * Set up SFC/DFC registers (user data space).
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 74d148b..370bc34 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -400,8 +400,6 @@ ia64_log_init(int sal_info_type)
 
// set up OS data structures to hold error info
IA64_LOG_ALLOCATE(sal_info_type, max_size);
-   memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size);
-   memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);
 }
 
 /*
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 0de4999..492f953 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -44,7 +44,6 @@ void __init paging_init(void)
int i;
 
empty_zero_page = (void *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-   memset((void *) empty_zero_page, 0, PAGE_SIZE);
 
pg_dir = swapper_pg_dir;
memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 44f4b89..bd1cd4b 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -376,10 +376,8 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 
if (mem_init_done)
p = kzalloc(size, mask);
-   else {
+   else
p = memblock_alloc(size, SMP_CACHE_BYTES);
-   if (p)
-   memset(p, 0, size);
-   }
+
return p;
 }
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 38940af..e7126ca 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -33,8 +33,6 @@ void * __init prom_early_alloc(unsigned long size)
void *ret;
 
ret = memblock_alloc(size, SMP_CACHE_BYTES);
-   if (ret != NULL)
-   memset(ret, 0, size);
 
prom_early_allocated += size;
 
-- 
2.7.4

[PATCH v2 14/21] ia64: add checks for the return value of memblock_alloc*()

2019-01-21 Thread Mike Rapoport

Add panic() calls if memblock_alloc*() returns NULL.

Most of the changes are simply addition of

if(!ptr)
panic();

statements after the calls to memblock_alloc*() variants.

Exceptions are create_mem_map_page_table() and ia64_log_init() that were
slightly refactored to accommodate the change.

Signed-off-by: Mike Rapoport 
---
 arch/ia64/kernel/mca.c  | 20 ++--
 arch/ia64/mm/contig.c   |  8 ++--
 arch/ia64/mm/discontig.c|  4 
 arch/ia64/mm/init.c | 38 ++
 arch/ia64/mm/tlb.c  |  6 ++
 arch/ia64/sn/kernel/io_common.c |  3 +++
 arch/ia64/sn/kernel/setup.c | 12 +++-
 7 files changed, 74 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 370bc34..5cabb3f 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -359,11 +359,6 @@ typedef struct ia64_state_log_s
 
 static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
 
-#define IA64_LOG_ALLOCATE(it, size) \
-   {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
-   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); \
-   ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
-   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);}
 #define IA64_LOG_LOCK_INIT(it) spin_lock_init(_state_log[it].isl_lock)
 #define IA64_LOG_LOCK(it)  spin_lock_irqsave(_state_log[it].isl_lock, 
s)
 #define IA64_LOG_UNLOCK(it)
spin_unlock_irqrestore(_state_log[it].isl_lock,s)
@@ -378,6 +373,19 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
 #define IA64_LOG_CURR_BUFFER(it)   (void 
*)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
 #define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
 
+static inline void ia64_log_allocate(int it, u64 size)
+{
+   ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] =
+   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);
+   if (!ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])
+   panic("%s: Failed to allocate %llu bytes\n", __func__, size);
+
+   ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] =
+   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);
+   if (!ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])
+   panic("%s: Failed to allocate %llu bytes\n", __func__, size);
+}
+
 /*
  * ia64_log_init
  * Reset the OS ia64 log buffer
@@ -399,7 +407,7 @@ ia64_log_init(int sal_info_type)
return;
 
// set up OS data structures to hold error info
-   IA64_LOG_ALLOCATE(sal_info_type, max_size);
+   ia64_log_allocate(sal_info_type, max_size);
 }
 
 /*
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 6e44723..d29fb6b 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -84,9 +84,13 @@ void *per_cpu_init(void)
 static inline void
 alloc_per_cpu_data(void)
 {
-   cpu_data = memblock_alloc_from(PERCPU_PAGE_SIZE * num_possible_cpus(),
-  PERCPU_PAGE_SIZE,
+   size_t size = PERCPU_PAGE_SIZE * num_possible_cpus();
+
+   cpu_data = memblock_alloc_from(size, PERCPU_PAGE_SIZE,
   __pa(MAX_DMA_ADDRESS));
+   if (!cpu_data)
+   panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, size, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 }
 
 /**
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index f9c3675..05490dd 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -454,6 +454,10 @@ static void __init *memory_less_node_alloc(int nid, 
unsigned long pernodesize)
 __pa(MAX_DMA_ADDRESS),
 MEMBLOCK_ALLOC_ACCESSIBLE,
 bestnode);
+   if (!ptr)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d 
from=%lx\n",
+ __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode,
+ __pa(MAX_DMA_ADDRESS));
 
return ptr;
 }
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 29d8415..e49200e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -444,23 +444,45 @@ int __init create_mem_map_page_table(u64 start, u64 end, 
void *arg)
 
for (address = start_page; address < end_page; address += PAGE_SIZE) {
pgd = pgd_offset_k(address);
-   if (pgd_none(*pgd))
-   pgd_populate(_mm, pgd, 
memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node));
+   if (pgd_none(*pgd)) {
+   pud = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
+   if (!pud)
+   goto err_alloc;
+   pgd_populate(_mm, pgd, pud);
+

[PATCH v2 03/21] memblock: replace memblock_alloc_base(ANYWHERE) with memblock_phys_alloc

2019-01-21 Thread Mike Rapoport

The calls to memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE) and
memblock_phys_alloc(size, align) are equivalent as both try to allocate
'size' bytes with 'align' alignment anywhere in the memory and panic if hte
allocation fails.

The conversion is done using the following semantic patch:

@@
expression size, align;
@@
- memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE)
+ memblock_phys_alloc(size, align)

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c   | 2 +-
 arch/sh/boards/mach-ap325rxa/setup.c | 2 +-
 arch/sh/boards/mach-ecovec24/setup.c | 4 ++--
 arch/sh/boards/mach-kfr2r09/setup.c  | 2 +-
 arch/sh/boards/mach-migor/setup.c| 2 +-
 arch/sh/boards/mach-se/7724/setup.c  | 4 ++--
 arch/xtensa/mm/kasan_init.c  | 3 +--
 7 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 478ea8b..b76b90e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -205,7 +205,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, 
phys_addr_t align)
 
BUG_ON(!arm_memblock_steal_permitted);
 
-   phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, align);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c 
b/arch/sh/boards/mach-ap325rxa/setup.c
index 8f234d04..d7ceab6 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -557,7 +557,7 @@ static void __init ap325rxa_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-ecovec24/setup.c 
b/arch/sh/boards/mach-ecovec24/setup.c
index 22b4106..a3901806 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1480,12 +1480,12 @@ static void __init ecovec_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu0_dma_membase = phys;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu1_dma_membase = phys;
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c 
b/arch/sh/boards/mach-kfr2r09/setup.c
index 203d249..55bdf4a 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -631,7 +631,7 @@ static void __init kfr2r09_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-migor/setup.c 
b/arch/sh/boards/mach-migor/setup.c
index f4ad33c..ba7eee6 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -630,7 +630,7 @@ static void __init migor_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-se/7724/setup.c 
b/arch/sh/boards/mach-se/7724/setup.c
index fdbec22a..4696e10 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -965,12 +965,12 @@ static void __init ms7724se_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu0_dma_membase = phys;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu1_dma_membase = phys;
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 1734cda..48dbb03 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -52,8 +52,7 @@ static void __init populate(void *start, void *end)
 
for (k = 0; k < PTRS_PER_PTE; ++k, ++j) {
phys_addr_t p

[PATCH v2 20/21] memblock: memblock_alloc_try_nid: don't panic

2019-01-21 Thread Mike Rapoport

As all the memblock_alloc*() users are now checking the return value and
panic() in case of error, the panic() call can be removed from the core
memblock allocator, namely memblock_alloc_try_nid().

Signed-off-by: Mike Rapoport 
---
 mm/memblock.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 03b3929..7164275 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1526,7 +1526,7 @@ void * __init memblock_alloc_try_nid_nopanic(
 }
 
 /**
- * memblock_alloc_try_nid - allocate boot memory block with panicking
+ * memblock_alloc_try_nid - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region from where the allocation
@@ -1536,9 +1536,8 @@ void * __init memblock_alloc_try_nid_nopanic(
  *   allocate only from memory limited by memblock.current_limit value
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
- * Public panicking version of memblock_alloc_try_nid_nopanic()
- * which provides debug information (including caller info), if enabled,
- * and panics if the request can not be satisfied.
+ * Public function, provides additional debug information (including caller
+ * info), if enabled. This function zeroes the allocated memory.
  *
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
@@ -1555,14 +1554,10 @@ void * __init memblock_alloc_try_nid(
 _addr, (void *)_RET_IP_);
ptr = memblock_alloc_internal(size, align,
   min_addr, max_addr, nid);
-   if (ptr) {
+   if (ptr)
memset(ptr, 0, size);
-   return ptr;
-   }
 
-   panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=%pa 
max_addr=%pa\n",
- __func__, (u64)size, (u64)align, nid, _addr, _addr);
-   return NULL;
+   return ptr;
 }
 
 /**
-- 
2.7.4

[PATCH v2 15/21] sparc: add checks for the return value of memblock_alloc*()

2019-01-21 Thread Mike Rapoport

Add panic() calls if memblock_alloc*() returns NULL.

Most of the changes are simply addition of

if(!ptr)
panic();

statements after the calls to memblock_alloc*() variants.

Exceptions are pcpu_populate_pte() and kernel_map_range() that were
slightly refactored to accommodate the change.

Signed-off-by: Mike Rapoport 
Acked-by: David S. Miller 
---
 arch/sparc/kernel/prom_32.c  |  2 ++
 arch/sparc/kernel/setup_64.c |  6 ++
 arch/sparc/kernel/smp_64.c   | 12 
 arch/sparc/mm/init_64.c  | 11 +++
 arch/sparc/mm/srmmu.c|  8 
 5 files changed, 39 insertions(+)

diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index e7126ca..869b16c 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -33,6 +33,8 @@ void * __init prom_early_alloc(unsigned long size)
void *ret;
 
ret = memblock_alloc(size, SMP_CACHE_BYTES);
+   if (!ret)
+   panic("%s: Failed to allocate %lu bytes\n", __func__, size);
 
prom_early_allocated += size;
 
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 51c4d12..fd2182a 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -624,8 +624,14 @@ void __init alloc_irqstack_bootmem(void)
 
softirq_stack[i] = memblock_alloc_node(THREAD_SIZE,
   THREAD_SIZE, node);
+   if (!softirq_stack[i])
+   panic("%s: Failed to allocate %lu bytes align=%lx 
nid=%d\n",
+ __func__, THREAD_SIZE, THREAD_SIZE, node);
hardirq_stack[i] = memblock_alloc_node(THREAD_SIZE,
   THREAD_SIZE, node);
+   if (!hardirq_stack[i])
+   panic("%s: Failed to allocate %lu bytes align=%lx 
nid=%d\n",
+ __func__, THREAD_SIZE, THREAD_SIZE, node);
}
 }
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f45d876..a8275fe 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1628,6 +1628,8 @@ static void __init pcpu_populate_pte(unsigned long addr)
pud_t *new;
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
pgd_populate(_mm, pgd, new);
}
 
@@ -1636,6 +1638,8 @@ static void __init pcpu_populate_pte(unsigned long addr)
pmd_t *new;
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
pud_populate(_mm, pud, new);
}
 
@@ -1644,8 +1648,16 @@ static void __init pcpu_populate_pte(unsigned long addr)
pte_t *new;
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
pmd_populate_kernel(_mm, pmd, new);
}
+
+   return;
+
+err_alloc:
+   panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index ef340e8..f2d70ff 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1809,6 +1809,8 @@ static unsigned long __ref kernel_map_range(unsigned long 
pstart,
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
  PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
alloc_bytes += PAGE_SIZE;
pgd_populate(_mm, pgd, new);
}
@@ -1822,6 +1824,8 @@ static unsigned long __ref kernel_map_range(unsigned long 
pstart,
}
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
  PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
alloc_bytes += PAGE_SIZE;
pud_populate(_mm, pud, new);
}
@@ -1836,6 +1840,8 @@ static unsigned long __ref kernel_map_range(unsigned long 
pstart,
}
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
  PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
alloc_bytes += PAGE_SIZE;
pmd_populate_kernel(_mm, pmd, new);
}
@@ -1855,6 +1861,11 @@ static unsigned long __ref kernel_map_range(unsigned 
long pstart,
}
 
return alloc_bytes;
+
+err_alloc:
+

[PATCH v2 18/21] swiotlb: add checks for the return value of memblock_alloc*()

2019-01-21 Thread Mike Rapoport

Add panic() calls if memblock_alloc() returns NULL.

The panic() format duplicates the one used by memblock itself and in order
to avoid explosion with long parameters list replace open coded allocation
size calculations with a local variable.

Signed-off-by: Mike Rapoport 
---
 kernel/dma/swiotlb.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d636177..e78835c8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -191,6 +191,7 @@ void __init swiotlb_update_mem_attributes(void)
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
unsigned long i, bytes;
+   size_t alloc_size;
 
bytes = nslabs << IO_TLB_SHIFT;
 
@@ -203,12 +204,18 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs, int verbose)
 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 * between io_tlb_start and io_tlb_end.
 */
-   io_tlb_list = memblock_alloc(
-   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
-   PAGE_SIZE);
-   io_tlb_orig_addr = memblock_alloc(
-   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
-   PAGE_SIZE);
+   alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
+   io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
+   if (!io_tlb_list)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+
+   alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
+   io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
+   if (!io_tlb_orig_addr)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+
for (i = 0; i < io_tlb_nslabs; i++) {
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-- 
2.7.4

[PATCH v2 09/21] memblock: drop memblock_alloc_base()

2019-01-21 Thread Mike Rapoport

The memblock_alloc_base() function tries to allocate a memory up to the
limit specified by its max_addr parameter and panics if the allocation
fails. Replace its usage with memblock_phys_alloc_range() and make the
callers check the return value and panic in case of error.

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kernel/rtas.c  |  6 +-
 arch/powerpc/mm/hash_utils_64.c |  8 ++--
 arch/s390/kernel/smp.c  |  6 +-
 drivers/macintosh/smu.c |  2 +-
 include/linux/memblock.h|  2 --
 mm/memblock.c   | 14 --
 6 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index de35bd8f..fbc6761 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1187,7 +1187,11 @@ void __init rtas_initialize(void)
ibm_suspend_me_token = rtas_token("ibm,suspend-me");
}
 #endif
-   rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, 
rtas_region);
+   rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+0, rtas_region);
+   if (!rtas_rmo_buf)
+   panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
+ PAGE_SIZE, _region);
 
 #ifdef CONFIG_RTAS_ERROR_LOGGING
rtas_last_error_token = rtas_token("rtas-last-error");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index bc6be44..c7d5f48 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -882,8 +882,12 @@ static void __init htab_initialize(void)
}
 #endif /* CONFIG_PPC_CELL */
 
-   table = memblock_alloc_base(htab_size_bytes, htab_size_bytes,
-   limit);
+   table = memblock_phys_alloc_range(htab_size_bytes,
+ htab_size_bytes,
+ 0, limit);
+   if (!table)
+   panic("ERROR: Failed to allocate %pa bytes below %pa\n",
+ _size_bytes, );
 
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index f82b3d3..9061597 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -651,7 +651,11 @@ void __init smp_save_dump_cpus(void)
/* No previous system present, normal boot. */
return;
/* Allocate a page as dumping area for the store status sigps */
-   page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
+   page = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, 1UL << 31);
+   if (!page)
+   panic("ERROR: Failed to allocate %x bytes below %lx\n",
+ PAGE_SIZE, 1UL << 31);
+
/* Set multi-threading state to the previous system. */
pcpu_set_smt(sclp.mtid_prev);
boot_cpu_addr = stap();
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 0a0b8e1..42cf68d 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -485,7 +485,7 @@ int __init smu_init (void)
 * SMU based G5s need some memory below 2Gb. Thankfully this is
 * called at a time where memblock is still available.
 */
-   smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x8000UL);
+   smu_cmdbuf_abs = memblock_phys_alloc_range(4096, 4096, 0, 0x8000UL);
if (smu_cmdbuf_abs == 0) {
printk(KERN_ERR "SMU: Command buffer allocation failed !\n");
ret = -EINVAL;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 768e2b4..6874fdc 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -494,8 +494,6 @@ static inline bool memblock_bottom_up(void)
return memblock.bottom_up;
 }
 
-phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
-   phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_mem_size(unsigned long limit_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index e5ffdcd..531fa77 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1363,20 +1363,6 @@ phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t 
size, phys_addr_t align,
return ret;
 }
 
-phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
-{
-   phys_addr_t alloc;
-
-   alloc = memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
-   MEMBLOCK_NONE);
-
-   if (alloc == 0)
-   panic("ERROR: Failed to allocate %pa bytes below %pa.\n",
-

[PATCH v2 21/21] memblock: drop memblock_alloc_*_nopanic() variants

2019-01-21 Thread Mike Rapoport

As all the memblock allocation functions return NULL in case of error
rather than panic(), the duplicates with _nopanic suffix can be removed.

Signed-off-by: Mike Rapoport 
Acked-by: Greg Kroah-Hartman 
---
 arch/arc/kernel/unwind.c   |  3 +--
 arch/sh/mm/init.c  |  2 +-
 arch/x86/kernel/setup_percpu.c | 10 +-
 arch/x86/mm/kasan_init_64.c| 14 --
 drivers/firmware/memmap.c  |  2 +-
 drivers/usb/early/xhci-dbc.c   |  2 +-
 include/linux/memblock.h   | 35 ---
 kernel/dma/swiotlb.c   |  2 +-
 kernel/printk/printk.c |  9 +
 mm/memblock.c  | 35 ---
 mm/page_alloc.c| 10 +-
 mm/page_ext.c  |  2 +-
 mm/percpu.c| 11 ---
 mm/sparse.c|  6 ++
 14 files changed, 31 insertions(+), 112 deletions(-)

diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index d34f69e..271e9fa 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -181,8 +181,7 @@ static void init_unwind_hdr(struct unwind_table *table,
  */
 static void *__init unw_hdr_alloc_early(unsigned long sz)
 {
-   return memblock_alloc_from_nopanic(sz, sizeof(unsigned int),
-  MAX_DMA_ADDRESS);
+   return memblock_alloc_from(sz, sizeof(unsigned int), MAX_DMA_ADDRESS);
 }
 
 static void *unw_hdr_alloc(unsigned long sz)
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index fceefd9..7062132 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -202,7 +202,7 @@ void __init allocate_pgdat(unsigned int nid)
get_pfn_range_for_nid(nid, _pfn, _pfn);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-   NODE_DATA(nid) = memblock_alloc_try_nid_nopanic(
+   NODE_DATA(nid) = memblock_alloc_try_nid(
sizeof(struct pglist_data),
SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e8796fc..0c5e9bf 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
unsigned long size,
void *ptr;
 
if (!node_online(node) || !NODE_DATA(node)) {
-   ptr = memblock_alloc_from_nopanic(size, align, goal);
+   ptr = memblock_alloc_from(size, align, goal);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
 cpu, size, __pa(ptr));
} else {
-   ptr = memblock_alloc_try_nid_nopanic(size, align, goal,
-MEMBLOCK_ALLOC_ACCESSIBLE,
-node);
+   ptr = memblock_alloc_try_nid(size, align, goal,
+MEMBLOCK_ALLOC_ACCESSIBLE,
+node);
 
pr_debug("per cpu data for cpu%d %lu bytes on node%d at 
%016lx\n",
 cpu, size, node, __pa(ptr));
}
return ptr;
 #else
-   return memblock_alloc_from_nopanic(size, align, goal);
+   return memblock_alloc_from(size, align, goal);
 #endif
 }
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 462fde8..8dc0fc0 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -24,14 +24,16 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 
-static __init void *early_alloc(size_t size, int nid, bool panic)
+static __init void *early_alloc(size_t size, int nid, bool should_panic)
 {
-   if (panic)
-   return memblock_alloc_try_nid(size, size,
-   __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-   else
-   return memblock_alloc_try_nid_nopanic(size, size,
+   void *ptr = memblock_alloc_try_nid(size, size,
__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+
+   if (!ptr && should_panic)
+   panic("%pS: Failed to allocate page, nid=%d from=%lx\n",
+ (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS));
+
+   return ptr;
 }
 
 static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index ec4fd25..d168c87 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -333,7 +333,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const 
char *type)
 {
struct firmware_map_entry *entry;
 
-

[PATCH v2 05/21] memblock: emphasize that memblock_alloc_range() returns a physical address

2019-01-21 Thread Mike Rapoport

Rename memblock_alloc_range() to memblock_phys_alloc_range() to emphasize
that it returns a physical address.
While on it, remove the 'enum memblock_flags' parameter from this function
as its only user anyway sets it to MEMBLOCK_NONE, which is the default for
the most of memblock allocations.

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h |  5 ++---
 mm/cma.c | 10 --
 mm/memblock.c|  9 +
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f7ef313..66dfdb3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -369,6 +369,8 @@ static inline int memblock_get_region_node(const struct 
memblock_region *r)
 #define ARCH_LOW_ADDRESS_LIMIT  0xUL
 #endif
 
+phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
+ phys_addr_t start, phys_addr_t end);
 phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int 
nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, 
int nid);
 
@@ -487,9 +489,6 @@ static inline bool memblock_bottom_up(void)
return memblock.bottom_up;
 }
 
-phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
-   phys_addr_t start, phys_addr_t end,
-   enum memblock_flags flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/cma.c b/mm/cma.c
index c7b39dd..e4530ae 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -327,16 +327,14 @@ int __init cma_declare_contiguous(phys_addr_t base,
 * memory in case of failure.
 */
if (base < highmem_start && limit > highmem_start) {
-   addr = memblock_alloc_range(size, alignment,
-   highmem_start, limit,
-   MEMBLOCK_NONE);
+   addr = memblock_phys_alloc_range(size, alignment,
+highmem_start, limit);
limit = highmem_start;
}
 
if (!addr) {
-   addr = memblock_alloc_range(size, alignment, base,
-   limit,
-   MEMBLOCK_NONE);
+   addr = memblock_phys_alloc_range(size, alignment, base,
+limit);
if (!addr) {
ret = -ENOMEM;
goto err;
diff --git a/mm/memblock.c b/mm/memblock.c
index c80029e..f019aee 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1338,12 +1338,13 @@ static phys_addr_t __init 
memblock_alloc_range_nid(phys_addr_t size,
return 0;
 }
 
-phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
-   phys_addr_t start, phys_addr_t end,
-   enum memblock_flags flags)
+phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
+phys_addr_t align,
+phys_addr_t start,
+phys_addr_t end)
 {
return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
-   flags);
+   MEMBLOCK_NONE);
 }
 
 phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t 
align, int nid)
-- 
2.7.4

[PATCH v2 16/21] mm/percpu: add checks for the return value of memblock_alloc*()

2019-01-21 Thread Mike Rapoport

Add panic() calls if memblock_alloc() returns NULL.

The panic() format duplicates the one used by memblock itself and in order
to avoid explosion with long parameters list replace open coded allocation
size calculations with a local variable.

Signed-off-by: Mike Rapoport 
---
 mm/percpu.c | 73 +++--
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index db86282..5998b03 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1086,6 +1086,7 @@ static struct pcpu_chunk * __init 
pcpu_alloc_first_chunk(unsigned long tmp_addr,
struct pcpu_chunk *chunk;
unsigned long aligned_addr, lcm_align;
int start_offset, offset_bits, region_size, region_bits;
+   size_t alloc_size;
 
/* region calculations */
aligned_addr = tmp_addr & PAGE_MASK;
@@ -1101,9 +1102,12 @@ static struct pcpu_chunk * __init 
pcpu_alloc_first_chunk(unsigned long tmp_addr,
region_size = ALIGN(start_offset + map_size, lcm_align);
 
/* allocate chunk */
-   chunk = memblock_alloc(sizeof(struct pcpu_chunk) +
-  BITS_TO_LONGS(region_size >> PAGE_SHIFT),
-  SMP_CACHE_BYTES);
+   alloc_size = sizeof(struct pcpu_chunk) +
+   BITS_TO_LONGS(region_size >> PAGE_SHIFT);
+   chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
 
INIT_LIST_HEAD(>list);
 
@@ -1114,12 +1118,25 @@ static struct pcpu_chunk * __init 
pcpu_alloc_first_chunk(unsigned long tmp_addr,
chunk->nr_pages = region_size >> PAGE_SHIFT;
region_bits = pcpu_chunk_map_bits(chunk);
 
-   chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) * 
sizeof(chunk->alloc_map[0]),
- SMP_CACHE_BYTES);
-   chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) * 
sizeof(chunk->bound_map[0]),
- SMP_CACHE_BYTES);
-   chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) * 
sizeof(chunk->md_blocks[0]),
- SMP_CACHE_BYTES);
+   alloc_size = BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]);
+   chunk->alloc_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk->alloc_map)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
+   alloc_size =
+   BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]);
+   chunk->bound_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk->bound_map)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
+   alloc_size = pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]);
+   chunk->md_blocks = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk->md_blocks)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
pcpu_init_md_blocks(chunk);
 
/* manage populated page bitmap */
@@ -2044,6 +2061,7 @@ int __init pcpu_setup_first_chunk(const struct 
pcpu_alloc_info *ai,
int group, unit, i;
int map_size;
unsigned long tmp_addr;
+   size_t alloc_size;
 
 #define PCPU_SETUP_BUG_ON(cond)do {
\
if (unlikely(cond)) {   \
@@ -2075,14 +2093,29 @@ int __init pcpu_setup_first_chunk(const struct 
pcpu_alloc_info *ai,
PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
/* process group information and build config tables accordingly */
-   group_offsets = memblock_alloc(ai->nr_groups * sizeof(group_offsets[0]),
-  SMP_CACHE_BYTES);
-   group_sizes = memblock_alloc(ai->nr_groups * sizeof(group_sizes[0]),
-SMP_CACHE_BYTES);
-   unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]),
- SMP_CACHE_BYTES);
-   unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]),
- SMP_CACHE_BYTES);
+   alloc_size = ai->nr_groups * sizeof(group_offsets[0]);
+   group_offsets = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!group_offsets)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
+   alloc_size = ai->nr_groups * sizeof(group_sizes[0]);
+   group_sizes = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!group_sizes)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+

[PATCH v2 17/21] init/main: add checks for the return value of memblock_alloc*()

2019-01-21 Thread Mike Rapoport

Add panic() calls if memblock_alloc() returns NULL.

The panic() format duplicates the one used by memblock itself and in order
to avoid explosion with long parameters list replace open coded allocation
size calculations with a local variable.

Signed-off-by: Mike Rapoport 
---
 init/main.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/init/main.c b/init/main.c
index a56f65a..d58a365 100644
--- a/init/main.c
+++ b/init/main.c
@@ -373,12 +373,20 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) 
{ }
  */
 static void __init setup_command_line(char *command_line)
 {
-   saved_command_line =
-   memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
-   initcall_command_line =
-   memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
-   static_command_line = memblock_alloc(strlen(command_line) + 1,
-SMP_CACHE_BYTES);
+   size_t len = strlen(boot_command_line) + 1;
+
+   saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+   if (!saved_command_line)
+   panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+   initcall_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+   if (!initcall_command_line)
+   panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+   static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+   if (!static_command_line)
+   panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
strcpy(saved_command_line, boot_command_line);
strcpy(static_command_line, command_line);
 }
@@ -773,8 +781,14 @@ static int __init initcall_blacklist(char *str)
pr_debug("blacklisting initcall %s\n", str_entry);
entry = memblock_alloc(sizeof(*entry),
   SMP_CACHE_BYTES);
+   if (!entry)
+   panic("%s: Failed to allocate %zu bytes\n",
+ __func__, sizeof(*entry));
entry->buf = memblock_alloc(strlen(str_entry) + 1,
SMP_CACHE_BYTES);
+   if (!entry->buf)
+   panic("%s: Failed to allocate %zu bytes\n",
+ __func__, strlen(str_entry) + 1);
strcpy(entry->buf, str_entry);
list_add(>next, _initcalls);
}
-- 
2.7.4

[PATCH v2 19/21] treewide: add checks for the return value of memblock_alloc*()

2019-01-21 Thread Mike Rapoport

Add check for the return value of memblock_alloc*() functions and call
panic() in case of error.
The panic message repeats the one used by panicing memblock allocators with
adjustment of parameters to include only relevant ones.

The replacement was mostly automated with semantic patches like the one
below with manual massaging of format strings.

@@
expression ptr, size, align;
@@
ptr = memblock_alloc(size, align);
+ if (!ptr)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__,
size, align);

Signed-off-by: Mike Rapoport 
Reviewed-by: Guo Ren  # c-sky
Acked-by: Paul Burton  # MIPS
Acked-by: Heiko Carstens  # s390
Reviewed-by: Juergen Gross  # Xen
---
 arch/alpha/kernel/core_cia.c  |  3 +++
 arch/alpha/kernel/core_marvel.c   |  6 ++
 arch/alpha/kernel/pci-noop.c  | 13 +++--
 arch/alpha/kernel/pci.c   | 11 ++-
 arch/alpha/kernel/pci_iommu.c | 12 
 arch/arc/mm/highmem.c |  4 
 arch/arm/kernel/setup.c   |  6 ++
 arch/arm/mm/mmu.c | 14 +-
 arch/arm64/kernel/setup.c |  8 +---
 arch/arm64/mm/kasan_init.c| 10 ++
 arch/c6x/mm/dma-coherent.c|  4 
 arch/c6x/mm/init.c|  3 +++
 arch/csky/mm/highmem.c|  5 +
 arch/h8300/mm/init.c  |  3 +++
 arch/m68k/atari/stram.c   |  4 
 arch/m68k/mm/init.c   |  3 +++
 arch/m68k/mm/mcfmmu.c |  6 ++
 arch/m68k/mm/motorola.c   |  9 +
 arch/m68k/mm/sun3mmu.c|  6 ++
 arch/m68k/sun3/sun3dvma.c |  3 +++
 arch/microblaze/mm/init.c |  8 ++--
 arch/mips/cavium-octeon/dma-octeon.c  |  3 +++
 arch/mips/kernel/setup.c  |  3 +++
 arch/mips/kernel/traps.c  |  3 +++
 arch/mips/mm/init.c   |  5 +
 arch/nds32/mm/init.c  | 12 
 arch/openrisc/mm/ioremap.c|  8 ++--
 arch/powerpc/kernel/dt_cpu_ftrs.c |  5 +
 arch/powerpc/kernel/pci_32.c  |  3 +++
 arch/powerpc/kernel/setup-common.c|  3 +++
 arch/powerpc/kernel/setup_64.c|  4 
 arch/powerpc/lib/alloc.c  |  3 +++
 arch/powerpc/mm/hash_utils_64.c   |  3 +++
 arch/powerpc/mm/mmu_context_nohash.c  |  9 +
 arch/powerpc/mm/pgtable-book3e.c  | 12 ++--
 arch/powerpc/mm/pgtable-book3s64.c|  3 +++
 arch/powerpc/mm/pgtable-radix.c   |  9 -
 arch/powerpc/mm/ppc_mmu_32.c  |  3 +++
 arch/powerpc/platforms/pasemi/iommu.c |  3 +++
 arch/powerpc/platforms/powermac/nvram.c   |  3 +++
 arch/powerpc/platforms/powernv/opal.c |  3 +++
 arch/powerpc/platforms/powernv/pci-ioda.c |  8 
 arch/powerpc/platforms/ps3/setup.c|  3 +++
 arch/powerpc/sysdev/msi_bitmap.c  |  3 +++
 arch/s390/kernel/setup.c  | 13 +
 arch/s390/kernel/smp.c|  5 -
 arch/s390/kernel/topology.c   |  6 ++
 arch/s390/numa/mode_emu.c |  3 +++
 arch/s390/numa/numa.c |  6 +-
 arch/sh/mm/init.c |  6 ++
 arch/sh/mm/numa.c |  4 
 arch/um/drivers/net_kern.c|  3 +++
 arch/um/drivers/vector_kern.c |  3 +++
 arch/um/kernel/initrd.c   |  2 ++
 arch/um/kernel/mem.c  | 16 
 arch/unicore32/kernel/setup.c |  4 
 arch/unicore32/mm/mmu.c   | 15 +--
 arch/x86/kernel/acpi/boot.c   |  3 +++
 arch/x86/kernel/apic/io_apic.c|  5 +
 arch/x86/kernel/e820.c|  3 +++
 arch/x86/platform/olpc/olpc_dt.c  |  3 +++
 arch/x86/xen/p2m.c| 11 +--
 arch/xtensa/mm/kasan_init.c   |  4 
 arch/xtensa/mm/mmu.c  |  3 +++
 drivers/clk/ti/clk.c  |  3 +++
 drivers/macintosh/smu.c   |  3 +++
 drivers/of/fdt.c  |  8 +++-
 drivers/of/unittest.c |  8 +++-
 drivers/xen/swiotlb-xen.c |  7 +--
 kernel/power/snapshot.c   |  3 +++
 lib/cpumask.c |  3 +++
 mm/kasan/init.c   | 10 --
 mm/sparse.c   | 19 +--
 73 files changed, 409 insertions(+), 28 deletions(-)

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 466cd44..f489170 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -33

[PATCH v2 01/21] openrisc: prefer memblock APIs returning virtual address

2019-01-21 Thread Mike Rapoport

The allocation of the page tables memory in openrics uses
memblock_phys_alloc() and then converts the returned physical address to
virtual one. Use memblock_alloc_raw() and add a panic() if the allocation
fails.

Signed-off-by: Mike Rapoport 
---
 arch/openrisc/mm/init.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index d157310..caeb418 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -105,7 +105,10 @@ static void __init map_ram(void)
}
 
/* Alloc one page for holding PTE's... */
-   pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, 
PAGE_SIZE));
+   pte = memblock_alloc_raw(PAGE_SIZE, PAGE_SIZE);
+   if (!pte)
+   panic("%s: Failed to allocate page for PTEs\n",
+ __func__);
set_pmd(pme, __pmd(_KERNPG_TABLE + __pa(pte)));
 
/* Fill the newly allocated page with PTE'S */
-- 
2.7.4

[PATCH 00/21] Refine memblock API

2019-01-16 Thread Mike Rapoport

Hi,

Current memblock API is quite extensive and, which is more annoying,
duplicated. Except the low-level functions that allow searching for a free
memory region and marking it as reserved, memblock provides three (well,
two and a half) sets of functions to allocate memory. There are several
overlapping functions that return a physical address and there are
functions that return virtual address. Those that return the virtual
address may also clear the allocated memory. And, on top of all that, some
allocators panic and some return NULL in case of error.

This set tries to reduce the mess, and trim down the amount of memblock
allocation methods.

Patches 1-10 consolidate the functions that return physical address of
the allocated memory

Patches 11-13 are some trivial cleanups

Patches 14-19 add checks for the return value of memblock_alloc*() and
panics in case of errors. The patches 14-18 include some minor refactoring
to have better readability of the resulting code and patch 19 is a
mechanical addition of

if (!ptr)
panic();

after memblock_alloc*() calls.

And, finally, patches 20 and 21 remove panic() calls memblock and _nopanic
variants from memblock.

Christophe Leroy (1):
  powerpc: use memblock functions returning virtual address

Mike Rapoport (20):
  openrisc: prefer memblock APIs returning virtual address
  memblock: replace memblock_alloc_base(ANYWHERE) with memblock_phys_alloc
  memblock: drop memblock_alloc_base_nid()
  memblock: emphasize that memblock_alloc_range() returns a physical address
  memblock: memblock_phys_alloc_try_nid(): don't panic
  memblock: memblock_phys_alloc(): don't panic
  memblock: drop __memblock_alloc_base()
  memblock: drop memblock_alloc_base()
  memblock: refactor internal allocation functions
  memblock: make memblock_find_in_range_node() and choose_memblock_flags() 
static
  arch: use memblock_alloc() instead of memblock_alloc_from(size, align, 0)
  arch: don't memset(0) memory returned by memblock_alloc()
  ia64: add checks for the return value of memblock_alloc*()
  sparc: add checks for the return value of memblock_alloc*()
  mm/percpu: add checks for the return value of memblock_alloc*()
  init/main: add checks for the return value of memblock_alloc*()
  swiotlb: add checks for the return value of memblock_alloc*()
  treewide: add checks for the return value of memblock_alloc*()
  memblock: memblock_alloc_try_nid: don't panic
  memblock: drop memblock_alloc_*_nopanic() variants

 arch/alpha/kernel/core_cia.c  |   5 +-
 arch/alpha/kernel/core_marvel.c   |   6 +
 arch/alpha/kernel/pci-noop.c  |  11 +-
 arch/alpha/kernel/pci.c   |  11 +-
 arch/alpha/kernel/pci_iommu.c |  16 +-
 arch/alpha/kernel/setup.c |   2 +-
 arch/arc/kernel/unwind.c  |   3 +-
 arch/arc/mm/highmem.c |   4 +
 arch/arm/kernel/setup.c   |   6 +
 arch/arm/mm/init.c|   6 +-
 arch/arm/mm/mmu.c |  14 +-
 arch/arm64/kernel/setup.c |   9 +-
 arch/arm64/mm/kasan_init.c|  10 ++
 arch/arm64/mm/mmu.c   |   2 +
 arch/arm64/mm/numa.c  |   4 +
 arch/c6x/mm/dma-coherent.c|   4 +
 arch/c6x/mm/init.c|   4 +-
 arch/csky/mm/highmem.c|   5 +
 arch/h8300/mm/init.c  |   4 +-
 arch/ia64/kernel/mca.c|  25 +--
 arch/ia64/mm/contig.c |   8 +-
 arch/ia64/mm/discontig.c  |   4 +
 arch/ia64/mm/init.c   |  38 -
 arch/ia64/mm/tlb.c|   6 +
 arch/ia64/sn/kernel/io_common.c   |   3 +
 arch/ia64/sn/kernel/setup.c   |  12 +-
 arch/m68k/atari/stram.c   |   4 +
 arch/m68k/mm/init.c   |   3 +
 arch/m68k/mm/mcfmmu.c |   7 +-
 arch/m68k/mm/motorola.c   |   9 ++
 arch/m68k/mm/sun3mmu.c|   6 +
 arch/m68k/sun3/sun3dvma.c |   3 +
 arch/microblaze/mm/init.c |  10 +-
 arch/mips/cavium-octeon/dma-octeon.c  |   3 +
 arch/mips/kernel/setup.c  |   3 +
 arch/mips/kernel/traps.c  |   5 +-
 arch/mips/mm/init.c   |   5 +
 arch/nds32/mm/init.c  |  12 ++
 arch/openrisc/mm/init.c   |   5 +-
 arch/openrisc/mm/ioremap.c|   8 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c |   8 +-
 arch/powerpc/kernel/irq.c |   5 -
 arch/powerpc/kernel/paca.c|   6 +-
 arch/powerpc/kernel/pci_32.c  |   3 +
 arch/powerpc/kernel/prom.c|   5 +-
 arch/powerpc/kernel/rtas.c|   6 +-
 arch/powerpc/kernel/setup-common.c|   3 +
 arch/powerpc/kernel/setup_32.c|  26 ++--
 arch/powerpc

[PATCH 06/21] memblock: memblock_phys_alloc_try_nid(): don't panic

2019-01-16 Thread Mike Rapoport

The memblock_phys_alloc_try_nid() function tries to allocate memory from
the requested node and then falls back to allocation from any node in the
system. The memblock_alloc_base() fallback used by this function panics if
the allocation fails.

Replace the memblock_alloc_base() fallback with the direct call to
memblock_alloc_range_nid() and update the memblock_phys_alloc_try_nid()
callers to check the returned value and panic in case of error.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/mm/numa.c   | 4 
 arch/powerpc/mm/numa.c | 4 
 mm/memblock.c  | 4 +++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index ae34e3a..2c61ea4 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -237,6 +237,10 @@ static void __init setup_node_data(int nid, u64 start_pfn, 
u64 end_pfn)
pr_info("Initmem setup node %d []\n", nid);
 
nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+   if (!nd_pa)
+   panic("Cannot allocate %zu bytes for node %d data\n",
+ nd_size, nid);
+
nd = __va(nd_pa);
 
/* report and initialize */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 270cefb..8f2bbe1 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -788,6 +788,10 @@ static void __init setup_node_data(int nid, u64 start_pfn, 
u64 end_pfn)
int tnid;
 
nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+   if (!nd_pa)
+   panic("Cannot allocate %zu bytes for node %d data\n",
+ nd_size, nid);
+
nd = __va(nd_pa);
 
/* report and initialize */
diff --git a/mm/memblock.c b/mm/memblock.c
index f019aee..8aabb1b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1393,7 +1393,9 @@ phys_addr_t __init 
memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
 
if (res)
return res;
-   return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+   return memblock_alloc_range_nid(size, align, 0,
+   MEMBLOCK_ALLOC_ACCESSIBLE,
+   NUMA_NO_NODE, MEMBLOCK_NONE);
 }
 
 /**
-- 
2.7.4

[PATCH 01/21] openrisc: prefer memblock APIs returning virtual address

2019-01-16 Thread Mike Rapoport

The allocation of the page tables memory in openrics uses
memblock_phys_alloc() and then converts the returned physical address to
virtual one. Use memblock_alloc_raw() and add a panic() if the allocation
fails.

Signed-off-by: Mike Rapoport 
---
 arch/openrisc/mm/init.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index d157310..caeb418 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -105,7 +105,10 @@ static void __init map_ram(void)
}
 
/* Alloc one page for holding PTE's... */
-   pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, 
PAGE_SIZE));
+   pte = memblock_alloc_raw(PAGE_SIZE, PAGE_SIZE);
+   if (!pte)
+   panic("%s: Failed to allocate page for PTEs\n",
+ __func__);
set_pmd(pme, __pmd(_KERNPG_TABLE + __pa(pte)));
 
/* Fill the newly allocated page with PTE'S */
-- 
2.7.4

[PATCH 12/21] arch: use memblock_alloc() instead of memblock_alloc_from(size, align, 0)

2019-01-16 Thread Mike Rapoport

The last parameter of memblock_alloc_from() is the lower limit for the
memory allocation. When it is 0, the call is equivalent to
memblock_alloc().

Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/core_cia.c  |  2 +-
 arch/alpha/kernel/pci_iommu.c |  4 ++--
 arch/alpha/kernel/setup.c |  2 +-
 arch/ia64/kernel/mca.c|  3 +--
 arch/mips/kernel/traps.c  |  2 +-
 arch/sparc/kernel/prom_32.c   |  2 +-
 arch/sparc/mm/init_32.c   |  2 +-
 arch/sparc/mm/srmmu.c | 10 +-
 8 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 867e873..466cd44 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -331,7 +331,7 @@ cia_prepare_tbia_workaround(int window)
long i;
 
/* Use minimal 1K map. */
-   ppte = memblock_alloc_from(CIA_BROKEN_TBIA_SIZE, 32768, 0);
+   ppte = memblock_alloc(CIA_BROKEN_TBIA_SIZE, 32768);
pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1;
 
for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i)
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index aa0f50d..e4cf77b 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -87,13 +87,13 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, 
dma_addr_t base,
printk("%s: couldn't allocate arena ptes from node %d\n"
   "falling back to system-wide allocation\n",
   __func__, nid);
-   arena->ptes = memblock_alloc_from(mem_size, align, 0);
+   arena->ptes = memblock_alloc(mem_size, align);
}
 
 #else /* CONFIG_DISCONTIGMEM */
 
arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
-   arena->ptes = memblock_alloc_from(mem_size, align, 0);
+   arena->ptes = memblock_alloc(mem_size, align);
 
 #endif /* CONFIG_DISCONTIGMEM */
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 4b5b1b2..5d4c76a 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -293,7 +293,7 @@ move_initrd(unsigned long mem_limit)
unsigned long size;
 
size = initrd_end - initrd_start;
-   start = memblock_alloc_from(PAGE_ALIGN(size), PAGE_SIZE, 0);
+   start = memblock_alloc(PAGE_ALIGN(size), PAGE_SIZE);
if (!start || __pa(start) + size > mem_limit) {
initrd_start = initrd_end = 0;
return NULL;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 91bd1e1..74d148b 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1835,8 +1835,7 @@ format_mca_init_stack(void *mca_data, unsigned long 
offset,
 /* Caller prevents this from being called after init */
 static void * __ref mca_bootmem(void)
 {
-   return memblock_alloc_from(sizeof(struct ia64_mca_cpu),
-  KERNEL_STACK_SIZE, 0);
+   return memblock_alloc(sizeof(struct ia64_mca_cpu), KERNEL_STACK_SIZE);
 }
 
 /* Do per-CPU MCA-related initialization.  */
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c91097f..2bbdee5 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2291,7 +2291,7 @@ void __init trap_init(void)
phys_addr_t ebase_pa;
 
ebase = (unsigned long)
-   memblock_alloc_from(size, 1 << fls(size), 0);
+   memblock_alloc(size, 1 << fls(size));
 
/*
 * Try to ensure ebase resides in KSeg0 if possible.
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 42d7f2a..38940af 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -32,7 +32,7 @@ void * __init prom_early_alloc(unsigned long size)
 {
void *ret;
 
-   ret = memblock_alloc_from(size, SMP_CACHE_BYTES, 0UL);
+   ret = memblock_alloc(size, SMP_CACHE_BYTES);
if (ret != NULL)
memset(ret, 0, size);
 
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index d900952..a8ff298 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -264,7 +264,7 @@ void __init mem_init(void)
i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
i += 1;
sparc_valid_addr_bitmap = (unsigned long *)
-   memblock_alloc_from(i << 2, SMP_CACHE_BYTES, 0UL);
+   memblock_alloc(i << 2, SMP_CACHE_BYTES);
 
if (sparc_valid_addr_bitmap == NULL) {
prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index b609362..a400ec3 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -303,13 +303,13 @@ static void __init srmmu_nocache_init(void)
 
bitmap_bits = srmmu_nocache_size >> SRMMU_N

[PATCH 08/21] memblock: drop __memblock_alloc_base()

2019-01-16 Thread Mike Rapoport

The __memblock_alloc_base() function tries to allocate a memory up to the
limit specified by its max_addr parameter. Depending on the value of this
parameter, the __memblock_alloc_base() can is replaced with the appropriate
memblock_phys_alloc*() variant.

Signed-off-by: Mike Rapoport 
---
 arch/sh/kernel/machine_kexec.c |  3 ++-
 arch/x86/kernel/e820.c |  2 +-
 arch/x86/mm/numa.c | 12 
 drivers/of/of_reserved_mem.c   |  7 ++-
 include/linux/memblock.h   |  2 --
 mm/memblock.c  |  9 ++---
 6 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index b9f9f1a..63d63a3 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -168,7 +168,8 @@ void __init reserve_crashkernel(void)
crash_size = PAGE_ALIGN(resource_size(_res));
if (!crashk_res.start) {
unsigned long max = memblock_end_of_DRAM() - memory_limit;
-   crashk_res.start = __memblock_alloc_base(crash_size, PAGE_SIZE, 
max);
+   crashk_res.start = memblock_phys_alloc_range(crash_size,
+PAGE_SIZE, 0, max);
if (!crashk_res.start) {
pr_err("crashkernel allocation failed\n");
goto disable;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 50895c2..9c0eb54 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -778,7 +778,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 
align)
 {
u64 addr;
 
-   addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+   addr = memblock_phys_alloc(size, align);
if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, 
E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for 
e820__memblock_alloc_reserved()\n");
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1308f54..f85ae42 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -195,15 +195,11 @@ static void __init alloc_node_data(int nid)
 * Allocate node data.  Try node-local memory and then any node.
 * Never allocate in DMA zone.
 */
-   nd_pa = memblock_phys_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+   nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
if (!nd_pa) {
-   nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
- MEMBLOCK_ALLOC_ACCESSIBLE);
-   if (!nd_pa) {
-   pr_err("Cannot find %zu bytes in any node (initial 
node: %d)\n",
-  nd_size, nid);
-   return;
-   }
+   pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
+  nd_size, nid);
+   return;
}
nd = __va(nd_pa);
 
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 1977ee0..499f16d 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -31,13 +31,10 @@ int __init __weak 
early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
phys_addr_t *res_base)
 {
phys_addr_t base;
-   /*
-* We use __memblock_alloc_base() because memblock_alloc_base()
-* panic()s on allocation failure.
-*/
+
end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
align = !align ? SMP_CACHE_BYTES : align;
-   base = __memblock_alloc_base(size, align, end);
+   base = memblock_phys_alloc_range(size, align, 0, end);
if (!base)
return -ENOMEM;
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7883c74..768e2b4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -496,8 +496,6 @@ static inline bool memblock_bottom_up(void)
 
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
phys_addr_t max_addr);
-phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
- phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_mem_size(unsigned long limit_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index 461e40a3..e5ffdcd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1363,17 +1363,12 @@ phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t 
size, phys_addr_t align,
return ret;
 }
 
-phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
-{
-   return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
-   MEMBLOCK_NONE);
-}
-
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_

[PATCH 09/21] memblock: drop memblock_alloc_base()

2019-01-16 Thread Mike Rapoport

The memblock_alloc_base() function tries to allocate a memory up to the
limit specified by its max_addr parameter and panics if the allocation
fails. Replace its usage with memblock_phys_alloc_range() and make the
callers check the return value and panic in case of error.

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kernel/rtas.c  |  6 +-
 arch/powerpc/mm/hash_utils_64.c |  8 ++--
 arch/s390/kernel/smp.c  |  6 +-
 drivers/macintosh/smu.c |  2 +-
 include/linux/memblock.h|  2 --
 mm/memblock.c   | 14 --
 6 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index de35bd8f..fbc6761 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1187,7 +1187,11 @@ void __init rtas_initialize(void)
ibm_suspend_me_token = rtas_token("ibm,suspend-me");
}
 #endif
-   rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, 
rtas_region);
+   rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+0, rtas_region);
+   if (!rtas_rmo_buf)
+   panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
+ PAGE_SIZE, _region);
 
 #ifdef CONFIG_RTAS_ERROR_LOGGING
rtas_last_error_token = rtas_token("rtas-last-error");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index bc6be44..c7d5f48 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -882,8 +882,12 @@ static void __init htab_initialize(void)
}
 #endif /* CONFIG_PPC_CELL */
 
-   table = memblock_alloc_base(htab_size_bytes, htab_size_bytes,
-   limit);
+   table = memblock_phys_alloc_range(htab_size_bytes,
+ htab_size_bytes,
+ 0, limit);
+   if (!table)
+   panic("ERROR: Failed to allocate %pa bytes below %pa\n",
+ _size_bytes, );
 
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index f82b3d3..9061597 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -651,7 +651,11 @@ void __init smp_save_dump_cpus(void)
/* No previous system present, normal boot. */
return;
/* Allocate a page as dumping area for the store status sigps */
-   page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
+   page = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, 1UL << 31);
+   if (!page)
+   panic("ERROR: Failed to allocate %x bytes below %lx\n",
+ PAGE_SIZE, 1UL << 31);
+
/* Set multi-threading state to the previous system. */
pcpu_set_smt(sclp.mtid_prev);
boot_cpu_addr = stap();
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 0a0b8e1..42cf68d 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -485,7 +485,7 @@ int __init smu_init (void)
 * SMU based G5s need some memory below 2Gb. Thankfully this is
 * called at a time where memblock is still available.
 */
-   smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x8000UL);
+   smu_cmdbuf_abs = memblock_phys_alloc_range(4096, 4096, 0, 0x8000UL);
if (smu_cmdbuf_abs == 0) {
printk(KERN_ERR "SMU: Command buffer allocation failed !\n");
ret = -EINVAL;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 768e2b4..6874fdc 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -494,8 +494,6 @@ static inline bool memblock_bottom_up(void)
return memblock.bottom_up;
 }
 
-phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
-   phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_mem_size(unsigned long limit_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index e5ffdcd..531fa77 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1363,20 +1363,6 @@ phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t 
size, phys_addr_t align,
return ret;
 }
 
-phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
-{
-   phys_addr_t alloc;
-
-   alloc = memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
-   MEMBLOCK_NONE);
-
-   if (alloc == 0)
-   panic("ERROR: Failed to allocate %pa bytes below %pa.\n",
-

[PATCH 10/21] memblock: refactor internal allocation functions

2019-01-16 Thread Mike Rapoport

Currently, memblock has several internal functions with overlapping
functionality. They all call memblock_find_in_range_node() to find free
memory and then reserve the allocated range and mark it with kmemleak.
However, there is difference in the allocation constraints and in fallback
strategies.

The allocations returning physical address first attempt to find free
memory on the specified node within mirrored memory regions, then retry on
the same node without the requirement for memory mirroring and finally fall
back to all available memory.

The allocations returning virtual address start with clamping the allowed
range to memblock.current_limit, attempt to allocate from the specified
node from regions with mirroring and with user defined minimal address. If
such allocation fails, next attempt is done with node restriction lifted.
Next, the allocation is retried with minimal address reset to zero and at
last without the requirement for mirrored regions.

Let's consolidate various fallbacks handling and make them more consistent
for physical and virtual variants. Most of the fallback handling is moved
to memblock_alloc_range_nid() and it now handles node and mirror fallbacks.

The memblock_alloc_internal() uses memblock_alloc_range_nid() to get a
physical address of the allocated range and converts it to virtual address.

The fallback for allocation below the specified minimal address remains in
memblock_alloc_internal() because memblock_alloc_range_nid() is used by CMA
with exact requirement for lower bounds.

The memblock_phys_alloc_nid() function is completely dropped as it is not
used anywhere outside memblock and its only usage can be replaced by a call
to memblock_alloc_range_nid().

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h |   1 -
 mm/memblock.c| 173 +--
 2 files changed, 78 insertions(+), 96 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6874fdc..cf4cd9c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -371,7 +371,6 @@ static inline int memblock_get_region_node(const struct 
memblock_region *r)
 
 phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
  phys_addr_t start, phys_addr_t end);
-phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int 
nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, 
int nid);
 
 static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
diff --git a/mm/memblock.c b/mm/memblock.c
index 531fa77..739f769 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1312,30 +1312,84 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone 
*zone,
 
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
+/**
+ * memblock_alloc_range_nid - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @start: the lower bound of the memory region to allocate (phys address)
+ * @end: the upper bound of the memory region to allocate (phys address)
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * The allocation is performed from memory region limited by
+ * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
+ *
+ * If the specified node can not hold the requested memory the
+ * allocation falls back to any node in the system
+ *
+ * For systems with memory mirroring, the allocation is attempted first
+ * from the regions with mirroring enabled and then retried from any
+ * memory region.
+ *
+ * In addition, function sets the min_count to 0 using kmemleak_alloc_phys for
+ * allocated boot memory block, so that it is never reported as leaks.
+ *
+ * Return:
+ * Physical address of allocated memory block on success, %0 on failure.
+ */
 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
-   phys_addr_t end, int nid,
-   enum memblock_flags flags)
+   phys_addr_t end, int nid)
 {
+   enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
 
+   if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is 
deprecated. Use NUMA_NO_NODE instead\n"))
+   nid = NUMA_NO_NODE;
+
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
align = SMP_CACHE_BYTES;
}
 
+   if (end > memblock.current_limit)
+   end = memblock.current_limit;
+
+again:
found = memblock_find_in_range_node(size, align, start, end, nid,
flags);
-   if (found && !memblock_reserve(found, size)) {
+   if (found && !memblock_reserve(found

[PATCH 13/21] arch: don't memset(0) memory returned by memblock_alloc()

2019-01-16 Thread Mike Rapoport

memblock_alloc() already clears the allocated memory, no point in doing it
twice.

Signed-off-by: Mike Rapoport 
---
 arch/c6x/mm/init.c  | 1 -
 arch/h8300/mm/init.c| 1 -
 arch/ia64/kernel/mca.c  | 2 --
 arch/m68k/mm/mcfmmu.c   | 1 -
 arch/microblaze/mm/init.c   | 6 ++
 arch/sparc/kernel/prom_32.c | 2 --
 6 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index af5ada0..e83c046 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -40,7 +40,6 @@ void __init paging_init(void)
 
empty_zero_page  = (unsigned long) memblock_alloc(PAGE_SIZE,
  PAGE_SIZE);
-   memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
/*
 * Set up user data space
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 6519252..a157890 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -68,7 +68,6 @@ void __init paging_init(void)
 * to a couple of allocated pages.
 */
empty_zero_page = (unsigned long)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-   memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
/*
 * Set up SFC/DFC registers (user data space).
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 74d148b..370bc34 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -400,8 +400,6 @@ ia64_log_init(int sal_info_type)
 
// set up OS data structures to hold error info
IA64_LOG_ALLOCATE(sal_info_type, max_size);
-   memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size);
-   memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);
 }
 
 /*
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 0de4999..492f953 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -44,7 +44,6 @@ void __init paging_init(void)
int i;
 
empty_zero_page = (void *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-   memset((void *) empty_zero_page, 0, PAGE_SIZE);
 
pg_dir = swapper_pg_dir;
memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 44f4b89..bd1cd4b 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -376,10 +376,8 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 
if (mem_init_done)
p = kzalloc(size, mask);
-   else {
+   else
p = memblock_alloc(size, SMP_CACHE_BYTES);
-   if (p)
-   memset(p, 0, size);
-   }
+
return p;
 }
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 38940af..e7126ca 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -33,8 +33,6 @@ void * __init prom_early_alloc(unsigned long size)
void *ret;
 
ret = memblock_alloc(size, SMP_CACHE_BYTES);
-   if (ret != NULL)
-   memset(ret, 0, size);
 
prom_early_allocated += size;
 
-- 
2.7.4

[PATCH 15/21] sparc: add checks for the return value of memblock_alloc*()

2019-01-16 Thread Mike Rapoport

Add panic() calls if memblock_alloc*() returns NULL.

Most of the changes are simply addition of

if(!ptr)
panic();

statements after the calls to memblock_alloc*() variants.

Exceptions are pcpu_populate_pte() and kernel_map_range() that were
slightly refactored to accommodate the change.

Signed-off-by: Mike Rapoport 
---
 arch/sparc/kernel/prom_32.c  |  2 ++
 arch/sparc/kernel/setup_64.c |  6 ++
 arch/sparc/kernel/smp_64.c   | 12 
 arch/sparc/mm/init_64.c  | 11 +++
 arch/sparc/mm/srmmu.c|  8 
 5 files changed, 39 insertions(+)

diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index e7126ca..869b16c 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -33,6 +33,8 @@ void * __init prom_early_alloc(unsigned long size)
void *ret;
 
ret = memblock_alloc(size, SMP_CACHE_BYTES);
+   if (!ret)
+   panic("%s: Failed to allocate %lu bytes\n", __func__, size);
 
prom_early_allocated += size;
 
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 51c4d12..fd2182a 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -624,8 +624,14 @@ void __init alloc_irqstack_bootmem(void)
 
softirq_stack[i] = memblock_alloc_node(THREAD_SIZE,
   THREAD_SIZE, node);
+   if (!softirq_stack[i])
+   panic("%s: Failed to allocate %lu bytes align=%lx 
nid=%d\n",
+ __func__, THREAD_SIZE, THREAD_SIZE, node);
hardirq_stack[i] = memblock_alloc_node(THREAD_SIZE,
   THREAD_SIZE, node);
+   if (!hardirq_stack[i])
+   panic("%s: Failed to allocate %lu bytes align=%lx 
nid=%d\n",
+ __func__, THREAD_SIZE, THREAD_SIZE, node);
}
 }
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f45d876..a8275fe 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1628,6 +1628,8 @@ static void __init pcpu_populate_pte(unsigned long addr)
pud_t *new;
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
pgd_populate(_mm, pgd, new);
}
 
@@ -1636,6 +1638,8 @@ static void __init pcpu_populate_pte(unsigned long addr)
pmd_t *new;
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
pud_populate(_mm, pud, new);
}
 
@@ -1644,8 +1648,16 @@ static void __init pcpu_populate_pte(unsigned long addr)
pte_t *new;
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
pmd_populate_kernel(_mm, pmd, new);
}
+
+   return;
+
+err_alloc:
+   panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index ef340e8..f2d70ff 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1809,6 +1809,8 @@ static unsigned long __ref kernel_map_range(unsigned long 
pstart,
 
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
  PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
alloc_bytes += PAGE_SIZE;
pgd_populate(_mm, pgd, new);
}
@@ -1822,6 +1824,8 @@ static unsigned long __ref kernel_map_range(unsigned long 
pstart,
}
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
  PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
alloc_bytes += PAGE_SIZE;
pud_populate(_mm, pud, new);
}
@@ -1836,6 +1840,8 @@ static unsigned long __ref kernel_map_range(unsigned long 
pstart,
}
new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
  PAGE_SIZE);
+   if (!new)
+   goto err_alloc;
alloc_bytes += PAGE_SIZE;
pmd_populate_kernel(_mm, pmd, new);
}
@@ -1855,6 +1861,11 @@ static unsigned long __ref kernel_map_range(unsigned 
long pstart,
}
 
return alloc_bytes;
+
+err_alloc:
+   panic("%s: Fail

[PATCH 19/21] treewide: add checks for the return value of memblock_alloc*()

2019-01-16 Thread Mike Rapoport

Add check for the return value of memblock_alloc*() functions and call
panic() in case of error.
The panic message repeats the one used by panicing memblock allocators with
adjustment of parameters to include only relevant ones.

The replacement was mostly automated with semantic patches like the one
below with manual massaging of format strings.

@@
expression ptr, size, align;
@@
ptr = memblock_alloc(size, align);
+ if (!ptr)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__,
size, align);

Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/core_cia.c  |  3 +++
 arch/alpha/kernel/core_marvel.c   |  6 ++
 arch/alpha/kernel/pci-noop.c  | 11 ++-
 arch/alpha/kernel/pci.c   | 11 ++-
 arch/alpha/kernel/pci_iommu.c | 12 
 arch/arc/mm/highmem.c |  4 
 arch/arm/kernel/setup.c   |  6 ++
 arch/arm/mm/mmu.c | 14 +-
 arch/arm64/kernel/setup.c |  9 ++---
 arch/arm64/mm/kasan_init.c| 10 ++
 arch/c6x/mm/dma-coherent.c|  4 
 arch/c6x/mm/init.c|  3 +++
 arch/csky/mm/highmem.c|  5 +
 arch/h8300/mm/init.c  |  3 +++
 arch/m68k/atari/stram.c   |  4 
 arch/m68k/mm/init.c   |  3 +++
 arch/m68k/mm/mcfmmu.c |  6 ++
 arch/m68k/mm/motorola.c   |  9 +
 arch/m68k/mm/sun3mmu.c|  6 ++
 arch/m68k/sun3/sun3dvma.c |  3 +++
 arch/microblaze/mm/init.c |  8 ++--
 arch/mips/cavium-octeon/dma-octeon.c  |  3 +++
 arch/mips/kernel/setup.c  |  3 +++
 arch/mips/kernel/traps.c  |  3 +++
 arch/mips/mm/init.c   |  5 +
 arch/nds32/mm/init.c  | 12 
 arch/openrisc/mm/ioremap.c|  8 ++--
 arch/powerpc/kernel/dt_cpu_ftrs.c |  5 +
 arch/powerpc/kernel/pci_32.c  |  3 +++
 arch/powerpc/kernel/setup-common.c|  3 +++
 arch/powerpc/kernel/setup_64.c|  4 
 arch/powerpc/lib/alloc.c  |  3 +++
 arch/powerpc/mm/hash_utils_64.c   |  3 +++
 arch/powerpc/mm/mmu_context_nohash.c  |  9 +
 arch/powerpc/mm/pgtable-book3e.c  | 12 ++--
 arch/powerpc/mm/pgtable-book3s64.c|  3 +++
 arch/powerpc/mm/pgtable-radix.c   |  9 -
 arch/powerpc/mm/ppc_mmu_32.c  |  3 +++
 arch/powerpc/platforms/pasemi/iommu.c |  3 +++
 arch/powerpc/platforms/powermac/nvram.c   |  3 +++
 arch/powerpc/platforms/powernv/opal.c |  3 +++
 arch/powerpc/platforms/powernv/pci-ioda.c |  8 
 arch/powerpc/platforms/ps3/setup.c|  3 +++
 arch/powerpc/sysdev/msi_bitmap.c  |  3 +++
 arch/s390/kernel/setup.c  | 13 +
 arch/s390/kernel/smp.c|  5 -
 arch/s390/kernel/topology.c   |  6 ++
 arch/s390/numa/mode_emu.c |  3 +++
 arch/s390/numa/numa.c |  6 +-
 arch/s390/numa/toptree.c  |  8 ++--
 arch/sh/mm/init.c |  6 ++
 arch/sh/mm/numa.c |  4 
 arch/um/drivers/net_kern.c|  3 +++
 arch/um/drivers/vector_kern.c |  3 +++
 arch/um/kernel/initrd.c   |  2 ++
 arch/um/kernel/mem.c  | 16 
 arch/unicore32/kernel/setup.c |  4 
 arch/unicore32/mm/mmu.c   | 15 +--
 arch/x86/kernel/acpi/boot.c   |  3 +++
 arch/x86/kernel/apic/io_apic.c|  5 +
 arch/x86/kernel/e820.c|  3 +++
 arch/x86/platform/olpc/olpc_dt.c  |  3 +++
 arch/x86/xen/p2m.c| 11 +--
 arch/xtensa/mm/kasan_init.c   |  4 
 arch/xtensa/mm/mmu.c  |  3 +++
 drivers/clk/ti/clk.c  |  3 +++
 drivers/macintosh/smu.c   |  3 +++
 drivers/of/fdt.c  |  8 +++-
 drivers/of/unittest.c |  8 +++-
 drivers/xen/swiotlb-xen.c |  7 +--
 kernel/power/snapshot.c   |  3 +++
 lib/cpumask.c |  3 +++
 mm/kasan/init.c   | 10 --
 mm/sparse.c   | 19 +--
 74 files changed, 415 insertions(+), 29 deletions(-)

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 466cd44..f489170 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -332,6 +332,9 @@ cia_prepare_tbia_workaround(int window)
 
/* Use minimal 1K map. */
ppte = memb

[PATCH 21/21] memblock: drop memblock_alloc_*_nopanic() variants

2019-01-16 Thread Mike Rapoport

As all the memblock allocation functions return NULL in case of error
rather than panic(), the duplicates with _nopanic suffix can be removed.

Signed-off-by: Mike Rapoport 
---
 arch/arc/kernel/unwind.c   |  3 +--
 arch/sh/mm/init.c  |  2 +-
 arch/x86/kernel/setup_percpu.c | 10 +-
 arch/x86/mm/kasan_init_64.c| 14 --
 drivers/firmware/memmap.c  |  2 +-
 drivers/usb/early/xhci-dbc.c   |  2 +-
 include/linux/memblock.h   | 35 ---
 kernel/dma/swiotlb.c   |  2 +-
 kernel/printk/printk.c | 17 +++--
 mm/memblock.c  | 35 ---
 mm/page_alloc.c| 10 +-
 mm/page_ext.c  |  2 +-
 mm/percpu.c| 11 ---
 mm/sparse.c|  6 ++
 14 files changed, 37 insertions(+), 114 deletions(-)

diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index d34f69e..271e9fa 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -181,8 +181,7 @@ static void init_unwind_hdr(struct unwind_table *table,
  */
 static void *__init unw_hdr_alloc_early(unsigned long sz)
 {
-   return memblock_alloc_from_nopanic(sz, sizeof(unsigned int),
-  MAX_DMA_ADDRESS);
+   return memblock_alloc_from(sz, sizeof(unsigned int), MAX_DMA_ADDRESS);
 }
 
 static void *unw_hdr_alloc(unsigned long sz)
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index fceefd9..7062132 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -202,7 +202,7 @@ void __init allocate_pgdat(unsigned int nid)
get_pfn_range_for_nid(nid, _pfn, _pfn);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-   NODE_DATA(nid) = memblock_alloc_try_nid_nopanic(
+   NODE_DATA(nid) = memblock_alloc_try_nid(
sizeof(struct pglist_data),
SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e8796fc..0c5e9bf 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
unsigned long size,
void *ptr;
 
if (!node_online(node) || !NODE_DATA(node)) {
-   ptr = memblock_alloc_from_nopanic(size, align, goal);
+   ptr = memblock_alloc_from(size, align, goal);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
 cpu, size, __pa(ptr));
} else {
-   ptr = memblock_alloc_try_nid_nopanic(size, align, goal,
-MEMBLOCK_ALLOC_ACCESSIBLE,
-node);
+   ptr = memblock_alloc_try_nid(size, align, goal,
+MEMBLOCK_ALLOC_ACCESSIBLE,
+node);
 
pr_debug("per cpu data for cpu%d %lu bytes on node%d at 
%016lx\n",
 cpu, size, node, __pa(ptr));
}
return ptr;
 #else
-   return memblock_alloc_from_nopanic(size, align, goal);
+   return memblock_alloc_from(size, align, goal);
 #endif
 }
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 462fde8..8dc0fc0 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -24,14 +24,16 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 
-static __init void *early_alloc(size_t size, int nid, bool panic)
+static __init void *early_alloc(size_t size, int nid, bool should_panic)
 {
-   if (panic)
-   return memblock_alloc_try_nid(size, size,
-   __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-   else
-   return memblock_alloc_try_nid_nopanic(size, size,
+   void *ptr = memblock_alloc_try_nid(size, size,
__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+
+   if (!ptr && should_panic)
+   panic("%pS: Failed to allocate page, nid=%d from=%lx\n",
+ (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS));
+
+   return ptr;
 }
 
 static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index ec4fd25..d168c87 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -333,7 +333,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const 
char *type)
 {
struct firmware_map_entry *entry;
 
-   entry = membloc

[PATCH 17/21] init/main: add checks for the return value of memblock_alloc*()

2019-01-16 Thread Mike Rapoport

Add panic() calls if memblock_alloc() returns NULL.

The panic() format duplicates the one used by memblock itself and in order
to avoid explosion with long parameters list replace open coded allocation
size calculations with a local variable.

Signed-off-by: Mike Rapoport 
---
 init/main.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/init/main.c b/init/main.c
index a56f65a..d58a365 100644
--- a/init/main.c
+++ b/init/main.c
@@ -373,12 +373,20 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) 
{ }
  */
 static void __init setup_command_line(char *command_line)
 {
-   saved_command_line =
-   memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
-   initcall_command_line =
-   memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
-   static_command_line = memblock_alloc(strlen(command_line) + 1,
-SMP_CACHE_BYTES);
+   size_t len = strlen(boot_command_line) + 1;
+
+   saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+   if (!saved_command_line)
+   panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+   initcall_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+   if (!initcall_command_line)
+   panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+   static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+   if (!static_command_line)
+   panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
strcpy(saved_command_line, boot_command_line);
strcpy(static_command_line, command_line);
 }
@@ -773,8 +781,14 @@ static int __init initcall_blacklist(char *str)
pr_debug("blacklisting initcall %s\n", str_entry);
entry = memblock_alloc(sizeof(*entry),
   SMP_CACHE_BYTES);
+   if (!entry)
+   panic("%s: Failed to allocate %zu bytes\n",
+ __func__, sizeof(*entry));
entry->buf = memblock_alloc(strlen(str_entry) + 1,
SMP_CACHE_BYTES);
+   if (!entry->buf)
+   panic("%s: Failed to allocate %zu bytes\n",
+ __func__, strlen(str_entry) + 1);
strcpy(entry->buf, str_entry);
list_add(>next, _initcalls);
}
-- 
2.7.4

[PATCH 18/21] swiotlb: add checks for the return value of memblock_alloc*()

2019-01-16 Thread Mike Rapoport

Add panic() calls if memblock_alloc() returns NULL.

The panic() format duplicates the one used by memblock itself and in order
to avoid explosion with long parameters list replace open coded allocation
size calculations with a local variable.

Signed-off-by: Mike Rapoport 
---
 kernel/dma/swiotlb.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d636177..e78835c8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -191,6 +191,7 @@ void __init swiotlb_update_mem_attributes(void)
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
unsigned long i, bytes;
+   size_t alloc_size;
 
bytes = nslabs << IO_TLB_SHIFT;
 
@@ -203,12 +204,18 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs, int verbose)
 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 * between io_tlb_start and io_tlb_end.
 */
-   io_tlb_list = memblock_alloc(
-   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
-   PAGE_SIZE);
-   io_tlb_orig_addr = memblock_alloc(
-   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
-   PAGE_SIZE);
+   alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
+   io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
+   if (!io_tlb_list)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+
+   alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
+   io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
+   if (!io_tlb_orig_addr)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+
for (i = 0; i < io_tlb_nslabs; i++) {
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-- 
2.7.4

[PATCH 16/21] mm/percpu: add checks for the return value of memblock_alloc*()

2019-01-16 Thread Mike Rapoport

Add panic() calls if memblock_alloc() returns NULL.

The panic() format duplicates the one used by memblock itself and in order
to avoid explosion with long parameters list replace open coded allocation
size calculations with a local variable.

Signed-off-by: Mike Rapoport 
---
 mm/percpu.c | 73 +++--
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index db86282..5998b03 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1086,6 +1086,7 @@ static struct pcpu_chunk * __init 
pcpu_alloc_first_chunk(unsigned long tmp_addr,
struct pcpu_chunk *chunk;
unsigned long aligned_addr, lcm_align;
int start_offset, offset_bits, region_size, region_bits;
+   size_t alloc_size;
 
/* region calculations */
aligned_addr = tmp_addr & PAGE_MASK;
@@ -1101,9 +1102,12 @@ static struct pcpu_chunk * __init 
pcpu_alloc_first_chunk(unsigned long tmp_addr,
region_size = ALIGN(start_offset + map_size, lcm_align);
 
/* allocate chunk */
-   chunk = memblock_alloc(sizeof(struct pcpu_chunk) +
-  BITS_TO_LONGS(region_size >> PAGE_SHIFT),
-  SMP_CACHE_BYTES);
+   alloc_size = sizeof(struct pcpu_chunk) +
+   BITS_TO_LONGS(region_size >> PAGE_SHIFT);
+   chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
 
INIT_LIST_HEAD(>list);
 
@@ -1114,12 +1118,25 @@ static struct pcpu_chunk * __init 
pcpu_alloc_first_chunk(unsigned long tmp_addr,
chunk->nr_pages = region_size >> PAGE_SHIFT;
region_bits = pcpu_chunk_map_bits(chunk);
 
-   chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) * 
sizeof(chunk->alloc_map[0]),
- SMP_CACHE_BYTES);
-   chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) * 
sizeof(chunk->bound_map[0]),
- SMP_CACHE_BYTES);
-   chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) * 
sizeof(chunk->md_blocks[0]),
- SMP_CACHE_BYTES);
+   alloc_size = BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]);
+   chunk->alloc_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk->alloc_map)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
+   alloc_size =
+   BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]);
+   chunk->bound_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk->bound_map)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
+   alloc_size = pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]);
+   chunk->md_blocks = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!chunk->md_blocks)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
pcpu_init_md_blocks(chunk);
 
/* manage populated page bitmap */
@@ -2044,6 +2061,7 @@ int __init pcpu_setup_first_chunk(const struct 
pcpu_alloc_info *ai,
int group, unit, i;
int map_size;
unsigned long tmp_addr;
+   size_t alloc_size;
 
 #define PCPU_SETUP_BUG_ON(cond)do {
\
if (unlikely(cond)) {   \
@@ -2075,14 +2093,29 @@ int __init pcpu_setup_first_chunk(const struct 
pcpu_alloc_info *ai,
PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
/* process group information and build config tables accordingly */
-   group_offsets = memblock_alloc(ai->nr_groups * sizeof(group_offsets[0]),
-  SMP_CACHE_BYTES);
-   group_sizes = memblock_alloc(ai->nr_groups * sizeof(group_sizes[0]),
-SMP_CACHE_BYTES);
-   unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]),
- SMP_CACHE_BYTES);
-   unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]),
- SMP_CACHE_BYTES);
+   alloc_size = ai->nr_groups * sizeof(group_offsets[0]);
+   group_offsets = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!group_offsets)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+ alloc_size);
+
+   alloc_size = ai->nr_groups * sizeof(group_sizes[0]);
+   group_sizes = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
+   if (!group_sizes)
+   panic("%s: Failed to allocate %zu bytes\n", __func__,
+

[PATCH 20/21] memblock: memblock_alloc_try_nid: don't panic

2019-01-16 Thread Mike Rapoport

As all the memblock_alloc*() users are now checking the return value and
panic() in case of error, the panic() call can be removed from the core
memblock allocator, namely memblock_alloc_try_nid().

Signed-off-by: Mike Rapoport 
---
 mm/memblock.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 03b3929..7164275 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1526,7 +1526,7 @@ void * __init memblock_alloc_try_nid_nopanic(
 }
 
 /**
- * memblock_alloc_try_nid - allocate boot memory block with panicking
+ * memblock_alloc_try_nid - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region from where the allocation
@@ -1536,9 +1536,8 @@ void * __init memblock_alloc_try_nid_nopanic(
  *   allocate only from memory limited by memblock.current_limit value
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
- * Public panicking version of memblock_alloc_try_nid_nopanic()
- * which provides debug information (including caller info), if enabled,
- * and panics if the request can not be satisfied.
+ * Public function, provides additional debug information (including caller
+ * info), if enabled. This function zeroes the allocated memory.
  *
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
@@ -1555,14 +1554,10 @@ void * __init memblock_alloc_try_nid(
 _addr, (void *)_RET_IP_);
ptr = memblock_alloc_internal(size, align,
   min_addr, max_addr, nid);
-   if (ptr) {
+   if (ptr)
memset(ptr, 0, size);
-   return ptr;
-   }
 
-   panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=%pa 
max_addr=%pa\n",
- __func__, (u64)size, (u64)align, nid, _addr, _addr);
-   return NULL;
+   return ptr;
 }
 
 /**
-- 
2.7.4

[PATCH 14/21] ia64: add checks for the return value of memblock_alloc*()

2019-01-16 Thread Mike Rapoport

Add panic() calls if memblock_alloc*() returns NULL.

Most of the changes are simply addition of

if(!ptr)
panic();

statements after the calls to memblock_alloc*() variants.

Exceptions are create_mem_map_page_table() and ia64_log_init() that were
slightly refactored to accommodate the change.

Signed-off-by: Mike Rapoport 
---
 arch/ia64/kernel/mca.c  | 20 ++--
 arch/ia64/mm/contig.c   |  8 ++--
 arch/ia64/mm/discontig.c|  4 
 arch/ia64/mm/init.c | 38 ++
 arch/ia64/mm/tlb.c  |  6 ++
 arch/ia64/sn/kernel/io_common.c |  3 +++
 arch/ia64/sn/kernel/setup.c | 12 +++-
 7 files changed, 74 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 370bc34..5cabb3f 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -359,11 +359,6 @@ typedef struct ia64_state_log_s
 
 static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
 
-#define IA64_LOG_ALLOCATE(it, size) \
-   {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
-   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); \
-   ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
-   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);}
 #define IA64_LOG_LOCK_INIT(it) spin_lock_init(_state_log[it].isl_lock)
 #define IA64_LOG_LOCK(it)  spin_lock_irqsave(_state_log[it].isl_lock, 
s)
 #define IA64_LOG_UNLOCK(it)
spin_unlock_irqrestore(_state_log[it].isl_lock,s)
@@ -378,6 +373,19 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
 #define IA64_LOG_CURR_BUFFER(it)   (void 
*)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
 #define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
 
+static inline void ia64_log_allocate(int it, u64 size)
+{
+   ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] =
+   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);
+   if (!ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])
+   panic("%s: Failed to allocate %llu bytes\n", __func__, size);
+
+   ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] =
+   (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);
+   if (!ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])
+   panic("%s: Failed to allocate %llu bytes\n", __func__, size);
+}
+
 /*
  * ia64_log_init
  * Reset the OS ia64 log buffer
@@ -399,7 +407,7 @@ ia64_log_init(int sal_info_type)
return;
 
// set up OS data structures to hold error info
-   IA64_LOG_ALLOCATE(sal_info_type, max_size);
+   ia64_log_allocate(sal_info_type, max_size);
 }
 
 /*
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 6e44723..d29fb6b 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -84,9 +84,13 @@ void *per_cpu_init(void)
 static inline void
 alloc_per_cpu_data(void)
 {
-   cpu_data = memblock_alloc_from(PERCPU_PAGE_SIZE * num_possible_cpus(),
-  PERCPU_PAGE_SIZE,
+   size_t size = PERCPU_PAGE_SIZE * num_possible_cpus();
+
+   cpu_data = memblock_alloc_from(size, PERCPU_PAGE_SIZE,
   __pa(MAX_DMA_ADDRESS));
+   if (!cpu_data)
+   panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, size, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 }
 
 /**
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index f9c3675..05490dd 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -454,6 +454,10 @@ static void __init *memory_less_node_alloc(int nid, 
unsigned long pernodesize)
 __pa(MAX_DMA_ADDRESS),
 MEMBLOCK_ALLOC_ACCESSIBLE,
 bestnode);
+   if (!ptr)
+   panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d 
from=%lx\n",
+ __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode,
+ __pa(MAX_DMA_ADDRESS));
 
return ptr;
 }
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 29d8415..e49200e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -444,23 +444,45 @@ int __init create_mem_map_page_table(u64 start, u64 end, 
void *arg)
 
for (address = start_page; address < end_page; address += PAGE_SIZE) {
pgd = pgd_offset_k(address);
-   if (pgd_none(*pgd))
-   pgd_populate(_mm, pgd, 
memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node));
+   if (pgd_none(*pgd)) {
+   pud = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
+   if (!pud)
+   goto err_alloc;
+   pgd_populate(_mm, pgd, pud);
+

[PATCH 11/21] memblock: make memblock_find_in_range_node() and choose_memblock_flags() static

2019-01-16 Thread Mike Rapoport

These functions are not used outside memblock. Make them static.

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h | 4 
 mm/memblock.c| 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index cf4cd9c..f5a83a1 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -111,9 +111,6 @@ void memblock_discard(void);
 #define memblock_dbg(fmt, ...) \
if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
-phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
-   phys_addr_t start, phys_addr_t end,
-   int nid, enum memblock_flags flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -130,7 +127,6 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t 
size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
-enum memblock_flags choose_memblock_flags(void);
 
 unsigned long memblock_free_all(void);
 void reset_node_managed_pages(pg_data_t *pgdat);
diff --git a/mm/memblock.c b/mm/memblock.c
index 739f769..03b3929 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -125,7 +125,7 @@ static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 static int memblock_reserved_in_slab __initdata_memblock = 0;
 
-enum memblock_flags __init_memblock choose_memblock_flags(void)
+static enum memblock_flags __init_memblock choose_memblock_flags(void)
 {
return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
 }
@@ -254,7 +254,7 @@ __memblock_find_range_top_down(phys_addr_t start, 
phys_addr_t end,
  * Return:
  * Found address on success, 0 on failure.
  */
-phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
+static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t 
size,
phys_addr_t align, phys_addr_t start,
phys_addr_t end, int nid,
enum memblock_flags flags)
-- 
2.7.4

[PATCH 07/21] memblock: memblock_phys_alloc(): don't panic

2019-01-16 Thread Mike Rapoport

Make the memblock_phys_alloc() function an inline wrapper for
memblock_phys_alloc_range() and update the memblock_phys_alloc() callers to
check the returned value and panic in case of error.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c   | 4 
 arch/arm64/mm/mmu.c  | 2 ++
 arch/powerpc/sysdev/dart_iommu.c | 3 +++
 arch/s390/kernel/crash_dump.c| 3 +++
 arch/s390/kernel/setup.c | 3 +++
 arch/sh/boards/mach-ap325rxa/setup.c | 3 +++
 arch/sh/boards/mach-ecovec24/setup.c | 6 ++
 arch/sh/boards/mach-kfr2r09/setup.c  | 3 +++
 arch/sh/boards/mach-migor/setup.c| 3 +++
 arch/sh/boards/mach-se/7724/setup.c  | 6 ++
 arch/xtensa/mm/kasan_init.c  | 3 +++
 include/linux/memblock.h | 7 ++-
 mm/memblock.c| 5 -
 13 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b76b90e..15dddfe 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -206,6 +206,10 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, 
phys_addr_t align)
BUG_ON(!arm_memblock_steal_permitted);
 
phys = memblock_phys_alloc(size, align);
+   if (!phys)
+   panic("Failed to steal %pa bytes at %pS\n",
+ , (void *)_RET_IP_);
+
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b6f5aa5..a74e4be 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -104,6 +104,8 @@ static phys_addr_t __init early_pgtable_alloc(void)
void *ptr;
 
phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate page table page\n");
 
/*
 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 25bc25f..b82c9ff 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -265,6 +265,9 @@ static void allocate_dart(void)
 * prefetching into invalid pages and corrupting data
 */
tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+   if (!tmp)
+   panic("DART: table allocation failed\n");
+
dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
 DARTMAP_RPNMASK);
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 97eae38..f96a585 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -61,6 +61,9 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
struct save_area *sa;
 
sa = (void *) memblock_phys_alloc(sizeof(*sa), 8);
+   if (!sa)
+   panic("Failed to allocate save area\n");
+
if (is_boot_cpu)
list_add(>list, _save_areas);
else
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 72dd23e..da48397 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -968,6 +968,9 @@ static void __init setup_randomness(void)
 
vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
PAGE_SIZE);
+   if (!vmms)
+   panic("Failed to allocate memory for sysinfo structure\n");
+
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(>vm, sizeof(vmms->vm[0]) * 
vmms->count);
memblock_free((unsigned long) vmms, PAGE_SIZE);
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c 
b/arch/sh/boards/mach-ap325rxa/setup.c
index d7ceab6..08a0cc9 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -558,6 +558,9 @@ static void __init ap325rxa_mv_mem_reserve(void)
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
phys = memblock_phys_alloc(size, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate CEU memory\n");
+
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-ecovec24/setup.c 
b/arch/sh/boards/mach-ecovec24/setup.c
index a3901806..fd264a6 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1481,11 +1481,17 @@ static void __init ecovec_mv_mem_reserve(void)
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
phys = memblock_phys_alloc(size, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate CEU0 memory\n");
+
memblock_free(phys, size);
memblock_remove(phys, size);
ceu0_dma_membase = phys;
 
phys = memblock_phys_alloc(size, PAGE_SIZE);
+   if (!phys)
+   panic("Failed to allocate CEU1 memory\n");
+
memblock_free(ph

[PATCH 04/21] memblock: drop memblock_alloc_base_nid()

2019-01-16 Thread Mike Rapoport

The memblock_alloc_base_nid() is a oneliner wrapper for
memblock_alloc_range_nid() without any side effect.
Replace it's usage by the direct calls to memblock_alloc_range_nid().

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h |  3 ---
 mm/memblock.c| 15 ---
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 60e100f..f7ef313 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -490,9 +490,6 @@ static inline bool memblock_bottom_up(void)
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
phys_addr_t start, phys_addr_t end,
enum memblock_flags flags);
-phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
-   phys_addr_t align, phys_addr_t max_addr,
-   int nid, enum memblock_flags flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/memblock.c b/mm/memblock.c
index a32db30..c80029e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1346,21 +1346,14 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t 
size, phys_addr_t align,
flags);
 }
 
-phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
-   phys_addr_t align, phys_addr_t max_addr,
-   int nid, enum memblock_flags flags)
-{
-   return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
-}
-
 phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t 
align, int nid)
 {
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t ret;
 
 again:
-   ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE,
- nid, flags);
+   ret = memblock_alloc_range_nid(size, align, 0,
+  MEMBLOCK_ALLOC_ACCESSIBLE, nid, flags);
 
if (!ret && (flags & MEMBLOCK_MIRROR)) {
flags &= ~MEMBLOCK_MIRROR;
@@ -1371,8 +1364,8 @@ phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t 
size, phys_addr_t align,
 
 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
 {
-   return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE,
-  MEMBLOCK_NONE);
+   return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
+   MEMBLOCK_NONE);
 }
 
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, 
phys_addr_t max_addr)
-- 
2.7.4

[PATCH 03/21] memblock: replace memblock_alloc_base(ANYWHERE) with memblock_phys_alloc

2019-01-16 Thread Mike Rapoport

The calls to memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE) and
memblock_phys_alloc(size, align) are equivalent as both try to allocate
'size' bytes with 'align' alignment anywhere in the memory and panic if hte
allocation fails.

The conversion is done using the following semantic patch:

@@
expression size, align;
@@
- memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE)
+ memblock_phys_alloc(size, align)

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c   | 2 +-
 arch/sh/boards/mach-ap325rxa/setup.c | 2 +-
 arch/sh/boards/mach-ecovec24/setup.c | 4 ++--
 arch/sh/boards/mach-kfr2r09/setup.c  | 2 +-
 arch/sh/boards/mach-migor/setup.c| 2 +-
 arch/sh/boards/mach-se/7724/setup.c  | 4 ++--
 arch/xtensa/mm/kasan_init.c  | 3 +--
 7 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 478ea8b..b76b90e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -205,7 +205,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, 
phys_addr_t align)
 
BUG_ON(!arm_memblock_steal_permitted);
 
-   phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, align);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c 
b/arch/sh/boards/mach-ap325rxa/setup.c
index 8f234d04..d7ceab6 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -557,7 +557,7 @@ static void __init ap325rxa_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-ecovec24/setup.c 
b/arch/sh/boards/mach-ecovec24/setup.c
index 22b4106..a3901806 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1480,12 +1480,12 @@ static void __init ecovec_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu0_dma_membase = phys;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu1_dma_membase = phys;
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c 
b/arch/sh/boards/mach-kfr2r09/setup.c
index 203d249..55bdf4a 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -631,7 +631,7 @@ static void __init kfr2r09_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-migor/setup.c 
b/arch/sh/boards/mach-migor/setup.c
index f4ad33c..ba7eee6 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -630,7 +630,7 @@ static void __init migor_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-se/7724/setup.c 
b/arch/sh/boards/mach-se/7724/setup.c
index fdbec22a..4696e10 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -965,12 +965,12 @@ static void __init ms7724se_mv_mem_reserve(void)
phys_addr_t phys;
phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu0_dma_membase = phys;
 
-   phys = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+   phys = memblock_phys_alloc(size, PAGE_SIZE);
memblock_free(phys, size);
memblock_remove(phys, size);
ceu1_dma_membase = phys;
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 1734cda..48dbb03 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -52,8 +52,7 @@ static void __init populate(void *start, void *end)
 
for (k = 0; k < PTRS_PER_PTE; ++k, ++j) {
phys_addr_t p

[PATCH] alpha: fix hang caused by the bootmem removal

2018-11-26 Thread Mike Rapoport

The conversion of alpha to memblock as the early memory manager caused boot
to hang as described at [1].

The issue is caused because for CONFIG_DISCTONTIGMEM=y case, memblock_add()
is called using memory start PFN that had been rounded down to the nearest
8Mb and it caused memblock to see more memory that is actually present in
the system.

Besides, memblock allocates memory from high addresses while bootmem was
using low memory, which broke the assumption that early allocations are
always accessible by the hardware.

This patch ensures that memblock_add() is using the correct PFN for the
memory start and forces memblock to use bottom-up allocations.

[1] https://lkml.org/lkml/2018/11/22/1032

Reported-by: Meelis Roos 
Signed-off-by: Mike Rapoport 
Tested-by: Meelis Roos 
---
 arch/alpha/kernel/setup.c | 1 +
 arch/alpha/mm/numa.c  | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index a37fd99..4b5b1b2 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -634,6 +634,7 @@ setup_arch(char **cmdline_p)
 
/* Find our memory.  */
setup_memory(kernel_end);
+   memblock_set_bottom_up(true);
 
/* First guess at cpu cache sizes.  Do this before init_arch.  */
determine_cpu_caches(cpu->type);
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 74846553..d0b7337 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -144,14 +144,14 @@ setup_memory_node(int nid, void *kernel_end)
if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > 
start_kernel_pfn))
panic("kernel loaded out of ram");
 
+   memblock_add(PFN_PHYS(node_min_pfn),
+(node_max_pfn - node_min_pfn) << PAGE_SHIFT);
+
/* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
   Note that we round this down, not up - node memory
   has much larger alignment than 8Mb, so it's safe. */
node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
 
-   memblock_add(PFN_PHYS(node_min_pfn),
-(node_max_pfn - node_min_pfn) << PAGE_SHIFT);
-
NODE_DATA(nid)->node_start_pfn = node_min_pfn;
NODE_DATA(nid)->node_present_pages = node_max_pfn - node_min_pfn;
 
-- 
2.7.4

Re: NO_BOOTMEM breaks alpha pc164

2018-11-24 Thread Mike Rapoport

(adding linux-mm, the beginning of the thread is at
https://lkml.org/lkml/2018/11/22/1032)

On Fri, Nov 23, 2018 at 06:11:09PM +0200, Meelis Roos wrote:
> >>The bad commit is swith to NO_BOOTMEM.
> >
> >[ ... ]
> >>How do I debug it?
> >
> >Apparently, some of the early memory registration is not properly converted
> >from bootmem to memblock + nobootmem for your system.
> >
> >You can try applying the below patch to enable debug printouts from
> >memblock, maybe it'll shed some more light.
> 
> Here is the serial console output from a boot with the debug patch applied:
> 
> (boot dka0.0.0.5.0 -flags 0)
> block 0 of dka0.0.0.5.0 is a valid boot block
> reading 161 blocks from dka0.0.0.5.0
> bootstrap code read in
> base = 18, image_start = 0, image_bytes = 14200
> initializing HWRPB at 2000
> initializing page table at 172000
> initializing machine state
> setting affinity to the primary CPU
> jumping to bootstrap code
> aboot: Linux/Alpha SRM bootloader version 1.0_pre20040408
> aboot: switching to OSF/1 PALcode version 1.23
> aboot: booting from device 'SCSI 0 5 0 0 0 0 0'
> aboot: valid disklabel found: 4 partitions.
> aboot: loading uncompressed test...
> aboot: loading compressed test...
> aboot: PHDR 0 vaddr 0xfc31 offset 0x2000 size 0x79925c
> aboot: bss at 0xfcaa925c, size 0x16469c
> aboot: zero-filling 1459868 bytes at 0xfcaa925c
> aboot: starting kernel test with arguments root=/dev/sda2 console=ttyS0
> [0.00] Linux version 4.20.0-rc2-00068-gda5322e65940-dirty 
> (mroos@pc164) (gcc version 7.3.0 (Gentoo 7.3.0-r3 p1.4)) #115 Fri Nov 23 
> 17:38:17 EET 2018
> [0.00] Booting on EB164 variation PC164 using machine vector PC164 
> from SRM
> [0.00] Major Options: EV56 LEGACY_START VERBOSE_MCHECK DISCONTIGMEM 
> MAGIC_SYSRQ
> [0.00] Command line: root=/dev/sda2 console=ttyS0
> [0.00] Raw memory layout:
> [0.00]  memcluster  0, usage 1, start0, end  192
> [0.00]  memcluster  1, usage 0, start  192, end32651
> [0.00]  memcluster  2, usage 1, start32651, end32768
> [0.00] Initializing bootmem allocator on Node ID 0
> [0.00]  memcluster  1, usage 0, start  192, end32651
> [0.00]  Detected node memory:   start  192, end32651
> [0.00] memblock_add: [0x-0x0ff15fff] 
> setup_memory+0x39c/0x478
> [0.00] memblock_reserve: [0x0030-0x00c11fff] 
> setup_memory+0x444/0x478
> [0.00] 1024K Bcache detected; load hit latency 30 cycles, load miss 
> latency 212 cycles
> [0.00] pci: cia revision 2
> [0.00] memblock_alloc_try_nid: 104 bytes align=0x20 nid=-1 
> from=0x max_addr=0x 
> alloc_pci_controller+0x2c/0x50
> [0.00] memblock_reserve: [0x0ff15f80-0x0ff15fe7] 
> memblock_alloc_internal+0x170/0x278
> [0.00] memblock_alloc_try_nid: 64 bytes align=0x20 nid=-1 
> from=0x max_addr=0x alloc_resource+0x2c/0x40
> [0.00] memblock_reserve: [0x0ff15f40-0x0ff15f7f] 
> memblock_alloc_internal+0x170/0x278

...
 
> halted CPU 0
> 
> halt code = 7
> machine check while in PAL mode
> PC = 1814c
> boot failure
> >>>

Two things that might cause the hang. 
First, memblock_add() is called after node_min_pfn has been rounded down to
the nearest 8Mb and in your case this cases memblock to see more memory that
is actually present in the system.
I'm not sure why the 8Mb alignment is required, I've just made sure that
memblock_add() will use exact available memory (the first patch below).

Another thing is that memblock allocates memory from high addresses while
bootmem was using low memory. It may happen that an allocation from high
memory is not accessible by the hardware, although it should be. The second
patch below addresses this issue.

It would be really great if you could test with each patch separately and
with both patches applied :)


Patch 1

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 74846553..7db1cb5 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -144,14 +144,14 @@ setup_memory_node(int nid, void *kernel_end)
if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > 
start_kernel_pfn))
panic("kernel loaded out of ram");
 
+   memblock_add(PFN_PHYS(node_min_pfn),
+(node_max_pfn - node_min_pfn) << PAGE_SHIFT);
+
/* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
   Note that we round this down, not up - node memory
   has much larger alignment than 8Mb, so it's safe. */
node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
 
-   memblock_add(PFN_PHYS(node_min_pfn),
-(node_max_pfn - node_min_pfn) << PAGE_SHIFT);
-

Re: NO_BOOTMEM breaks alpha pc164

2018-11-22 Thread Mike Rapoport

Hi,

On Thu, Nov 22, 2018 at 03:27:44PM +0200, Meelis Roos wrote:
> I have bisected 4.20-rc2 boot hang on a PC164 subarch alpha.
> Other alphas in my collection are working fine.
> 
> When it hangs, it hangs right after starting init. "random: crng init done" 
> is printed
> after starting init and nothing more appears on serial console.

Can you please send the kernel log for the broken boot case?
What happens if you try to boot with "init=/bin/sh" added to the kernel
command line? Is the system able to start shell?
 
> The bad commit is swith to NO_BOOTMEM.

[ ... ] 
 
> How do I debug it?

Apparently, some of the early memory registration is not properly converted
from bootmem to memblock + nobootmem for your system.

You can try applying the below patch to enable debug printouts from
memblock, maybe it'll shed some more light.

diff --git a/mm/memblock.c b/mm/memblock.c
index 7df468c..a2c289e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -119,7 +119,7 @@ struct memblock memblock __initdata_memblock = {
.current_limit  = MEMBLOCK_ALLOC_ANYWHERE,
 };
 
-int memblock_debug __initdata_memblock;
+int memblock_debug __initdata_memblock = 1;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 
> dmesg from working boot before the commit:
> 
> [0.00] Linux version 4.19.0-06941-ge92d39cdb120 (mroos@pc164) (gcc 
> version 7.3.0 (Gentoo 7.3.0-r3 p1.4)) #113 Wed Nov 21 23:11:58 EET 2018
> [0.00] Booting on EB164 variation PC164 using machine vector PC164 
> from SRM
> [0.00] Major Options: EV56 LEGACY_START VERBOSE_MCHECK DISCONTIGMEM 
> MAGIC_SYSRQ
> [0.00] Command line: root=/dev/sda2 console=ttyS0
> [0.00] Raw memory layout:
> [0.00]  memcluster  0, usage 1, start0, end  192
> [0.00]  memcluster  1, usage 0, start  192, end32651
> [0.00]  memcluster  2, usage 1, start32651, end32768
> [0.00] Initializing bootmem allocator on Node ID 0
> [0.00]  memcluster  1, usage 0, start  192, end32651
> [0.00]  Detected node memory:   start  194, end32651
> [0.00]  freeing pages 192:384
> [0.00]  freeing pages 1540:32651
> [0.00]  reserving pages 1540:1541
> [0.00] 1024K Bcache detected; load hit latency 30 cycles, load miss 
> latency 212 cycles
> [0.00] pci: cia revision 2
> [0.00] On node 0 totalpages: 32651
> [0.00]   DMA zone: 224 pages used for memmap
> [0.00]   DMA zone: 0 pages reserved
> [0.00]   DMA zone: 32651 pages, LIFO batch:7
> [0.00] pcpu-alloc: s0 r0 d32768 u32768 alloc=1*32768
> [0.00] pcpu-alloc: [0] 0
> [0.00] Built 1 zonelists, mobility grouping on.  Total pages: 32427
> [0.00] Kernel command line: root=/dev/sda2 console=ttyS0
> [0.00] Dentry cache hash table entries: 32768 (order: 5, 262144 bytes)
> [0.00] Inode-cache hash table entries: 16384 (order: 4, 131072 bytes)
> [0.00] Sorting __ex_table...
> [0.00] Memory: 248176K/261208K available (5425K kernel code, 372K 
> rwdata, 1732K rodata, 208K init, 1416K bss, 13032K reserved, 0K cma-reserved)
> [0.00] SLUB: HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=128
> [0.00] NR_IRQS: 35
> [0.00] clocksource: rpcc: mask: 0x max_cycles: 0x, 
> max_idle_ns: 3822520893 ns
> [0.001953] Console: colour VGA+ 80x25
> [0.028320] printk: console [ttyS0] enabled
> [0.029296] Calibrating delay loop... 979.52 BogoMIPS (lpj=478208)
> [0.038085] pid_max: default: 32768 minimum: 301
> [0.040039] Mount-cache hash table entries: 1024 (order: 0, 8192 bytes)
> [0.041015] Mountpoint-cache hash table entries: 1024 (order: 0, 8192 
> bytes)
> [0.051757] devtmpfs: initialized
> [0.054687] random: get_random_u32 called from 
> bucket_table_alloc.isra.18+0xbc/0x270 with crng_init=0
> [0.056640] clocksource: jiffies: mask: 0x max_cycles: 0x, 
> max_idle_ns: 1866466235866741 ns
> [0.057617] futex hash table entries: 256 (order: -1, 6144 bytes)
> [0.059570] NET: Registered protocol family 16
> [0.062499] pci: passed tb register update test
> [0.063476] pci: passed sg loopback i/o read test
> [0.064453] pci: passed tbia test
> [0.065429] pci: passed pte write cache snoop test
> [0.066406] pci: failed valid tag invalid pte reload test (mcheck; 
> workaround available)
> [0.067382] pci: passed pci machine check test
> [0.068359] PCI host bridge to bus :00
> [0.069335] pci_bus :00: root bus resource [io  0x-0x]
> [0.070312] pci_bus :00: root bus resource [mem 0x-0x1fff]
> [0.071289] pci_bus :00: No busn resource found for root bus, will use 
> [bus 00-ff]
> [0.072265] pci :00:05.0: [1000:000f] type

Re: [PATCH v2 0/2] arm64: Cut rebuild time when changing CONFIG_BLK_DEV_INITRD

2018-10-25 Thread Mike Rapoport

On Thu, Oct 25, 2018 at 08:15:15AM -0500, Rob Herring wrote:
> +Ard
> 
> On Thu, Oct 25, 2018 at 4:38 AM Mike Rapoport  wrote:
> >
> > On Wed, Oct 24, 2018 at 02:55:17PM -0500, Rob Herring wrote:
> > > On Wed, Oct 24, 2018 at 2:33 PM Florian Fainelli  
> > > wrote:
> > > >
> > > > Hi all,
> > > >
> > > > While investigating why ARM64 required a ton of objects to be rebuilt
> > > > when toggling CONFIG_DEV_BLK_INITRD, it became clear that this was
> > > > because we define __early_init_dt_declare_initrd() differently and we do
> > > > that in arch/arm64/include/asm/memory.h which gets included by a fair
> > > > amount of other header files, and translation units as well.
> > >
> > > I scratch my head sometimes as to why some config options rebuild so
> > > much stuff. One down, ? to go. :)
> > >
> > > > Changing the value of CONFIG_DEV_BLK_INITRD is a common thing with build
> > > > systems that generate two kernels: one with the initramfs and one
> > > > without. buildroot is one of these build systems, OpenWrt is also
> > > > another one that does this.
> > > >
> > > > This patch series proposes adding an empty initrd.h to satisfy the need
> > > > for drivers/of/fdt.c to unconditionally include that file, and moves the
> > > > custom __early_init_dt_declare_initrd() definition away from
> > > > asm/memory.h
> > > >
> > > > This cuts the number of objects rebuilds from 1920 down to 26, so a
> > > > factor 73 approximately.
> > > >
> > > > Apologies for the long CC list, please let me know how you would go
> > > > about merging that and if another approach would be preferable, e.g:
> > > > introducing a CONFIG_ARCH_INITRD_BELOW_START_OK Kconfig option or
> > > > something like that.
> > >
> > > There may be a better way as of 4.20 because bootmem is now gone and
> > > only memblock is used. This should unify what each arch needs to do
> > > with initrd early. We need the physical address early for memblock
> > > reserving. Then later on we need the virtual address to access the
> > > initrd. Perhaps we should just change initrd_start and initrd_end to
> > > physical addresses (or add 2 new variables would be less invasive and
> > > allow for different translation than __va()). The sanity checks and
> > > memblock reserve could also perhaps be moved to a common location.
> > >
> > > Alternatively, given arm64 is the only oddball, I'd be fine with an
> > > "if (IS_ENABLED(CONFIG_ARM64))" condition in the default
> > > __early_init_dt_declare_initrd as long as we have a path to removing
> > > it like the above option.
> >
> > I think arm64 does not have to redefine __early_init_dt_declare_initrd().
> > Something like this might be just all we need (completely untested,
> > probably it won't even compile):
> >
> > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> > index 9d9582c..e9ca238 100644
> > --- a/arch/arm64/mm/init.c
> > +++ b/arch/arm64/mm/init.c
> > @@ -62,6 +62,9 @@ s64 memstart_addr __ro_after_init = -1;
> >  phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >
> >  #ifdef CONFIG_BLK_DEV_INITRD
> > +
> > +static phys_addr_t initrd_start_phys, initrd_end_phys;
> > +
> >  static int __init early_initrd(char *p)
> >  {
> > unsigned long start, size;
> > @@ -71,8 +74,8 @@ static int __init early_initrd(char *p)
> > if (*endp == ',') {
> > size = memparse(endp + 1, NULL);
> >
> > -   initrd_start = start;
> > -   initrd_end = start + size;
> > +   initrd_start_phys = start;
> > +   initrd_end_phys = end;
> > }
> > return 0;
> >  }
> > @@ -407,14 +410,27 @@ void __init arm64_memblock_init(void)
> > memblock_add(__pa_symbol(_text), (u64)(_end - _text));
> > }
> >
> > -   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
> > +   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
> > +   (initrd_start || initrd_start_phys)) {
> > +   /*
> > +* FIXME: ensure proper precendence between
> > +* early_initrd and DT when both are present
> 
> Command line takes precedence, so just reverse the order.
> 
> > +*/
> > +

Re: [PATCH v2 0/2] arm64: Cut rebuild time when changing CONFIG_BLK_DEV_INITRD

2018-10-25 Thread Mike Rapoport

On Wed, Oct 24, 2018 at 02:55:17PM -0500, Rob Herring wrote:
> On Wed, Oct 24, 2018 at 2:33 PM Florian Fainelli  wrote:
> >
> > Hi all,
> >
> > While investigating why ARM64 required a ton of objects to be rebuilt
> > when toggling CONFIG_DEV_BLK_INITRD, it became clear that this was
> > because we define __early_init_dt_declare_initrd() differently and we do
> > that in arch/arm64/include/asm/memory.h which gets included by a fair
> > amount of other header files, and translation units as well.
> 
> I scratch my head sometimes as to why some config options rebuild so
> much stuff. One down, ? to go. :)
> 
> > Changing the value of CONFIG_DEV_BLK_INITRD is a common thing with build
> > systems that generate two kernels: one with the initramfs and one
> > without. buildroot is one of these build systems, OpenWrt is also
> > another one that does this.
> >
> > This patch series proposes adding an empty initrd.h to satisfy the need
> > for drivers/of/fdt.c to unconditionally include that file, and moves the
> > custom __early_init_dt_declare_initrd() definition away from
> > asm/memory.h
> >
> > This cuts the number of objects rebuilds from 1920 down to 26, so a
> > factor 73 approximately.
> >
> > Apologies for the long CC list, please let me know how you would go
> > about merging that and if another approach would be preferable, e.g:
> > introducing a CONFIG_ARCH_INITRD_BELOW_START_OK Kconfig option or
> > something like that.
> 
> There may be a better way as of 4.20 because bootmem is now gone and
> only memblock is used. This should unify what each arch needs to do
> with initrd early. We need the physical address early for memblock
> reserving. Then later on we need the virtual address to access the
> initrd. Perhaps we should just change initrd_start and initrd_end to
> physical addresses (or add 2 new variables would be less invasive and
> allow for different translation than __va()). The sanity checks and
> memblock reserve could also perhaps be moved to a common location.
>
> Alternatively, given arm64 is the only oddball, I'd be fine with an
> "if (IS_ENABLED(CONFIG_ARM64))" condition in the default
> __early_init_dt_declare_initrd as long as we have a path to removing
> it like the above option.

I think arm64 does not have to redefine __early_init_dt_declare_initrd().
Something like this might be just all we need (completely untested,
probably it won't even compile):

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9d9582c..e9ca238 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -62,6 +62,9 @@ s64 memstart_addr __ro_after_init = -1;
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
 
 #ifdef CONFIG_BLK_DEV_INITRD
+
+static phys_addr_t initrd_start_phys, initrd_end_phys;
+
 static int __init early_initrd(char *p)
 {
unsigned long start, size;
@@ -71,8 +74,8 @@ static int __init early_initrd(char *p)
if (*endp == ',') {
size = memparse(endp + 1, NULL);
 
-   initrd_start = start;
-   initrd_end = start + size;
+   initrd_start_phys = start;
+   initrd_end_phys = end;
}
return 0;
 }
@@ -407,14 +410,27 @@ void __init arm64_memblock_init(void)
memblock_add(__pa_symbol(_text), (u64)(_end - _text));
}
 
-   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
+   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
+   (initrd_start || initrd_start_phys)) {
+   /*
+* FIXME: ensure proper precendence between
+* early_initrd and DT when both are present
+*/
+   if (initrd_start) {
+   initrd_start_phys = __phys_to_virt(initrd_start);
+   initrd_end_phys = __phys_to_virt(initrd_end);
+   } else if (initrd_start_phys) {
+   initrd_start = __va(initrd_start_phys);
+   initrd_end = __va(initrd_start_phys);
+   }
+
/*
 * Add back the memory we just removed if it results in the
 * initrd to become inaccessible via the linear mapping.
 * Otherwise, this is a no-op
 */
-   u64 base = initrd_start & PAGE_MASK;
-   u64 size = PAGE_ALIGN(initrd_end) - base;
+   u64 base = initrd_start_phys & PAGE_MASK;
+   u64 size = PAGE_ALIGN(initrd_end_phys) - base;
 
/*
 * We can only add back the initrd memory if we don't end up
@@ -458,7 +474,7 @@ void __init arm64_memblock_init(void)
 * pagetables with memblock.
 */
memblock_reserve(__pa_symbol(_text), _end - _text);
-#ifdef CONFIG_BLK_DEV_INITRD
+#if 0
if (initrd_start) {
memblock_reserve(initrd_start, initrd_end - initrd_start);
 
 
> Rob
> 

-- 
Sincerely yours,
Mike.

Re: [RESEND PATCH v2] alpha: switch to NO_BOOTMEM

2018-10-20 Thread Mike Rapoport

On Thu, Oct 18, 2018 at 06:58:34PM -0700, Andrew Morton wrote:
> No reviews or acks for this one yet?

Nope :(
 
> From: Mike Rapoport 
> Subject: alpha: switch to NO_BOOTMEM
> 
> Replace bootmem allocator with memblock and enable use of NO_BOOTMEM like
> on most other architectures.
> 
> Alpha gets the description of the physical memory from the firmware as an
> array of memory clusters.  Each cluster that is not reserved by the
> firmware is added to memblock.memory.
> 
> Once the memblock.memory is set up, we reserve the kernel and initrd pages
> with memblock reserve.
> 
> Since we don't need the bootmem bitmap anymore, the code that finds an
> appropriate place is removed.
> 
> The conversion does not take care of NUMA support which is marked broken
> for more than 10 years now.
> 
> Link: 
> http://lkml.kernel.org/r/1535952894-10967-1-git-send-email-r...@linux.vnet.ibm.com
> Signed-off-by: Mike Rapoport 
> Cc: Richard Henderson 
> Cc: Ivan Kokshaysky 
> Cc: Michal Hocko 
> Signed-off-by: Andrew Morton 
> ---
> 
> 
> --- a/arch/alpha/Kconfig~alpha-switch-to-no_bootmem
> +++ a/arch/alpha/Kconfig
> @@ -31,6 +31,8 @@ config ALPHA
>   select ODD_RT_SIGACTION
>   select OLD_SIGSUSPEND
>   select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
> + select HAVE_MEMBLOCK
> + select NO_BOOTMEM
>   help
> The Alpha is a 64-bit general-purpose processor designed and
> marketed by the Digital Equipment Corporation of blessed memory,
> --- a/arch/alpha/kernel/core_irongate.c~alpha-switch-to-no_bootmem
> +++ a/arch/alpha/kernel/core_irongate.c
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include 
>  #include 
> @@ -241,8 +242,7 @@ albacore_init_arch(void)
>  size / 1024);
>   }
>  #endif
> - reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
> - pci_mem, BOOTMEM_DEFAULT);
> + memblock_reserve(pci_mem, memtop - pci_mem);
>   printk("irongate_init_arch: temporarily reserving "
>   "region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
>   }
> --- a/arch/alpha/kernel/setup.c~alpha-switch-to-no_bootmem
> +++ a/arch/alpha/kernel/setup.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -312,9 +313,7 @@ setup_memory(void *kernel_end)
>  {
>   struct memclust_struct * cluster;
>   struct memdesc_struct * memdesc;
> - unsigned long start_kernel_pfn, end_kernel_pfn;
> - unsigned long bootmap_size, bootmap_pages, bootmap_start;
> - unsigned long start, end;
> + unsigned long kernel_size;
>   unsigned long i;
> 
>   /* Find free clusters, and init and free the bootmem accordingly.  */
> @@ -322,6 +321,8 @@ setup_memory(void *kernel_end)
> (hwrpb->mddt_offset + (unsigned long) hwrpb);
> 
>   for_each_mem_cluster(memdesc, cluster, i) {
> + unsigned long end;
> +
>   printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
>  i, cluster->usage, cluster->start_pfn,
>  cluster->start_pfn + cluster->numpages);
> @@ -335,6 +336,9 @@ setup_memory(void *kernel_end)
>   end = cluster->start_pfn + cluster->numpages;
>   if (end > max_low_pfn)
>   max_low_pfn = end;
> +
> + memblock_add(PFN_PHYS(cluster->start_pfn),
> +  cluster->numpages << PAGE_SHIFT);
>   }
> 
>   /*
> @@ -363,87 +367,9 @@ setup_memory(void *kernel_end)
>   max_low_pfn = mem_size_limit;
>   }
> 
> - /* Find the bounds of kernel memory.  */
> - start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
> - end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
> - bootmap_start = -1;
> -
> - try_again:
> - if (max_low_pfn <= end_kernel_pfn)
> - panic("not enough memory to boot");
> -
> - /* We need to know how many physically contiguous pages
> -we'll need for the bootmap.  */
> - bootmap_pages = bootmem_bootmap_pages(max_low_pfn);
> -
> - /* Now find a good region where to allocate the bootmap.  */
> - for_each_mem_cluster(memdesc, cluster, i) {
> - if (cluster->usage & 3)
> - continue;
> -
> - start = cluster->start_pfn;
> - end = start + cluster->numpages;
> - if (start >= max_low_pfn)
> - c

Re: [PATCH] memblock: stop using implicit alignement to SMP_CACHE_BYTES

2018-10-11 Thread Mike Rapoport

On Fri, Oct 05, 2018 at 03:19:34PM -0700, Andrew Morton wrote:
> On Fri,  5 Oct 2018 00:07:04 +0300 Mike Rapoport  
> wrote:
> 
> > When a memblock allocation APIs are called with align = 0, the alignment is
> > implicitly set to SMP_CACHE_BYTES.
> > 
> > Replace all such uses of memblock APIs with the 'align' parameter explicitly
> > set to SMP_CACHE_BYTES and stop implicit alignment assignment in the
> > memblock internal allocation functions.
> > 
> > For the case when memblock APIs are used via helper functions, e.g. like
> > iommu_arena_new_node() in Alpha, the helper functions were detected with
> > Coccinelle's help and then manually examined and updated where appropriate.
> > 
> > ...
> >
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -1298,9 +1298,6 @@ static phys_addr_t __init 
> > memblock_alloc_range_nid(phys_addr_t size,
> >  {
> > phys_addr_t found;
> >  
> > -   if (!align)
> > -   align = SMP_CACHE_BYTES;
> > -
> 
> Can we add a WARN_ON_ONCE(!align) here?  To catch unconverted code
> which sneaks in later on.

Here it goes:

>From baec825c58e8bc11371433d3a4b20b2216877a50 Mon Sep 17 00:00:00 2001
From: Mike Rapoport 
Date: Mon, 8 Oct 2018 11:22:10 +0300
Subject: [PATCH] memblock: warn if zero alignment was requested

After update of all memblock users to explicitly specify SMP_CACHE_BYTES
alignment rather than use 0, it is still possible that uncovered users
may sneak in. Add a WARN_ON_ONCE for such cases.

Signed-off-by: Mike Rapoport 
---
 mm/memblock.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/mm/memblock.c b/mm/memblock.c
index 0bbae56..5fefc70 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1298,6 +1298,9 @@ static phys_addr_t __init 
memblock_alloc_range_nid(phys_addr_t size,
 {
phys_addr_t found;
 
+   if (WARN_ON_ONCE(!align))
+   align = SMP_CACHE_BYTES;
+
found = memblock_find_in_range_node(size, align, start, end, nid,
flags);
if (found && !memblock_reserve(found, size)) {
@@ -1420,6 +1423,9 @@ static void * __init memblock_alloc_internal(
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, nid);
 
+   if (WARN_ON_ONCE(!align))
+   align = SMP_CACHE_BYTES;
+
if (max_addr > memblock.current_limit)
max_addr = memblock.current_limit;
 again:
-- 
2.7.4


-- 
Sincerely yours,
Mike.

[PATCH] memblock: stop using implicit alignement to SMP_CACHE_BYTES

2018-10-04 Thread Mike Rapoport

When a memblock allocation APIs are called with align = 0, the alignment is
implicitly set to SMP_CACHE_BYTES.

Replace all such uses of memblock APIs with the 'align' parameter explicitly
set to SMP_CACHE_BYTES and stop implicit alignment assignment in the
memblock internal allocation functions.

For the case when memblock APIs are used via helper functions, e.g. like
iommu_arena_new_node() in Alpha, the helper functions were detected with
Coccinelle's help and then manually examined and updated where appropriate.

The direct memblock APIs users were updated using the semantic patch below:

@@
expression size, min_addr, max_addr, nid;
@@
(
|
- memblock_alloc_try_nid_raw(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, min_addr, max_addr,
nid)
|
- memblock_alloc_try_nid_nopanic(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, min_addr, max_addr,
nid)
|
- memblock_alloc_try_nid(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, nid)
|
- memblock_alloc(size, 0)
+ memblock_alloc(size, SMP_CACHE_BYTES)
|
- memblock_alloc_raw(size, 0)
+ memblock_alloc_raw(size, SMP_CACHE_BYTES)
|
- memblock_alloc_from(size, 0, min_addr)
+ memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr)
|
- memblock_alloc_nopanic(size, 0)
+ memblock_alloc_nopanic(size, SMP_CACHE_BYTES)
|
- memblock_alloc_low(size, 0)
+ memblock_alloc_low(size, SMP_CACHE_BYTES)
|
- memblock_alloc_low_nopanic(size, 0)
+ memblock_alloc_low_nopanic(size, SMP_CACHE_BYTES)
|
- memblock_alloc_from_nopanic(size, 0, min_addr)
+ memblock_alloc_from_nopanic(size, SMP_CACHE_BYTES, min_addr)
|
- memblock_alloc_node(size, 0, nid)
+ memblock_alloc_node(size, SMP_CACHE_BYTES, nid)
)

Suggested-by: Michal Hocko 
Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/core_apecs.c|  3 ++-
 arch/alpha/kernel/core_lca.c  |  3 ++-
 arch/alpha/kernel/core_marvel.c   |  4 ++--
 arch/alpha/kernel/core_mcpcia.c   |  6 +++--
 arch/alpha/kernel/core_t2.c   |  2 +-
 arch/alpha/kernel/core_titan.c|  6 +++--
 arch/alpha/kernel/core_tsunami.c  |  6 +++--
 arch/alpha/kernel/core_wildfire.c |  6 +++--
 arch/alpha/kernel/pci-noop.c  |  4 ++--
 arch/alpha/kernel/pci.c   |  4 ++--
 arch/alpha/kernel/pci_iommu.c |  4 ++--
 arch/arm/kernel/setup.c   |  4 ++--
 arch/arm/mach-omap2/omap_hwmod.c  |  8 ---
 arch/arm64/kernel/setup.c |  2 +-
 arch/ia64/kernel/mca.c|  4 ++--
 arch/ia64/mm/tlb.c|  6 +++--
 arch/ia64/sn/kernel/io_common.c   |  4 +++-
 arch/ia64/sn/kernel/setup.c   |  5 ++--
 arch/m68k/sun3/sun3dvma.c |  2 +-
 arch/microblaze/mm/init.c |  2 +-
 arch/mips/kernel/setup.c  |  2 +-
 arch/powerpc/kernel/pci_32.c  |  3 ++-
 arch/powerpc/lib/alloc.c  |  2 +-
 arch/powerpc/mm/mmu_context_nohash.c  |  7 +++---
 arch/powerpc/platforms/powermac/nvram.c   |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  6 ++---
 arch/powerpc/sysdev/msi_bitmap.c  |  2 +-
 arch/um/drivers/net_kern.c|  2 +-
 arch/um/drivers/vector_kern.c |  2 +-
 arch/um/kernel/initrd.c   |  2 +-
 arch/unicore32/kernel/setup.c |  2 +-
 arch/x86/kernel/acpi/boot.c   |  2 +-
 arch/x86/kernel/apic/io_apic.c|  2 +-
 arch/x86/kernel/e820.c|  3 ++-
 arch/x86/platform/olpc/olpc_dt.c  |  2 +-
 arch/xtensa/platforms/iss/network.c   |  2 +-
 drivers/clk/ti/clk.c  |  2 +-
 drivers/firmware/memmap.c |  3 ++-
 drivers/macintosh/smu.c   |  2 +-
 drivers/of/of_reserved_mem.c  |  1 +
 include/linux/memblock.h  |  3 ++-
 init/main.c   | 13 +++
 kernel/power/snapshot.c   |  3 ++-
 lib/cpumask.c |  2 +-
 mm/memblock.c |  8 ---
 mm/page_alloc.c   |  6 +++--
 mm/percpu.c   | 39 ---
 mm/sparse.c   |  3 ++-
 48 files changed, 118 insertions(+), 95 deletions(-)

diff --git a/arch/alpha/kernel/core_apecs.c b/arch/alpha/kernel/core_apecs.c
index 1bf3eef..6df765f 100644
--- a/arch/alpha/kernel/core_apecs.c
+++ b/arch/alpha/kernel/core_apecs.c
@@ -346,7 +346,8 @@ apecs_init_arch(void)
 * Window 1 is direct access 1GB at 1GB
 * Window 2 is scatter-gather 8MB at 8MB (for isa)
 */
-   hose->sg_isa = iommu_arena_new(hose, 0x0080, 0x0080, 0);
+   hose->sg_isa = iommu_arena_new(hose, 0x0080, 0x0080,
+  SMP_CACHE_BYTES);
hose-&

Re: [PATCH 03/30] mm: remove CONFIG_HAVE_MEMBLOCK

2018-09-26 Thread Mike Rapoport

On Wed, Sep 26, 2018 at 05:34:32PM -0700, Alexander Duyck wrote:
> On Wed, Sep 26, 2018 at 11:32 AM Mike Rapoport  
> wrote:
> >
> > On Wed, Sep 26, 2018 at 09:58:41AM -0700, Alexander Duyck wrote:
> > > On Fri, Sep 14, 2018 at 5:11 AM Mike Rapoport  
> > > wrote:
> > > >
> > > > All architecures use memblock for early memory management. There is no 
> > > > need
> > > > for the CONFIG_HAVE_MEMBLOCK configuration option.
> > > >
> > > > Signed-off-by: Mike Rapoport 
> > >
> > > 
> > >
> > > > diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> > > > index 5169205..4ae91fc 100644
> > > > --- a/include/linux/memblock.h
> > > > +++ b/include/linux/memblock.h
> > > > @@ -2,7 +2,6 @@
> > > >  #define _LINUX_MEMBLOCK_H
> > > >  #ifdef __KERNEL__
> > > >
> > > > -#ifdef CONFIG_HAVE_MEMBLOCK
> > > >  /*
> > > >   * Logical memory blocks.
> > > >   *
> > > > @@ -460,7 +459,6 @@ static inline phys_addr_t 
> > > > memblock_alloc(phys_addr_t size, phys_addr_t align)
> > > >  {
> > > > return 0;
> > > >  }
> > > > -#endif /* CONFIG_HAVE_MEMBLOCK */
> > > >
> > > >  #endif /* __KERNEL__ */
> > >
> > > There was an #else above this section and I believe it and the code
> > > after it needs to be stripped as well.
> >
> > Right, I've already sent the fix [1] and it's in mmots.
> >
> > [1] https://lkml.org/lkml/2018/9/19/416
> >
> 
> Are you sure? The patch you reference appears to be for
> drivers/of/fdt.c, and the bit I pointed out here is in
> include/linux/memblock.h.

Ah, sorry. You are right, will fix. Thanks for spotting it!
 
> - Alex
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH 14/30] memblock: add align parameter to memblock_alloc_node()

2018-09-26 Thread Mike Rapoport

On Wed, Sep 26, 2018 at 11:36:48AM +0200, Michal Hocko wrote:
> On Wed 26-09-18 11:31:27, Michal Hocko wrote:
> > On Fri 14-09-18 15:10:29, Mike Rapoport wrote:
> > > With the align parameter memblock_alloc_node() can be used as drop in
> > > replacement for alloc_bootmem_pages_node() and __alloc_bootmem_node(),
> > > which is done in the following patches.
> > 
> > /me confused. Why do we need this patch at all? Maybe it should be
> > folded into the later patch you are refereing here?
> 
> OK, I can see 1536927045-23536-17-git-send-email-r...@linux.vnet.ibm.com
> now. If you are going to repost for whatever reason please merge those
> two. Also I would get rid of the implicit "0 implies SMP_CACHE_BYTES"
> behavior. It is subtle and you have to dig deep to find that out. Why
> not make it explicit?

Agree. I'd just prefer to make it a separate patch rather then resend the
whole series. 

> -- 
> Michal Hocko
> SUSE Labs
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH 03/30] mm: remove CONFIG_HAVE_MEMBLOCK

2018-09-26 Thread Mike Rapoport

On Wed, Sep 26, 2018 at 11:24:04AM +0200, Michal Hocko wrote:
> On Fri 14-09-18 15:10:18, Mike Rapoport wrote:
> > All architecures use memblock for early memory management. There is no need
> > for the CONFIG_HAVE_MEMBLOCK configuration option.
> 
> git grep says
> arch/csky/Kconfig:  select HAVE_MEMBLOCK
 
Not only that, there are other bootmem leftovers in csky.
I've sent the patch with the necessary fixups [1]

[1] https://lkml.kernel.org/lkml/20180926112744.GC4628@rapoport-lnx/

> > Signed-off-by: Mike Rapoport 
> 
> Other than that
> Acked-by: Michal Hocko 
> -- 
> Michal Hocko
> SUSE Labs
> 

-- 
Sincerely yours,
Mike.

[RESEND PATCH v2] alpha: switch to NO_BOOTMEM

2018-09-02 Thread Mike Rapoport

Replace bootmem allocator with memblock and enable use of NO_BOOTMEM like
on most other architectures.

Alpha gets the description of the physical memory from the firmware as an
array of memory clusters. Each cluster that is not reserved by the firmware
is added to memblock.memory.

Once the memblock.memory is set up, we reserve the kernel and initrd pages
with memblock reserve.

Since we don't need the bootmem bitmap anymore, the code that finds an
appropriate place is removed.

The conversion does not take care of NUMA support which is marked broken
for more than 10 years now.

Signed-off-by: Mike Rapoport 
---
v2: describe the conversion as per Michal's request

Tested with qemu-system-alpha. I've added some tweaks to sys_dp264 to force
memory split for testing with CONFIG_DISCONTIGMEM=y

 arch/alpha/Kconfig|   2 +
 arch/alpha/kernel/core_irongate.c |   4 +-
 arch/alpha/kernel/setup.c |  98 -
 arch/alpha/mm/numa.c  | 113 +-
 4 files changed, 29 insertions(+), 188 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5b4f883..620b0a7 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -31,6 +31,8 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+   select HAVE_MEMBLOCK
+   select NO_BOOTMEM
help
  The Alpha is a 64-bit general-purpose processor designed and
  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/kernel/core_irongate.c 
b/arch/alpha/kernel/core_irongate.c
index aec7572..f709866 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -241,8 +242,7 @@ albacore_init_arch(void)
   size / 1024);
}
 #endif
-   reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
-   pci_mem, BOOTMEM_DEFAULT);
+   memblock_reserve(pci_mem, memtop - pci_mem);
printk("irongate_init_arch: temporarily reserving "
"region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
}
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 5576f76..4f0d944 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -312,9 +313,7 @@ setup_memory(void *kernel_end)
 {
struct memclust_struct * cluster;
struct memdesc_struct * memdesc;
-   unsigned long start_kernel_pfn, end_kernel_pfn;
-   unsigned long bootmap_size, bootmap_pages, bootmap_start;
-   unsigned long start, end;
+   unsigned long kernel_size;
unsigned long i;
 
/* Find free clusters, and init and free the bootmem accordingly.  */
@@ -322,6 +321,8 @@ setup_memory(void *kernel_end)
  (hwrpb->mddt_offset + (unsigned long) hwrpb);
 
for_each_mem_cluster(memdesc, cluster, i) {
+   unsigned long end;
+
printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
   i, cluster->usage, cluster->start_pfn,
   cluster->start_pfn + cluster->numpages);
@@ -335,6 +336,9 @@ setup_memory(void *kernel_end)
end = cluster->start_pfn + cluster->numpages;
if (end > max_low_pfn)
max_low_pfn = end;
+
+   memblock_add(PFN_PHYS(cluster->start_pfn),
+cluster->numpages << PAGE_SHIFT);
}
 
/*
@@ -363,87 +367,9 @@ setup_memory(void *kernel_end)
max_low_pfn = mem_size_limit;
}
 
-   /* Find the bounds of kernel memory.  */
-   start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
-   end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
-   bootmap_start = -1;
-
- try_again:
-   if (max_low_pfn <= end_kernel_pfn)
-   panic("not enough memory to boot");
-
-   /* We need to know how many physically contiguous pages
-  we'll need for the bootmap.  */
-   bootmap_pages = bootmem_bootmap_pages(max_low_pfn);
-
-   /* Now find a good region where to allocate the bootmap.  */
-   for_each_mem_cluster(memdesc, cluster, i) {
-   if (cluster->usage & 3)
-   continue;
-
-   start = cluster->start_pfn;
-   end = start + cluster->numpages;
-   if (start >= max_low_pfn)
-   continue;
-   if (end > max_low_pfn)
-   end = max_low_pfn;
-   if (start < start_kernel_pfn) {
-

Re: [PATCH v2] alpha: switch to NO_BOOTMEM

2018-07-12 Thread Mike Rapoport

(added Matt Turner, sorry, should have done it from the beginning)

Any comments on this?
 
> On Sat, Jun 30, 2018 at 06:13:30PM +0300, Mike Rapoport wrote:
> Replace bootmem allocator with memblock and enable use of NO_BOOTMEM like
> on most other architectures.
> 
> Alpha gets the description of the physical memory from the firmware as an
> array of memory clusters. Each cluster that is not reserved by the firmware
> is added to memblock.memory.
> 
> Once the memblock.memory is set up, we reserve the kernel and initrd pages
> with memblock reserve.
> 
> Since we don't need the bootmem bitmap anymore, the code that finds an
> appropriate place is removed.
> 
> The conversion does not take care of NUMA support which is marked broken
> for more than 10 years now.
> 
> Signed-off-by: Mike Rapoport 
> ---
> v2: describe the conversion as per Michal's request
> 
> Tested with qemu-system-alpha. I've added some tweaks to sys_dp264 to force
> memory split for testing with CONFIG_DISCONTIGMEM=y
> 
> The allyesconfig build requires update to DEFERRED_STRUCT_PAGE_INIT
> dependencies [1] which is already in -mm tree.
> 
> [1] https://lkml.org/lkml/2018/6/29/353
> 
>  arch/alpha/Kconfig|   2 +
>  arch/alpha/kernel/core_irongate.c |   4 +-
>  arch/alpha/kernel/setup.c |  98 -
>  arch/alpha/mm/numa.c  | 113 
> +-
>  4 files changed, 29 insertions(+), 188 deletions(-)
> 
> diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
> index 04a4a138ed13..040692a8d433 100644
> --- a/arch/alpha/Kconfig
> +++ b/arch/alpha/Kconfig
> @@ -30,6 +30,8 @@ config ALPHA
>   select ODD_RT_SIGACTION
>   select OLD_SIGSUSPEND
>   select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
> + select HAVE_MEMBLOCK
> + select NO_BOOTMEM
>   help
> The Alpha is a 64-bit general-purpose processor designed and
> marketed by the Digital Equipment Corporation of blessed memory,
> diff --git a/arch/alpha/kernel/core_irongate.c 
> b/arch/alpha/kernel/core_irongate.c
> index aec757250e07..f70986683fc6 100644
> --- a/arch/alpha/kernel/core_irongate.c
> +++ b/arch/alpha/kernel/core_irongate.c
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include 
>  #include 
> @@ -241,8 +242,7 @@ albacore_init_arch(void)
>  size / 1024);
>   }
>  #endif
> - reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
> - pci_mem, BOOTMEM_DEFAULT);
> + memblock_reserve(pci_mem, memtop - pci_mem);
>   printk("irongate_init_arch: temporarily reserving "
>   "region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
>   }
> diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
> index 5576f7646fb6..4f0d94471bc9 100644
> --- a/arch/alpha/kernel/setup.c
> +++ b/arch/alpha/kernel/setup.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -312,9 +313,7 @@ setup_memory(void *kernel_end)
>  {
>   struct memclust_struct * cluster;
>   struct memdesc_struct * memdesc;
> - unsigned long start_kernel_pfn, end_kernel_pfn;
> - unsigned long bootmap_size, bootmap_pages, bootmap_start;
> - unsigned long start, end;
> + unsigned long kernel_size;
>   unsigned long i;
> 
>   /* Find free clusters, and init and free the bootmem accordingly.  */
> @@ -322,6 +321,8 @@ setup_memory(void *kernel_end)
> (hwrpb->mddt_offset + (unsigned long) hwrpb);
> 
>   for_each_mem_cluster(memdesc, cluster, i) {
> + unsigned long end;
> +
>   printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
>  i, cluster->usage, cluster->start_pfn,
>  cluster->start_pfn + cluster->numpages);
> @@ -335,6 +336,9 @@ setup_memory(void *kernel_end)
>   end = cluster->start_pfn + cluster->numpages;
>   if (end > max_low_pfn)
>   max_low_pfn = end;
> +
> + memblock_add(PFN_PHYS(cluster->start_pfn),
> +  cluster->numpages << PAGE_SHIFT);
>   }
> 
>   /*
> @@ -363,87 +367,9 @@ setup_memory(void *kernel_end)
>   max_low_pfn = mem_size_limit;
>   }
> 
> - /* Find the bounds of kernel memory.  */
> - start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
> - end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
>

Re: [PATCH v2] alpha: switch to NO_BOOTMEM

2018-07-04 Thread Mike Rapoport

Any comments on this?

On Sat, Jun 30, 2018 at 06:13:30PM +0300, Mike Rapoport wrote:
> Replace bootmem allocator with memblock and enable use of NO_BOOTMEM like
> on most other architectures.
> 
> Alpha gets the description of the physical memory from the firmware as an
> array of memory clusters. Each cluster that is not reserved by the firmware
> is added to memblock.memory.
> 
> Once the memblock.memory is set up, we reserve the kernel and initrd pages
> with memblock reserve.
> 
> Since we don't need the bootmem bitmap anymore, the code that finds an
> appropriate place is removed.
> 
> The conversion does not take care of NUMA support which is marked broken
> for more than 10 years now.
> 
> Signed-off-by: Mike Rapoport 
> ---
> v2: describe the conversion as per Michal's request
> 
> Tested with qemu-system-alpha. I've added some tweaks to sys_dp264 to force
> memory split for testing with CONFIG_DISCONTIGMEM=y
> 
> The allyesconfig build requires update to DEFERRED_STRUCT_PAGE_INIT
> dependencies [1] which is already in -mm tree.
> 
> [1] https://lkml.org/lkml/2018/6/29/353
> 
>  arch/alpha/Kconfig|   2 +
>  arch/alpha/kernel/core_irongate.c |   4 +-
>  arch/alpha/kernel/setup.c |  98 -
>  arch/alpha/mm/numa.c  | 113 
> +-
>  4 files changed, 29 insertions(+), 188 deletions(-)
> 
> diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
> index 04a4a138ed13..040692a8d433 100644
> --- a/arch/alpha/Kconfig
> +++ b/arch/alpha/Kconfig
> @@ -30,6 +30,8 @@ config ALPHA
>   select ODD_RT_SIGACTION
>   select OLD_SIGSUSPEND
>   select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
> + select HAVE_MEMBLOCK
> + select NO_BOOTMEM
>   help
> The Alpha is a 64-bit general-purpose processor designed and
> marketed by the Digital Equipment Corporation of blessed memory,
> diff --git a/arch/alpha/kernel/core_irongate.c 
> b/arch/alpha/kernel/core_irongate.c
> index aec757250e07..f70986683fc6 100644
> --- a/arch/alpha/kernel/core_irongate.c
> +++ b/arch/alpha/kernel/core_irongate.c
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include 
>  #include 
> @@ -241,8 +242,7 @@ albacore_init_arch(void)
>  size / 1024);
>   }
>  #endif
> - reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
> - pci_mem, BOOTMEM_DEFAULT);
> + memblock_reserve(pci_mem, memtop - pci_mem);
>   printk("irongate_init_arch: temporarily reserving "
>   "region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
>   }
> diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
> index 5576f7646fb6..4f0d94471bc9 100644
> --- a/arch/alpha/kernel/setup.c
> +++ b/arch/alpha/kernel/setup.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -312,9 +313,7 @@ setup_memory(void *kernel_end)
>  {
>   struct memclust_struct * cluster;
>   struct memdesc_struct * memdesc;
> - unsigned long start_kernel_pfn, end_kernel_pfn;
> - unsigned long bootmap_size, bootmap_pages, bootmap_start;
> - unsigned long start, end;
> + unsigned long kernel_size;
>   unsigned long i;
> 
>   /* Find free clusters, and init and free the bootmem accordingly.  */
> @@ -322,6 +321,8 @@ setup_memory(void *kernel_end)
> (hwrpb->mddt_offset + (unsigned long) hwrpb);
> 
>   for_each_mem_cluster(memdesc, cluster, i) {
> + unsigned long end;
> +
>   printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
>  i, cluster->usage, cluster->start_pfn,
>  cluster->start_pfn + cluster->numpages);
> @@ -335,6 +336,9 @@ setup_memory(void *kernel_end)
>   end = cluster->start_pfn + cluster->numpages;
>   if (end > max_low_pfn)
>   max_low_pfn = end;
> +
> + memblock_add(PFN_PHYS(cluster->start_pfn),
> +  cluster->numpages << PAGE_SHIFT);
>   }
> 
>   /*
> @@ -363,87 +367,9 @@ setup_memory(void *kernel_end)
>   max_low_pfn = mem_size_limit;
>   }
> 
> - /* Find the bounds of kernel memory.  */
> - start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
> - end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
> - bootmap_start = -1;
> -
> - try_again:
> - if (max_low_pfn &

[PATCH v2] alpha: switch to NO_BOOTMEM

2018-06-30 Thread Mike Rapoport

Replace bootmem allocator with memblock and enable use of NO_BOOTMEM like
on most other architectures.

Alpha gets the description of the physical memory from the firmware as an
array of memory clusters. Each cluster that is not reserved by the firmware
is added to memblock.memory.

Once the memblock.memory is set up, we reserve the kernel and initrd pages
with memblock reserve.

Since we don't need the bootmem bitmap anymore, the code that finds an
appropriate place is removed.

The conversion does not take care of NUMA support which is marked broken
for more than 10 years now.

Signed-off-by: Mike Rapoport 
---
v2: describe the conversion as per Michal's request

Tested with qemu-system-alpha. I've added some tweaks to sys_dp264 to force
memory split for testing with CONFIG_DISCONTIGMEM=y

The allyesconfig build requires update to DEFERRED_STRUCT_PAGE_INIT
dependencies [1] which is already in -mm tree.

[1] https://lkml.org/lkml/2018/6/29/353

 arch/alpha/Kconfig|   2 +
 arch/alpha/kernel/core_irongate.c |   4 +-
 arch/alpha/kernel/setup.c |  98 -
 arch/alpha/mm/numa.c  | 113 +-
 4 files changed, 29 insertions(+), 188 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 04a4a138ed13..040692a8d433 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -30,6 +30,8 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+   select HAVE_MEMBLOCK
+   select NO_BOOTMEM
help
  The Alpha is a 64-bit general-purpose processor designed and
  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/kernel/core_irongate.c 
b/arch/alpha/kernel/core_irongate.c
index aec757250e07..f70986683fc6 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -241,8 +242,7 @@ albacore_init_arch(void)
   size / 1024);
}
 #endif
-   reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
-   pci_mem, BOOTMEM_DEFAULT);
+   memblock_reserve(pci_mem, memtop - pci_mem);
printk("irongate_init_arch: temporarily reserving "
"region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
}
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 5576f7646fb6..4f0d94471bc9 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -312,9 +313,7 @@ setup_memory(void *kernel_end)
 {
struct memclust_struct * cluster;
struct memdesc_struct * memdesc;
-   unsigned long start_kernel_pfn, end_kernel_pfn;
-   unsigned long bootmap_size, bootmap_pages, bootmap_start;
-   unsigned long start, end;
+   unsigned long kernel_size;
unsigned long i;
 
/* Find free clusters, and init and free the bootmem accordingly.  */
@@ -322,6 +321,8 @@ setup_memory(void *kernel_end)
  (hwrpb->mddt_offset + (unsigned long) hwrpb);
 
for_each_mem_cluster(memdesc, cluster, i) {
+   unsigned long end;
+
printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
   i, cluster->usage, cluster->start_pfn,
   cluster->start_pfn + cluster->numpages);
@@ -335,6 +336,9 @@ setup_memory(void *kernel_end)
end = cluster->start_pfn + cluster->numpages;
if (end > max_low_pfn)
max_low_pfn = end;
+
+   memblock_add(PFN_PHYS(cluster->start_pfn),
+cluster->numpages << PAGE_SHIFT);
}
 
/*
@@ -363,87 +367,9 @@ setup_memory(void *kernel_end)
max_low_pfn = mem_size_limit;
}
 
-   /* Find the bounds of kernel memory.  */
-   start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
-   end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
-   bootmap_start = -1;
-
- try_again:
-   if (max_low_pfn <= end_kernel_pfn)
-   panic("not enough memory to boot");
-
-   /* We need to know how many physically contiguous pages
-  we'll need for the bootmap.  */
-   bootmap_pages = bootmem_bootmap_pages(max_low_pfn);
-
-   /* Now find a good region where to allocate the bootmap.  */
-   for_each_mem_cluster(memdesc, cluster, i) {
-   if (cluster->usage & 3)
-   continue;
-
-   start = cluster->start_pfn;
-   end = start + cluster->numpages;
-   if (start >= max_low_pfn)
-

Re: [PATCH] alpha: switch to NO_BOOTMEM

2018-06-29 Thread Mike Rapoport

On Thu, Jun 28, 2018 at 05:38:29AM +0800, kbuild test robot wrote:
> Hi Mike,
> 
> I love your patch! Yet something to improve:
> 
> [auto build test ERROR on linus/master]
> [also build test ERROR on v4.18-rc2 next-20180627]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Mike-Rapoport/alpha-switch-to-NO_BOOTMEM/20180627-194800
> config: alpha-allyesconfig (attached as .config)
> compiler: alpha-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> reproduce:
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> GCC_VERSION=7.2.0 make.cross ARCH=alpha 
> 
> All error/warnings (new ones prefixed by >>):
> 
>mm/page_alloc.c: In function 'update_defer_init':
> >> mm/page_alloc.c:321:14: error: 'PAGES_PER_SECTION' undeclared (first use 
> >> in this function); did you mean 'USEC_PER_SEC'?
>  (pfn & (PAGES_PER_SECTION - 1)) == 0) {
>  ^
>  USEC_PER_SEC

The PAGES_PER_SECTION is  defined only for SPARSEMEM with the exception of
x86-32 defining it for DISCONTIGMEM as well. That said, any architecture
that can have DISCTONTIGMEM=y && NO_BOOTMEM=y will fail the build with
DEFERRED_STRUCT_PAGE_INIT enabled.

The simplest solution seems to make DEFERRED_STRUCT_PAGE_INIT explicitly
dependent on SPARSEMEM rather than !FLATMEM. The downside is that deferred
struct page initialization won't be available for x86-32 NUMA setups.

Thoughts?

>mm/page_alloc.c:321:14: note: each undeclared identifier is reported only 
> once for each function it appears in
>In file included from include/linux/cache.h:5:0,
> from include/linux/printk.h:9,
> from include/linux/kernel.h:14,
> from include/asm-generic/bug.h:18,
> from arch/alpha/include/asm/bug.h:23,
> from include/linux/bug.h:5,
> from include/linux/mmdebug.h:5,
> from include/linux/mm.h:9,
> from mm/page_alloc.c:18:
>mm/page_alloc.c: In function 'deferred_grow_zone':
>mm/page_alloc.c:1624:52: error: 'PAGES_PER_SECTION' undeclared (first use 
> in this function); did you mean 'USEC_PER_SEC'?
>  unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
>^
>include/uapi/linux/kernel.h:11:47: note: in definition of macro 
> '__ALIGN_KERNEL_MASK'
> #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
>   ^~~~
> >> include/linux/kernel.h:58:22: note: in expansion of macro '__ALIGN_KERNEL'
> #define ALIGN(x, a)  __ALIGN_KERNEL((x), (a))
>  ^~
> >> mm/page_alloc.c:1624:34: note: in expansion of macro 'ALIGN'
>  unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
>  ^
>In file included from include/asm-generic/bug.h:18:0,
> from arch/alpha/include/asm/bug.h:23,
> from include/linux/bug.h:5,
> from include/linux/mmdebug.h:5,
> from include/linux/mm.h:9,
> from mm/page_alloc.c:18:
>mm/page_alloc.c: In function 'free_area_init_node':
>mm/page_alloc.c:6379:50: error: 'PAGES_PER_SECTION' undeclared (first use 
> in this function); did you mean 'USEC_PER_SEC'?
>  pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
>  ^
>include/linux/kernel.h:812:22: note: in definition of macro '__typecheck'
>   (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
>  ^
>include/linux/kernel.h:836:24: note: in expansion of macro '__safe_cmp'
>  __builtin_choose_expr(__safe_cmp(x, y), \
>^~
>include/linux/kernel.h:904:27: note: in expansion of macro '__careful_cmp'
> #define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
>   ^
> >> mm/page_alloc.c:6379:29: note: in expansion of macro 'min_t'
>  pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
> ^
>include/linux/kernel.h:836:2: error: first argument to 
> '__builtin_choose_expr' not a constant
>  __builtin_choose_expr(__safe_cmp(x, y), \
>

Re: [PATCH] alpha: switch to NO_BOOTMEM

2018-06-27 Thread Mike Rapoport

On Wed, Jun 27, 2018 at 01:38:51PM +0200, Michal Hocko wrote:
> On Wed 27-06-18 14:32:48, Mike Rapoport wrote:
> > Replace bootmem allocator with memblock and enable use of NO_BOOTMEM like
> > on most other architectures.
> > 
> > The conversion does not take care of NUMA support which is marked broken
> > for more than 10 years now.
> 
> It would be great to describe how is the conversion done. At least on
> high level.

It's straightforward, isn't it? :)

Sure, no problem. I'll just wait for other feedback before sending v2.

> -- 
> Michal Hocko
> SUSE Labs
> 

-- 
Sincerely yours,
Mike.

--
To unsubscribe from this list: send the line "unsubscribe linux-alpha" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] alpha: switch to NO_BOOTMEM

2018-06-27 Thread Mike Rapoport

Replace bootmem allocator with memblock and enable use of NO_BOOTMEM like
on most other architectures.

The conversion does not take care of NUMA support which is marked broken
for more than 10 years now.

Signed-off-by: Mike Rapoport 
---

Tested with qemu-system-alpha. I've added some tweaks to sys_dp264 to force
memory split for testing with CONFIG_DISCONTIGMEM=y

 arch/alpha/Kconfig|   2 +
 arch/alpha/kernel/core_irongate.c |   4 +-
 arch/alpha/kernel/setup.c |  98 -
 arch/alpha/mm/numa.c  | 113 +-
 4 files changed, 29 insertions(+), 188 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 04a4a138ed13..040692a8d433 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -30,6 +30,8 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+   select HAVE_MEMBLOCK
+   select NO_BOOTMEM
help
  The Alpha is a 64-bit general-purpose processor designed and
  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/kernel/core_irongate.c 
b/arch/alpha/kernel/core_irongate.c
index aec757250e07..f70986683fc6 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -241,8 +242,7 @@ albacore_init_arch(void)
   size / 1024);
}
 #endif
-   reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
-   pci_mem, BOOTMEM_DEFAULT);
+   memblock_reserve(pci_mem, memtop - pci_mem);
printk("irongate_init_arch: temporarily reserving "
"region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
}
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 5576f7646fb6..4f0d94471bc9 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -312,9 +313,7 @@ setup_memory(void *kernel_end)
 {
struct memclust_struct * cluster;
struct memdesc_struct * memdesc;
-   unsigned long start_kernel_pfn, end_kernel_pfn;
-   unsigned long bootmap_size, bootmap_pages, bootmap_start;
-   unsigned long start, end;
+   unsigned long kernel_size;
unsigned long i;
 
/* Find free clusters, and init and free the bootmem accordingly.  */
@@ -322,6 +321,8 @@ setup_memory(void *kernel_end)
  (hwrpb->mddt_offset + (unsigned long) hwrpb);
 
for_each_mem_cluster(memdesc, cluster, i) {
+   unsigned long end;
+
printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
   i, cluster->usage, cluster->start_pfn,
   cluster->start_pfn + cluster->numpages);
@@ -335,6 +336,9 @@ setup_memory(void *kernel_end)
end = cluster->start_pfn + cluster->numpages;
if (end > max_low_pfn)
max_low_pfn = end;
+
+   memblock_add(PFN_PHYS(cluster->start_pfn),
+cluster->numpages << PAGE_SHIFT);
}
 
/*
@@ -363,87 +367,9 @@ setup_memory(void *kernel_end)
max_low_pfn = mem_size_limit;
}
 
-   /* Find the bounds of kernel memory.  */
-   start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
-   end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
-   bootmap_start = -1;
-
- try_again:
-   if (max_low_pfn <= end_kernel_pfn)
-   panic("not enough memory to boot");
-
-   /* We need to know how many physically contiguous pages
-  we'll need for the bootmap.  */
-   bootmap_pages = bootmem_bootmap_pages(max_low_pfn);
-
-   /* Now find a good region where to allocate the bootmap.  */
-   for_each_mem_cluster(memdesc, cluster, i) {
-   if (cluster->usage & 3)
-   continue;
-
-   start = cluster->start_pfn;
-   end = start + cluster->numpages;
-   if (start >= max_low_pfn)
-   continue;
-   if (end > max_low_pfn)
-   end = max_low_pfn;
-   if (start < start_kernel_pfn) {
-   if (end > end_kernel_pfn
-   && end - end_kernel_pfn >= bootmap_pages) {
-   bootmap_start = end_kernel_pfn;
-   break;
-   } else if (end > start_kernel_pfn)
-   end = start_kernel_pfn;
-   } else if (start < end_kernel_pfn)
-

Re: [PATCH 00/32] docs/vm: convert to ReST format

2018-04-15 Thread Mike Rapoport

On Fri, Apr 13, 2018 at 01:55:51PM -0600, Jonathan Corbet wrote:
> Sorry for the silence, I'm pedaling as fast as I can, honest...
> 
> On Sun, 1 Apr 2018 09:38:58 +0300
> Mike Rapoport <r...@linux.vnet.ibm.com> wrote:
> 
> > My thinking was to start with mechanical RST conversion and then to start
> > working on the contents and ordering of the documentation. Some of the
> > existing files, e.g. ksm.txt, can be moved as is into the appropriate
> > places, others, like transhuge.txt should be at least split into admin/user
> > and developer guides.
> > 
> > Another problem with many of the existing mm docs is that they are rather
> > developer notes and it wouldn't be really straight forward to assign them
> > to a particular topic.
> 
> All this sounds good.
> 
> > I believe that keeping the mm docs together will give better visibility of
> > what (little) mm documentation we have and will make the updates easier.
> > The documents that fit well into a certain topic could be linked there. For
> > instance:
> 
> ...but this sounds like just the opposite...?  
> 
> I've had this conversation with folks in a number of subsystems.
> Everybody wants to keep their documentation together in one place - it's
> easier for the developers after all.  But for the readers I think it's
> objectively worse.  It perpetuates the mess that Documentation/ is, and
> forces readers to go digging through all kinds of inappropriate material
> in the hope of finding something that tells them what they need to know.
> 
> So I would *really* like to split the documentation by audience, as has
> been done for a number of other kernel subsystems (and eventually all, I
> hope).
> 
> I can go ahead and apply the RST conversion, that seems like a step in
> the right direction regardless.  But I sure hope we don't really have to
> keep it as an unorganized jumble of stuff...

I didn't mean we should keep it as unorganized jumble of stuff and I agree
that splitting the documentation by audience is better because developers
are already know how to find it :)

I just thought that putting the doc into the place should not be done
immediately after mechanical ReST conversion but rather after improving the
contents. Although I'd agree that part of the documentation in
Documentation/vm is in pretty good shape already.

 
> Thanks,
> 
> jon
> 

-- 
Sincerely yours,
Mike.

--
To unsubscribe from this list: send the line "unsubscribe linux-alpha" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 00/32] docs/vm: convert to ReST format

2018-04-15 Thread Mike Rapoport

On Fri, Apr 13, 2018 at 01:21:08PM -0700, Matthew Wilcox wrote:
> On Fri, Apr 13, 2018 at 01:55:51PM -0600, Jonathan Corbet wrote:
> > > I believe that keeping the mm docs together will give better visibility of
> > > what (little) mm documentation we have and will make the updates easier.
> > > The documents that fit well into a certain topic could be linked there. 
> > > For
> > > instance:
> > 
> > ...but this sounds like just the opposite...?  
> > 
> > I've had this conversation with folks in a number of subsystems.
> > Everybody wants to keep their documentation together in one place - it's
> > easier for the developers after all.  But for the readers I think it's
> > objectively worse.  It perpetuates the mess that Documentation/ is, and
> > forces readers to go digging through all kinds of inappropriate material
> > in the hope of finding something that tells them what they need to know.
> > 
> > So I would *really* like to split the documentation by audience, as has
> > been done for a number of other kernel subsystems (and eventually all, I
> > hope).
> > 
> > I can go ahead and apply the RST conversion, that seems like a step in
> > the right direction regardless.  But I sure hope we don't really have to
> > keep it as an unorganized jumble of stuff...
> 
> I've started on Documentation/core-api/memory.rst which covers just
> memory allocation.  So far it has the Overview and GFP flags sections
> written and an outline for 'The slab allocator', 'The page allocator',
> 'The vmalloc allocator' and 'The page_frag allocator'.  And typing this
> up, I realise we need a 'The percpu allocator'.  I'm thinking that this
> is *not* the right document for the DMA memory allocators (although it
> should link to that documentation).
> 
> I suspect the existing Documentation/vm/ should probably stay as an
> unorganised jumble of stuff.  Developers mostly talking to other MM
> developers.  Stuff that people outside the MM fraternity should know
> about needs to be centrally documented.  By all means convert it to
> ReST ... I don't much care, and it may make it easier to steal bits
> or link to it from the organised documentation.
 
The existing Documentation/vm contains different types of documents. Some
are indeed "Developers mostly talking to other MM developers". Some are
really user/administrator guides. Others are somewhat in between.

I took another look at what's there and I think we can actually move part
of Documentation/vm to Documentation/admin-guide. We can add
Documentation/admin-guide/vm/ and title it "Memory Management Tuning" or
something like that. And several files, e.g. hugetlbpage, ksm, soft-dirty
can be moved there.

-- 
Sincerely yours,
Mike.

--
To unsubscribe from this list: send the line "unsubscribe linux-alpha" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

1 2 >

1 - 100 of 119 matches

Mail list logo