[RFC PATCH V2 4/4] powerpc/mm/iommu: Allow migration of cma allocated pages during mm_iommu_get

2018-09-05 Thread Aneesh Kumar K.V
Current code doesn't do page migration if the page allocated is a compound page.
With HugeTLB migration support, we can end up allocating hugetlb pages from
CMA region. Also THP pages can be allocated from CMA region. This patch updates
the code to handle compound pages correctly.

This use the new helper get_user_pages_cma_migrate. It does one get_user_pages
with right count, instead of doing one get_user_pages per page. That avoids
reading page table multiple times.

The patch also convert the hpas member of mm_iommu_table_group_mem_t to a union.
We use the same storage location to store pointers to struct page. We cannot
update alll the code path use struct page *, because we access hpas in real mode
and we can't do that struct page * to pfn conversion in real mode.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/mmu_context_iommu.c | 124 +---
 1 file changed, 37 insertions(+), 87 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_iommu.c 
b/arch/powerpc/mm/mmu_context_iommu.c
index f472965f7638..607acd03ab06 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static DEFINE_MUTEX(mem_list_mutex);
 
@@ -30,8 +31,18 @@ struct mm_iommu_table_group_mem_t {
atomic64_t mapped;
unsigned int pageshift;
u64 ua; /* userspace address */
-   u64 entries;/* number of entries in hpas[] */
-   u64 *hpas;  /* vmalloc'ed */
+   u64 entries;/* number of entries in hpages[] */
+   /*
+* in mm_iommu_get we temporarily use this to store
+* struct page address.
+*
+* We need to convert ua to hpa in real mode. Make it
+* simpler by storing physicall address.
+*/
+   union {
+   struct page **hpages;   /* vmalloc'ed */
+   phys_addr_t *hpas;
+   };
 };
 
 static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -74,63 +85,12 @@ bool mm_iommu_preregistered(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
 
-/*
- * Taken from alloc_migrate_target with changes to remove CMA allocations
- */
-struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
-{
-   gfp_t gfp_mask = GFP_USER;
-   struct page *new_page;
-
-   if (PageCompound(page))
-   return NULL;
-
-   if (PageHighMem(page))
-   gfp_mask |= __GFP_HIGHMEM;
-
-   /*
-* We don't want the allocation to force an OOM if possibe
-*/
-   new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
-   return new_page;
-}
-
-static int mm_iommu_move_page_from_cma(struct page *page)
-{
-   int ret = 0;
-   LIST_HEAD(cma_migrate_pages);
-
-   /* Ignore huge pages for now */
-   if (PageCompound(page))
-   return -EBUSY;
-
-   lru_add_drain();
-   ret = isolate_lru_page(page);
-   if (ret)
-   return ret;
-
-   list_add(>lru, _migrate_pages);
-   put_page(page); /* Drop the gup reference */
-
-   ret = migrate_pages(_migrate_pages, new_iommu_non_cma_page,
-   NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
-   if (ret) {
-   if (!list_empty(_migrate_pages))
-   putback_movable_pages(_migrate_pages);
-   }
-
-   return 0;
-}
-
 long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long 
entries,
struct mm_iommu_table_group_mem_t **pmem)
 {
struct mm_iommu_table_group_mem_t *mem;
-   long i, j, ret = 0, locked_entries = 0;
+   long i, ret = 0, locked_entries = 0;
unsigned int pageshift;
-   unsigned long flags;
-   unsigned long cur_ua;
-   struct page *page = NULL;
 
mutex_lock(_list_mutex);
 
@@ -177,47 +137,37 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, 
unsigned long entries,
goto unlock_exit;
}
 
+   ret = get_user_pages_cma_migrate(ua, entries, 1, mem->hpages);
+   if (ret != entries) {
+   /* free the reference taken */
+   for (i = 0; i < ret; i++)
+   put_page(mem->hpages[i]);
+
+   vfree(mem->hpas);
+   kfree(mem);
+   ret = -EFAULT;
+   goto unlock_exit;
+   } else
+   ret = 0;
+
+   pageshift = PAGE_SHIFT;
for (i = 0; i < entries; ++i) {
-   cur_ua = ua + (i << PAGE_SHIFT);
-   if (1 != get_user_pages_fast(cur_ua,
-   1/* pages */, 1/* iswrite */, )) {
-   ret = -EFAULT;
-   for (j = 0; j < i; ++j)
-   put_page(pfn_to_page(mem->hpas[j] >>
-   PAGE_SHIFT));
-   vfree(mem->hpas);
-   

[RFC PATCH V2 3/4] powerpc/mm/iommu: Allow large IOMMU page size only for hugetlb backing

2018-09-05 Thread Aneesh Kumar K.V
THP pages can get split during different code paths. An incremented reference
count do imply we will not split the compound page. But the pmd entry can be
converted to level 4 pte entries. Keep the code simpler by allowing large
IOMMU page size only if the guest ram is backed by hugetlb pages.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/mmu_context_iommu.c | 16 ++--
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_iommu.c 
b/arch/powerpc/mm/mmu_context_iommu.c
index c9ee9e23845f..f472965f7638 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -212,21 +212,9 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, 
unsigned long entries,
}
 populate:
pageshift = PAGE_SHIFT;
-   if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
-   pte_t *pte;
+   if (mem->pageshift > PAGE_SHIFT && PageHuge(page)) {
struct page *head = compound_head(page);
-   unsigned int compshift = compound_order(head);
-   unsigned int pteshift;
-
-   local_irq_save(flags); /* disables as well */
-   pte = find_linux_pte(mm->pgd, cur_ua, NULL, );
-
-   /* Double check it is still the same pinned page */
-   if (pte && pte_page(*pte) == head &&
-   pteshift == compshift + PAGE_SHIFT)
-   pageshift = max_t(unsigned int, pteshift,
-   PAGE_SHIFT);
-   local_irq_restore(flags);
+   pageshift = compound_order(head) + PAGE_SHIFT;
}
mem->pageshift = min(mem->pageshift, pageshift);
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
-- 
2.17.1



[RFC PATCH V2 2/4] mm: Add get_user_pages_cma_migrate

2018-09-05 Thread Aneesh Kumar K.V
This helper does a get_user_pages_fast and if it find pages in the CMA area
it will try to migrate them before taking page reference. This makes sure that
we don't keep non-movable pages (due to page reference count) in the CMA area.
Not able to move pages out of CMA area result in CMA allocation failures.

Signed-off-by: Aneesh Kumar K.V 
---
 include/linux/migrate.h |   3 ++
 mm/migrate.c| 108 
 2 files changed, 111 insertions(+)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f2b4abbca55e..d82b35afd2eb 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -286,6 +286,9 @@ static inline int migrate_vma(const struct migrate_vma_ops 
*ops,
 }
 #endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */
 
+extern int get_user_pages_cma_migrate(unsigned long start, int nr_pages, int 
write,
+ struct page **pages);
+
 #endif /* CONFIG_MIGRATION */
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index c27e97b5b69d..c26288d407ae 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -3008,3 +3008,111 @@ int migrate_vma(const struct migrate_vma_ops *ops,
 }
 EXPORT_SYMBOL(migrate_vma);
 #endif /* defined(MIGRATE_VMA_HELPER) */
+
+static struct page *new_non_cma_page(struct page *page, unsigned long private)
+{
+   /*
+* We want to make sure we allocate the new page from the same node
+* as the source page.
+*/
+   int nid = page_to_nid(page);
+   gfp_t gfp_mask = GFP_USER | __GFP_THISNODE;
+
+   if (PageHighMem(page))
+   gfp_mask |= __GFP_HIGHMEM;
+
+   if (PageHuge(page)) {
+
+   struct hstate *h = page_hstate(page);
+   /*
+* We don't want to dequeue from the pool because pool pages 
will
+* mostly be from the CMA region.
+*/
+   return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+
+   } else if (PageTransHuge(page)) {
+   struct page *thp;
+   gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_THISNODE;
+
+   /*
+* Remove the movable mask so that we don't allocate from
+* CMA area again.
+*/
+   thp_gfpmask &= ~__GFP_MOVABLE;
+   thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
+   }
+
+   return __alloc_pages_node(nid, gfp_mask, 0);
+}
+
+int get_user_pages_cma_migrate(unsigned long start, int nr_pages, int write,
+  struct page **pages)
+{
+   int i, ret;
+   bool drain_allow = true;
+   bool migrate_allow = true;
+   LIST_HEAD(cma_page_list);
+
+get_user_again:
+   ret = get_user_pages_fast(start, nr_pages, write, pages);
+   if (ret <= 0)
+   return ret;
+
+   for (i = 0; i < ret; ++i) {
+   /*
+* If we get a page from the CMA zone, since we are going to
+* be pinning these entries, we might as well move them out
+* of the CMA zone if possible.
+*/
+   if (is_migrate_cma_page(pages[i]) && migrate_allow) {
+   if (PageHuge(pages[i]))
+   isolate_huge_page(pages[i], _page_list);
+   else {
+   struct page *head = compound_head(pages[i]);
+
+   if (!PageLRU(head) && drain_allow) {
+   lru_add_drain_all();
+   drain_allow = false;
+   }
+
+   if (!isolate_lru_page(head)) {
+   list_add_tail(>lru, 
_page_list);
+   mod_node_page_state(page_pgdat(head),
+   NR_ISOLATED_ANON +
+   
page_is_file_cache(head),
+   
hpage_nr_pages(head));
+   }
+   }
+   }
+   }
+   if (!list_empty(_page_list)) {
+   /*
+* drop the above get_user_pages reference.
+*/
+   for (i = 0; i < ret; ++i)
+   put_page(pages[i]);
+
+   if (migrate_pages(_page_list, new_non_cma_page,
+ NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+   /*
+* some of the pages failed migration. Do get_user_pages
+* without migration.
+*/
+   migrate_allow = false;
+
+   if 

[RFC PATCH V2 1/4] mm: Export alloc_migrate_huge_page

2018-09-05 Thread Aneesh Kumar K.V
We want to use this to support customized huge page migration.

Signed-off-by: Aneesh Kumar K.V 
---
 include/linux/hugetlb.h | 2 ++
 mm/hugetlb.c| 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c39d9170a8a0..98c9c6dc308c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -357,6 +357,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int 
preferred_nid,
nodemask_t *nmask);
 struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+int nid, nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 47566bb0b4b1..1b3f8628 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1586,8 +1586,8 @@ static struct page *alloc_surplus_huge_page(struct hstate 
*h, gfp_t gfp_mask,
return page;
 }
 
-static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
-   int nid, nodemask_t *nmask)
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+int nid, nodemask_t *nmask)
 {
struct page *page;
 
-- 
2.17.1



Re: [RFC PATCH 00/29] mm: remove bootmem allocator

2018-09-05 Thread Greentime Hu
Mike Rapoport  於 2018年9月6日 週四 上午12:04寫道:
>
> Hi,
>
> These patches switch early memory managment to use memblock directly
> without any bootmem compatibility wrappers. As the result both bootmem and
> nobootmem are removed.
>
> There are still a couple of things to sort out, the most important is the
> removal of bootmem usage in MIPS.
>
> Still, IMHO, the series is in sufficient state to post and get the early
> feedback.
>
> The patches are build-tested with defconfig for most architectures (I
> couldn't find a compiler for nds32 and unicore32) and boot-tested on x86
> VM.
>
Hi Mike,

There are nds32 toolchains.
https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/x86_64/8.1.0/x86_64-gcc-8.1.0-nolibc-nds32le-linux.tar.gz
https://github.com/vincentzwc/prebuilt-nds32-toolchain/releases/download/20180521/nds32le-linux-glibc-v3-upstream.tar.gz

Sorry, we have no qemu yet.


Re: [PATCH 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion

2018-09-05 Thread Guenter Roeck

On 09/05/2018 03:18 PM, Nicholas Piggin wrote:

On Wed, 5 Sep 2018 07:29:51 -0700
Guenter Roeck  wrote:


Hi,

On Tue, Aug 28, 2018 at 09:20:34PM +1000, Nicholas Piggin wrote:

Similarly to the previous patch, this tries to optimise dirty/accessed
bits in ptes to avoid access costs of hardware setting them.
   


This patch results in silent nios2 boot failures, silent meaning that
the boot stalls.

...
Unpacking initramfs...
Freeing initrd memory: 2168K
workingset: timestamp_bits=30 max_order=15 bucket_order=0
jffs2: version 2.2. (NAND) © 2001-2006 Red Hat, Inc.
random: fast init done
random: crng init done

[no further activity until the qemu session is aborted]

Reverting the patch fixes the problem. Bisect log is attached.


Thanks for bisecting it, I'll try to reproduce. Just qemu with no
obscure options? Interesting that it's hit nios2 but apparently not
other archs (yet).



Nothing special. See 
https://github.com/groeck/linux-build-test/tree/master/rootfs/nios2/.

Guenter


[PATCH v2 7/9] powerpc: enable building all dtbs

2018-09-05 Thread Rob Herring
Enable the 'dtbs' target for powerpc. This allows building all the dts
files in arch/powerpc/boot/dts/ when COMPILE_TEST and OF_ALL_DTBS are
enabled.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring 
---
Please ack so I can take the whole series via the DT tree.

 arch/powerpc/boot/dts/Makefile | 5 +
 arch/powerpc/boot/dts/fsl/Makefile | 4 
 2 files changed, 9 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/fsl/Makefile

diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile
index f66554cd5c45..fb335d05aae8 100644
--- a/arch/powerpc/boot/dts/Makefile
+++ b/arch/powerpc/boot/dts/Makefile
@@ -1 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+
+subdir-y += fsl
+
+dtstree:= $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard 
$(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/dts/fsl/Makefile 
b/arch/powerpc/boot/dts/fsl/Makefile
new file mode 100644
index ..3bae982641e9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+dtstree:= $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard 
$(dtstree)/*.dts))
--
2.17.1


[PATCH v2 6/9] kbuild: consolidate Devicetree dtb build rules

2018-09-05 Thread Rob Herring
There is nothing arch specific about building dtb files other than their
location under /arch/*/boot/dts/. Keeping each arch aligned is a pain.
The dependencies and supported targets are all slightly different.
Also, a cross-compiler for each arch is needed, but really the host
compiler preprocessor is perfectly fine for building dtbs. Move the
build rules to a common location and remove the arch specific ones. This
is done in a single step to avoid warnings about overriding rules.

The build dependencies had been a mixture of 'scripts' and/or 'prepare'.
These pull in several dependencies some of which need a target compiler
(specifically devicetable-offsets.h) and aren't needed to build dtbs.
All that is really needed is dtc, so adjust the dependencies to only be
dtc.

This change enables support 'dtbs_install' on some arches which were
missing the target.

Cc: Masahiro Yamada 
Cc: Michal Marek 
Cc: Vineet Gupta 
Cc: Russell King 
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Yoshinori Sato 
Cc: Michal Simek 
Cc: Ralf Baechle 
Cc: Paul Burton 
Cc: James Hogan 
Cc: Ley Foon Tan 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Chris Zankel 
Cc: Max Filippov 
Cc: linux-kbu...@vger.kernel.org
Cc: linux-snps-...@lists.infradead.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: uclinux-h8-de...@lists.sourceforge.jp
Cc: linux-m...@linux-mips.org
Cc: nios2-...@lists.rocketboards.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-xte...@linux-xtensa.org
Signed-off-by: Rob Herring 
---
Please ack so I can take the whole series via the DT tree.

v2:
 - Fix $arch/boot/dts path check for out of tree builds
 - Fix dtc dependency for building built-in dtbs
 - Fix microblaze built-in dtb building

 Makefile  | 32 +++
 arch/arc/Makefile |  6 --
 arch/arm/Makefile | 20 +--
 arch/arm64/Makefile   | 17 +---
 arch/c6x/Makefile |  2 --
 arch/h8300/Makefile   | 11 +--
 arch/microblaze/Makefile  |  4 +---
 arch/microblaze/boot/dts/Makefile |  2 ++
 arch/mips/Makefile| 15 +--
 arch/nds32/Makefile   |  2 +-
 arch/nios2/Makefile   |  7 ---
 arch/nios2/boot/Makefile  |  4 
 arch/powerpc/Makefile |  3 ---
 arch/xtensa/Makefile  | 12 +---
 scripts/Makefile.lib  |  2 +-
 15 files changed, 42 insertions(+), 97 deletions(-)

diff --git a/Makefile b/Makefile
index 2b458801ba74..bc18dbbc16c5 100644
--- a/Makefile
+++ b/Makefile
@@ -1212,6 +1212,32 @@ kselftest-merge:
$(srctree)/tools/testing/selftests/*/config
+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig

+# ---
+# Devicetree files
+
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/boot/dts/),)
+dtstree := arch/$(SRCARCH)/boot/dts
+endif
+
+ifdef CONFIG_OF_EARLY_FLATTREE
+
+%.dtb %.dtb.S %.dtb.o: | dtc
+   $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
+
+PHONY += dtbs
+dtbs: | dtc
+   $(Q)$(MAKE) $(build)=$(dtstree)
+
+dtbs_install: dtbs
+   $(Q)$(MAKE) $(dtbinst)=$(dtstree)
+
+all: dtbs
+
+dtc:
+   $(Q)$(MAKE) $(build)=scripts/dtc
+
+endif
+
 # ---
 # Modules

@@ -1421,6 +1447,12 @@ help:
@echo  '  kselftest-merge - Merge all the config dependencies of 
kselftest to existing'
@echo  '.config.'
@echo  ''
+   @$(if $(dtstree), \
+   echo 'Devicetree:'; \
+   echo '* dtbs- Build device tree blobs for enabled 
boards'; \
+   echo '  dtbs_install- Install dtbs to 
$(INSTALL_DTBS_PATH)'; \
+   echo '')
+
@echo 'Userspace tools targets:'
@echo '  use "make tools/help"'
@echo '  or  "cd tools; make help"'
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index fb026196aaab..5c7bc6d62f43 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -132,11 +132,5 @@ boot_targets += uImage uImage.bin uImage.gz
 $(boot_targets): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@

-%.dtb %.dtb.S %.dtb.o: scripts
-   $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-dtbs: scripts
-   $(Q)$(MAKE) $(build)=$(boot)/dts
-
 archclean:
$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index d1516f85f25d..161c2df6567e 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -307,12 +307,7 @@ else
 KBUILD_IMAGE := $(boot)/zImage
 endif

-# Build the DT binary blobs if we have OF configured
-ifeq ($(CONFIG_USE_OF),y)
-KBUILD_DTBS := dtbs
-endif
-
-all:   $(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
+all:   $(notdir $(KBUILD_IMAGE))


 archheaders:
@@ -339,17 +334,6 @@ $(BOOT_TARGETS): vmlinux
 $(INSTALL_TARGETS):
$(Q)$(MAKE) $(build)=$(boot) 

[PATCH v2 1/9] powerpc: build .dtb files in dts directory

2018-09-05 Thread Rob Herring
Align powerpc with other architectures which build the dtb files in the
same directory as the dts files. This is also in line with most other
build targets which are located in the same directory as the source.
This move will help enable the 'dtbs' target which builds all the dtbs
regardless of kernel config.

This transition could break some scripts if they expect dtb files in the
old location.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring 
---
Please ack so I can take the whole series via the DT tree.


 arch/powerpc/Makefile  |  2 +-
 arch/powerpc/boot/Makefile | 49 --
 arch/powerpc/boot/dts/Makefile |  1 +
 3 files changed, 25 insertions(+), 27 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/Makefile

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 11a1acba164a..53ea887eb34e 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -294,7 +294,7 @@ bootwrapper_install:
$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)

 %.dtb: scripts
-   $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+   $(Q)$(MAKE) $(build)=$(boot)/dts $(patsubst %,$(boot)/dts/%,$@)

 # Used to create 'merged defconfigs'
 # To use it $(call) it with the first argument as the base defconfig
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 0fb96c26136f..b201d93e1725 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -381,11 +381,11 @@ $(addprefix $(obj)/, $(sort $(filter zImage.%, 
$(image-y: vmlinux $(wrapperb
$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))

 # dtbImage% - a dtbImage is a zImage with an embedded device tree blob
-$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
-   $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+   $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)

-$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
-   $(call if_changed,wrap,$*,,$(obj)/$*.dtb)
+$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+   $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb)

 # This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
 # prefix
@@ -395,36 +395,33 @@ $(obj)/vmlinux.strip: vmlinux
 $(obj)/uImage: vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,uboot)

-$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call 
if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)

-$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb)
+$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb)

-$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call 
if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)

-$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
+$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb)

-$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call 
if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call 
if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)

-$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb)
+$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb)

-$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call 
if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call 
if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)

-$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-   $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb)
+$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+   $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb)

-# Rule to build device tree blobs
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
-   $(call if_changed_dep,dtc)
-
-$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE
-   $(call if_changed_dep,dtc)
+# Needed for the 

[PATCH v2 0/9] Devicetree build consolidation

2018-09-05 Thread Rob Herring
This series addresses a couple of issues I have with building dts files.

First, the ability to build all the dts files in the tree. This has been
supported on most arches for some time with powerpc being the main
exception. The reason powerpc wasn't supported was it needed a change
in the location built dtb files are put.

Secondly, it's a pain to acquire all the cross-compilers needed to build
dtbs for each arch. There's no reason to build with the cross compiler and
the host compiler is perfectly fine as we only need the pre-processor.

I started addressing just those 2 problems, but kept finding small
differences such as target dependencies and dtbs_install support across
architectures. Instead of trying to align all these, I've consolidated the
build targets moving them out of the arch makefiles.

I'd like to take the series via the DT tree.

Rob

v2:
 - Fix $arch/boot/dts path check for out of tree builds
 - Fix dtc dependency for building built-in dtbs
 - Fix microblaze built-in dtb building
 - Add dtbs target for microblaze

Rob Herring (9):
  powerpc: build .dtb files in dts directory
  nios2: build .dtb files in dts directory
  nios2: use common rules to build built-in dtb
  nios2: fix building all dtbs
  c6x: use common built-in dtb support
  kbuild: consolidate Devicetree dtb build rules
  powerpc: enable building all dtbs
  c6x: enable building all dtbs
  microblaze: enable building all dtbs

 Makefile   | 32 +++
 arch/arc/Makefile  |  6 
 arch/arm/Makefile  | 20 +---
 arch/arm64/Makefile| 17 +--
 arch/c6x/Makefile  |  2 --
 arch/c6x/boot/dts/Makefile | 17 +--
 arch/c6x/boot/dts/linked_dtb.S |  2 --
 arch/c6x/include/asm/sections.h|  1 -
 arch/c6x/kernel/setup.c|  4 +--
 arch/c6x/kernel/vmlinux.lds.S  | 10 --
 arch/h8300/Makefile| 11 +--
 arch/microblaze/Makefile   |  4 +--
 arch/microblaze/boot/dts/Makefile  |  4 +++
 arch/mips/Makefile | 15 +
 arch/nds32/Makefile|  2 +-
 arch/nios2/Makefile| 11 +--
 arch/nios2/boot/Makefile   | 22 --
 arch/nios2/boot/dts/Makefile   |  6 
 arch/nios2/boot/linked_dtb.S   | 19 
 arch/powerpc/Makefile  |  3 --
 arch/powerpc/boot/Makefile | 49 ++
 arch/powerpc/boot/dts/Makefile |  6 
 arch/powerpc/boot/dts/fsl/Makefile |  4 +++
 arch/xtensa/Makefile   | 12 +---
 scripts/Makefile.lib   |  2 +-
 25 files changed, 93 insertions(+), 188 deletions(-)
 delete mode 100644 arch/c6x/boot/dts/linked_dtb.S
 create mode 100644 arch/nios2/boot/dts/Makefile
 delete mode 100644 arch/nios2/boot/linked_dtb.S
 create mode 100644 arch/powerpc/boot/dts/Makefile
 create mode 100644 arch/powerpc/boot/dts/fsl/Makefile

--
2.17.1


Re: [PATCH 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion

2018-09-05 Thread Nicholas Piggin
On Wed, 5 Sep 2018 07:29:51 -0700
Guenter Roeck  wrote:

> Hi,
> 
> On Tue, Aug 28, 2018 at 09:20:34PM +1000, Nicholas Piggin wrote:
> > Similarly to the previous patch, this tries to optimise dirty/accessed
> > bits in ptes to avoid access costs of hardware setting them.
> >   
> 
> This patch results in silent nios2 boot failures, silent meaning that
> the boot stalls.
> 
> ...
> Unpacking initramfs...
> Freeing initrd memory: 2168K
> workingset: timestamp_bits=30 max_order=15 bucket_order=0
> jffs2: version 2.2. (NAND) © 2001-2006 Red Hat, Inc.
> random: fast init done
> random: crng init done
> 
> [no further activity until the qemu session is aborted]
> 
> Reverting the patch fixes the problem. Bisect log is attached.

Thanks for bisecting it, I'll try to reproduce. Just qemu with no
obscure options? Interesting that it's hit nios2 but apparently not
other archs (yet).

Thanks,
Nick


[PATCH 13/21] powerpc: 8xx: get cpu node with of_get_cpu_node

2018-09-05 Thread Rob Herring
"device_type" use is deprecated for FDT though it has continued to be used
for nodes like cpu nodes. Use of_get_cpu_node() instead which works using
node names by default. This will allow the eventually removal of cpu
device_type properties.

Also, fix a leaked reference and add a missing of_node_put.

Cc: Vitaly Bordug 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring 
---
Please ack and I will take via the DT tree. This is dependent on the
first 2 patches.

 arch/powerpc/platforms/8xx/m8xx_setup.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c 
b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 027c42d8966c..f1c805c8adbc 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -66,7 +66,7 @@ static int __init get_freq(char *name, unsigned long *val)
int found = 0;

/* The cpu node should have timebase and clock frequency properties */
-   cpu = of_find_node_by_type(NULL, "cpu");
+   cpu = of_get_cpu_node(0, NULL);

if (cpu) {
fp = of_get_property(cpu, name, NULL);
@@ -147,8 +147,9 @@ void __init mpc8xx_calibrate_decr(void)
 * we have to enable the timebase).  The decrementer interrupt
 * is wired into the vector table, nothing to do here for that.
 */
-   cpu = of_find_node_by_type(NULL, "cpu");
+   cpu = of_get_cpu_node(0, NULL);
virq= irq_of_parse_and_map(cpu, 0);
+   of_node_put(cpu);
irq = virq_to_hw(virq);

sys_tmr2 = immr_map(im_sit);
--
2.17.1


[PATCH 12/21] powerpc: 4xx: get cpu node with of_get_cpu_node

2018-09-05 Thread Rob Herring
"device_type" use is deprecated for FDT though it has continued to be used
for nodes like cpu nodes. Use of_get_cpu_node() instead which works using
node names by default. This will allow the eventually removal of cpu
device_type properties.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring 
---
Please ack and I will take via the DT tree. This is dependent on the
first 2 patches.

 arch/powerpc/platforms/4xx/soc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c
index 5e36508b2a70..1844bf502fcf 100644
--- a/arch/powerpc/platforms/4xx/soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -200,7 +200,7 @@ void ppc4xx_reset_system(char *cmd)
u32 reset_type = DBCR0_RST_SYSTEM;
const u32 *prop;

-   np = of_find_node_by_type(NULL, "cpu");
+   np = of_get_cpu_node(0, NULL);
if (np) {
prop = of_get_property(np, "reset-type", NULL);

--
2.17.1


[PATCH 11/21] powerpc: use for_each_of_cpu_node iterator

2018-09-05 Thread Rob Herring
Use the for_each_of_cpu_node iterator to iterate over cpu nodes. This
has the side effect of defaulting to iterating using "cpu" node names in
preference to the deprecated (for FDT) device_type == "cpu".

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring 
---
Please ack and I will take via the DT tree. This is dependent on the
first 2 patches.

 arch/powerpc/platforms/powermac/feature.c | 51 ---
 arch/powerpc/platforms/powermac/setup.c   | 15 +++
 2 files changed, 26 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/feature.c 
b/arch/powerpc/platforms/powermac/feature.c
index 4eb8cb38fc69..ed2f54b3f173 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1049,7 +1049,6 @@ core99_reset_cpu(struct device_node *node, long param, 
long value)
unsigned long flags;
struct macio_chip *macio;
struct device_node *np;
-   struct device_node *cpus;
const int dflt_reset_lines[] = {KL_GPIO_RESET_CPU0,
KL_GPIO_RESET_CPU1,
KL_GPIO_RESET_CPU2,
@@ -1059,10 +1058,7 @@ core99_reset_cpu(struct device_node *node, long param, 
long value)
if (macio->type != macio_keylargo)
return -ENODEV;

-   cpus = of_find_node_by_path("/cpus");
-   if (cpus == NULL)
-   return -ENODEV;
-   for (np = cpus->child; np != NULL; np = np->sibling) {
+   for_each_of_cpu_node(np) {
const u32 *num = of_get_property(np, "reg", NULL);
const u32 *rst = of_get_property(np, "soft-reset", NULL);
if (num == NULL || rst == NULL)
@@ -1072,7 +1068,6 @@ core99_reset_cpu(struct device_node *node, long param, 
long value)
break;
}
}
-   of_node_put(cpus);
if (np == NULL || reset_io == 0)
reset_io = dflt_reset_lines[param];

@@ -1504,16 +1499,12 @@ static long g5_reset_cpu(struct device_node *node, long 
param, long value)
unsigned long flags;
struct macio_chip *macio;
struct device_node *np;
-   struct device_node *cpus;

macio = _chips[0];
if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
return -ENODEV;

-   cpus = of_find_node_by_path("/cpus");
-   if (cpus == NULL)
-   return -ENODEV;
-   for (np = cpus->child; np != NULL; np = np->sibling) {
+   for_each_of_cpu_node(np) {
const u32 *num = of_get_property(np, "reg", NULL);
const u32 *rst = of_get_property(np, "soft-reset", NULL);
if (num == NULL || rst == NULL)
@@ -1523,7 +1514,6 @@ static long g5_reset_cpu(struct device_node *node, long 
param, long value)
break;
}
}
-   of_node_put(cpus);
if (np == NULL || reset_io == 0)
return -ENODEV;

@@ -2515,31 +2505,26 @@ static int __init probe_motherboard(void)
 * supposed to be set when not supported, but I'm not very confident
 * that all Apple OF revs did it properly, I do it the paranoid way.
 */
-   while (uninorth_base && uninorth_rev > 3) {
-   struct device_node *cpus = of_find_node_by_path("/cpus");
+   if (uninorth_base && uninorth_rev > 3) {
struct device_node *np;

-   if (!cpus || !cpus->child) {
-   printk(KERN_WARNING "Can't find CPU(s) in device tree 
!\n");
-   of_node_put(cpus);
-   break;
-   }
-   np = cpus->child;
-   /* Nap mode not supported on SMP */
-   if (np->sibling) {
-   of_node_put(cpus);
-   break;
-   }
-   /* Nap mode not supported if flush-on-lock property is present 
*/
-   if (of_get_property(np, "flush-on-lock", NULL)) {
-   of_node_put(cpus);
-   break;
+   for_each_of_cpu_node(np) {
+   int cpu_count = 1;
+
+   /* Nap mode not supported on SMP */
+   if (of_get_property(np, "flush-on-lock", NULL) ||
+   (cpu_count > 1)) {
+   powersave_nap = 0;
+   of_node_put(np);
+   break;
+   }
+
+   cpu_count++;
+   powersave_nap = 1;
}
-   of_node_put(cpus);
-   powersave_nap = 1;
-   printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
-   break;
}
+   if (powersave_nap)
+   printk(KERN_DEBUG 

[PATCH 00/21] DT cpu node iterator

2018-09-05 Thread Rob Herring
This series adds an iterator for cpu nodes and converts users over to use
it or of_get_cpu_node in some cases. This allows us to remove the
dependency on device_type property for cpu nodes though removing that
from DTS files will have to wait for some time. In some cases, this makes
the DT search more strict by only looking in /cpus child nodes rather
than any node with the device_type == cpu. The iterator also honors the
status property which is often forgotten.

I've only tested on ARM under QEMU and compiled powerpc.

Rob

Rob Herring (21):
  of: Add cpu node iterator for_each_of_cpu_node()
  of: Support matching cpu nodes with no 'reg' property
  ARM: use for_each_of_cpu_node iterator
  ARM: topology: remove unneeded check for /cpus node
  ARM: shmobile: use for_each_of_cpu_node iterator
  arm64: use for_each_of_cpu_node iterator
  c6x: use for_each_of_cpu_node iterator
  microblaze: get cpu node with of_get_cpu_node
  nios2: get cpu node with of_get_cpu_node
  openrisc: use for_each_of_cpu_node iterator
  powerpc: use for_each_of_cpu_node iterator
  powerpc: 4xx: get cpu node with of_get_cpu_node
  powerpc: 8xx: get cpu node with of_get_cpu_node
  riscv: use for_each_of_cpu_node iterator
  SH: use for_each_of_cpu_node iterator
  x86: DT: use for_each_of_cpu_node iterator
  clk: mvebu: use for_each_of_cpu_node iterator
  edac: cpc925: use for_each_of_cpu_node iterator
  iommu: fsl_pamu: use for_each_of_cpu_node iterator
  of: use for_each_of_cpu_node iterator
  fbdev: fsl-diu: get cpu node with of_get_cpu_node

 arch/arm/kernel/devtree.c |  5 +--
 arch/arm/kernel/topology.c|  6 ---
 arch/arm/mach-shmobile/pm-rcar-gen2.c |  8 +---
 arch/arm/mach-shmobile/pm-rmobile.c   |  2 +-
 arch/arm/mach-shmobile/timer.c| 10 +
 arch/arm64/kernel/smp.c   |  2 +-
 arch/c6x/kernel/setup.c   | 11 ++---
 arch/microblaze/kernel/cpu/cpuinfo.c  |  4 +-
 arch/nios2/kernel/cpuinfo.c   |  4 +-
 arch/openrisc/kernel/setup.c  |  3 +-
 arch/powerpc/platforms/4xx/soc.c  |  2 +-
 arch/powerpc/platforms/8xx/m8xx_setup.c   |  5 ++-
 arch/powerpc/platforms/powermac/feature.c | 51 ---
 arch/powerpc/platforms/powermac/setup.c   | 15 +++
 arch/riscv/kernel/smpboot.c   |  2 +-
 arch/sh/boards/of-generic.c   |  2 +-
 arch/x86/kernel/devicetree.c  |  2 +-
 drivers/clk/mvebu/clk-cpu.c   |  4 +-
 drivers/edac/cpc925_edac.c| 20 +
 drivers/iommu/fsl_pamu.c  |  2 +-
 drivers/of/base.c | 43 ++-
 drivers/of/of_numa.c  | 15 +--
 drivers/video/fbdev/fsl-diu-fb.c  |  2 +-
 include/linux/of.h| 11 +
 24 files changed, 111 insertions(+), 120 deletions(-)

--
2.17.1


Re: [PATCH 0/3] tty: hvc: latency break regression fixes

2018-09-05 Thread Jason Gunthorpe
On Wed, Sep 05, 2018 at 10:14:36PM +1000, Nicholas Piggin wrote:
> Hi Greg,
> 
> Here are fixes for a few regressions that came in with my
> carelessness with the irq latency work for the hvc subsystem.
> These were independently reported in 2 configurations, and I
> confirmed with another.
> 
> I think those went upstream via Michael's tree, but he's away
> at the moment so if you would be able to consider them for
> the tty tree that would be appreciated.

Series works for me too, thanks.

Tested-by: Jason Gunthorpe 

Jason


Re: FSL/NXP P5020: USB problems with the latest Git kernels

2018-09-05 Thread Scott Wood
On Wed, 2018-09-05 at 14:08 +, Laurentiu Tudor wrote:
> Hi Scott,
> 
> > -Original Message- 
> > On Mon, 2018-08-27 at 20:15 +0200, Christian Zigotzky wrote:
> > > Hello,
> > > 
> > > Our users tested the RC1 of kernel 4.19 on their P5020 boards today.
> > > Unfortunately the USB bug still exists. With mem values bigger than
> > 
> > 4096M,
> > > the USB mouse and keyboard doesn’t work. With the bootarg mem=4096M, the
> > 
> > USB
> > > devices work without any problems. Please compile the RC1 and test it on
> > > your P5020 board. There is a problem with the memory management since
> > > 22/08/18.
> > 
> > I just tested 4.19-rc1 on a T4240 and got a similar problem with MMC.  MMC
> > and
> > USB on these chips both have a 32-bit DMA limitation.  I'll look into it.
> 
> I encountered similar issues on LS104xA chips. This is the workaround for
> MMC:
> https://patchwork.kernel.org/patch/10506627/

That's not a workaround; it's the proper way to handle DMA addressing limits.

> On the USB side I didn't find a proper place in the usb subsystem code for a
> workaround but instead
> found an undocumented kernel arg (*) that limits the dma mask to 32 bits:
> xhci-hcd.quirks=0x80.
> 
> (*) https://patchwork.kernel.org/patch/10509159/

The mask should already be getting set to 32 bits.  The problem is a recent
patch that changed initcall order, which lead to swiotlb not being used.  We
need to rework the swiotlb notifier so it doesn't depend on initcall ordering.

-Scott



Re: [RFC PATCH 07/29] memblock: remove _virt from APIs returning virtual address

2018-09-05 Thread Mike Rapoport
On Wed, Sep 05, 2018 at 12:04:36PM -0500, Rob Herring wrote:
> On Wed, Sep 5, 2018 at 11:00 AM Mike Rapoport  wrote:
> >
> > The conversion is done using
> >
> > sed -i 's@memblock_virt_alloc@memblock_alloc@g' \
> > $(git grep -l memblock_virt_alloc)
> 
> What's the reason to do this? It seems like a lot of churn even if a
> mechanical change.

I felt that memblock_virt_alloc_ is too long for a prefix, e.g:
memblock_virt_alloc_node_nopanic, memblock_virt_alloc_low_nopanic.

And for consistency I've changed the memblock_virt_alloc as well.


> Rob
> 

-- 
Sincerely yours,
Mike.



Re: [RFC PATCH 07/29] memblock: remove _virt from APIs returning virtual address

2018-09-05 Thread Rob Herring
On Wed, Sep 5, 2018 at 11:00 AM Mike Rapoport  wrote:
>
> The conversion is done using
>
> sed -i 's@memblock_virt_alloc@memblock_alloc@g' \
> $(git grep -l memblock_virt_alloc)

What's the reason to do this? It seems like a lot of churn even if a
mechanical change.

Rob


[RFC PATCH 08/29] memblock: replace alloc_bootmem_align with memblock_alloc

2018-09-05 Thread Mike Rapoport
The functions are equivalent, just the later does not require nobootmem
translation layer.

Signed-off-by: Mike Rapoport 
---
 arch/x86/xen/p2m.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 159a897..68c0f14 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -180,7 +180,7 @@ static void p2m_init_identity(unsigned long *p2m, unsigned 
long pfn)
 static void * __ref alloc_p2m_page(void)
 {
if (unlikely(!slab_is_available()))
-   return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+   return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 
return (void *)__get_free_page(GFP_KERNEL);
 }
-- 
2.7.4



[RFC PATCH 10/29] memblock: replace __alloc_bootmem_node_nopanic with memblock_alloc_try_nid_nopanic

2018-09-05 Thread Mike Rapoport
The __alloc_bootmem_node_nopanic() is used only once, there is no reason to
add a wrapper for memblock_alloc_try_nid_nopanic for it.

Signed-off-by: Mike Rapoport 
---
 arch/x86/kernel/setup_percpu.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ea554f8..67d48e26 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -112,8 +112,10 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
unsigned long size,
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
 cpu, size, __pa(ptr));
} else {
-   ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
-  size, align, goal);
+   ptr = memblock_alloc_try_nid_nopanic(size, align, goal,
+BOOTMEM_ALLOC_ACCESSIBLE,
+node);
+
pr_debug("per cpu data for cpu%d %lu bytes on node%d at 
%016lx\n",
 cpu, size, node, __pa(ptr));
}
-- 
2.7.4



[RFC PATCH 19/29] memblock: replace alloc_bootmem_pages with memblock_alloc

2018-09-05 Thread Mike Rapoport
The conversion is done using the following semantic patch:

@@
expression e;
@@
- alloc_bootmem_pages(e)
+ memblock_alloc(e, PAGE_SIZE)

Signed-off-by: Mike Rapoport 
---
 arch/c6x/mm/init.c | 3 ++-
 arch/h8300/mm/init.c   | 2 +-
 arch/m68k/mm/init.c| 2 +-
 arch/m68k/mm/mcfmmu.c  | 4 ++--
 arch/m68k/mm/motorola.c| 2 +-
 arch/m68k/mm/sun3mmu.c | 4 ++--
 arch/sh/mm/init.c  | 4 ++--
 arch/x86/kernel/apic/io_apic.c | 3 ++-
 arch/x86/mm/init_64.c  | 2 +-
 drivers/xen/swiotlb-xen.c  | 3 ++-
 10 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index 4cc72b0..dc369ad 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -38,7 +38,8 @@ void __init paging_init(void)
struct pglist_data *pgdat = NODE_DATA(0);
unsigned long zones_size[MAX_NR_ZONES] = {0, };
 
-   empty_zero_page  = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+   empty_zero_page  = (unsigned long) memblock_alloc(PAGE_SIZE,
+ PAGE_SIZE);
memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
/*
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 015287a..5d31ac9 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -67,7 +67,7 @@ void __init paging_init(void)
 * Initialize the bad page table and bad page to point
 * to a couple of allocated pages.
 */
-   empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
+   empty_zero_page = (unsigned long)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
/*
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 38e2b27..977363e 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -93,7 +93,7 @@ void __init paging_init(void)
 
high_memory = (void *) end_mem;
 
-   empty_zero_page = alloc_bootmem_pages(PAGE_SIZE);
+   empty_zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 
/*
 * Set up SFC/DFC registers (user data space).
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index f5453d9..38a1d92 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -44,7 +44,7 @@ void __init paging_init(void)
enum zone_type zone;
int i;
 
-   empty_zero_page = (void *) alloc_bootmem_pages(PAGE_SIZE);
+   empty_zero_page = (void *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
memset((void *) empty_zero_page, 0, PAGE_SIZE);
 
pg_dir = swapper_pg_dir;
@@ -52,7 +52,7 @@ void __init paging_init(void)
 
size = num_pages * sizeof(pte_t);
size = (size + PAGE_SIZE) & ~(PAGE_SIZE-1);
-   next_pgtable = (unsigned long) alloc_bootmem_pages(size);
+   next_pgtable = (unsigned long) memblock_alloc(size, PAGE_SIZE);
 
bootmem_end = (next_pgtable + size + PAGE_SIZE) & PAGE_MASK;
pg_dir += PAGE_OFFSET >> PGDIR_SHIFT;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 8bcf57e..2113eec 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -276,7 +276,7 @@ void __init paging_init(void)
 * initialize the bad page table and bad page to point
 * to a couple of allocated pages
 */
-   empty_zero_page = alloc_bootmem_pages(PAGE_SIZE);
+   empty_zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 
/*
 * Set up SFC/DFC registers
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index 4a99799..19c05ab 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -45,7 +45,7 @@ void __init paging_init(void)
unsigned long zones_size[MAX_NR_ZONES] = { 0, };
unsigned long size;
 
-   empty_zero_page = alloc_bootmem_pages(PAGE_SIZE);
+   empty_zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 
address = PAGE_OFFSET;
pg_dir = swapper_pg_dir;
@@ -55,7 +55,7 @@ void __init paging_init(void)
size = num_pages * sizeof(pte_t);
size = (size + PAGE_SIZE) & ~(PAGE_SIZE-1);
 
-   next_pgtable = (unsigned long)alloc_bootmem_pages(size);
+   next_pgtable = (unsigned long)memblock_alloc(size, PAGE_SIZE);
bootmem_end = (next_pgtable + size + PAGE_SIZE) & PAGE_MASK;
 
/* Map whole memory from PAGE_OFFSET (0x0E00) */
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 7713c08..c884b76 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -128,7 +128,7 @@ static pmd_t * __init one_md_table_init(pud_t *pud)
if (pud_none(*pud)) {
pmd_t *pmd;
 
-   pmd = alloc_bootmem_pages(PAGE_SIZE);
+   pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pud_populate(_mm, pud, pmd);
BUG_ON(pmd != pmd_offset(pud, 0));
}
@@ -141,7 +141,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
if (pmd_none(*pmd)) {

[RFC PATCH 29/29] mm: remove include/linux/bootmem.h

2018-09-05 Thread Mike Rapoport
Move remaining definitions and declarations from include/linux/bootmem.h
into include/linux/memblock.h and remove the redundant header.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/core_cia.c|   2 +-
 arch/alpha/kernel/core_irongate.c   |   1 -
 arch/alpha/kernel/core_marvel.c |   2 +-
 arch/alpha/kernel/core_titan.c  |   2 +-
 arch/alpha/kernel/core_tsunami.c|   2 +-
 arch/alpha/kernel/pci-noop.c|   2 +-
 arch/alpha/kernel/pci.c |   2 +-
 arch/alpha/kernel/pci_iommu.c   |   2 +-
 arch/alpha/kernel/setup.c   |   1 -
 arch/alpha/kernel/sys_nautilus.c|   2 +-
 arch/alpha/mm/init.c|   2 +-
 arch/alpha/mm/numa.c|   1 -
 arch/arc/kernel/unwind.c|   2 +-
 arch/arc/mm/highmem.c   |   2 +-
 arch/arc/mm/init.c  |   1 -
 arch/arm/kernel/devtree.c   |   1 -
 arch/arm/kernel/setup.c |   1 -
 arch/arm/mach-omap2/omap_hwmod.c|   2 +-
 arch/arm/mm/dma-mapping.c   |   1 -
 arch/arm/mm/init.c  |   1 -
 arch/arm/xen/mm.c   |   1 -
 arch/arm/xen/p2m.c  |   2 +-
 arch/arm64/kernel/acpi.c|   1 -
 arch/arm64/kernel/acpi_numa.c   |   1 -
 arch/arm64/kernel/setup.c   |   1 -
 arch/arm64/mm/dma-mapping.c |   2 +-
 arch/arm64/mm/init.c|   1 -
 arch/arm64/mm/kasan_init.c  |   1 -
 arch/arm64/mm/numa.c|   1 -
 arch/c6x/kernel/setup.c |   1 -
 arch/c6x/mm/init.c  |   2 +-
 arch/h8300/kernel/setup.c   |   1 -
 arch/h8300/mm/init.c|   2 +-
 arch/hexagon/kernel/dma.c   |   2 +-
 arch/hexagon/kernel/setup.c |   2 +-
 arch/hexagon/mm/init.c  |   1 -
 arch/ia64/kernel/crash.c|   2 +-
 arch/ia64/kernel/efi.c  |   2 +-
 arch/ia64/kernel/ia64_ksyms.c   |   2 +-
 arch/ia64/kernel/iosapic.c  |   2 +-
 arch/ia64/kernel/mca.c  |   2 +-
 arch/ia64/kernel/mca_drv.c  |   2 +-
 arch/ia64/kernel/setup.c|   1 -
 arch/ia64/kernel/smpboot.c  |   2 +-
 arch/ia64/kernel/topology.c |   2 +-
 arch/ia64/kernel/unwind.c   |   2 +-
 arch/ia64/mm/contig.c   |   1 -
 arch/ia64/mm/discontig.c|   1 -
 arch/ia64/mm/init.c |   1 -
 arch/ia64/mm/numa.c |   2 +-
 arch/ia64/mm/tlb.c  |   2 +-
 arch/ia64/pci/pci.c |   2 +-
 arch/ia64/sn/kernel/bte.c   |   2 +-
 arch/ia64/sn/kernel/io_common.c |   2 +-
 arch/ia64/sn/kernel/setup.c |   2 +-
 arch/m68k/atari/stram.c |   2 +-
 arch/m68k/coldfire/m54xx.c  |   2 +-
 arch/m68k/kernel/setup_mm.c |   1 -
 arch/m68k/kernel/setup_no.c |   1 -
 arch/m68k/kernel/uboot.c|   2 +-
 arch/m68k/mm/init.c |   2 +-
 arch/m68k/mm/mcfmmu.c   |   1 -
 arch/m68k/mm/motorola.c |   1 -
 arch/m68k/mm/sun3mmu.c  |   2 +-
 arch/m68k/sun3/config.c |   2 +-
 arch/m68k/sun3/dvma.c   |   2 +-
 arch/m68k/sun3/mmu_emu.c|   2 +-
 arch/m68k/sun3/sun3dvma.c   |   2 +-
 arch/m68k/sun3x/dvma.c  |   2 +-
 arch/microblaze/mm/consistent.c |   2 +-
 arch/microblaze/mm/init.c   |   3 +-
 arch/microblaze/pci/pci-common.c|   2 +-
 arch/mips/ar7/memory.c  |   2 +-
 arch/mips/ath79/setup.c |   2 +-
 arch/mips/bcm63xx/prom.c|   2 +-
 arch/mips/bcm63xx/setup.c   |   2 +-
 arch/mips/bmips/setup.c |   2 +-
 arch/mips/cavium-octeon/dma-octeon.c|   2 +-
 arch/mips/dec/prom/memory.c |   2 +-
 arch/mips/emma/common/prom.c|   2 +-
 arch/mips/fw/arc/memory.c   |   2 +-
 arch/mips/jazz/jazzdma.c|   2 +-
 arch/mips/kernel/crash.c|   2 +-
 arch/mips/kernel/crash_dump.c   |   2 +-
 arch/mips/kernel/prom.c |   2 +-
 arch/mips/kernel/setup.c|   1 -
 arch/mips/kernel/traps.c|   2 +-
 arch/mips/kernel/vpe.c  |   2 +-
 arch/mips/kvm/commpage.c|   2 +-
 arch/mips/kvm/dyntrans.c|   

[RFC PATCH 21/29] memblock: replace alloc_bootmem with memblock_alloc

2018-09-05 Thread Mike Rapoport
The conversion is done using the following semantic patch:

@@
expression e;
@@
- __alloc_bootmem(e)
+ memblock_alloc_from(e, 0)

Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/core_marvel.c | 4 ++--
 arch/alpha/kernel/pci-noop.c| 4 ++--
 arch/alpha/kernel/pci.c | 4 ++--
 arch/alpha/kernel/pci_iommu.c   | 4 ++--
 arch/ia64/kernel/mca.c  | 4 ++--
 arch/ia64/mm/tlb.c  | 4 ++--
 arch/m68k/sun3/sun3dvma.c   | 3 ++-
 arch/microblaze/mm/init.c   | 2 +-
 arch/mips/kernel/setup.c| 2 +-
 arch/um/drivers/net_kern.c  | 2 +-
 arch/um/drivers/vector_kern.c   | 2 +-
 arch/um/kernel/initrd.c | 2 +-
 arch/x86/kernel/acpi/boot.c | 3 ++-
 arch/x86/kernel/apic/io_apic.c  | 2 +-
 arch/x86/kernel/e820.c  | 2 +-
 arch/x86/platform/olpc/olpc_dt.c| 2 +-
 arch/xtensa/platforms/iss/network.c | 2 +-
 arch/xtensa/platforms/iss/setup.c   | 4 ++--
 drivers/macintosh/smu.c | 2 +-
 init/main.c | 4 ++--
 20 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index bdebb8c2..1f00c94 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -82,7 +82,7 @@ mk_resource_name(int pe, int port, char *str)
char *name;

sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port);
-   name = alloc_bootmem(strlen(tmp) + 1);
+   name = memblock_alloc(strlen(tmp) + 1, 0);
strcpy(name, tmp);
 
return name;
@@ -117,7 +117,7 @@ alloc_io7(unsigned int pe)
return NULL;
}
 
-   io7 = alloc_bootmem(sizeof(*io7));
+   io7 = memblock_alloc(sizeof(*io7), 0);
io7->pe = pe;
raw_spin_lock_init(>irq_lock);
 
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index c7c5879..59cbfc2 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -33,7 +33,7 @@ alloc_pci_controller(void)
 {
struct pci_controller *hose;
 
-   hose = alloc_bootmem(sizeof(*hose));
+   hose = memblock_alloc(sizeof(*hose), 0);
 
*hose_tail = hose;
hose_tail = >next;
@@ -44,7 +44,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-   return alloc_bootmem(sizeof(struct resource));
+   return memblock_alloc(sizeof(struct resource), 0);
 }
 
 SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index c668c3b..4cc3eb9 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -392,7 +392,7 @@ alloc_pci_controller(void)
 {
struct pci_controller *hose;
 
-   hose = alloc_bootmem(sizeof(*hose));
+   hose = memblock_alloc(sizeof(*hose), 0);
 
*hose_tail = hose;
hose_tail = >next;
@@ -403,7 +403,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-   return alloc_bootmem(sizeof(struct resource));
+   return memblock_alloc(sizeof(struct resource), 0);
 }
 
 
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 0c05493..5d178c7 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -79,7 +79,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, 
dma_addr_t base,
printk("%s: couldn't allocate arena from node %d\n"
   "falling back to system-wide allocation\n",
   __func__, nid);
-   arena = alloc_bootmem(sizeof(*arena));
+   arena = memblock_alloc(sizeof(*arena), 0);
}
 
arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid);
@@ -92,7 +92,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, 
dma_addr_t base,
 
 #else /* CONFIG_DISCONTIGMEM */
 
-   arena = alloc_bootmem(sizeof(*arena));
+   arena = memblock_alloc(sizeof(*arena), 0);
arena->ptes = memblock_alloc_from(mem_size, align, 0);
 
 #endif /* CONFIG_DISCONTIGMEM */
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 5586926..7120976 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -361,9 +361,9 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
 
 #define IA64_LOG_ALLOCATE(it, size) \
{ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
-   (ia64_err_rec_t *)alloc_bootmem(size); \
+   (ia64_err_rec_t *)memblock_alloc(size, 0); \
ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
-   (ia64_err_rec_t *)alloc_bootmem(size);}
+   (ia64_err_rec_t *)memblock_alloc(size, 0);}
 #define IA64_LOG_LOCK_INIT(it) spin_lock_init(_state_log[it].isl_lock)
 #define IA64_LOG_LOCK(it)  spin_lock_irqsave(_state_log[it].isl_lock, 
s)
 #define IA64_LOG_UNLOCK(it)
spin_unlock_irqrestore(_state_log[it].isl_lock,s)
diff 

[RFC PATCH 28/29] memblock: replace BOOTMEM_ALLOC_* with MEMBLOCK variants

2018-09-05 Thread Mike Rapoport
Drop BOOTMEM_ALLOC_ACCESSIBLE and BOOTMEM_ALLOC_ANYWHERE in favor of
identical MEMBLOCK definitions.

Signed-off-by: Mike Rapoport 
---
 arch/ia64/mm/discontig.c   | 2 +-
 arch/powerpc/kernel/setup_64.c | 2 +-
 arch/sparc/kernel/smp_64.c | 2 +-
 arch/x86/kernel/setup_percpu.c | 2 +-
 arch/x86/mm/kasan_init_64.c| 4 ++--
 mm/hugetlb.c   | 3 ++-
 mm/kasan/kasan_init.c  | 2 +-
 mm/memblock.c  | 8 
 mm/page_ext.c  | 2 +-
 mm/sparse-vmemmap.c| 3 ++-
 mm/sparse.c| 5 +++--
 11 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 918dda9..70609f8 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -453,7 +453,7 @@ static void __init *memory_less_node_alloc(int nid, 
unsigned long pernodesize)
 
ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE,
 __pa(MAX_DMA_ADDRESS),
-BOOTMEM_ALLOC_ACCESSIBLE,
+MEMBLOCK_ALLOC_ACCESSIBLE,
 bestnode);
 
return ptr;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e564b27..b3e70cc 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -758,7 +758,7 @@ void __init emergency_stack_init(void)
 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
- BOOTMEM_ALLOC_ACCESSIBLE,
+ MEMBLOCK_ALLOC_ACCESSIBLE,
  early_cpu_to_node(cpu));
 
 }
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index a087a6a..6cc80d0 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1595,7 +1595,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
size_t size,
 cpu, size, __pa(ptr));
} else {
ptr = memblock_alloc_try_nid(size, align, goal,
-BOOTMEM_ALLOC_ACCESSIBLE, node);
+MEMBLOCK_ALLOC_ACCESSIBLE, node);
pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
 "%016lx\n", cpu, size, node, __pa(ptr));
}
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a006f1b..483412f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -114,7 +114,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
unsigned long size,
 cpu, size, __pa(ptr));
} else {
ptr = memblock_alloc_try_nid_nopanic(size, align, goal,
-BOOTMEM_ALLOC_ACCESSIBLE,
+MEMBLOCK_ALLOC_ACCESSIBLE,
 node);
 
pr_debug("per cpu data for cpu%d %lu bytes on node%d at 
%016lx\n",
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 77b857c..8f87499 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -29,10 +29,10 @@ static __init void *early_alloc(size_t size, int nid, bool 
panic)
 {
if (panic)
return memblock_alloc_try_nid(size, size,
-   __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+   __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
else
return memblock_alloc_try_nid_nopanic(size, size,
-   __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+   __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
 static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3f5419c..ee0b140 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2102,7 +2103,7 @@ int __alloc_bootmem_huge_page(struct hstate *h)
 
addr = memblock_alloc_try_nid_raw(
huge_page_size(h), huge_page_size(h),
-   0, BOOTMEM_ALLOC_ACCESSIBLE, node);
+   0, MEMBLOCK_ALLOC_ACCESSIBLE, node);
if (addr) {
/*
 * Use the beginning of the huge page to store the
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 24d734b..785a970 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -84,7 +84,7 @@ static inline bool kasan_zero_page_entry(pte_t pte)
 static __init void *early_alloc(size_t size, int node)
 {
return 

[RFC PATCH 27/29] mm: remove nobootmem

2018-09-05 Thread Mike Rapoport
Move a few remaining functions from nobootmem.c to memblock.c and remove
nobootmem

Signed-off-by: Mike Rapoport 
---
 mm/Makefile|   1 -
 mm/memblock.c  | 104 ++
 mm/nobootmem.c | 128 -
 3 files changed, 104 insertions(+), 129 deletions(-)
 delete mode 100644 mm/nobootmem.c

diff --git a/mm/Makefile b/mm/Makefile
index 0a3e72e..fb96c45 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -42,7 +42,6 @@ obj-y := filemap.o mempool.o oom_kill.o 
fadvise.o \
   debug.o $(mmu-y)
 
 obj-y += init-mm.o
-obj-y += nobootmem.o
 obj-y += memblock.o
 
 ifdef CONFIG_MMU
diff --git a/mm/memblock.c b/mm/memblock.c
index 55d7d50..3f76d40 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -82,6 +82,16 @@
  * initialization compltes.
  */
 
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+struct pglist_data __refdata contig_page_data;
+EXPORT_SYMBOL(contig_page_data);
+#endif
+
+unsigned long max_low_pfn;
+unsigned long min_low_pfn;
+unsigned long max_pfn;
+unsigned long long max_possible_pfn;
+
 static struct memblock_region 
memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region 
memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
@@ -1959,6 +1969,100 @@ static int __init early_memblock(char *p)
 }
 early_param("memblock", early_memblock);
 
+static void __init __free_pages_memory(unsigned long start, unsigned long end)
+{
+   int order;
+
+   while (start < end) {
+   order = min(MAX_ORDER - 1UL, __ffs(start));
+
+   while (start + (1UL << order) > end)
+   order--;
+
+   memblock_free_pages(pfn_to_page(start), start, order);
+
+   start += (1UL << order);
+   }
+}
+
+static unsigned long __init __free_memory_core(phys_addr_t start,
+phys_addr_t end)
+{
+   unsigned long start_pfn = PFN_UP(start);
+   unsigned long end_pfn = min_t(unsigned long,
+ PFN_DOWN(end), max_low_pfn);
+
+   if (start_pfn >= end_pfn)
+   return 0;
+
+   __free_pages_memory(start_pfn, end_pfn);
+
+   return end_pfn - start_pfn;
+}
+
+static unsigned long __init free_low_memory_core_early(void)
+{
+   unsigned long count = 0;
+   phys_addr_t start, end;
+   u64 i;
+
+   memblock_clear_hotplug(0, -1);
+
+   for_each_reserved_mem_region(i, , )
+   reserve_bootmem_region(start, end);
+
+   /*
+* We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
+*  because in some case like Node0 doesn't have RAM installed
+*  low ram will be on Node1
+*/
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, , ,
+   NULL)
+   count += __free_memory_core(start, end);
+
+   return count;
+}
+
+static int reset_managed_pages_done __initdata;
+
+void reset_node_managed_pages(pg_data_t *pgdat)
+{
+   struct zone *z;
+
+   for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
+   z->managed_pages = 0;
+}
+
+void __init reset_all_zones_managed_pages(void)
+{
+   struct pglist_data *pgdat;
+
+   if (reset_managed_pages_done)
+   return;
+
+   for_each_online_pgdat(pgdat)
+   reset_node_managed_pages(pgdat);
+
+   reset_managed_pages_done = 1;
+}
+
+/**
+ * memblock_free_all - release free pages to the buddy allocator
+ *
+ * Return: the number of pages actually released.
+ */
+unsigned long __init memblock_free_all(void)
+{
+   unsigned long pages;
+
+   reset_all_zones_managed_pages();
+
+   pages = free_low_memory_core_early();
+   totalram_pages += pages;
+
+   return pages;
+}
+
 #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK)
 
 static int memblock_debug_show(struct seq_file *m, void *private)
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
deleted file mode 100644
index 9608bc5..000
--- a/mm/nobootmem.c
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  bootmem - A boot-time physical memory allocator and configurator
- *
- *  Copyright (C) 1999 Ingo Molnar
- *1999 Kanoj Sarcar, SGI
- *2008 Johannes Weiner
- *
- * Access to this subsystem has to be serialized externally (which is true
- * for the boot process anyway).
- */
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-
-#include "internal.h"
-
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data;
-EXPORT_SYMBOL(contig_page_data);
-#endif
-
-unsigned long max_low_pfn;
-unsigned long min_low_pfn;
-unsigned long max_pfn;
-unsigned long long max_possible_pfn;
-
-static void __init __free_pages_memory(unsigned long 

[RFC PATCH 26/29] memblock: rename __free_pages_bootmem to memblock_free_pages

2018-09-05 Thread Mike Rapoport
The conversion is done using

sed -i 's@__free_pages_bootmem@memblock_free_pages@' \
$(git grep -l __free_pages_bootmem)

Signed-off-by: Mike Rapoport 
---
 mm/internal.h   | 2 +-
 mm/memblock.c   | 2 +-
 mm/nobootmem.c  | 2 +-
 mm/page_alloc.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 87256ae..291eb2b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -161,7 +161,7 @@ static inline struct page *pageblock_pfn_to_page(unsigned 
long start_pfn,
 }
 
 extern int __isolate_free_page(struct page *page, unsigned int order);
-extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
+extern void memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned int order);
 extern void post_alloc_hook(struct page *page, unsigned int order,
diff --git a/mm/memblock.c b/mm/memblock.c
index 63df68b..55d7d50 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1639,7 +1639,7 @@ void __init __memblock_free_late(phys_addr_t base, 
phys_addr_t size)
end = PFN_DOWN(base + size);
 
for (; cursor < end; cursor++) {
-   __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
+   memblock_free_pages(pfn_to_page(cursor), cursor, 0);
totalram_pages++;
}
 }
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index bb64b09..9608bc5 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -43,7 +43,7 @@ static void __init __free_pages_memory(unsigned long start, 
unsigned long end)
while (start + (1UL << order) > end)
order--;
 
-   __free_pages_bootmem(pfn_to_page(start), start, order);
+   memblock_free_pages(pfn_to_page(start), start, order);
 
start += (1UL << order);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 33c9e27..e143fae 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1333,7 +1333,7 @@ meminit_pfn_in_nid(unsigned long pfn, int node,
 #endif
 
 
-void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
+void __init memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order)
 {
if (early_page_uninitialised(pfn))
-- 
2.7.4



[RFC PATCH 25/29] memblock: rename free_all_bootmem to memblock_free_all

2018-09-05 Thread Mike Rapoport
The conversion is done using

sed -i 's@free_all_bootmem@memblock_free_all@' \
$(git grep -l free_all_bootmem)

Signed-off-by: Mike Rapoport 
---
 arch/alpha/mm/init.c   | 2 +-
 arch/arc/mm/init.c | 2 +-
 arch/arm/mm/init.c | 2 +-
 arch/arm64/mm/init.c   | 2 +-
 arch/c6x/mm/init.c | 2 +-
 arch/h8300/mm/init.c   | 2 +-
 arch/hexagon/mm/init.c | 2 +-
 arch/ia64/mm/init.c| 2 +-
 arch/m68k/mm/init.c| 2 +-
 arch/microblaze/mm/init.c  | 2 +-
 arch/mips/loongson64/loongson-3/numa.c | 2 +-
 arch/mips/mm/init.c| 2 +-
 arch/mips/sgi-ip27/ip27-memory.c   | 2 +-
 arch/nds32/mm/init.c   | 2 +-
 arch/nios2/mm/init.c   | 2 +-
 arch/openrisc/mm/init.c| 2 +-
 arch/parisc/mm/init.c  | 2 +-
 arch/powerpc/mm/mem.c  | 2 +-
 arch/riscv/mm/init.c   | 2 +-
 arch/s390/mm/init.c| 2 +-
 arch/sh/mm/init.c  | 2 +-
 arch/sparc/mm/init_32.c| 2 +-
 arch/sparc/mm/init_64.c| 4 ++--
 arch/um/kernel/mem.c   | 2 +-
 arch/unicore32/mm/init.c   | 2 +-
 arch/x86/mm/highmem_32.c   | 2 +-
 arch/x86/mm/init_32.c  | 4 ++--
 arch/x86/mm/init_64.c  | 4 ++--
 arch/x86/xen/mmu_pv.c  | 2 +-
 arch/xtensa/mm/init.c  | 2 +-
 include/linux/bootmem.h| 2 +-
 mm/memblock.c  | 2 +-
 mm/nobootmem.c | 4 ++--
 mm/page_alloc.c| 2 +-
 mm/page_poison.c   | 2 +-
 35 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 9d74520..853d153 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -282,7 +282,7 @@ mem_init(void)
 {
set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-   free_all_bootmem();
+   memblock_free_all();
mem_init_print_info(NULL);
 }
 
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index ba14506..0f29c65 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -218,7 +218,7 @@ void __init mem_init(void)
free_highmem_page(pfn_to_page(tmp));
 #endif
 
-   free_all_bootmem();
+   memblock_free_all();
mem_init_print_info(NULL);
 }
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 0cc8e04..d421a10 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -508,7 +508,7 @@ void __init mem_init(void)
 
/* this will put all unused low memory onto the freelists */
free_unused_memmap();
-   free_all_bootmem();
+   memblock_free_all();
 
 #ifdef CONFIG_SA
/* now that our DMA memory is actually so designated, we can free it */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index e335452..ae21849 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -601,7 +601,7 @@ void __init mem_init(void)
free_unused_memmap();
 #endif
/* this will put all unused low memory onto the freelists */
-   free_all_bootmem();
+   memblock_free_all();
 
kexec_reserve_crashkres_pages();
 
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index dc369ad..3383df8 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -62,7 +62,7 @@ void __init mem_init(void)
high_memory = (void *)(memory_end & PAGE_MASK);
 
/* this will put all memory onto the freelists */
-   free_all_bootmem();
+   memblock_free_all();
 
mem_init_print_info(NULL);
 }
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 5d31ac9..f2bf448 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -96,7 +96,7 @@ void __init mem_init(void)
max_mapnr = MAP_NR(high_memory);
 
/* this will put all low memory onto the freelists */
-   free_all_bootmem();
+   memblock_free_all();
 
mem_init_print_info(NULL);
 }
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index d789b9c..88643fa 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -68,7 +68,7 @@ unsigned long long kmap_generation;
 void __init mem_init(void)
 {
/*  No idea where this is actually declared.  Seems to evade LXR.  */
-   free_all_bootmem();
+   memblock_free_all();
mem_init_print_info(NULL);
 
/*
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 2169ca5..43ea4a4 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -627,7 +627,7 @@ mem_init (void)
 
set_max_mapnr(max_low_pfn);
high_memory = __va(max_low_pfn * PAGE_SIZE);
-   free_all_bootmem();
+   memblock_free_all();
mem_init_print_info(NULL);
 
/*
diff --git 

[RFC PATCH 24/29] memblock: replace free_bootmem_late with memblock_free_late

2018-09-05 Thread Mike Rapoport
The free_bootmem_late and memblock_free_late do exactly the same thing:
they iterate over a range and give pages to the page allocator.

Replace calls to free_bootmem_late with calls to memblock_free_late and
remove the bootmem variant.

Signed-off-by: Mike Rapoport 
---
 arch/sparc/kernel/mdesc.c   |  3 ++-
 arch/x86/platform/efi/quirks.c  |  6 +++---
 drivers/firmware/efi/apple-properties.c |  2 +-
 include/linux/bootmem.h |  2 --
 mm/nobootmem.c  | 24 
 5 files changed, 6 insertions(+), 31 deletions(-)

diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 59131e7..a41526b 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -190,7 +191,7 @@ static void __init mdesc_memblock_free(struct mdesc_handle 
*hp)
 
alloc_size = PAGE_ALIGN(hp->handle_size);
start = __pa(hp);
-   free_bootmem_late(start, alloc_size);
+   memblock_free_late(start, alloc_size);
 }
 
 static struct mdesc_mem_ops memblock_mdesc_ops = {
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 844d31c..7b4854c 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -332,7 +332,7 @@ void __init efi_reserve_boot_services(void)
 
/*
 * Because the following memblock_reserve() is paired
-* with free_bootmem_late() for this region in
+* with memblock_free_late() for this region in
 * efi_free_boot_services(), we must be extremely
 * careful not to reserve, and subsequently free,
 * critical regions of memory (like the kernel image) or
@@ -363,7 +363,7 @@ void __init efi_reserve_boot_services(void)
 * doesn't make sense as far as the firmware is
 * concerned, but it does provide us with a way to tag
 * those regions that must not be paired with
-* free_bootmem_late().
+* memblock_free_late().
 */
md->attribute |= EFI_MEMORY_RUNTIME;
}
@@ -413,7 +413,7 @@ void __init efi_free_boot_services(void)
size -= rm_size;
}
 
-   free_bootmem_late(start, size);
+   memblock_free_late(start, size);
}
 
if (!num_entries)
diff --git a/drivers/firmware/efi/apple-properties.c 
b/drivers/firmware/efi/apple-properties.c
index 60a9571..2b675f7 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -235,7 +235,7 @@ static int __init map_properties(void)
 */
data->len = 0;
memunmap(data);
-   free_bootmem_late(pa_data + sizeof(*data), data_len);
+   memblock_free_late(pa_data + sizeof(*data), data_len);
 
return ret;
}
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 706cf8e..bcc7e2f 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -30,8 +30,6 @@ extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
-extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
-
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
 
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 85e1822..ee0f7fc 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -33,30 +33,6 @@ unsigned long min_low_pfn;
 unsigned long max_pfn;
 unsigned long long max_possible_pfn;
 
-/**
- * free_bootmem_late - free bootmem pages directly to page allocator
- * @addr: starting address of the range
- * @size: size of the range in bytes
- *
- * This is only useful when the bootmem allocator has already been torn
- * down, but we are still initializing the system.  Pages are given directly
- * to the page allocator, no bootmem metadata is updated because it is gone.
- */
-void __init free_bootmem_late(unsigned long addr, unsigned long size)
-{
-   unsigned long cursor, end;
-
-   kmemleak_free_part_phys(addr, size);
-
-   cursor = PFN_UP(addr);
-   end = PFN_DOWN(addr + size);
-
-   for (; cursor < end; cursor++) {
-   __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
-   totalram_pages++;
-   }
-}
-
 static void __init __free_pages_memory(unsigned long start, unsigned long end)
 {
int order;
-- 
2.7.4



[RFC PATCH 23/29] memblock: replace free_bootmem{_node} with memblock_free

2018-09-05 Thread Mike Rapoport
The free_bootmem and free_bootmem_node are merely wrappers for
memblock_free. Replace their usage with a call to memblock_free using the
following semantic patch:

@@
expression e1, e2, e3;
@@
(
- free_bootmem(e1, e2)
+ memblock_free(e1, e2)
|
- free_bootmem_node(e1, e2, e3)
+ memblock_free(e2, e3)
)

Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/core_irongate.c |  3 +--
 arch/arm64/mm/init.c  |  2 +-
 arch/mips/kernel/setup.c  |  2 +-
 arch/powerpc/kernel/setup_64.c|  2 +-
 arch/sparc/kernel/smp_64.c|  2 +-
 arch/um/kernel/mem.c  |  3 ++-
 arch/unicore32/mm/init.c  |  2 +-
 arch/x86/kernel/setup_percpu.c|  3 ++-
 arch/x86/kernel/tce_64.c  |  3 ++-
 arch/x86/xen/p2m.c|  3 ++-
 drivers/macintosh/smu.c   |  2 +-
 drivers/usb/early/xhci-dbc.c  | 11 ++-
 drivers/xen/swiotlb-xen.c |  4 +++-
 include/linux/bootmem.h   |  4 
 mm/nobootmem.c| 30 --
 15 files changed, 24 insertions(+), 52 deletions(-)

diff --git a/arch/alpha/kernel/core_irongate.c 
b/arch/alpha/kernel/core_irongate.c
index f709866..35572be 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -234,8 +234,7 @@ albacore_init_arch(void)
unsigned long size;
 
size = initrd_end - initrd_start;
-   free_bootmem_node(NODE_DATA(0), __pa(initrd_start),
- PAGE_ALIGN(size));
+   memblock_free(__pa(initrd_start), PAGE_ALIGN(size));
if (!move_initrd(pci_mem))
printk("irongate_init_arch: initrd too big "
   "(%ldK)\ndisabling initrd\n",
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 787e279..e335452 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -538,7 +538,7 @@ static inline void free_memmap(unsigned long start_pfn, 
unsigned long end_pfn)
 * memmap array.
 */
if (pg < pgend)
-   free_bootmem(pg, pgend - pg);
+   memblock_free(pg, pgend - pg);
 }
 
 /*
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 419dfc42..6d8d0c7 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -561,7 +561,7 @@ static void __init bootmem_init(void)
extern void show_kernel_relocation(const char *level);
 
offset = __pa_symbol(_text) - __pa_symbol(VMLINUX_LOAD_ADDRESS);
-   free_bootmem(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
+   memblock_free(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
 
 #if defined(CONFIG_DEBUG_KERNEL) && defined(CONFIG_DEBUG_INFO)
/*
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6add560..e564b27 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -765,7 +765,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t 
size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-   free_bootmem(__pa(ptr), size);
+   memblock_free(__pa(ptr), size);
 }
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 337febd..a087a6a 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1607,7 +1607,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
size_t size,
 
 static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-   free_bootmem(__pa(ptr), size);
+   memblock_free(__pa(ptr), size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 185f6bb..3555c13 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -46,7 +47,7 @@ void __init mem_init(void)
 */
brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
-   free_bootmem(__pa(brk_end), uml_reserved - brk_end);
+   memblock_free(__pa(brk_end), uml_reserved - brk_end);
uml_reserved = brk_end;
 
/* this will put all low memory onto the freelists */
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 44ccc15..4c572ab 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -241,7 +241,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 * free the section of the memmap array.
 */
if (pg < pgend)
-   free_bootmem(pg, pgend - pg);
+   memblock_free(pg, pgend - pg);
 }
 
 /*
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 041663a..a006f1b 100644
--- 

[RFC PATCH 22/29] mm: nobootmem: remove bootmem allocation APIs

2018-09-05 Thread Mike Rapoport
The bootmem compatibility APIs are not used and can be removed.

Signed-off-by: Mike Rapoport 
---
 include/linux/bootmem.h |  47 --
 mm/nobootmem.c  | 224 
 2 files changed, 271 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index c97c105..73f1272 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -36,33 +36,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
-extern void *__alloc_bootmem(unsigned long size,
-unsigned long align,
-unsigned long goal);
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-unsigned long align,
-unsigned long goal) __malloc;
-extern void *__alloc_bootmem_node(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal) __malloc;
-void *__alloc_bootmem_node_high(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal) __malloc;
-extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal) __malloc;
-void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal,
- unsigned long limit) __malloc;
-extern void *__alloc_bootmem_low(unsigned long size,
-unsigned long align,
-unsigned long goal) __malloc;
-
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
 
@@ -70,26 +43,6 @@ extern void *__alloc_bootmem_low(unsigned long size,
 #define ARCH_LOW_ADDRESS_LIMIT  0xUL
 #endif
 
-#define alloc_bootmem(x) \
-   __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_align(x, align) \
-   __alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages(x) \
-   __alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_nopanic(x) \
-   __alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_node(pgdat, x) \
-   __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_node_nopanic(pgdat, x) \
-   __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, 
BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_node(pgdat, x) \
-   __alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-
-#define alloc_bootmem_low(x) \
-   __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-#define alloc_bootmem_low_pages(x) \
-   __alloc_bootmem_low(x, PAGE_SIZE, 0)
-
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE   0
 #define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0)
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 44ce7de..bc38e56 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -33,41 +33,6 @@ unsigned long min_low_pfn;
 unsigned long max_pfn;
 unsigned long long max_possible_pfn;
 
-static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
-   u64 goal, u64 limit)
-{
-   void *ptr;
-   u64 addr;
-   enum memblock_flags flags = choose_memblock_flags();
-
-   if (limit > memblock.current_limit)
-   limit = memblock.current_limit;
-
-again:
-   addr = memblock_find_in_range_node(size, align, goal, limit, nid,
-  flags);
-   if (!addr && (flags & MEMBLOCK_MIRROR)) {
-   flags &= ~MEMBLOCK_MIRROR;
-   pr_warn("Could not allocate %pap bytes of mirrored memory\n",
-   );
-   goto again;
-   }
-   if (!addr)
-   return NULL;
-
-   if (memblock_reserve(addr, size))
-   return NULL;
-
-   ptr = phys_to_virt(addr);
-   memset(ptr, 0, size);
-   /*
-* The min_count is set to 0 so that bootmem allocated blocks
-* are never reported as leaks.
-*/
-   kmemleak_alloc(ptr, size, 0, 0);
-   return ptr;
-}
-
 /**
  * free_bootmem_late - free bootmem pages directly to page allocator
  * @addr: starting address of the range
@@ -215,192 +180,3 @@ void __init free_bootmem(unsigned long addr, unsigned 
long size)
 {
memblock_free(addr, size);
 }
-
-static void * __init 

[RFC PATCH 20/29] memblock: replace __alloc_bootmem with memblock_alloc_from

2018-09-05 Thread Mike Rapoport
The conversion is done using the following semantic patch:

@@
expression e1, e2, e3;
@@
- __alloc_bootmem(e1, e2, e3)
+ memblock_alloc(e1, e2, e3)

Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/core_cia.c  |  2 +-
 arch/alpha/kernel/pci_iommu.c |  4 ++--
 arch/alpha/kernel/setup.c |  2 +-
 arch/ia64/kernel/mca.c|  4 ++--
 arch/ia64/mm/contig.c |  5 +++--
 arch/mips/kernel/traps.c  |  2 +-
 arch/sparc/kernel/prom_32.c   |  2 +-
 arch/sparc/kernel/smp_64.c| 10 +-
 arch/sparc/mm/init_32.c   |  2 +-
 arch/sparc/mm/init_64.c   |  9 ++---
 arch/sparc/mm/srmmu.c | 10 +-
 include/linux/bootmem.h   |  8 
 12 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 4b38386..026ee95 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -331,7 +331,7 @@ cia_prepare_tbia_workaround(int window)
long i;
 
/* Use minimal 1K map. */
-   ppte = __alloc_bootmem(CIA_BROKEN_TBIA_SIZE, 32768, 0);
+   ppte = memblock_alloc_from(CIA_BROKEN_TBIA_SIZE, 32768, 0);
pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1;
 
for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i)
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index b52d76f..0c05493 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -87,13 +87,13 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, 
dma_addr_t base,
printk("%s: couldn't allocate arena ptes from node %d\n"
   "falling back to system-wide allocation\n",
   __func__, nid);
-   arena->ptes = __alloc_bootmem(mem_size, align, 0);
+   arena->ptes = memblock_alloc_from(mem_size, align, 0);
}
 
 #else /* CONFIG_DISCONTIGMEM */
 
arena = alloc_bootmem(sizeof(*arena));
-   arena->ptes = __alloc_bootmem(mem_size, align, 0);
+   arena->ptes = memblock_alloc_from(mem_size, align, 0);
 
 #endif /* CONFIG_DISCONTIGMEM */
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 4f0d944..64c06a0 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -294,7 +294,7 @@ move_initrd(unsigned long mem_limit)
unsigned long size;
 
size = initrd_end - initrd_start;
-   start = __alloc_bootmem(PAGE_ALIGN(size), PAGE_SIZE, 0);
+   start = memblock_alloc_from(PAGE_ALIGN(size), PAGE_SIZE, 0);
if (!start || __pa(start) + size > mem_limit) {
initrd_start = initrd_end = 0;
return NULL;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6115464..5586926 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1835,8 +1835,8 @@ format_mca_init_stack(void *mca_data, unsigned long 
offset,
 /* Caller prevents this from being called after init */
 static void * __ref mca_bootmem(void)
 {
-   return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
-   KERNEL_STACK_SIZE, 0);
+   return memblock_alloc_from(sizeof(struct ia64_mca_cpu),
+  KERNEL_STACK_SIZE, 0);
 }
 
 /* Do per-CPU MCA-related initialization.  */
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index e2e40bb..9e5c23a 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -85,8 +85,9 @@ void *per_cpu_init(void)
 static inline void
 alloc_per_cpu_data(void)
 {
-   cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(),
-  PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+   cpu_data = memblock_alloc_from(PERCPU_PAGE_SIZE * num_possible_cpus(),
+  PERCPU_PAGE_SIZE,
+  __pa(MAX_DMA_ADDRESS));
 }
 
 /**
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 576aeef..31566d5 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2261,7 +2261,7 @@ void __init trap_init(void)
phys_addr_t ebase_pa;
 
ebase = (unsigned long)
-   __alloc_bootmem(size, 1 << fls(size), 0);
+   memblock_alloc_from(size, 1 << fls(size), 0);
 
/*
 * Try to ensure ebase resides in KSeg0 if possible.
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index b51cbb9..4389944 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -32,7 +32,7 @@ void * __init prom_early_alloc(unsigned long size)
 {
void *ret;
 
-   ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+   ret = memblock_alloc_from(size, SMP_CACHE_BYTES, 0UL);
if (ret != NULL)
memset(ret, 0, size);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 83ff88d..337febd 100644
--- 

[RFC PATCH 18/29] memblock: replace alloc_bootmem_low_pages with memblock_alloc_low

2018-09-05 Thread Mike Rapoport
The conversion is done using the following semantic patch:

@@
expression e;
@@
- alloc_bootmem_low_pages(e)
+ memblock_alloc_low(e, PAGE_SIZE)

Signed-off-by: Mike Rapoport 
---
 arch/arc/mm/highmem.c|  2 +-
 arch/m68k/atari/stram.c  |  3 ++-
 arch/m68k/mm/motorola.c  |  5 +++--
 arch/mips/cavium-octeon/dma-octeon.c |  2 +-
 arch/mips/mm/init.c  |  3 ++-
 arch/um/kernel/mem.c | 10 ++
 arch/xtensa/mm/mmu.c |  2 +-
 7 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index 77ff64a..f582dc8 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -123,7 +123,7 @@ static noinline pte_t * __init alloc_kmap_pgtable(unsigned 
long kvaddr)
pud_k = pud_offset(pgd_k, kvaddr);
pmd_k = pmd_offset(pud_k, kvaddr);
 
-   pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+   pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
pmd_populate_kernel(_mm, pmd_k, pte_k);
return pte_k;
 }
diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index c83d664..1089d67 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -95,7 +95,8 @@ void __init atari_stram_reserve_pages(void *start_mem)
 {
if (kernel_in_stram) {
pr_debug("atari_stram pool: kernel in ST-RAM, using 
alloc_bootmem!\n");
-   stram_pool.start = 
(resource_size_t)alloc_bootmem_low_pages(pool_size);
+   stram_pool.start = 
(resource_size_t)memblock_alloc_low(pool_size,
+  
PAGE_SIZE);
stram_pool.end = stram_pool.start + pool_size - 1;
request_resource(_resource, _pool);
stram_virt_offset = 0;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 4e17ecb..8bcf57e 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -55,7 +55,7 @@ static pte_t * __init kernel_page_table(void)
 {
pte_t *ptablep;
 
-   ptablep = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+   ptablep = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 
clear_page(ptablep);
__flush_page_to_ram(ptablep);
@@ -95,7 +95,8 @@ static pmd_t * __init kernel_ptr_table(void)
 
last_pgtable += PTRS_PER_PMD;
if (((unsigned long)last_pgtable & ~PAGE_MASK) == 0) {
-   last_pgtable = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+   last_pgtable = (pmd_t *)memblock_alloc_low(PAGE_SIZE,
+  PAGE_SIZE);
 
clear_page(last_pgtable);
__flush_page_to_ram(last_pgtable);
diff --git a/arch/mips/cavium-octeon/dma-octeon.c 
b/arch/mips/cavium-octeon/dma-octeon.c
index 236833b..c44c1a6 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -244,7 +244,7 @@ void __init plat_swiotlb_setup(void)
swiotlb_nslabs = ALIGN(swiotlb_nslabs, IO_TLB_SEGSIZE);
swiotlbsize = swiotlb_nslabs << IO_TLB_SHIFT;
 
-   octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize);
+   octeon_swiotlb = memblock_alloc_low(swiotlbsize, PAGE_SIZE);
 
if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
panic("Cannot allocate SWIOTLB buffer");
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 400676c..a010fba7 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -244,7 +244,8 @@ void __init fixrange_init(unsigned long start, unsigned 
long end,
pmd = (pmd_t *)pud;
for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) 
{
if (pmd_none(*pmd)) {
-   pte = (pte_t *) 
alloc_bootmem_low_pages(PAGE_SIZE);
+   pte = (pte_t *) 
memblock_alloc_low(PAGE_SIZE,
+  
PAGE_SIZE);
set_pmd(pmd, __pmd((unsigned long)pte));
BUG_ON(pte != pte_offset_kernel(pmd, 
0));
}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3c0e470..185f6bb 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -64,7 +64,8 @@ void __init mem_init(void)
 static void __init one_page_table_init(pmd_t *pmd)
 {
if (pmd_none(*pmd)) {
-   pte_t *pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+   pte_t *pte = (pte_t *) memblock_alloc_low(PAGE_SIZE,
+ PAGE_SIZE);
set_pmd(pmd, __pmd(_KERNPG_TABLE +
   (unsigned long) __pa(pte)));
if (pte != pte_offset_kernel(pmd, 0))
@@ -75,7 +76,7 @@ static 

[RFC PATCH 17/29] memblock: replace alloc_bootmem_node with memblock_alloc_node

2018-09-05 Thread Mike Rapoport
Signed-off-by: Mike Rapoport 
---
 arch/alpha/kernel/pci_iommu.c   | 4 ++--
 arch/ia64/sn/kernel/io_common.c | 7 ++-
 arch/ia64/sn/kernel/setup.c | 4 ++--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 6923b0d..b52d76f 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -74,7 +74,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, 
dma_addr_t base,
 
 #ifdef CONFIG_DISCONTIGMEM
 
-   arena = alloc_bootmem_node(NODE_DATA(nid), sizeof(*arena));
+   arena = memblock_alloc_node(sizeof(*arena), align, nid);
if (!NODE_DATA(nid) || !arena) {
printk("%s: couldn't allocate arena from node %d\n"
   "falling back to system-wide allocation\n",
@@ -82,7 +82,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, 
dma_addr_t base,
arena = alloc_bootmem(sizeof(*arena));
}
 
-   arena->ptes = __alloc_bootmem_node(NODE_DATA(nid), mem_size, align, 0);
+   arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid);
if (!NODE_DATA(nid) || !arena->ptes) {
printk("%s: couldn't allocate arena ptes from node %d\n"
   "falling back to system-wide allocation\n",
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 102aaba..8b05d55 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -385,16 +385,13 @@ void __init hubdev_init_node(nodepda_t * npda, cnodeid_t 
node)
 {
struct hubdev_info *hubdev_info;
int size;
-   pg_data_t *pg;
 
size = sizeof(struct hubdev_info);
 
if (node >= num_online_nodes()) /* Headless/memless IO nodes */
-   pg = NODE_DATA(0);
-   else
-   pg = NODE_DATA(node);
+   node = 0;
 
-   hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
+   hubdev_info = (struct hubdev_info *)memblock_alloc_node(size, 0, node);
 
npda->pdinfo = (void *)hubdev_info;
 }
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 5f6b6b4..ab2564f 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -511,7 +511,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 */
for_each_online_node(cnode) {
nodepdaindr[cnode] =
-   alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
+   memblock_alloc_node(sizeof(nodepda_t), 0, cnode);
memset(nodepdaindr[cnode]->phys_cpuid, -1,
sizeof(nodepdaindr[cnode]->phys_cpuid));
spin_lock_init([cnode]->ptc_lock);
@@ -522,7 +522,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 */
for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++)
nodepdaindr[cnode] =
-   alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
+   memblock_alloc_node(sizeof(nodepda_t), 0, 0);
 
/*
 * Now copy the array of nodepda pointers to each nodepda.
-- 
2.7.4



[RFC PATCH 15/29] memblock: replace alloc_bootmem_pages_node with memblock_alloc_node

2018-09-05 Thread Mike Rapoport
Signed-off-by: Mike Rapoport 
---
 arch/ia64/mm/init.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 3b85c3e..ffcc358 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -447,19 +447,19 @@ int __init create_mem_map_page_table(u64 start, u64 end, 
void *arg)
for (address = start_page; address < end_page; address += PAGE_SIZE) {
pgd = pgd_offset_k(address);
if (pgd_none(*pgd))
-   pgd_populate(_mm, pgd, 
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+   pgd_populate(_mm, pgd, 
memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node));
pud = pud_offset(pgd, address);
 
if (pud_none(*pud))
-   pud_populate(_mm, pud, 
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+   pud_populate(_mm, pud, 
memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node));
pmd = pmd_offset(pud, address);
 
if (pmd_none(*pmd))
-   pmd_populate_kernel(_mm, pmd, 
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+   pmd_populate_kernel(_mm, pmd, 
memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node));
pte = pte_offset_kernel(pmd, address);
 
if (pte_none(*pte))
-   set_pte(pte, 
pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> 
PAGE_SHIFT,
+   set_pte(pte, 
pfn_pte(__pa(memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node))) >> PAGE_SHIFT,
 PAGE_KERNEL));
}
return 0;
-- 
2.7.4



[RFC PATCH 16/29] memblock: replace __alloc_bootmem_node with appropriate memblock_ API

2018-09-05 Thread Mike Rapoport
Use memblock_alloc_try_nid whenever goal (i.e. mininal address is
specified) and memblock_alloc_node otherwise.

Signed-off-by: Mike Rapoport 
---
 arch/ia64/mm/discontig.c   |  6 --
 arch/ia64/mm/init.c|  2 +-
 arch/powerpc/kernel/setup_64.c |  6 --
 arch/sparc/kernel/setup_64.c   | 10 --
 arch/sparc/kernel/smp_64.c |  4 ++--
 5 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 1928d57..918dda9 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -451,8 +451,10 @@ static void __init *memory_less_node_alloc(int nid, 
unsigned long pernodesize)
if (bestnode == -1)
bestnode = anynode;
 
-   ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
-   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+   ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE,
+__pa(MAX_DMA_ADDRESS),
+BOOTMEM_ALLOC_ACCESSIBLE,
+bestnode);
 
return ptr;
 }
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ffcc358..2169ca5 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -459,7 +459,7 @@ int __init create_mem_map_page_table(u64 start, u64 end, 
void *arg)
pte = pte_offset_kernel(pmd, address);
 
if (pte_none(*pte))
-   set_pte(pte, 
pfn_pte(__pa(memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node))) >> PAGE_SHIFT,
+   set_pte(pte, 
pfn_pte(__pa(memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node)) >> PAGE_SHIFT,
 PAGE_KERNEL));
}
return 0;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6a501b2..6add560 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -757,8 +757,10 @@ void __init emergency_stack_init(void)
 
 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
-   return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, 
align,
-   __pa(MAX_DMA_ADDRESS));
+   return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
+ BOOTMEM_ALLOC_ACCESSIBLE,
+ early_cpu_to_node(cpu));
+
 }
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 206bf81..5fb11ea 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -622,12 +622,10 @@ void __init alloc_irqstack_bootmem(void)
for_each_possible_cpu(i) {
node = cpu_to_node(i);
 
-   softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-   THREAD_SIZE,
-   THREAD_SIZE, 0);
-   hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-   THREAD_SIZE,
-   THREAD_SIZE, 0);
+   softirq_stack[i] = memblock_alloc_node(THREAD_SIZE,
+  THREAD_SIZE, node);
+   hardirq_stack[i] = memblock_alloc_node(THREAD_SIZE,
+  THREAD_SIZE, node);
}
 }
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3ea1f3..83ff88d 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1594,8 +1594,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
size_t size,
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
 cpu, size, __pa(ptr));
} else {
-   ptr = __alloc_bootmem_node(NODE_DATA(node),
-  size, align, goal);
+   ptr = memblock_alloc_try_nid(size, align, goal,
+BOOTMEM_ALLOC_ACCESSIBLE, node);
pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
 "%016lx\n", cpu, size, node, __pa(ptr));
}
-- 
2.7.4



[RFC PATCH 12/29] memblock: replace alloc_bootmem_low with memblock_alloc_low

2018-09-05 Thread Mike Rapoport
The alloc_bootmem_low(size) allocates low memory with default alignement
and can be replcaed by memblock_alloc_low(size, 0)

Signed-off-by: Mike Rapoport 
---
 arch/arm64/kernel/setup.c | 2 +-
 arch/unicore32/kernel/setup.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 5b4fac4..cf7a7b7 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -213,7 +213,7 @@ static void __init request_standard_resources(void)
kernel_data.end = __pa_symbol(_end - 1);
 
for_each_memblock(memory, region) {
-   res = alloc_bootmem_low(sizeof(*res));
+   res = memblock_alloc_low(sizeof(*res), 0);
if (memblock_is_nomap(region)) {
res->name  = "reserved";
res->flags = IORESOURCE_MEM;
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index c2bffa5..9f163f9 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -207,7 +207,7 @@ request_standard_resources(struct meminfo *mi)
if (mi->bank[i].size == 0)
continue;
 
-   res = alloc_bootmem_low(sizeof(*res));
+   res = memblock_alloc_low(sizeof(*res), 0);
res->name  = "System RAM";
res->start = mi->bank[i].start;
res->end   = mi->bank[i].start + mi->bank[i].size - 1;
-- 
2.7.4



[RFC PATCH 14/29] memblock: add align parameter to memblock_alloc_node()

2018-09-05 Thread Mike Rapoport
With the align parameter memblock_alloc_node() can be used as drop in
replacement for alloc_bootmem_pages_node().

Signed-off-by: Mike Rapoport 
---
 include/linux/bootmem.h | 4 ++--
 mm/sparse.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7d91f0f..3896af2 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -157,9 +157,9 @@ static inline void * __init memblock_alloc_from_nopanic(
 }
 
 static inline void * __init memblock_alloc_node(
-   phys_addr_t size, int nid)
+   phys_addr_t size, phys_addr_t align, int nid)
 {
-   return memblock_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+   return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index 04e97af..509828f 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -68,7 +68,7 @@ static noinline struct mem_section __ref 
*sparse_index_alloc(int nid)
if (slab_is_available())
section = kzalloc_node(array_size, GFP_KERNEL, nid);
else
-   section = memblock_alloc_node(array_size, nid);
+   section = memblock_alloc_node(array_size, 0, nid);
 
return section;
 }
-- 
2.7.4



[RFC PATCH 13/29] memblock: replace __alloc_bootmem_nopanic with memblock_alloc_from_nopanic

2018-09-05 Thread Mike Rapoport
Signed-off-by: Mike Rapoport 
---
 arch/arc/kernel/unwind.c   | 4 ++--
 arch/x86/kernel/setup_percpu.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 183391d..2a01dd1 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -181,8 +181,8 @@ static void init_unwind_hdr(struct unwind_table *table,
  */
 static void *__init unw_hdr_alloc_early(unsigned long sz)
 {
-   return __alloc_bootmem_nopanic(sz, sizeof(unsigned int),
-  MAX_DMA_ADDRESS);
+   return memblock_alloc_from_nopanic(sz, sizeof(unsigned int),
+  MAX_DMA_ADDRESS);
 }
 
 static void *unw_hdr_alloc(unsigned long sz)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 67d48e26..041663a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -106,7 +106,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
unsigned long size,
void *ptr;
 
if (!node_online(node) || !NODE_DATA(node)) {
-   ptr = __alloc_bootmem_nopanic(size, align, goal);
+   ptr = memblock_alloc_from_nopanic(size, align, goal);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
@@ -121,7 +121,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, 
unsigned long size,
}
return ptr;
 #else
-   return __alloc_bootmem_nopanic(size, align, goal);
+   return memblock_alloc_from_nopanic(size, align, goal);
 #endif
 }
 
-- 
2.7.4



[RFC PATCH 09/29] memblock: replace alloc_bootmem_low with memblock_alloc_low

2018-09-05 Thread Mike Rapoport
The functions are equivalent, just the later does not require nobootmem
translation layer.

Signed-off-by: Mike Rapoport 
---
 arch/x86/kernel/tce_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index f386bad..54c9b5a 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -173,7 +173,7 @@ void * __init alloc_tce_table(void)
size = table_size_to_number_of_entries(specified_table_size);
size *= TCE_ENTRY_SIZE;
 
-   return __alloc_bootmem_low(size, size, 0);
+   return memblock_alloc_low(size, size);
 }
 
 void __init free_tce_table(void *tbl)
-- 
2.7.4



[RFC PATCH 11/29] memblock: replace alloc_bootmem_pages_nopanic with memblock_alloc_nopanic

2018-09-05 Thread Mike Rapoport
The alloc_bootmem_pages_nopanic(size) is a shortcut for
__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT) and can be
replaced by memblock_alloc_nopanic(size, PAGE_SIZE)

Signed-off-by: Mike Rapoport 
---
 drivers/usb/early/xhci-dbc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index e15e896..16df968 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -94,7 +94,7 @@ static void * __init xdbc_get_page(dma_addr_t *dma_addr)
 {
void *virt;
 
-   virt = alloc_bootmem_pages_nopanic(PAGE_SIZE);
+   virt = memblock_alloc_nopanic(PAGE_SIZE, PAGE_SIZE);
if (!virt)
return NULL;
 
-- 
2.7.4



[RFC PATCH 07/29] memblock: remove _virt from APIs returning virtual address

2018-09-05 Thread Mike Rapoport
The conversion is done using

sed -i 's@memblock_virt_alloc@memblock_alloc@g' \
$(git grep -l memblock_virt_alloc)

Signed-off-by: Mike Rapoport 
---
 arch/arm/kernel/setup.c   |  4 ++--
 arch/arm/mach-omap2/omap_hwmod.c  |  2 +-
 arch/arm64/mm/kasan_init.c|  2 +-
 arch/arm64/mm/numa.c  |  2 +-
 arch/powerpc/kernel/pci_32.c  |  2 +-
 arch/powerpc/lib/alloc.c  |  2 +-
 arch/powerpc/mm/mmu_context_nohash.c  |  6 ++---
 arch/powerpc/platforms/powermac/nvram.c   |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  6 ++---
 arch/powerpc/platforms/ps3/setup.c|  2 +-
 arch/powerpc/sysdev/msi_bitmap.c  |  2 +-
 arch/s390/kernel/setup.c  | 12 +-
 arch/s390/kernel/smp.c|  2 +-
 arch/s390/kernel/topology.c   |  4 ++--
 arch/s390/numa/mode_emu.c |  2 +-
 arch/s390/numa/toptree.c  |  2 +-
 arch/x86/mm/kasan_init_64.c   |  4 ++--
 arch/xtensa/mm/kasan_init.c   |  2 +-
 drivers/clk/ti/clk.c  |  2 +-
 drivers/firmware/memmap.c |  2 +-
 drivers/of/fdt.c  |  2 +-
 drivers/of/unittest.c |  2 +-
 include/linux/bootmem.h   | 38 +++
 init/main.c   |  6 ++---
 kernel/dma/swiotlb.c  |  8 +++
 kernel/power/snapshot.c   |  2 +-
 kernel/printk/printk.c|  4 ++--
 lib/cpumask.c |  2 +-
 mm/hugetlb.c  |  2 +-
 mm/kasan/kasan_init.c |  2 +-
 mm/memblock.c | 26 ++---
 mm/page_alloc.c   |  8 +++
 mm/page_ext.c |  2 +-
 mm/percpu.c   | 28 +++
 mm/sparse-vmemmap.c   |  2 +-
 mm/sparse.c   | 12 +-
 36 files changed, 105 insertions(+), 105 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 4c249cb..39e6090 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -857,7 +857,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
 */
boot_alias_start = phys_to_idmap(start);
if (arm_has_idmap_alias() && boot_alias_start != 
IDMAP_INVALID_ADDR) {
-   res = memblock_virt_alloc(sizeof(*res), 0);
+   res = memblock_alloc(sizeof(*res), 0);
res->name = "System RAM (boot alias)";
res->start = boot_alias_start;
res->end = phys_to_idmap(end);
@@ -865,7 +865,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
request_resource(_resource, res);
}
 
-   res = memblock_virt_alloc(sizeof(*res), 0);
+   res = memblock_alloc(sizeof(*res), 0);
res->name  = "System RAM";
res->start = start;
res->end = end;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index cd65ea4..314284e 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -725,7 +725,7 @@ static int __init _setup_clkctrl_provider(struct 
device_node *np)
struct clkctrl_provider *provider;
u64 size;
 
-   provider = memblock_virt_alloc(sizeof(*provider), 0);
+   provider = memblock_alloc(sizeof(*provider), 0);
if (!provider)
return -ENOMEM;
 
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 1214587..2391560 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -38,7 +38,7 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata 
__aligned(PGD_SIZE);
 
 static phys_addr_t __init kasan_alloc_zeroed_page(int node)
 {
-   void *p = memblock_virt_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+   void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
  __pa(MAX_DMA_ADDRESS),
  MEMBLOCK_ALLOC_ACCESSIBLE, node);
return __pa(p);
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index e5aacd6..8f2e0e8 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -168,7 +168,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t 
size,
 {
int nid = early_cpu_to_node(cpu);
 
-   return  memblock_virt_alloc_try_nid(size, align,
+   return  memblock_alloc_try_nid(size, align,
__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d63b488..2fb4781 

[RFC PATCH 05/29] mm: nobootmem: remove dead code

2018-09-05 Thread Mike Rapoport
Several bootmem functions and macros are not used. Remove them.

Signed-off-by: Mike Rapoport 
---
 include/linux/bootmem.h | 26 --
 mm/nobootmem.c  | 35 ---
 2 files changed, 61 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index fce6278..b74bafd1 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -36,17 +36,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
-/*
- * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
- * the architecture-specific code should honor this).
- *
- * If flags is BOOTMEM_DEFAULT, then the return value is always 0 (success).
- * If flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the memory
- * already was reserved.
- */
-#define BOOTMEM_DEFAULT0
-#define BOOTMEM_EXCLUSIVE  (1<<0)
-
 extern void *__alloc_bootmem(unsigned long size,
 unsigned long align,
 unsigned long goal);
@@ -73,13 +62,6 @@ void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 extern void *__alloc_bootmem_low(unsigned long size,
 unsigned long align,
 unsigned long goal) __malloc;
-void *__alloc_bootmem_low_nopanic(unsigned long size,
-unsigned long align,
-unsigned long goal) __malloc;
-extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal) __malloc;
 
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
@@ -92,8 +74,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_align(x, align) \
__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_nopanic(x) \
-   __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages(x) \
__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_nopanic(x) \
@@ -104,17 +84,11 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, 
BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
-   __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 
 #define alloc_bootmem_low(x) \
__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-#define alloc_bootmem_low_pages_nopanic(x) \
-   __alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0)
 #define alloc_bootmem_low_pages(x) \
__alloc_bootmem_low(x, PAGE_SIZE, 0)
-#define alloc_bootmem_low_pages_node(pgdat, x) \
-   __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE   0
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index d4d0cd4..44ce7de 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -404,38 +404,3 @@ void * __init __alloc_bootmem_low(unsigned long size, 
unsigned long align,
 {
return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
 }
-
-void * __init __alloc_bootmem_low_nopanic(unsigned long size,
- unsigned long align,
- unsigned long goal)
-{
-   return ___alloc_bootmem_nopanic(size, align, goal,
-   ARCH_LOW_ADDRESS_LIMIT);
-}
-
-/**
- * __alloc_bootmem_low_node - allocate low boot memory from a specific node
- * @pgdat: node to allocate from
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may fall back to any node in the system if the specified node
- * can not hold the requested memory.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
-  unsigned long align, unsigned long goal)
-{
-   if (WARN_ON_ONCE(slab_is_available()))
-   return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-   return ___alloc_bootmem_node(pgdat, size, align, goal,
-ARCH_LOW_ADDRESS_LIMIT);
-}
-- 
2.7.4



[RFC PATCH 06/29] memblock: rename memblock_alloc{_nid, _try_nid} to memblock_phys_alloc*

2018-09-05 Thread Mike Rapoport
This will allow using memblock_alloc for memblock allocations returning
virtual address.

The conversion is done using the following semantic patch:

@@
expression e1, e2, e3;
@@
(
- memblock_alloc(e1, e2)
+ memblock_phys_alloc(e1, e2)
|
- memblock_alloc_nid(e1, e2, e3)
+ memblock_phys_alloc_nid(e1, e2, e3)
|
- memblock_alloc_try_nid(e1, e2, e3)
+ memblock_phys_alloc_try_nid(e1, e2, e3)
)

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/mmu.c |  2 +-
 arch/arm64/mm/mmu.c   |  2 +-
 arch/arm64/mm/numa.c  |  2 +-
 arch/c6x/mm/dma-coherent.c|  4 ++--
 arch/nds32/mm/init.c  |  8 
 arch/openrisc/mm/init.c   |  2 +-
 arch/openrisc/mm/ioremap.c|  2 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c |  4 +---
 arch/powerpc/kernel/paca.c|  2 +-
 arch/powerpc/kernel/prom.c|  2 +-
 arch/powerpc/kernel/setup-common.c|  3 +--
 arch/powerpc/kernel/setup_32.c| 10 +-
 arch/powerpc/mm/numa.c|  2 +-
 arch/powerpc/mm/pgtable_32.c  |  2 +-
 arch/powerpc/mm/ppc_mmu_32.c  |  2 +-
 arch/powerpc/platforms/pasemi/iommu.c |  2 +-
 arch/powerpc/platforms/powernv/opal.c |  2 +-
 arch/powerpc/sysdev/dart_iommu.c  |  2 +-
 arch/s390/kernel/crash_dump.c |  2 +-
 arch/s390/kernel/setup.c  |  3 ++-
 arch/s390/mm/vmem.c   |  4 ++--
 arch/s390/numa/numa.c |  2 +-
 arch/sparc/kernel/mdesc.c |  2 +-
 arch/sparc/kernel/prom_64.c   |  2 +-
 arch/sparc/mm/init_64.c   | 11 ++-
 arch/unicore32/mm/mmu.c   |  2 +-
 arch/x86/mm/numa.c|  2 +-
 drivers/firmware/efi/memmap.c |  2 +-
 include/linux/memblock.h  |  6 +++---
 mm/memblock.c |  8 
 30 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e46a6a4..f5cc1cc 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -721,7 +721,7 @@ EXPORT_SYMBOL(phys_mem_access_prot);
 
 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
 {
-   void *ptr = __va(memblock_alloc(sz, align));
+   void *ptr = __va(memblock_phys_alloc(sz, align));
memset(ptr, 0, sz);
return ptr;
 }
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 65f8627..33558f4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -83,7 +83,7 @@ static phys_addr_t __init early_pgtable_alloc(void)
phys_addr_t phys;
void *ptr;
 
-   phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+   phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
 
/*
 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 146c04c..e5aacd6 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -237,7 +237,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, 
u64 end_pfn)
if (start_pfn >= end_pfn)
pr_info("Initmem setup node %d []\n", nid);
 
-   nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+   nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
nd = __va(nd_pa);
 
/* report and initialize */
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index d0a8e0c..01305c7 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -135,8 +135,8 @@ void __init coherent_mem_init(phys_addr_t start, u32 size)
if (dma_size & (PAGE_SIZE - 1))
++dma_pages;
 
-   bitmap_phys = memblock_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long),
-sizeof(long));
+   bitmap_phys = memblock_phys_alloc(BITS_TO_LONGS(dma_pages) * 
sizeof(long),
+ sizeof(long));
 
dma_bitmap = phys_to_virt(bitmap_phys);
memset(dma_bitmap, 0, dma_pages * PAGE_SIZE);
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index c713d2a..5af81b8 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -81,7 +81,7 @@ static void __init map_ram(void)
}
 
/* Alloc one page for holding PTE's... */
-   pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+   pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
memset(pte, 0, PAGE_SIZE);
set_pmd(pme, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 
@@ -114,7 +114,7 @@ static void __init fixedrange_init(void)
pgd = swapper_pg_dir + pgd_index(vaddr);
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
-   fixmap_pmd_p = (pmd_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+   fixmap_pmd_p = (pmd_t *) __va(memblock_phys_alloc(PAGE_SIZE, 
PAGE_SIZE));
memset(fixmap_pmd_p, 0, PAGE_SIZE);

[RFC PATCH 03/29] mm: remove CONFIG_HAVE_MEMBLOCK

2018-09-05 Thread Mike Rapoport
All architecures use memblock for early memory management. There is no need
for the CONFIG_HAVE_MEMBLOCK configuration option.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/Kconfig  |   1 -
 arch/arc/Kconfig|   1 -
 arch/arm/Kconfig|   1 -
 arch/arm64/Kconfig  |   1 -
 arch/c6x/Kconfig|   1 -
 arch/h8300/Kconfig  |   1 -
 arch/hexagon/Kconfig|   1 -
 arch/ia64/Kconfig   |   1 -
 arch/m68k/Kconfig   |   1 -
 arch/microblaze/Kconfig |   1 -
 arch/mips/Kconfig   |   1 -
 arch/nds32/Kconfig  |   1 -
 arch/nios2/Kconfig  |   1 -
 arch/openrisc/Kconfig   |   1 -
 arch/parisc/Kconfig |   1 -
 arch/powerpc/Kconfig|   1 -
 arch/riscv/Kconfig  |   1 -
 arch/s390/Kconfig   |   1 -
 arch/sh/Kconfig |   1 -
 arch/sparc/Kconfig  |   1 -
 arch/um/Kconfig |   1 -
 arch/unicore32/Kconfig  |   1 -
 arch/x86/Kconfig|   1 -
 arch/xtensa/Kconfig |   1 -
 drivers/of/fdt.c|   2 -
 drivers/of/of_reserved_mem.c|  13 +
 drivers/staging/android/ion/Kconfig |   2 +-
 fs/pstore/Kconfig   |   1 -
 include/linux/bootmem.h | 112 
 include/linux/memblock.h|   2 -
 include/linux/mm.h  |   2 +-
 lib/Kconfig.debug   |   3 +-
 mm/Kconfig  |   5 +-
 mm/Makefile |   2 +-
 mm/nobootmem.c  |   4 --
 mm/page_alloc.c |   4 +-
 36 files changed, 8 insertions(+), 168 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 04de6be..5b4f883 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -31,7 +31,6 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
-   select HAVE_MEMBLOCK
help
  The Alpha is a 64-bit general-purpose processor designed and
  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 04ebead..5260440 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -37,7 +37,6 @@ config ARC
select HAVE_KERNEL_LZMA
select HAVE_KPROBES
select HAVE_KRETPROBES
-   select HAVE_MEMBLOCK
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_OPROFILE
select HAVE_PERF_EVENTS
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61ea3dd..07468e6 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -82,7 +82,6 @@ config ARM
select HAVE_KERNEL_XZ
select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
select HAVE_KRETPROBES if (HAVE_KPROBES)
-   select HAVE_MEMBLOCK
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0065653..7d7d813 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -133,7 +133,6 @@ config ARM64
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
-   select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
select HAVE_NMI
select HAVE_PATA_PLATFORM
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index a641b0b..833fdb0 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -13,7 +13,6 @@ config C6X
select GENERIC_ATOMIC64
select GENERIC_IRQ_SHOW
select HAVE_ARCH_TRACEHOOK
-   select HAVE_MEMBLOCK
select SPARSE_IRQ
select IRQ_DOMAIN
select OF
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e89d40..d19c6b16 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -15,7 +15,6 @@ config H8300
select OF
select OF_IRQ
select OF_EARLY_FLATTREE
-   select HAVE_MEMBLOCK
select TIMER_OF
select H8300_TMR8
select HAVE_KERNEL_GZIP
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index fb7e0ba..d86e134 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -29,7 +29,6 @@ config HEXAGON
select GENERIC_CLOCKEVENTS_BROADCAST
select MODULES_USE_ELF_RELA
select GENERIC_CPU_DEVICES
-   select HAVE_MEMBLOCK
select ARCH_DISCARD_MEMBLOCK
---help---
  Qualcomm Hexagon is a processor architecture designed for high
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2bf4ef7..36773de 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -26,7 +26,6 @@ config IA64
select HAVE_FUNCTION_TRACER
select TTY
select HAVE_ARCH_TRACEHOOK
- 

[RFC PATCH 04/29] mm: remove bootmem allocator implementation.

2018-09-05 Thread Mike Rapoport
All architectures have been converted to use MEMBLOCK + NO_BOOTMEM. The
bootmem allocator implementation can be removed.

Signed-off-by: Mike Rapoport 
---
 include/linux/bootmem.h |  16 -
 mm/bootmem.c| 811 
 2 files changed, 827 deletions(-)
 delete mode 100644 mm/bootmem.c

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index ee61ac3..fce6278 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -26,14 +26,6 @@ extern unsigned long max_pfn;
  */
 extern unsigned long long max_possible_pfn;
 
-extern unsigned long bootmem_bootmap_pages(unsigned long);
-
-extern unsigned long init_bootmem_node(pg_data_t *pgdat,
-  unsigned long freepfn,
-  unsigned long startpfn,
-  unsigned long endpfn);
-extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
-
 extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
@@ -55,14 +47,6 @@ extern void free_bootmem_late(unsigned long physaddr, 
unsigned long size);
 #define BOOTMEM_DEFAULT0
 #define BOOTMEM_EXCLUSIVE  (1<<0)
 
-extern int reserve_bootmem(unsigned long addr,
-  unsigned long size,
-  int flags);
-extern int reserve_bootmem_node(pg_data_t *pgdat,
-   unsigned long physaddr,
-   unsigned long size,
-   int flags);
-
 extern void *__alloc_bootmem(unsigned long size,
 unsigned long align,
 unsigned long goal);
diff --git a/mm/bootmem.c b/mm/bootmem.c
deleted file mode 100644
index 97db0e8..000
--- a/mm/bootmem.c
+++ /dev/null
@@ -1,811 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  bootmem - A boot-time physical memory allocator and configurator
- *
- *  Copyright (C) 1999 Ingo Molnar
- *1999 Kanoj Sarcar, SGI
- *2008 Johannes Weiner
- *
- * Access to this subsystem has to be serialized externally (which is true
- * for the boot process anyway).
- */
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include "internal.h"
-
-/**
- * DOC: bootmem overview
- *
- * Bootmem is a boot-time physical memory allocator and configurator.
- *
- * It is used early in the boot process before the page allocator is
- * set up.
- *
- * Bootmem is based on the most basic of allocators, a First Fit
- * allocator which uses a bitmap to represent memory. If a bit is 1,
- * the page is allocated and 0 if unallocated. To satisfy allocations
- * of sizes smaller than a page, the allocator records the Page Frame
- * Number (PFN) of the last allocation and the offset the allocation
- * ended at. Subsequent small allocations are merged together and
- * stored on the same page.
- *
- * The information used by the bootmem allocator is represented by
- * :c:type:`struct bootmem_data`. An array to hold up to %MAX_NUMNODES
- * such structures is statically allocated and then it is discarded
- * when the system initialization completes. Each entry in this array
- * corresponds to a node with memory. For UMA systems only entry 0 is
- * used.
- *
- * The bootmem allocator is initialized during early architecture
- * specific setup. Each architecture is required to supply a
- * :c:func:`setup_arch` function which, among other tasks, is
- * responsible for acquiring the necessary parameters to initialise
- * the boot memory allocator. These parameters define limits of usable
- * physical memory:
- *
- * * @min_low_pfn - the lowest PFN that is available in the system
- * * @max_low_pfn - the highest PFN that may be addressed by low
- *   memory (%ZONE_NORMAL)
- * * @max_pfn - the last PFN available to the system.
- *
- * After those limits are determined, the :c:func:`init_bootmem` or
- * :c:func:`init_bootmem_node` function should be called to initialize
- * the bootmem allocator. The UMA case should use the `init_bootmem`
- * function. It will initialize ``contig_page_data`` structure that
- * represents the only memory node in the system. In the NUMA case the
- * `init_bootmem_node` function should be called to initialize the
- * bootmem allocator for each node.
- *
- * Once the allocator is set up, it is possible to use either single
- * node or NUMA variant of the allocation APIs.
- */
-
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = {
-   .bdata = _node_data[0]
-};
-EXPORT_SYMBOL(contig_page_data);
-#endif
-
-unsigned long max_low_pfn;
-unsigned long min_low_pfn;
-unsigned long max_pfn;
-unsigned long long max_possible_pfn;
-
-bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
-
-static struct list_head bdata_list __initdata = 

[RFC PATCH 02/29] mm: remove CONFIG_NO_BOOTMEM

2018-09-05 Thread Mike Rapoport
All achitectures select NO_BOOTMEM which essentially becomes 'Y' for any
kernel configuration and therefore it can be removed.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/Kconfig  |  1 -
 arch/arc/Kconfig|  1 -
 arch/arm/Kconfig|  1 -
 arch/arm64/Kconfig  |  1 -
 arch/c6x/Kconfig|  1 -
 arch/h8300/Kconfig  |  1 -
 arch/hexagon/Kconfig|  1 -
 arch/ia64/Kconfig   |  1 -
 arch/m68k/Kconfig   |  1 -
 arch/microblaze/Kconfig |  1 -
 arch/mips/Kconfig   |  1 -
 arch/nds32/Kconfig  |  1 -
 arch/nios2/Kconfig  |  1 -
 arch/openrisc/Kconfig   |  1 -
 arch/parisc/Kconfig |  1 -
 arch/powerpc/Kconfig|  1 -
 arch/riscv/Kconfig  |  1 -
 arch/s390/Kconfig   |  1 -
 arch/sh/Kconfig |  1 -
 arch/sparc/Kconfig  |  1 -
 arch/um/Kconfig |  1 -
 arch/unicore32/Kconfig  |  1 -
 arch/x86/Kconfig|  3 ---
 arch/xtensa/Kconfig |  1 -
 include/linux/bootmem.h | 36 ++--
 include/linux/mmzone.h  |  5 +
 mm/Kconfig  |  3 ---
 mm/Makefile |  7 +--
 mm/memblock.c   |  2 --
 29 files changed, 4 insertions(+), 75 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 620b0a7..04de6be 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -32,7 +32,6 @@ config ALPHA
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
select HAVE_MEMBLOCK
-   select NO_BOOTMEM
help
  The Alpha is a 64-bit general-purpose processor designed and
  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index b4441b0..04ebead 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -44,7 +44,6 @@ config ARC
select HANDLE_DOMAIN_IRQ
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
-   select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
select OF_RESERVED_MEM
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ed74be4..61ea3dd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -100,7 +100,6 @@ config ARM
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
-   select NO_BOOTMEM
select OF_EARLY_FLATTREE if OF
select OF_RESERVED_MEM if OF
select OLD_SIGACTION
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c05ab9e..0065653 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -154,7 +154,6 @@ config ARM64
select MULTI_IRQ_HANDLER
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
-   select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
select OF_RESERVED_MEM
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 85ed568..a641b0b 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -14,7 +14,6 @@ config C6X
select GENERIC_IRQ_SHOW
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
-   select NO_BOOTMEM
select SPARSE_IRQ
select IRQ_DOMAIN
select OF
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 0b334b6..5e89d40 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -16,7 +16,6 @@ config H8300
select OF_IRQ
select OF_EARLY_FLATTREE
select HAVE_MEMBLOCK
-   select NO_BOOTMEM
select TIMER_OF
select H8300_TMR8
select HAVE_KERNEL_GZIP
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index f793499..fb7e0ba 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -31,7 +31,6 @@ config HEXAGON
select GENERIC_CPU_DEVICES
select HAVE_MEMBLOCK
select ARCH_DISCARD_MEMBLOCK
-   select NO_BOOTMEM
---help---
  Qualcomm Hexagon is a processor architecture designed for high
  performance and low power across a wide variety of applications.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8b4a0c17..2bf4ef7 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,7 +28,6 @@ config IA64
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
-   select NO_BOOTMEM
select HAVE_VIRT_CPU_ACCOUNTING
select ARCH_HAS_DMA_MARK_CLEAN
select ARCH_HAS_SG_CHAIN
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 0705537..8c7111d 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -29,7 +29,6 @@ config M68K
select DMA_NONCOHERENT_OPS if HAS_DMA
select HAVE_MEMBLOCK
select ARCH_DISCARD_MEMBLOCK
-   select NO_BOOTMEM
 
 config CPU_BIG_ENDIAN
def_bool y
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index ace5c5b..56379b9 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,6 @@ config MICROBLAZE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
-   

[RFC PATCH 00/29] mm: remove bootmem allocator

2018-09-05 Thread Mike Rapoport
Hi,

These patches switch early memory managment to use memblock directly
without any bootmem compatibility wrappers. As the result both bootmem and
nobootmem are removed.

There are still a couple of things to sort out, the most important is the
removal of bootmem usage in MIPS. 

Still, IMHO, the series is in sufficient state to post and get the early
feedback.

The patches are build-tested with defconfig for most architectures (I
couldn't find a compiler for nds32 and unicore32) and boot-tested on x86
VM.

Mike Rapoport (29):
  mips: switch to NO_BOOTMEM
  mm: remove CONFIG_NO_BOOTMEM
  mm: remove CONFIG_HAVE_MEMBLOCK
  mm: remove bootmem allocator implementation.
  mm: nobootmem: remove dead code
  memblock: rename memblock_alloc{_nid,_try_nid} to memblock_phys_alloc*
  memblock: remove _virt from APIs returning virtual address
  memblock: replace alloc_bootmem_align with memblock_alloc
  memblock: replace alloc_bootmem_low with memblock_alloc_low
  memblock: replace __alloc_bootmem_node_nopanic with
memblock_alloc_try_nid_nopanic
  memblock: replace alloc_bootmem_pages_nopanic with
memblock_alloc_nopanic
  memblock: replace alloc_bootmem_low with memblock_alloc_low
  memblock: replace __alloc_bootmem_nopanic with
memblock_alloc_from_nopanic
  memblock: add align parameter to memblock_alloc_node()
  memblock: replace alloc_bootmem_pages_node with memblock_alloc_node
  memblock: replace __alloc_bootmem_node with appropriate memblock_ API
  memblock: replace alloc_bootmem_node with memblock_alloc_node
  memblock: replace alloc_bootmem_low_pages with memblock_alloc_low
  memblock: replace alloc_bootmem_pages with memblock_alloc
  memblock: replace __alloc_bootmem with memblock_alloc_from
  memblock: replace alloc_bootmem with memblock_alloc
  mm: nobootmem: remove bootmem allocation APIs
  memblock: replace free_bootmem{_node} with memblock_free
  memblock: replace free_bootmem_late with memblock_free_late
  memblock: rename free_all_bootmem to memblock_free_all
  memblock: rename __free_pages_bootmem to memblock_free_pages
  mm: remove nobootmem
  memblock: replace BOOTMEM_ALLOC_* with MEMBLOCK variants
  mm: remove include/linux/bootmem.h

 arch/alpha/Kconfig  |   2 -
 arch/alpha/kernel/core_cia.c|   4 +-
 arch/alpha/kernel/core_irongate.c   |   4 +-
 arch/alpha/kernel/core_marvel.c |   6 +-
 arch/alpha/kernel/core_titan.c  |   2 +-
 arch/alpha/kernel/core_tsunami.c|   2 +-
 arch/alpha/kernel/pci-noop.c|   6 +-
 arch/alpha/kernel/pci.c |   6 +-
 arch/alpha/kernel/pci_iommu.c   |  14 +-
 arch/alpha/kernel/setup.c   |   3 +-
 arch/alpha/kernel/sys_nautilus.c|   2 +-
 arch/alpha/mm/init.c|   4 +-
 arch/alpha/mm/numa.c|   1 -
 arch/arc/Kconfig|   2 -
 arch/arc/kernel/unwind.c|   6 +-
 arch/arc/mm/highmem.c   |   4 +-
 arch/arc/mm/init.c  |   3 +-
 arch/arm/Kconfig|   2 -
 arch/arm/kernel/devtree.c   |   1 -
 arch/arm/kernel/setup.c |   5 +-
 arch/arm/mach-omap2/omap_hwmod.c|   4 +-
 arch/arm/mm/dma-mapping.c   |   1 -
 arch/arm/mm/init.c  |   3 +-
 arch/arm/mm/mmu.c   |   2 +-
 arch/arm/xen/mm.c   |   1 -
 arch/arm/xen/p2m.c  |   2 +-
 arch/arm64/Kconfig  |   2 -
 arch/arm64/kernel/acpi.c|   1 -
 arch/arm64/kernel/acpi_numa.c   |   1 -
 arch/arm64/kernel/setup.c   |   3 +-
 arch/arm64/mm/dma-mapping.c |   2 +-
 arch/arm64/mm/init.c|   5 +-
 arch/arm64/mm/kasan_init.c  |   3 +-
 arch/arm64/mm/mmu.c |   2 +-
 arch/arm64/mm/numa.c|   5 +-
 arch/c6x/Kconfig|   2 -
 arch/c6x/kernel/setup.c |   1 -
 arch/c6x/mm/dma-coherent.c  |   4 +-
 arch/c6x/mm/init.c  |   7 +-
 arch/h8300/Kconfig  |   2 -
 arch/h8300/kernel/setup.c   |   1 -
 arch/h8300/mm/init.c|   6 +-
 arch/hexagon/Kconfig|   2 -
 arch/hexagon/kernel/dma.c   |   2 +-
 arch/hexagon/kernel/setup.c |   2 +-
 arch/hexagon/mm/init.c  |   3 +-
 arch/ia64/Kconfig   |   2 -
 arch/ia64/kernel/crash.c|   2 +-
 arch/ia64/kernel/efi.c  |   2 +-
 arch/ia64/kernel/ia64_ksyms.c   |   2 +-
 arch/ia64/kernel/iosapic.c  |   2 +-
 arch/ia64/kernel/mca.c  |  10 +-
 

[RFC PATCH 01/29] mips: switch to NO_BOOTMEM

2018-09-05 Thread Mike Rapoport
MIPS already has memblock support and all the memory is already registered
with it.

This patch replaces bootmem memory reservations with memblock ones and
removes the bootmem initialization.

Signed-off-by: Mike Rapoport 
---
 arch/mips/Kconfig  |  1 +
 arch/mips/kernel/setup.c   | 91 ++
 arch/mips/loongson64/loongson-3/numa.c | 34 ++---
 arch/mips/sgi-ip27/ip27-memory.c   | 11 ++--
 4 files changed, 35 insertions(+), 102 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 1a119fd..f744d25 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -78,6 +78,7 @@ config MIPS
select RTC_LIB if !MACH_LOONGSON64
select SYSCTL_EXCEPTION_TRACE
select VIRT_TO_BUS
+   select NO_BOOTMEM
 
 menu "Machine selection"
 
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 32fc11d..08f8251 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -333,7 +333,7 @@ static void __init finalize_initrd(void)
 
maybe_bswap_initrd();
 
-   reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT);
+   memblock_reserve(__pa(initrd_start), size);
initrd_below_start_ok = 1;
 
pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n",
@@ -370,20 +370,10 @@ static void __init bootmem_init(void)
 
 #else  /* !CONFIG_SGI_IP27 */
 
-static unsigned long __init bootmap_bytes(unsigned long pages)
-{
-   unsigned long bytes = DIV_ROUND_UP(pages, 8);
-
-   return ALIGN(bytes, sizeof(long));
-}
-
 static void __init bootmem_init(void)
 {
unsigned long reserved_end;
-   unsigned long mapstart = ~0UL;
-   unsigned long bootmap_size;
phys_addr_t ramstart = PHYS_ADDR_MAX;
-   bool bootmap_valid = false;
int i;
 
/*
@@ -395,6 +385,8 @@ static void __init bootmem_init(void)
init_initrd();
reserved_end = (unsigned long) PFN_UP(__pa_symbol(&_end));
 
+   memblock_reserve(PHYS_OFFSET, reserved_end << PAGE_SHIFT);
+
/*
 * max_low_pfn is not a number of pages. The number of pages
 * of the system is given by 'max_low_pfn - min_low_pfn'.
@@ -442,9 +434,6 @@ static void __init bootmem_init(void)
if (initrd_end && end <= (unsigned 
long)PFN_UP(__pa(initrd_end)))
continue;
 #endif
-   if (start >= mapstart)
-   continue;
-   mapstart = max(reserved_end, start);
}
 
if (min_low_pfn >= max_low_pfn)
@@ -456,9 +445,11 @@ static void __init bootmem_init(void)
/*
 * Reserve any memory between the start of RAM and PHYS_OFFSET
 */
-   if (ramstart > PHYS_OFFSET)
+   if (ramstart > PHYS_OFFSET) {
add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
  BOOT_MEM_RESERVED);
+   memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET);
+   }
 
if (min_low_pfn > ARCH_PFN_OFFSET) {
pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
@@ -483,52 +474,6 @@ static void __init bootmem_init(void)
max_low_pfn = PFN_DOWN(HIGHMEM_START);
}
 
-#ifdef CONFIG_BLK_DEV_INITRD
-   /*
-* mapstart should be after initrd_end
-*/
-   if (initrd_end)
-   mapstart = max(mapstart, (unsigned 
long)PFN_UP(__pa(initrd_end)));
-#endif
-
-   /*
-* check that mapstart doesn't overlap with any of
-* memory regions that have been reserved through eg. DTB
-*/
-   bootmap_size = bootmap_bytes(max_low_pfn - min_low_pfn);
-
-   bootmap_valid = memory_region_available(PFN_PHYS(mapstart),
-   bootmap_size);
-   for (i = 0; i < boot_mem_map.nr_map && !bootmap_valid; i++) {
-   unsigned long mapstart_addr;
-
-   switch (boot_mem_map.map[i].type) {
-   case BOOT_MEM_RESERVED:
-   mapstart_addr = PFN_ALIGN(boot_mem_map.map[i].addr +
-   boot_mem_map.map[i].size);
-   if (PHYS_PFN(mapstart_addr) < mapstart)
-   break;
-
-   bootmap_valid = memory_region_available(mapstart_addr,
-   bootmap_size);
-   if (bootmap_valid)
-   mapstart = PHYS_PFN(mapstart_addr);
-   break;
-   default:
-   break;
-   }
-   }
-
-   if (!bootmap_valid)
-   panic("No memory area to place a bootmap bitmap");
-
-   /*
-* Initialize the boot-time allocator with low memory only.
-*/
-   if (bootmap_size != init_bootmem_node(NODE_DATA(0), mapstart,
-min_low_pfn, max_low_pfn))
-  

[PATCH 5/5] PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

2018-09-05 Thread Sergey Miroshnichenko
Reading an empty slot returns all ones, which triggers a false
EEH error event on PowerNV.

New callbacks pcibios_rescan_prepare/done are introduced to
pause/resume the EEH during rescan.

Signed-off-by: Sergey Miroshnichenko 
---
 arch/powerpc/include/asm/eeh.h   |  2 ++
 arch/powerpc/kernel/eeh.c| 14 ++
 arch/powerpc/platforms/powernv/eeh-powernv.c | 20 
 drivers/pci/probe.c  | 14 ++
 include/linux/pci.h  |  2 ++
 5 files changed, 52 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 219637ea69a1..926c3e31df99 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -219,6 +219,8 @@ struct eeh_ops {
int (*next_error)(struct eeh_pe **pe);
int (*restore_config)(struct pci_dn *pdn);
int (*notify_resume)(struct pci_dn *pdn);
+   int (*pause)(struct pci_bus *bus);
+   int (*resume)(struct pci_bus *bus);
 };
 
 extern int eeh_subsystem_flags;
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6ebba3e48b01..dce9b0978cb5 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1831,3 +1831,17 @@ static int __init eeh_init_proc(void)
return 0;
 }
 __initcall(eeh_init_proc);
+
+void pcibios_rescan_prepare(struct pci_bus *bus)
+{
+   if (eeh_ops && eeh_ops->pause) {
+   eeh_ops->pause(bus);
+   }
+}
+
+void pcibios_rescan_done(struct pci_bus *bus)
+{
+   if (eeh_ops && eeh_ops->resume) {
+   eeh_ops->resume(bus);
+   }
+}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3c1beae29f2d..9c9213d92550 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -59,6 +59,24 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
eeh_sysfs_add_device(pdev);
 }
 
+static int pnv_eeh_pause(struct pci_bus *bus)
+{
+   struct pci_controller *hose = pci_bus_to_host(bus);
+   struct pnv_phb *phb = hose->private_data;
+   phb->flags &= ~PNV_PHB_FLAG_EEH;
+   disable_irq(eeh_event_irq);
+   return 0;
+}
+
+static int pnv_eeh_resume(struct pci_bus *bus)
+{
+   struct pci_controller *hose = pci_bus_to_host(bus);
+   struct pnv_phb *phb = hose->private_data;
+   enable_irq(eeh_event_irq);
+   phb->flags |= PNV_PHB_FLAG_EEH;
+   return 0;
+}
+
 static int pnv_eeh_init(void)
 {
struct pci_controller *hose;
@@ -1710,6 +1728,8 @@ static struct eeh_ops pnv_eeh_ops = {
.write_config   = pnv_eeh_write_config,
.next_error = pnv_eeh_next_error,
.restore_config = pnv_eeh_restore_config,
+   .pause  = pnv_eeh_pause,
+   .resume = pnv_eeh_resume,
.notify_resume  = NULL
 };
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ec784009a36b..203368566896 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2893,6 +2893,14 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
 {
 }
 
+void __weak pcibios_rescan_prepare(struct pci_bus *bus)
+{
+}
+
+void __weak pcibios_rescan_done(struct pci_bus *bus)
+{
+}
+
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
@@ -3147,9 +3155,15 @@ unsigned int pci_rescan_bus_bridge_resize(struct pci_dev 
*bridge)
 unsigned int pci_rescan_bus(struct pci_bus *bus)
 {
unsigned int max;
+   struct pci_bus *root = bus;
+   while (!pci_is_root_bus(root)) {
+   root = root->parent;
+   }
 
+   pcibios_rescan_prepare(root);
max = pci_scan_child_bus(bus);
pci_assign_unassigned_bus_resources(bus);
+   pcibios_rescan_done(root);
pci_bus_add_devices(bus);
 
return max;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e72ca8dd6241..d7fe72aa53b3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1952,6 +1952,8 @@ void pcibios_penalize_isa_irq(int irq, int active);
 int pcibios_alloc_irq(struct pci_dev *dev);
 void pcibios_free_irq(struct pci_dev *dev);
 resource_size_t pcibios_default_alignment(void);
+void pcibios_rescan_prepare(struct pci_bus *bus);
+void pcibios_rescan_done(struct pci_bus *bus);
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 extern struct dev_pm_ops pcibios_pm_ops;
-- 
2.17.1



[PATCH 0/5] powerpc/pci/hotplug: Discover surprise-hotplugged PCIe devices during rescan

2018-09-05 Thread Sergey Miroshnichenko
This patchset allows hotplugged PCIe devices to be enumerated during a bus
rescan being issued via sysfs, when the "Presence Detect Changed" interrupt
is not available.

As a first part of our work on adding support for hotplugging PCIe bridges
full of devices without special requirement such as Hot-Plug Controller,
reservation of bus numbers and memory regions by firmware, etc., this serie
is intended to solve the first two problems from the list below:

 I   PowerNV doesn't discover new hotplugged PCIe devices
 II  EEH is falsely triggered when poking empty slots during PCIe rescan on 
PowerNV
 III The PCI subsystem is not prepared to runtime changes of BAR addresses
 IV  Device drivers don't track changes of their BAR addresses
 V   Move BARs of working devices to make space for new ones
 VI  Add support for PCIe bridge hotplug

Sergey Miroshnichenko (5):
  powerpc/pci: Access PCI config space directly w/o pci_dn
  powerpc/pci: Create pci_dn on demand
  powerpc/pci: Use DT to create pci_dn for root bridges only
  powerpc/powernv/pci: Enable reassigning the bus numbers
  PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

 arch/powerpc/include/asm/eeh.h   |  2 +
 arch/powerpc/kernel/eeh.c| 14 +++
 arch/powerpc/kernel/pci_dn.c | 75 -
 arch/powerpc/kernel/rtas_pci.c   | 89 
 arch/powerpc/platforms/powernv/eeh-powernv.c | 20 +
 arch/powerpc/platforms/powernv/pci.c | 55 +++-
 drivers/pci/probe.c  | 14 +++
 include/linux/pci.h  |  2 +
 8 files changed, 197 insertions(+), 74 deletions(-)

-- 
2.17.1



[PATCH 4/5] powerpc/powernv/pci: Enable reassigning the bus numbers

2018-09-05 Thread Sergey Miroshnichenko
PowerNV doesn't depend on PCIe topology info from DT anymore, and now
it is able to enumerate the fabric and assign the bus numbers.

Signed-off-by: Sergey Miroshnichenko 
---
 arch/powerpc/platforms/powernv/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 6d4280086a08..f6eaca3123cd 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1104,6 +1104,7 @@ void __init pnv_pci_init(void)
struct device_node *np;
 
pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
+   pci_add_flags(PCI_REASSIGN_ALL_BUS);
 
/* If we don't have OPAL, eg. in sim, just skip PCI probe */
if (!firmware_has_feature(FW_FEATURE_OPAL))
-- 
2.17.1



[PATCH 3/5] powerpc/pci: Use DT to create pci_dn for root bridges only

2018-09-05 Thread Sergey Miroshnichenko
Endpoint's pci_dn can be created dynamically.

Signed-off-by: Sergey Miroshnichenko 
---
 arch/powerpc/kernel/pci_dn.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 5ce752874827..1b9f563d9461 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -530,8 +530,10 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
phb->pci_data = pdn;
}
 
-   /* Update dn->phb ptrs for new phb and children devices */
-   pci_traverse_device_nodes(dn, add_pdn, phb);
+   if (!pci_has_flag(PCI_REASSIGN_ALL_BUS)) {
+   /* Update dn->phb ptrs for new phb and children devices */
+   pci_traverse_device_nodes(dn, add_pdn, phb);
+   }
 }
 
 /** 
-- 
2.17.1



[PATCH 2/5] powerpc/pci: Create pci_dn on demand

2018-09-05 Thread Sergey Miroshnichenko
The pci_dn structures can be created not only from DT, but also
directly from newly discovered PCIe devices, so allocate them
dynamically.

Signed-off-by: Sergey Miroshnichenko 
---
 arch/powerpc/kernel/pci_dn.c | 69 +++-
 1 file changed, 52 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index ab147a1909c8..5ce752874827 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -33,6 +33,8 @@
 #include 
 #include 
 
+static struct pci_dn* create_pdn(struct pci_dev *pdev, struct pci_dn *parent);
+
 /*
  * The function is used to find the firmware data of one
  * specific PCI device, which is attached to the indicated
@@ -58,6 +60,10 @@ static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
pbus = pbus->parent;
}
 
+   if (!pbus->self && !pci_is_root_bus(pbus)) {
+   return NULL;
+   }
+
/*
 * Except virtual bus, all PCI buses should
 * have device nodes.
@@ -65,13 +71,16 @@ static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
dn = pci_bus_to_OF_node(pbus);
pdn = dn ? PCI_DN(dn) : NULL;
 
+   if (!pdn && pbus->self) {
+   pdn = pbus->self->dev.archdata.pci_data;
+   }
+
return pdn;
 }
 
 struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
int devfn)
 {
-   struct device_node *dn = NULL;
struct pci_dn *parent, *pdn;
struct pci_dev *pdev = NULL;
 
@@ -80,17 +89,10 @@ struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
if (pdev->devfn == devfn) {
if (pdev->dev.archdata.pci_data)
return pdev->dev.archdata.pci_data;
-
-   dn = pci_device_to_OF_node(pdev);
break;
}
}
 
-   /* Fast path: fetch from device node */
-   pdn = dn ? PCI_DN(dn) : NULL;
-   if (pdn)
-   return pdn;
-
/* Slow path: fetch from firmware data hierarchy */
parent = pci_bus_to_pdn(bus);
if (!parent)
@@ -128,16 +130,9 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
if (!parent)
return NULL;
 
-   list_for_each_entry(pdn, >child_list, list) {
-   if (pdn->busno == pdev->bus->number &&
-   pdn->devfn == pdev->devfn)
-   return pdn;
-   }
-
-   return NULL;
+   return create_pdn(pdev, parent);
 }
 
-#ifdef CONFIG_PCI_IOV
 static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
   int vf_index,
   int busno, int devfn)
@@ -164,7 +159,47 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn 
*parent,
 
return pdn;
 }
-#endif
+
+static struct pci_dn* create_pdn(struct pci_dev *pdev, struct pci_dn *parent)
+{
+   struct pci_dn *pdn = NULL;
+
+   pdn = add_one_dev_pci_data(parent, 0, pdev->bus->number, pdev->devfn);
+   dev_info(>dev, "Create a new pdn for devfn %2x\n", pdev->devfn / 
8);
+
+   if (pdn)
+   {
+   u32 class_code;
+   u16 device_id;
+   u16 vendor_id;
+
+   struct eeh_dev *edev = eeh_dev_init(pdn);
+   if (!edev) {
+   kfree(pdn);
+   dev_err(>dev, "%s:%d: Failed to allocate edev\n", 
__func__, __LINE__);
+   return NULL;
+   }
+
+   pdn->busno = pdev->bus->busn_res.start;
+
+   pci_bus_read_config_word(pdev->bus, pdev->devfn, PCI_VENDOR_ID, 
_id);
+   pdn->vendor_id = vendor_id;
+
+   pci_bus_read_config_word(pdev->bus, pdev->devfn, PCI_DEVICE_ID, 
_id);
+   pdn->device_id = device_id;
+
+   pci_bus_read_config_dword(pdev->bus, pdev->devfn, 
PCI_CLASS_REVISION, _code);
+   class_code >>= 8;
+   pdn->class_code = class_code;
+
+   pdn->pci_ext_config_space = 0;
+   pdev->dev.archdata.pci_data = pdn;
+   } else {
+   dev_err(>dev, "%s:%d: Failed to allocate pdn\n", 
__func__, __LINE__);
+   }
+
+   return pdn;
+}
 
 struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
 {
-- 
2.17.1



[PATCH 1/5] powerpc/pci: Access PCI config space directly w/o pci_dn

2018-09-05 Thread Sergey Miroshnichenko
The pci_dn structures are retrieved from a DT, but hot-plugged PCIe
devices don't have them. Don't stop PCIe I/O in absence of pci_dn, so
it is now possible to discover new devices.

Signed-off-by: Sergey Miroshnichenko 
---
 arch/powerpc/kernel/rtas_pci.c   | 89 +---
 arch/powerpc/platforms/powernv/pci.c | 54 ++---
 2 files changed, 88 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index c2b148b1634a..0da2c3732acd 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -55,10 +55,25 @@ static inline int config_access_valid(struct pci_dn *dn, 
int where)
return 0;
 }
 
-int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+static int rtas_read_raw_config(unsigned long buid, int busno, unsigned int 
devfn, int where, int size, u32 *val)
 {
int returnval = -1;
-   unsigned long buid, addr;
+   unsigned long addr = rtas_config_addr(busno, devfn, where);
+   int ret;
+
+   if (buid) {
+   ret = rtas_call(ibm_read_pci_config, 4, 2, ,
+   addr, BUID_HI(buid), BUID_LO(buid), size);
+   } else {
+   ret = rtas_call(read_pci_config, 2, 2, , addr, size);
+   }
+   *val = returnval;
+
+   return ret;
+}
+
+int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
int ret;
 
if (!pdn)
@@ -71,16 +86,7 @@ int rtas_read_config(struct pci_dn *pdn, int where, int 
size, u32 *val)
return PCIBIOS_SET_FAILED;
 #endif
 
-   addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
-   buid = pdn->phb->buid;
-   if (buid) {
-   ret = rtas_call(ibm_read_pci_config, 4, 2, ,
-   addr, BUID_HI(buid), BUID_LO(buid), size);
-   } else {
-   ret = rtas_call(read_pci_config, 2, 2, , addr, size);
-   }
-   *val = returnval;
-
+   ret = rtas_read_raw_config(pdn->phb->buid, pdn->busno, pdn->devfn, 
where, size, val);
if (ret)
return PCIBIOS_DEVICE_NOT_FOUND;
 
@@ -91,25 +97,43 @@ static int rtas_pci_read_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 *val)
 {
-   struct pci_dn *pdn;
-   int ret;
+   struct pci_dn *pdn = pci_get_pdn_by_devfn(bus, devfn);
 
*val = 0x;
 
-   pdn = pci_get_pdn_by_devfn(bus, devfn);
+   if (pdn && eeh_enabled()) {
+   /* Validity of pdn is checked in here */
+   int ret = rtas_read_config(pdn, where, size, val);
+   if (*val == EEH_IO_ERROR_VALUE(size) &&
+   eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
+   return PCIBIOS_DEVICE_NOT_FOUND;
+   return ret;
+   } else {
+   struct pci_controller *phb = pci_bus_to_host(bus);
+   return rtas_read_raw_config(phb->buid, bus->number, devfn, 
where, size, val);
+   }
+}
 
-   /* Validity of pdn is checked in here */
-   ret = rtas_read_config(pdn, where, size, val);
-   if (*val == EEH_IO_ERROR_VALUE(size) &&
-   eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
+static int rtas_write_raw_config(unsigned long buid, int busno, unsigned int 
devfn, int where, int size, u32 val)
+{
+   unsigned long addr = rtas_config_addr(busno, devfn, where);
+   int ret;
+
+   if (buid) {
+   ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+   BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+   } else {
+   ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, 
(ulong)val);
+   }
+
+   if (ret)
return PCIBIOS_DEVICE_NOT_FOUND;
 
-   return ret;
+   return PCIBIOS_SUCCESSFUL;
 }
 
 int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
 {
-   unsigned long buid, addr;
int ret;
 
if (!pdn)
@@ -122,15 +146,7 @@ int rtas_write_config(struct pci_dn *pdn, int where, int 
size, u32 val)
return PCIBIOS_SET_FAILED;
 #endif
 
-   addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
-   buid = pdn->phb->buid;
-   if (buid) {
-   ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
-   BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
-   } else {
-   ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, 
(ulong)val);
-   }
-
+   ret = rtas_write_raw_config(pdn->phb->buid, pdn->busno, pdn->devfn, 
where, size, val);
if (ret)
return PCIBIOS_DEVICE_NOT_FOUND;
 
@@ -141,12 +157,15 @@ static int rtas_pci_write_config(struct pci_bus *bus,
 unsigned int devfn,
 int where, int size, u32 val)
 {
-   struct pci_dn *pdn;
+   struct 

Re: [RFC PATCH v1 00/17] ban the use of _PAGE_XXX flags outside platform specific code

2018-09-05 Thread Christophe Leroy




On 09/05/2018 02:03 PM, Aneesh Kumar K.V wrote:

On 09/05/2018 06:06 PM, Christophe Leroy wrote:

Today flags like for instance _PAGE_RW or _PAGE_USER are used through
common parts of code.
Using those directly in common parts of code have proven to lead to
mistakes or misbehaviour, because their use is not always as trivial
as one could think.

For instance, (flags & _PAGE_USER) == 0 isn't enough to tell
that a page is a kernel page, because some targets are using
_PAGE_PRIVILEDGED and not _PAGE_USER, so the test has to be
(flags & (_PAGE_USER | _PAGE_PRIVILEDGED)) == _PAGE_PRIVILEDGED
This has to (bad) consequences:

  - All targets must define every bit, even the unsupported ones,
    leading to a lot of useless #define _PAGE_XXX 0
  - If someone forgets to take into account all possible _PAGE_XXX bits
    for the case, we can get unexpected behaviour on some targets.

This becomes even more complex when we come to using _PAGE_RW.
Testing (flags & _PAGE_RW) is not enough to test whether a page
if writable or not, because:

  - Some targets have _PAGE_RO instead, which has to be unset to tell
    a page is writable
  - Some targets have _PAGE_R and _PAGE_W, in which case
    _PAGE_RW = _PAGE_R | _PAGE_W
  - Even knowing whether a page is readable is not always trivial 
because:

    - Some targets requires to check that _PAGE_R is set to ensure page
    is readable
    - Some targets requires to check that _PAGE_NA is not set
    - Some targets requires to check that _PAGE_RO or _PAGE_RW is set

Etc 

In order to work around all those issues and minimise the risks of 
errors,

this serie aims at removing all use of _PAGE_XXX flags from powerpc code
and always use pte_xxx() and pte_mkxxx() accessors instead. Those 
accessors

are then defined in target specific parts of the kernel code.


We recently did on book3s 64.

static inline int pte_present(pte_t pte)
{
 /*
  * A pte is considerent present if _PAGE_PRESENT is set.
  * We also need to consider the pte present which is marked
  * invalid during ptep_set_access_flags. Hence we look for 
_PAGE_INVALID

  * if we find _PAGE_PRESENT cleared.
  */
 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
}

So I guess with that pte_present conversion we need to be careful.

Do you have a git tree which I can use to double check?


I pushed on branch 'helpers' on https://github.com/chleroy/linux.git

Christophe


Re: [PATCH 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion

2018-09-05 Thread Guenter Roeck
Hi,

On Tue, Aug 28, 2018 at 09:20:34PM +1000, Nicholas Piggin wrote:
> Similarly to the previous patch, this tries to optimise dirty/accessed
> bits in ptes to avoid access costs of hardware setting them.
> 

This patch results in silent nios2 boot failures, silent meaning that
the boot stalls.

...
Unpacking initramfs...
Freeing initrd memory: 2168K
workingset: timestamp_bits=30 max_order=15 bucket_order=0
jffs2: version 2.2. (NAND) © 2001-2006 Red Hat, Inc.
random: fast init done
random: crng init done

[no further activity until the qemu session is aborted]

Reverting the patch fixes the problem. Bisect log is attached.

Guenter

---
# bad: [387ac6229ecf6e012649d4fc409c5352655a4cf0] Add linux-next specific files 
for 20180905
# good: [57361846b52bc686112da6ca5368d11210796804] Linux 4.19-rc2
git bisect start 'HEAD' 'v4.19-rc2'
# good: [668570e8389bb076bea9b7531553e1362f5abd11] Merge remote-tracking branch 
'net-next/master'
git bisect good 668570e8389bb076bea9b7531553e1362f5abd11
# good: [7f2f69ebf0bcf3e9bcff7d560ba92cee960a66a6] Merge remote-tracking branch 
'battery/for-next'
git bisect good 7f2f69ebf0bcf3e9bcff7d560ba92cee960a66a6
# good: [c31458d3e03e3a2edeaab225a22eaf68c07c8290] Merge remote-tracking branch 
'rpmsg/for-next'
git bisect good c31458d3e03e3a2edeaab225a22eaf68c07c8290
# good: [e0f43dcbe9af8ac72f39fe92c5d0ee1883546427] Merge remote-tracking branch 
'nvdimm/libnvdimm-for-next'
git bisect good e0f43dcbe9af8ac72f39fe92c5d0ee1883546427
# bad: [f509e2c0f3cd11df238f0f1b5ba013fe726decdf] of: ignore sub-page memory 
regions
git bisect bad f509e2c0f3cd11df238f0f1b5ba013fe726decdf
# good: [2f7eebf30b87534f7e4c3982307579d9adc782a5] ocfs2: fix clusters leak in 
ocfs2_defrag_extent()
git bisect good 2f7eebf30b87534f7e4c3982307579d9adc782a5
# good: [119eb88c9dd23e305939ad748237100078e304a8] mm/swapfile.c: call 
free_swap_slot() in __swap_entry_free()
git bisect good 119eb88c9dd23e305939ad748237100078e304a8
# good: [21d64d37adf3ab20b4c3a1951018e84bf815c887] mm: remove vm_insert_pfn()
git bisect good 21d64d37adf3ab20b4c3a1951018e84bf815c887
# good: [90cd1a69010844e9dbfc43279d681d798812b962] cramfs: convert to use 
vmf_insert_mixed
git bisect good 90cd1a69010844e9dbfc43279d681d798812b962
# good: [c7dd91289b4bb4c400a8a71953511991815f8e6f] mm/cow: optimise pte 
dirty/accessed bits handling in fork
git bisect good c7dd91289b4bb4c400a8a71953511991815f8e6f
# bad: [87d74ae75700a39effcb8c9ed8a8445e719ac369] hexagon: switch to NO_BOOTMEM
git bisect bad 87d74ae75700a39effcb8c9ed8a8445e719ac369
# bad: [3d1d5b26ac5b4d4193dc618a50cd88de1fb0d360] mm: optimise pte 
dirty/accessed bit setting by demand based pte insertion
git bisect bad 3d1d5b26ac5b4d4193dc618a50cd88de1fb0d360
# first bad commit: [3d1d5b26ac5b4d4193dc618a50cd88de1fb0d360] mm: optimise pte 
dirty/accessed bit setting by demand based pte insertion


RE: FSL/NXP P5020: USB problems with the latest Git kernels

2018-09-05 Thread Laurentiu Tudor
Hi Scott,

> -Original Message- 
> On Mon, 2018-08-27 at 20:15 +0200, Christian Zigotzky wrote:
> > Hello,
> >
> > Our users tested the RC1 of kernel 4.19 on their P5020 boards today.
> > Unfortunately the USB bug still exists. With mem values bigger than
> 4096M,
> > the USB mouse and keyboard doesn’t work. With the bootarg mem=4096M, the
> USB
> > devices work without any problems. Please compile the RC1 and test it on
> > your P5020 board. There is a problem with the memory management since
> > 22/08/18.
> 
> I just tested 4.19-rc1 on a T4240 and got a similar problem with MMC.  MMC
> and
> USB on these chips both have a 32-bit DMA limitation.  I'll look into it.

I encountered similar issues on LS104xA chips. This is the workaround for MMC:
https://patchwork.kernel.org/patch/10506627/
On the USB side I didn't find a proper place in the usb subsystem code for a 
workaround but instead
found an undocumented kernel arg (*) that limits the dma mask to 32 bits:
xhci-hcd.quirks=0x80.

(*) https://patchwork.kernel.org/patch/10509159/

---
Best Regards, Laurentiu


Re: [RFC PATCH v1 00/17] ban the use of _PAGE_XXX flags outside platform specific code

2018-09-05 Thread Aneesh Kumar K.V

On 09/05/2018 06:06 PM, Christophe Leroy wrote:

Today flags like for instance _PAGE_RW or _PAGE_USER are used through
common parts of code.
Using those directly in common parts of code have proven to lead to
mistakes or misbehaviour, because their use is not always as trivial
as one could think.

For instance, (flags & _PAGE_USER) == 0 isn't enough to tell
that a page is a kernel page, because some targets are using
_PAGE_PRIVILEDGED and not _PAGE_USER, so the test has to be
(flags & (_PAGE_USER | _PAGE_PRIVILEDGED)) == _PAGE_PRIVILEDGED
This has to (bad) consequences:

  - All targets must define every bit, even the unsupported ones,
leading to a lot of useless #define _PAGE_XXX 0
  - If someone forgets to take into account all possible _PAGE_XXX bits
for the case, we can get unexpected behaviour on some targets.

This becomes even more complex when we come to using _PAGE_RW.
Testing (flags & _PAGE_RW) is not enough to test whether a page
if writable or not, because:

  - Some targets have _PAGE_RO instead, which has to be unset to tell
a page is writable
  - Some targets have _PAGE_R and _PAGE_W, in which case
_PAGE_RW = _PAGE_R | _PAGE_W
  - Even knowing whether a page is readable is not always trivial because:
- Some targets requires to check that _PAGE_R is set to ensure page
is readable
- Some targets requires to check that _PAGE_NA is not set
- Some targets requires to check that _PAGE_RO or _PAGE_RW is set

Etc 

In order to work around all those issues and minimise the risks of errors,
this serie aims at removing all use of _PAGE_XXX flags from powerpc code
and always use pte_xxx() and pte_mkxxx() accessors instead. Those accessors
are then defined in target specific parts of the kernel code.


We recently did on book3s 64.

static inline int pte_present(pte_t pte)
{
/*
 * A pte is considerent present if _PAGE_PRESENT is set.
 * We also need to consider the pte present which is marked
 * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID
 * if we find _PAGE_PRESENT cleared.
 */
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
}

So I guess with that pte_present conversion we need to be careful.

Do you have a git tree which I can use to double check?

-aneesh



Re: [PATCH] powerpc64s: Print exception vector name alongside the trap number

2018-09-05 Thread Christophe LEROY




Le 18/04/2018 à 11:02, Naveen N. Rao a écrit :

Print a small help text indicating the exception vector alongside the
trap number to make it easier while analyzing back traces. As an
example:

 Unable to handle kernel paging request for data at address 0x
 Faulting instruction address: 0xc06e3728
 Oops: Kernel access of bad area, sig: 11 [#1]
 LE SMP NR_CPUS=2048 NUMA PowerNV
 Modules linked in:
 CPU: 0 PID: 1 Comm: bash Not tainted 4.16.0-nnr #226
 NIP:  c06e3728 LR: c06e4774 CTR: c06e3700
 REGS: c000f0aa3980 TRAP: 0300 (DSI)  Not tainted  (4.16.0-nnr)
 MSR:  90009033   CR: 2822  XER: 
2000
 CFAR: c06e4770 DAR:  DSISR: 4200 SOFTE: 0

Signed-off-by: Naveen N. Rao 
---
I find this useful to have in backtraces, instead of having to look it
up. Some of the names could probably be tweaked a bit to be more
sensible.

- Naveen

  arch/powerpc/kernel/process.c | 42 +++
  1 file changed, 42 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1237f13fed51..71bfe29af456 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1414,6 +1414,42 @@ static void print_msr_bits(unsigned long val)
  #define LAST_VOLATILE 12
  #endif
  
+#ifdef CONFIG_PPC_BOOK3S_64


Why not make something generic applicable to all platforms ?

Christophe


+static char *print_trap(unsigned long trapno)
+{
+   trapno &= 0xff0;
+   switch (trapno) {
+   case 0x100: return "SRESET";
+   case 0x200: return "MCE";
+   case 0x300: return "DSI";
+   case 0x380: return "DSISLB";
+   case 0x400: return "ISI";
+   case 0x480: return "ISISLB";
+   case 0x500: return "EXT";
+   case 0x600: return "ALIGN";
+   case 0x700: return "PCHECK";
+   case 0x800: return "FP";
+   case 0x900: return "DEC";
+   case 0x980: return "HDEC";
+   case 0xa00: return "DBELL";
+   case 0xc00: return "SC";
+   case 0xd00: return "SSTEP";
+   case 0xe00: return "HDSI";
+   case 0xe20: return "HISI";
+   case 0xe40: return "HEMUL";
+   case 0xe60: return "HMI";
+   case 0xe80: return "HDBELL";
+   case 0xea0: return "HVIRT";
+   case 0xf00: return "PMI";
+   case 0xf20: return "ALTIVEC";
+   case 0xf40: return "VSX";
+   case 0xf60: return "UNAVAIL";
+   case 0xf80: return "HUNAVAIL";
+   }
+   return "UNKNOWN";
+}
+#endif
+
  void show_regs(struct pt_regs * regs)
  {
int i, trap;
@@ -1422,8 +1458,14 @@ void show_regs(struct pt_regs * regs)
  
  	printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",

   regs->nip, regs->link, regs->ctr);
+#ifdef CONFIG_PPC_BOOK3S_64
+   printk("REGS: %px TRAP: %04lx (%s)  %s  (%s)\n",
+  regs, regs->trap, print_trap(regs->trap), print_tainted(),
+  init_utsname()->release);
+#else
printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
   regs, regs->trap, print_tainted(), init_utsname()->release);
+#endif
printk("MSR:  "REG" ", regs->msr);
print_msr_bits(regs->msr);
pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);



Re: [PATCH] [RFC v2] Drop all 00-INDEX files from Documentation/

2018-09-05 Thread Paul Moore
On Mon, Sep 3, 2018 at 6:15 PM Henrik Austad  wrote:
> This is a respin with a wider audience (all that get_maintainer returned)
> and I know this spams a *lot* of people. Not sure what would be the correct
> way, so my apologies for ruining your inbox.
>
> The 00-INDEX files are supposed to give a summary of all files present
> in a directory, but these files are horribly out of date and their
> usefulness is brought into question. Often a simple "ls" would reveal
> the same information as the filenames are generally quite descriptive as
> a short introduction to what the file covers (it should not surprise
> anyone what Documentation/sched/sched-design-CFS.txt covers)
>
> A few years back it was mentioned that these files were no longer really
> needed, and they have since then grown further out of date, so perhaps
> it is time to just throw them out.
>
> A short status yields the following _outdated_ 00-INDEX files, first
> counter is files listed in 00-INDEX but missing in the directory, last
> is files present but not listed in 00-INDEX.
>
> List of outdated 00-INDEX:
> Documentation: (4/10)
> Documentation/sysctl: (0/1)
> Documentation/timers: (1/0)
> Documentation/blockdev: (3/1)
> Documentation/w1/slaves: (0/1)
> Documentation/locking: (0/1)
> Documentation/devicetree: (0/5)
> Documentation/power: (1/1)
> Documentation/powerpc: (0/5)
> Documentation/arm: (1/0)
> Documentation/x86: (0/9)
> Documentation/x86/x86_64: (1/1)
> Documentation/scsi: (4/4)
> Documentation/filesystems: (2/9)
> Documentation/filesystems/nfs: (0/2)
> Documentation/cgroup-v1: (0/2)
> Documentation/kbuild: (0/4)
> Documentation/spi: (1/0)
> Documentation/virtual/kvm: (1/0)
> Documentation/scheduler: (0/2)
> Documentation/fb: (0/1)
> Documentation/block: (0/1)
> Documentation/networking: (6/37)
> Documentation/vm: (1/3)
>
> Then there are 364 subdirectories in Documentation/ with several files that
> are missing 00-INDEX alltogether (and another 120 with a single file and no
> 00-INDEX).
>
> I don't really have an opinion to whether or not we /should/ have 00-INDEX,
> but the above 00-INDEX should either be removed or be kept up to date. If
> we should keep the files, I can try to keep them updated, but I rather not
> if we just want to delete them anyway.
>
> As a starting point, remove all index-files and references to 00-INDEX and
> see where the discussion is going.
>
> Again, sorry for the insanely wide distribution.
>
> Signed-off-by: Henrik Austad 
...
> Signed-off-by: Henrik Austad 
> ---
>  Documentation/00-INDEX  | 428 
> 
...

Looks reasonable to me, you can add my ACK for the NetLabel bits.

Acked-by: Paul Moore 

-- 
paul moore
www.paul-moore.com


[RFC PATCH v1 17/17] powerpc/8xx: change name of a few page flags to avoid confusion

2018-09-05 Thread Christophe Leroy
_PAGE_PRIVILEGED corresponds to the SH bit which doesn't protect
against user access but only disables ASID verification on kernel
accesses. User access is controlled with _PMD_USER flag.

Name it _PAGE_SH instead of _PAGE_PRIVILEGED

_PAGE_HUGE corresponds to the SPS bit which doesn't really tells
that's it is a huge page but only that it is not a 4k page.

Name it _PAGE_SPS instead of _PAGE_HUGE

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/32/pte-8xx.h | 28 ++--
 arch/powerpc/kernel/head_8xx.S   |  6 +++---
 arch/powerpc/mm/8xx_mmu.c|  2 +-
 arch/powerpc/mm/dump_linuxpagetables-8xx.c   |  2 +-
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h 
b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index 2b4669b3badb..1c57efac089d 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -29,10 +29,10 @@
  */
 
 /* Definitions for 8xx embedded chips. */
-#define _PAGE_PRESENT  0x0001  /* Page is valid */
-#define _PAGE_NO_CACHE 0x0002  /* I: cache inhibit */
-#define _PAGE_PRIVILEGED   0x0004  /* No ASID (context) compare */
-#define _PAGE_HUGE 0x0008  /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
+#define _PAGE_PRESENT  0x0001  /* V: Page is valid */
+#define _PAGE_NO_CACHE 0x0002  /* CI: cache inhibit */
+#define _PAGE_SH   0x0004  /* SH: No ASID (context) compare */
+#define _PAGE_SPS  0x0008  /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
 #define _PAGE_DIRTY0x0100  /* C: page changed */
 
 /* These 4 software bits must be masked out when the L2 entry is loaded
@@ -50,15 +50,15 @@
 #define _PAGE_COHERENT 0
 #define _PAGE_WRITETHRU0
 
-#define _PAGE_KERNEL_RO(_PAGE_PRIVILEGED | _PAGE_RO)
-#define _PAGE_KERNEL_ROX   (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
-#define _PAGE_KERNEL_RW(_PAGE_PRIVILEGED | _PAGE_DIRTY)
-#define _PAGE_KERNEL_RWX   (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_EXEC)
+#define _PAGE_KERNEL_RO(_PAGE_SH | _PAGE_RO)
+#define _PAGE_KERNEL_ROX   (_PAGE_SH | _PAGE_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RW(_PAGE_SH | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RWX   (_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
 
 /* Mask of bits returned by pte_pgprot() */
 #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_NO_CACHE | \
 _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
-_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_EXEC)
+_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
 
 #define _PMD_PRESENT   0x0001
 #define _PMD_PRESENT_MASK  _PMD_PRESENT
@@ -74,7 +74,7 @@
 #define PTE_ATOMIC_UPDATES 1
 
 #ifdef CONFIG_PPC_16K_PAGES
-#define _PAGE_PSIZE_PAGE_HUGE
+#define _PAGE_PSIZE_PAGE_SPS
 #else
 #define _PAGE_PSIZE0
 #endif
@@ -115,28 +115,28 @@ static inline pte_t pte_mkwrite(pte_t pte)
 
 static inline bool pte_user(pte_t pte)
 {
-   return !(pte_val(pte) & _PAGE_PRIVILEGED);
+   return !(pte_val(pte) & _PAGE_SH);
 }
 
 #define pte_user pte_user
 
 static inline pte_t pte_mkprivileged(pte_t pte)
 {
-   return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
+   return __pte(pte_val(pte) | _PAGE_SH);
 }
 
 #define pte_mkprivileged pte_mkprivileged
 
 static inline pte_t pte_mkuser(pte_t pte)
 {
-   return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
+   return __pte(pte_val(pte) & ~_PAGE_SH);
 }
 
 #define pte_mkuser pte_mkuser
 
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-   return __pte(pte_val(pte) | _PAGE_HUGE);
+   return __pte(pte_val(pte) | _PAGE_SPS);
 }
 
 #define pte_mkhuge pte_mkhuge
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 6582f824d620..134a573a9f2d 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -642,7 +642,7 @@ DTLBMissIMMR:
mtspr   SPRN_MD_TWC, r10
mfspr   r10, SPRN_IMMR  /* Get current IMMR */
rlwinm  r10, r10, 0, 0xfff8 /* Get 512 kbytes boundary */
-   ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+   ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
  _PAGE_PRESENT | _PAGE_NO_CACHE
mtspr   SPRN_MD_RPN, r10/* Update TLB entry */
 
@@ -660,7 +660,7 @@ DTLBMissLinear:
li  r11, MD_PS8MEG | MD_SVALID | M_APG2
mtspr   SPRN_MD_TWC, r11
rlwinm  r10, r10, 0, 0x0f80 /* 8xx supports max 256Mb RAM */
-   ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+   ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
  _PAGE_PRESENT
mtspr   SPRN_MD_RPN, r10/* Update TLB entry */
 
@@ -679,7 +679,7 @@ ITLBMissLinear:
li  r11, MI_PS8MEG | MI_SVALID | M_APG2
mtspr   SPRN_MI_TWC, r11
  

[RFC PATCH v1 16/17] powerpc/mm: Get rid of pte-common.h

2018-09-05 Thread Christophe Leroy
Do not include pte-common.h in nohash/32/pgtable.h

As that was the last includer, get rid of pte-common.h

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 23 +--
 arch/powerpc/include/asm/pte-common.h| 25 -
 2 files changed, 21 insertions(+), 27 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/pte-common.h

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 1577260641c9..562659b2f62b 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -128,8 +128,27 @@ extern int icache_44x_need_flush;
 #include 
 #endif
 
-/* And here we include common definitions */
-#include 
+/* Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here
+ */
+#ifndef PTE_RPN_SHIFT
+#define PTE_RPN_SHIFT  (PAGE_SHIFT)
+#endif
+
+/* The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs
+ */
+#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+#define PTE_RPN_MASK   (~((1ULL << PTE_RPN_SHIFT) - 1))
+#else
+#define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+#endif
+
+/* _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | 
_PAGE_SPECIAL)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/include/asm/pte-common.h 
b/arch/powerpc/include/asm/pte-common.h
deleted file mode 100644
index ff01368a175a..
--- a/arch/powerpc/include/asm/pte-common.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Included from asm/pgtable-*.h only ! */
-
-/* Location of the PFN in the PTE. Most 32-bit platforms use the same
- * as _PAGE_SHIFT here (ie, naturally aligned).
- * Platform who don't just pre-define the value so we don't override it here
- */
-#ifndef PTE_RPN_SHIFT
-#define PTE_RPN_SHIFT  (PAGE_SHIFT)
-#endif
-
-/* The mask covered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
- */
-#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-#define PTE_RPN_MASK   (~((1ULL<

[RFC PATCH v1 15/17] powerpc/mm: Define platform default caches related flags

2018-09-05 Thread Christophe Leroy
Cache related flags like _PAGE_COHERENT and _PAGE_WRITETHRU
and defined on most platforms. The platforms not defining
them don't define any alternative. So we can give them a NUL
value directly for those platforms directly.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/32/pte-40x.h |  3 +++
 arch/powerpc/include/asm/nohash/32/pte-8xx.h |  4 
 arch/powerpc/include/asm/pte-common.h| 11 ---
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h 
b/arch/powerpc/include/asm/nohash/32/pte-40x.h
index ab043b3e9b99..7a8b3c94592f 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -53,6 +53,9 @@
 /* No page size encoding in the linux PTE */
 #define _PAGE_PSIZE0
 
+/* cache related flags non existing on 40x */
+#define _PAGE_COHERENT 0
+
 #define _PAGE_KERNEL_RO0
 #define _PAGE_KERNEL_ROX   _PAGE_EXEC
 #define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h 
b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index b899c3c877ac..2b4669b3badb 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -46,6 +46,10 @@
 #define _PAGE_NA   0x0200  /* Supervisor NA, User no access */
 #define _PAGE_RO   0x0600  /* Supervisor RO, User no access */
 
+/* cache related flags non existing on 8xx */
+#define _PAGE_COHERENT 0
+#define _PAGE_WRITETHRU0
+
 #define _PAGE_KERNEL_RO(_PAGE_PRIVILEGED | _PAGE_RO)
 #define _PAGE_KERNEL_ROX   (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
 #define _PAGE_KERNEL_RW(_PAGE_PRIVILEGED | _PAGE_DIRTY)
diff --git a/arch/powerpc/include/asm/pte-common.h 
b/arch/powerpc/include/asm/pte-common.h
index 1a2102f8b1e7..ff01368a175a 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -1,17 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Included from asm/pgtable-*.h only ! */
 
-/*
- * Some bits are only used on some cpu families... Make sure that all
- * the undefined gets a sensible default
- */
-#ifndef _PAGE_COHERENT
-#define _PAGE_COHERENT 0
-#endif
-#ifndef _PAGE_WRITETHRU
-#define _PAGE_WRITETHRU0
-#endif
-
 /* Location of the PFN in the PTE. Most 32-bit platforms use the same
  * as _PAGE_SHIFT here (ie, naturally aligned).
  * Platform who don't just pre-define the value so we don't override it here
-- 
2.13.3



[RFC PATCH v1 14/17] powerpc/mm: Allow platforms to redefine some helpers

2018-09-05 Thread Christophe Leroy
The 40xx defines _PAGE_HWWRITE while others don't.
The 8xx defines _PAGE_RO instead of _PAGE_RW.
The 8xx defines _PAGE_PRIVILEGED instead of _PAGE_USER.
The 8xx defines _PAGE_HUGE and _PAGE_NA while others don't.

Lets those platforms redefine pte_write(), pte_wrprotect() and
pte_mkwrite() and get _PAGE_RO and _PAGE_HWWRITE off the common
helpers.

Lets the 8xx redefine pte_user(), pte_mkprivileged() and pte_mkuser()
and get rid of _PAGE_PRIVILEGED and _PAGE_USER default values.

Lets the 8xx redefine pte_mkhuge() and get rid of
_PAGE_HUGE default value.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 16 -
 arch/powerpc/include/asm/nohash/32/pte-40x.h | 16 +
 arch/powerpc/include/asm/nohash/32/pte-8xx.h | 51 
 arch/powerpc/include/asm/nohash/64/pgtable.h |  4 ---
 arch/powerpc/include/asm/nohash/pgtable.h| 24 +
 arch/powerpc/include/asm/pte-common.h| 24 -
 6 files changed, 91 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 4fab3a7d764b..1577260641c9 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -136,14 +136,12 @@ extern int icache_44x_need_flush;
 #define pte_clear(mm, addr, ptep) \
do { pte_update(ptep, ~0, 0); } while (0)
 
+#ifndef pte_mkwrite
 static inline pte_t pte_mkwrite(pte_t pte)
 {
-   pte_basic_t ptev;
-
-   ptev = pte_val(pte) & ~_PAGE_RO;
-   ptev |= _PAGE_RW;
-   return __pte(ptev);
+   return __pte(pte_val(pte) | _PAGE_RW);
 }
+#endif
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
@@ -155,14 +153,12 @@ static inline pte_t pte_mkyoung(pte_t pte)
return __pte(pte_val(pte) | _PAGE_ACCESSED);
 }
 
+#ifndef pte_wrprotect
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-   pte_basic_t ptev;
-
-   ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
-   ptev |= _PAGE_RO;
-   return __pte(ptev);
+   return __pte(pte_val(pte) & ~_PAGE_RW);
 }
+#endif
 
 static inline pte_t pte_mkexec(pte_t pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h 
b/arch/powerpc/include/asm/nohash/32/pte-40x.h
index 2b48bc289a4d..ab043b3e9b99 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -87,5 +87,21 @@
 #define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
 #define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
 
+#ifndef __ASSEMBLY__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE));
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
+}
+
+#define pte_mkclean pte_mkclean
+#endif
+
 #endif /* __KERNEL__ */
 #endif /*  _ASM_POWERPC_NOHASH_32_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h 
b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index d06fc45bd9ac..b899c3c877ac 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -87,5 +87,56 @@
 #define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_RO)
 #define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
 
+#ifndef __ASSEMBLY__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_RO);
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline int pte_write(pte_t pte)
+{
+   return !(pte_val(pte) & _PAGE_RO);
+}
+
+#define pte_write pte_write
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~_PAGE_RO);
+}
+
+#define pte_mkwrite pte_mkwrite
+
+static inline bool pte_user(pte_t pte)
+{
+   return !(pte_val(pte) & _PAGE_PRIVILEGED);
+}
+
+#define pte_user pte_user
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
+}
+
+#define pte_mkprivileged pte_mkprivileged
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
+}
+
+#define pte_mkuser pte_mkuser
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_HUGE);
+}
+
+#define pte_mkhuge pte_mkhuge
+#endif
+
 #endif /* __KERNEL__ */
 #endif /*  _ASM_POWERPC_NOHASH_32_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 40162de77c3a..c9e6626093ea 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -90,11 +90,7 @@
  */
 #include 
 
-#define _PAGE_HWWRITE  0
 #define _PAGE_SAO  0
-#define _PAGE_RO 0
-#define _PAGE_NA 0
-#define _PAGE_HUGE 0
 
 #define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
 
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 

[RFC PATCH v1 13/17] powerpc/nohash/64: do not include pte-common.h

2018-09-05 Thread Christophe Leroy
nohash/64 only uses book3e PTE flags, so it doesn't need pte-common.h

This also allows to drop PAGE_SAO and H_PAGE_4K_PFN from pte_common.h
as they are only used by PPC64

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/64/pgtable.h | 16 -
 arch/powerpc/include/asm/nohash/pgtable.h| 27 +
 arch/powerpc/include/asm/pte-common.h| 35 
 3 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 6b50aa864e12..40162de77c3a 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -89,7 +89,21 @@
  * Include the PTE bits definitions
  */
 #include 
-#include 
+
+#define _PAGE_HWWRITE  0
+#define _PAGE_SAO  0
+#define _PAGE_RO 0
+#define _PAGE_NA 0
+#define _PAGE_HUGE 0
+
+#define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+
+/* _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | 
_PAGE_SPECIAL)
+
+#define H_PAGE_4K_PFN 0
 
 #ifndef __ASSEMBLY__
 /* pte_clear moved to later in this file */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index 38fdf513afa1..a40ab294d541 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -8,6 +8,33 @@
 #include 
 #endif
 
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | 
_PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+_PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X  __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/* Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) 
||\
+   defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT   PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT   PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC   PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP   (PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
+
 #ifndef __ASSEMBLY__
 
 /* Generic accessors to PTE bits */
diff --git a/arch/powerpc/include/asm/pte-common.h 
b/arch/powerpc/include/asm/pte-common.h
index cce60b3ba7d4..4d594039bca5 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -14,9 +14,6 @@
 #ifndef _PAGE_WRITETHRU
 #define _PAGE_WRITETHRU0
 #endif
-#ifndef _PAGE_SAO
-#define _PAGE_SAO  0
-#endif
 /* _PAGE_RO and _PAGE_RW shall not be defined at the same time */
 #ifndef _PAGE_RO
 #define _PAGE_RO 0
@@ -61,35 +58,3 @@
  */
 #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | \
  _PAGE_ACCESSED | _PAGE_SPECIAL)
-
-/* Permission masks used for kernel mappings */
-#define PAGE_KERNEL__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-_PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-_PAGE_NO_CACHE | _PAGE_GUARDED)
-#define PAGE_KERNEL_X  __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
-#define PAGE_KERNEL_ROX__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
-
-/* Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) 
||\
-   defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT   PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT   PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC   PAGE_KERNEL_X
-
-/* Advertise special mapping type for AGP */
-#define PAGE_AGP   (PAGE_KERNEL_NC)
-#define HAVE_PAGE_AGP
-
-#ifndef H_PAGE_4K_PFN
-#define H_PAGE_4K_PFN 0
-#endif
-- 
2.13.3



[RFC PATCH v1 12/17] powerpc/mm: Distribute platform specific PAGE and PMD flags and definitions

2018-09-05 Thread Christophe Leroy
The base kernel PAGE_ definition sets are more or less platform
specific. Lets distribute them close to platform _PAGE_XXX flags
definition, and customise them to their exact platform flags.

Also defines _PAGE_PSIZE and _PTE_NONE_MASK for each platform
allthough they are defined as 0.

Do the same with _PMD flags like _PMD_USER and _PMD_PRESENT_MASK

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/32/pte-40x.h   | 29 ++
 arch/powerpc/include/asm/nohash/32/pte-44x.h   | 35 
 arch/powerpc/include/asm/nohash/32/pte-8xx.h   | 27 +
 arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h | 38 +
 arch/powerpc/include/asm/nohash/pte-book3e.h   | 30 ++
 arch/powerpc/include/asm/pte-common.h  | 66 --
 6 files changed, 159 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h 
b/arch/powerpc/include/asm/nohash/32/pte-40x.h
index bb4b3a4b92a0..2b48bc289a4d 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -50,13 +50,42 @@
 #define _PAGE_EXEC 0x200   /* hardware: EX permission */
 #define _PAGE_ACCESSED 0x400   /* software: R: page referenced */
 
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE0
+
+#define _PAGE_KERNEL_RO0
+#define _PAGE_KERNEL_ROX   _PAGE_EXEC
+#define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RWX   (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | 
_PAGE_EXEC)
+
 #define _PMD_PRESENT   0x400   /* PMD points to page of PTEs */
+#define _PMD_PRESENT_MASK  _PMD_PRESENT
 #define _PMD_BAD   0x802
 #define _PMD_SIZE_4M   0x0c0
 #define _PMD_SIZE_16M  0x0e0
+#define _PMD_USER  0
+
+#define _PTE_NONE_MASK 0
 
 /* Until my rework is finished, 40x still needs atomic PTE updates */
 #define PTE_ATOMIC_UPDATES 1
 
+/* Mask of bits returned by pte_pgprot() */
+#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_NO_CACHE | \
+_PAGE_WRITETHRU | _PAGE_USER | _PAGE_ACCESSED | \
+_PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
+
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE (_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE  __pgprot(_PAGE_BASE)
+#define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | 
_PAGE_EXEC)
+#define PAGE_COPY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
 #endif /* __KERNEL__ */
 #endif /*  _ASM_POWERPC_NOHASH_32_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h 
b/arch/powerpc/include/asm/nohash/32/pte-44x.h
index f812c0272364..8d6b268a986f 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h
@@ -85,14 +85,49 @@
 #define _PAGE_NO_CACHE 0x0400  /* H: I bit */
 #define _PAGE_WRITETHRU0x0800  /* H: W bit */
 
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE0
+
+#define _PAGE_KERNEL_RO0
+#define _PAGE_KERNEL_ROX   _PAGE_EXEC
+#define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX   (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
+/* Mask of bits returned by pte_pgprot() */
+#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
+_PAGE_WRITETHRU | _PAGE_USER | _PAGE_ACCESSED | \
+_PAGE_RW | _PAGE_DIRTY | _PAGE_EXEC)
+
 /* TODO: Add large page lowmem mapping support */
 #define _PMD_PRESENT   0
 #define _PMD_PRESENT_MASK (PAGE_MASK)
 #define _PMD_BAD   (~PAGE_MASK)
+#define _PMD_USER  0
 
 /* ERPN in a PTE never gets cleared, ignore it */
 #define _PTE_NONE_MASK 0xULL
 
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE (_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE  __pgprot(_PAGE_BASE)
+#define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | 
_PAGE_EXEC)
+#define PAGE_COPY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X__pgprot(_PAGE_BASE | 

[RFC PATCH v1 11/17] powerpc/mm: Move pte_user() into nohash/pgtable.h

2018-09-05 Thread Christophe Leroy
Now the pte-common.h is only for nohash platforms, lets
move pte_user() helper out of pte-common.h to put it
together with other helpers.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/pgtable.h | 10 ++
 arch/powerpc/include/asm/pte-common.h | 13 -
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index 0c63d10b8631..38fdf513afa1 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -47,6 +47,16 @@ static inline int pte_present(pte_t pte)
 }
 
 /*
+ * Don't just check for any non zero bits in __PAGE_USER, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_USER.  Need to explicitly match _PAGE_BAP_UR bit in that case too.
+ */
+static inline bool pte_user(pte_t pte)
+{
+   return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
+}
+
+/*
  * We only find page table entry in the last level
  * Hence no need for other accessors
  */
diff --git a/arch/powerpc/include/asm/pte-common.h 
b/arch/powerpc/include/asm/pte-common.h
index 3a8ec18ffd22..556a914ff845 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -66,19 +66,6 @@
 #define _PTE_NONE_MASK 0
 #endif
 
-#ifndef __ASSEMBLY__
-
-/*
- * Don't just check for any non zero bits in __PAGE_USER, since for book3e
- * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
- * _PAGE_USER.  Need to explicitly match _PAGE_BAP_UR bit in that case too.
- */
-static inline bool pte_user(pte_t pte)
-{
-   return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
-}
-#endif /* __ASSEMBLY__ */
-
 /* Location of the PFN in the PTE. Most 32-bit platforms use the same
  * as _PAGE_SHIFT here (ie, naturally aligned).
  * Platform who don't just pre-define the value so we don't override it here
-- 
2.13.3



[RFC PATCH v1 10/17] powerpc/book3s/32: do not include pte-common.h

2018-09-05 Thread Christophe Leroy
As done for book3s/64, add necessary flags/defines in
book3s/32/pgtable.h and do not include pte-common.h

It allows in the meantime to remove all related hash
definitions from pte-common.h and to also remove
_PAGE_EXEC default as _PAGE_EXEC is defined on all
platforms except book3s/32.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 97 ++--
 arch/powerpc/include/asm/pte-common.h| 16 +
 2 files changed, 96 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 90991dce63e9..6f56dfa25716 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -8,7 +8,97 @@
 #include 
 
 /* And here we include common definitions */
-#include 
+
+#define _PAGE_KERNEL_RO0
+#define _PAGE_KERNEL_ROX   0
+#define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX   (_PAGE_DIRTY | _PAGE_RW)
+
+#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
+
+#ifndef __ASSEMBLY__
+
+static inline bool pte_user(pte_t pte)
+{
+   return pte_val(pte) & _PAGE_USER;
+}
+#endif /* __ASSEMBLY__ */
+
+/* Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here
+ */
+#define PTE_RPN_SHIFT  (PAGE_SHIFT)
+
+/* The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs
+ */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_RPN_MASK   (~((1ULL << PTE_RPN_SHIFT) - 1))
+#else
+#define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+#endif
+
+/* _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HASHPTE | _PAGE_DIRTY | \
+_PAGE_ACCESSED | _PAGE_SPECIAL)
+
+/* Mask of bits returned by pte_pgprot() */
+#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
+_PAGE_WRITETHRU | _PAGE_USER | _PAGE_ACCESSED | \
+_PAGE_RW | _PAGE_DIRTY)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+
+/* Permission masks used to generate the __P and __S table,
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now.
+ */
+#define PAGE_NONE  __pgprot(_PAGE_BASE)
+#define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_COPY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_USER)
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | 
_PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+_PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X  __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/* Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) 
||\
+   defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT   PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT   PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC   PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP   (PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
 
 #define PTE_INDEX_SIZE PTE_SHIFT
 #define PMD_INDEX_SIZE 0
@@ -219,7 +309,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct 
*mm, unsigned long addr,
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
  pte_t *ptep)
 {
-   pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+   pte_update(ptep, _PAGE_RW, 0);
 }
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
@@ -235,9 +325,8 @@ static inline void 

[RFC PATCH v1 09/17] powerpc/mm: move __P and __S tables in the common pgtable.h

2018-09-05 Thread Christophe Leroy
__P and __S flags are the same for all platform and should remain
as is in the future, so avoid duplication.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 20 
 arch/powerpc/include/asm/pgtable.h   | 19 +++
 arch/powerpc/include/asm/pte-common.h| 20 
 3 files changed, 19 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 23299e1a2c08..ecb831400291 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -151,8 +151,6 @@
  * Write permissions imply read permissions for now (we could make write-only
  * pages on BookE but we don't bother for now). Execute permission control is
  * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
  */
 #define PAGE_NONE  __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
 #define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_RW)
@@ -162,24 +160,6 @@
 #define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_READ)
 #define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
 /* Permission masks used for kernel mappings */
 #define PAGE_KERNEL__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
 #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 14c79a7dc855..fb4b85bba110 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -20,6 +20,25 @@ struct mm_struct;
 #include 
 #endif /* !CONFIG_PPC_BOOK3S */
 
+/* Note due to the way vm flags are laid out, the bits are XWR */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
+#define __P101 PAGE_READONLY_X
+#define __P110 PAGE_COPY_X
+#define __P111 PAGE_COPY_X
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
+#define __S101 PAGE_READONLY_X
+#define __S110 PAGE_SHARED_X
+#define __S111 PAGE_SHARED_X
+
 #ifndef __ASSEMBLY__
 
 #include 
diff --git a/arch/powerpc/include/asm/pte-common.h 
b/arch/powerpc/include/asm/pte-common.h
index 5a5ba43bdf98..4860dae76dae 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -139,8 +139,6 @@ static inline bool pte_user(pte_t pte)
  * Write permissions imply read permissions for now (we could make write-only
  * pages on BookE but we don't bother for now). Execute permission control is
  * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
  */
 #define PAGE_NONE  __pgprot(_PAGE_BASE | _PAGE_NA)
 #define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
@@ -153,24 +151,6 @@ static inline bool pte_user(pte_t pte)
 #define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
 _PAGE_EXEC)
 
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
 /* Permission masks used for kernel mappings */
 #define PAGE_KERNEL__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
 #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-- 
2.13.3



[RFC PATCH v1 08/17] powerpc/mm: drop unused page flags

2018-09-05 Thread Christophe Leroy
The following page flags in pte-common.h can be dropped:

_PAGE_ENDIAN is only used in mm/fsl_booke_mmu.c and is defined in
asm/nohash/32/pte-fsl-booke.h

_PAGE_4K_PFN is nowhere defined nor used

_PAGE_READ, _PAGE_WRITE and _PAGE_PTE are only defined and used
in book3s/64

The following page flags in book3s/64/pgtable.h can be dropped as
they are not used on this platform nor by common code.

_PAGE_NA, _PAGE_RO, _PAGE_USER and _PAGE_PSIZE

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +-
 arch/powerpc/include/asm/pte-common.h| 17 +
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index b8a88c6d34ff..23299e1a2c08 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -14,10 +14,6 @@
  */
 #define _PAGE_BIT_SWAP_TYPE0
 
-#define _PAGE_NA   0
-#define _PAGE_RO   0
-#define _PAGE_USER 0
-
 #define _PAGE_EXEC 0x1 /* execute permission */
 #define _PAGE_WRITE0x2 /* write access allowed */
 #define _PAGE_READ 0x4 /* read access allowed */
@@ -123,10 +119,6 @@
 #define _PAGE_KERNEL_RWX   (_PAGE_PRIVILEGED | _PAGE_DIRTY |   \
 _PAGE_RW | _PAGE_EXEC)
 /*
- * No page size encoding in the linux PTE
- */
-#define _PAGE_PSIZE0
-/*
  * _PAGE_CHG_MASK masks of bits that are to be preserved across
  * pgprot changes
  */
@@ -149,7 +141,7 @@
  * pages. We always set _PAGE_COHERENT when SMP is enabled or
  * the processor might need it for DMA coherency.
  */
-#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
 #define _PAGE_BASE (_PAGE_BASE_NC)
 
 /* Permission masks used to generate the __P and __S table,
diff --git a/arch/powerpc/include/asm/pte-common.h 
b/arch/powerpc/include/asm/pte-common.h
index bef56141a549..5a5ba43bdf98 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -14,18 +14,12 @@
 #ifndef _PAGE_EXEC
 #define _PAGE_EXEC 0
 #endif
-#ifndef _PAGE_ENDIAN
-#define _PAGE_ENDIAN   0
-#endif
 #ifndef _PAGE_COHERENT
 #define _PAGE_COHERENT 0
 #endif
 #ifndef _PAGE_WRITETHRU
 #define _PAGE_WRITETHRU0
 #endif
-#ifndef _PAGE_4K_PFN
-#define _PAGE_4K_PFN   0
-#endif
 #ifndef _PAGE_SAO
 #define _PAGE_SAO  0
 #endif
@@ -39,9 +33,6 @@
 #define _PAGE_RW 0
 #endif
 
-#ifndef _PAGE_PTE
-#define _PAGE_PTE 0
-#endif
 /* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */
 #ifndef _PAGE_PRIVILEGED
 #define _PAGE_PRIVILEGED 0
@@ -122,7 +113,7 @@ static inline bool pte_user(pte_t pte)
 
 /* Mask of bits returned by pte_pgprot() */
 #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
-_PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
+_PAGE_WRITETHRU | \
 _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
 _PAGE_PRIVILEGED | \
 _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
@@ -208,12 +199,6 @@ static inline bool pte_user(pte_t pte)
 #define PAGE_AGP   (PAGE_KERNEL_NC)
 #define HAVE_PAGE_AGP
 
-#ifndef _PAGE_READ
-/* if not defined, we should not find _PAGE_WRITE too */
-#define _PAGE_READ 0
-#define _PAGE_WRITE _PAGE_RW
-#endif
-
 #ifndef H_PAGE_4K_PFN
 #define H_PAGE_4K_PFN 0
 #endif
-- 
2.13.3



[RFC PATCH v1 07/17] powerpc/mm: Split dump_pagelinuxtables flag_array table

2018-09-05 Thread Christophe Leroy
To reduce the complexity of flag_array, and allow the removal of
default 0 value of non existing flags, lets have one flag_array
table for each platform family with only the really existing flags.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/Makefile|   7 ++
 arch/powerpc/mm/dump_linuxpagetables-8xx.c  |  82 +
 arch/powerpc/mm/dump_linuxpagetables-book3s64.c | 115 ++
 arch/powerpc/mm/dump_linuxpagetables-generic.c  |  82 +
 arch/powerpc/mm/dump_linuxpagetables.c  | 155 +---
 arch/powerpc/mm/dump_linuxpagetables.h  |  19 +++
 6 files changed, 307 insertions(+), 153 deletions(-)
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables-8xx.c
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables-book3s64.c
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables-generic.c
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables.h

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index cdf6a9960046..3c844bdd16c4 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -43,5 +43,12 @@ obj-$(CONFIG_HIGHMEM)+= highmem.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
 obj-$(CONFIG_SPAPR_TCE_IOMMU)  += mmu_context_iommu.o
 obj-$(CONFIG_PPC_PTDUMP)   += dump_linuxpagetables.o
+ifdef CONFIG_PPC_PTDUMP
+obj-$(CONFIG_4xx)  += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_8xx)  += dump_linuxpagetables-8xx.o
+obj-$(CONFIG_PPC_BOOK3E_MMU)   += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_BOOK3S_32)+= dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_BOOK3S_64)+= dump_linuxpagetables-book3s64.o
+endif
 obj-$(CONFIG_PPC_HTDUMP)   += dump_hashpagetable.o
 obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
diff --git a/arch/powerpc/mm/dump_linuxpagetables-8xx.c 
b/arch/powerpc/mm/dump_linuxpagetables-8xx.c
new file mode 100644
index ..33f52a97975b
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-8xx.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include 
+#include 
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+   {
+   .mask   = _PAGE_PRIVILEGED,
+   .val= 0,
+   .set= "user",
+   .clear  = "",
+   }, {
+   .mask   = _PAGE_RO | _PAGE_NA,
+   .val= 0,
+   .set= "rw",
+   }, {
+   .mask   = _PAGE_RO | _PAGE_NA,
+   .val= _PAGE_RO,
+   .set= "r ",
+   }, {
+   .mask   = _PAGE_RO | _PAGE_NA,
+   .val= _PAGE_NA,
+   .set= "  ",
+   }, {
+   .mask   = _PAGE_EXEC,
+   .val= _PAGE_EXEC,
+   .set= " X ",
+   .clear  = "   ",
+   }, {
+   .mask   = _PAGE_PRESENT,
+   .val= _PAGE_PRESENT,
+   .set= "present",
+   .clear  = "   ",
+   }, {
+   .mask   = _PAGE_GUARDED,
+   .val= _PAGE_GUARDED,
+   .set= "guarded",
+   .clear  = "   ",
+   }, {
+   .mask   = _PAGE_DIRTY,
+   .val= _PAGE_DIRTY,
+   .set= "dirty",
+   .clear  = " ",
+   }, {
+   .mask   = _PAGE_ACCESSED,
+   .val= _PAGE_ACCESSED,
+   .set= "accessed",
+   .clear  = "",
+   }, {
+   .mask   = _PAGE_NO_CACHE,
+   .val= _PAGE_NO_CACHE,
+   .set= "no cache",
+   .clear  = "",
+   }, {
+   .mask   = _PAGE_SPECIAL,
+   .val= _PAGE_SPECIAL,
+   .set= "special",
+   }
+};
+
+struct pgtable_level pg_level[5] = {
+   {
+   }, { /* pgd */
+   .flag   = flag_array,
+   .num= ARRAY_SIZE(flag_array),
+   }, { /* pud */
+   .flag   = flag_array,
+   .num= ARRAY_SIZE(flag_array),
+   }, { /* pmd */
+   .flag   = flag_array,
+   .num= ARRAY_SIZE(flag_array),
+   }, { /* pte */
+   .flag   = flag_array,
+   .num= ARRAY_SIZE(flag_array),
+   },
+};
diff --git a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c 
b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c
new file mode 100644
index ..78aabdd63dc0
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include 
+#include 
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+   {
+ 

[RFC PATCH v1 06/17] powerpc/mm: use pte helpers in generic code

2018-09-05 Thread Christophe Leroy
Get rid of platform specific _PAGE_ in powerpc code code and
use helpers instead.

mm/dump_linuxpagetables.c will be handled separately

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  9 +++--
 arch/powerpc/include/asm/nohash/32/pgtable.h | 13 +
 arch/powerpc/include/asm/nohash/pgtable.h|  3 +--
 arch/powerpc/mm/mem.c|  2 +-
 arch/powerpc/mm/pgtable.c| 19 ++-
 arch/powerpc/mm/pgtable_32.c | 26 ++
 arch/powerpc/mm/pgtable_64.c | 21 +++--
 arch/powerpc/xmon/xmon.c | 12 +++-
 8 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index daebb4cde626..90991dce63e9 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -318,17 +318,14 @@ static inline int pte_present(pte_t pte)
 #define pte_access_permitted pte_access_permitted
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
-   unsigned long pteval = pte_val(pte);
/*
 * A read-only access is controlled by _PAGE_USER bit.
 * We have _PAGE_READ set for WRITE and EXECUTE
 */
-   unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
-
-   if (write)
-   need_pte_bits |= _PAGE_WRITE;
+   if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+   return false;
 
-   if ((pteval & need_pte_bits) != need_pte_bits)
+   if (write && !pte_write(pte))
return false;
 
return true;
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 3be2109719ed..4fab3a7d764b 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -277,8 +277,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct 
*mm, unsigned long addr,
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
  pte_t *ptep)
 {
-   pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+   unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
+   unsigned long set = pte_val(pte_wrprotect(__pte(0)));
+
+   pte_update(ptep, clr, set);
 }
+
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
@@ -291,9 +295,10 @@ static inline void __ptep_set_access_flags(struct 
vm_area_struct *vma,
   unsigned long address,
   int psize)
 {
-   unsigned long set = pte_val(entry) &
-   (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-   unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA);
+   pte_t pte_set = 
pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(0);
+   pte_t pte_clr = 
pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0);
+   unsigned long set = pte_val(entry) & pte_val(pte_set);
+   unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr);
 
pte_update(ptep, clr, set);
 
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index 49417b8b49e9..0c63d10b8631 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -32,8 +32,7 @@ static inline pgprot_t pte_pgprot(pte_t pte)  { return 
__pgprot(pte_val(pte) & PA
  */
 static inline int pte_protnone(pte_t pte)
 {
-   return (pte_val(pte) &
-   (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+   return pte_present(pte) && !pte_user(pte);
 }
 
 static inline int pmd_protnone(pmd_t pmd)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5c8530d0c611..24b2ddba0d4d 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -532,7 +532,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned 
long address,
access = 0UL;
break;
case 0x400:
-   access = _PAGE_EXEC;
+   access = pte_val(pte_mkexec(__pte(0)));
break;
default:
return;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 4f788f3762a9..8558f01ed5c8 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -44,20 +44,13 @@ static inline int is_exec_fault(void)
 static inline int pte_looks_normal(pte_t pte)
 {
 
-#if defined(CONFIG_PPC_BOOK3S_64)
-   if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
+   if (pte_present(pte) && !pte_special(pte)) {
if (pte_ci(pte))
return 0;
if (pte_user(pte))
return 

[RFC PATCH v1 05/17] powerpc/mm: add pte helpers to query and change pte flags

2018-09-05 Thread Christophe Leroy
In order to avoid using generic _PAGE_XXX flags in powerpc
core functions, define helpers for all needed flags:
- pte_mkuser() and pte_mkprivileged() to set/unset and/or
unset/set _PAGE_USER and/or _PAGE_PRIVILEGED
- pte_hashpte() to check if _PAGE_HASHPTE is set.
- pte_mknoncoherent() to make a page non coherent
- pte_ci() check if cache is inhibited (already existing on book3s/64)
- pte_exprotect() to protect against execution
- pte_exec() and pte_mkexec() to query and set page execution
- pte_mkpte() to set _PAGE_PTE flag.

On book3s/32 there is no exec protection, so pte_mkexec() and
pte_exprotect() are nops and pte_exec() returns always true.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 33 
 arch/powerpc/include/asm/book3s/64/pgtable.h | 30 +
 arch/powerpc/include/asm/nohash/32/pgtable.h |  5 +
 arch/powerpc/include/asm/nohash/64/pgtable.h |  5 +
 arch/powerpc/include/asm/nohash/pgtable.h| 28 +++
 5 files changed, 101 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index b6d3b25d255c..daebb4cde626 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -301,6 +301,9 @@ static inline int pte_dirty(pte_t pte)  { 
return !!(pte_val(pte) & _PAGE_DIRTY);
 static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & 
_PAGE_ACCESSED); }
 static inline int pte_special(pte_t pte)   { return !!(pte_val(pte) & 
_PAGE_SPECIAL); }
 static inline int pte_none(pte_t pte)  { return (pte_val(pte) & 
~_PTE_NONE_MASK) == 0; }
+static inline bool pte_hashpte(pte_t pte)  { return !!(pte_val(pte) & 
_PAGE_HASHPTE); }
+static inline bool pte_ci(pte_t pte)   { return !!(pte_val(pte) & 
_PAGE_NO_CACHE); }
+static inline bool pte_exec(pte_t pte) { return true; }
 static inline pgprot_t pte_pgprot(pte_t pte)   { return __pgprot(pte_val(pte) 
& PAGE_PROT_BITS); }
 
 static inline int pte_present(pte_t pte)
@@ -354,6 +357,11 @@ static inline pte_t pte_wrprotect(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_RW);
 }
 
+static inline pte_t pte_exprotect(pte_t pte)
+{
+   return pte;
+}
+
 static inline pte_t pte_mkclean(pte_t pte)
 {
return __pte(pte_val(pte) & ~_PAGE_DIRTY);
@@ -364,6 +372,16 @@ static inline pte_t pte_mkold(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
 }
 
+static inline pte_t pte_mkexec(pte_t pte)
+{
+   return pte;
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+   return pte;
+}
+
 static inline pte_t pte_mkwrite(pte_t pte)
 {
return __pte(pte_val(pte) | _PAGE_RW);
@@ -389,6 +407,21 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
 }
 
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~_PAGE_USER);
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_USER);
+}
+
+static inline pte_t pte_mknoncoherent(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~_PAGE_COHERENT);
+}
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 13a688fc8cd0..b8a88c6d34ff 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -519,6 +519,11 @@ static inline int pte_special(pte_t pte)
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
 }
 
+static inline bool pte_exec(pte_t pte)
+{
+   return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
+}
+
 static inline pgprot_t pte_pgprot(pte_t pte)   { return __pgprot(pte_val(pte) 
& PAGE_PROT_BITS); }
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -646,6 +651,11 @@ static inline pte_t pte_wrprotect(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_WRITE);
 }
 
+static inline pte_t pte_exprotect(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~_PAGE_EXEC);
+}
+
 static inline pte_t pte_mkclean(pte_t pte)
 {
return __pte(pte_val(pte) & ~_PAGE_DIRTY);
@@ -656,6 +666,16 @@ static inline pte_t pte_mkold(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
 }
 
+static inline pte_t pte_mkexec(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_PTE);
+}
+
 static inline pte_t pte_mkwrite(pte_t pte)
 {
/*
@@ -689,6 +709,16 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
 }
 
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
+}
+

[RFC PATCH v1 04/17] powerpc/mm: move some nohash pte helpers in nohash/[32:64]/pgtable.h

2018-09-05 Thread Christophe Leroy
In order to allow their use in nohash/32/pgtable.h, we have to move the
following helpers in nohash/[32:64]/pgtable.h:
- pte_mkwrite()
- pte_mkdirty()
- pte_mkyoung()
- pte_wrprotect()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 28 
 arch/powerpc/include/asm/nohash/64/pgtable.h | 20 
 arch/powerpc/include/asm/nohash/pgtable.h| 28 
 3 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index a507a65b0866..3ee4ae5d28c3 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -136,6 +136,34 @@ extern int icache_44x_need_flush;
 #define pte_clear(mm, addr, ptep) \
do { pte_update(ptep, ~0, 0); } while (0)
 
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+   pte_basic_t ptev;
+
+   ptev = pte_val(pte) & ~_PAGE_RO;
+   ptev |= _PAGE_RW;
+   return __pte(ptev);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+   pte_basic_t ptev;
+
+   ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
+   ptev |= _PAGE_RO;
+   return __pte(ptev);
+}
+
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(pmd_val(pmd) & _PMD_BAD)
 #definepmd_present(pmd)(pmd_val(pmd) & _PMD_PRESENT_MASK)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 7cd6809f4d33..60d3bdd13ba1 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -94,6 +94,26 @@
 #ifndef __ASSEMBLY__
 /* pte_clear moved to later in this file */
 
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+   return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
 #define PMD_BAD_BITS   (PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS   (PMD_TABLE_SIZE-1)
 
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index 5b82e44c4231..c746e9e784cd 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -77,15 +77,6 @@ static inline unsigned long pte_pfn(pte_t pte)   {
return pte_val(pte) >> PTE_RPN_SHIFT; }
 
 /* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte)
-{
-   pte_basic_t ptev;
-
-   ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
-   ptev |= _PAGE_RO;
-   return __pte(ptev);
-}
-
 static inline pte_t pte_mkclean(pte_t pte)
 {
return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
@@ -96,25 +87,6 @@ static inline pte_t pte_mkold(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
-{
-   pte_basic_t ptev;
-
-   ptev = pte_val(pte) & ~_PAGE_RO;
-   ptev |= _PAGE_RW;
-   return __pte(ptev);
-}
-
-static inline pte_t pte_mkdirty(pte_t pte)
-{
-   return __pte(pte_val(pte) | _PAGE_DIRTY);
-}
-
-static inline pte_t pte_mkyoung(pte_t pte)
-{
-   return __pte(pte_val(pte) | _PAGE_ACCESSED);
-}
-
 static inline pte_t pte_mkspecial(pte_t pte)
 {
return __pte(pte_val(pte) | _PAGE_SPECIAL);
-- 
2.13.3



[RFC PATCH v1 03/17] powerpc/mm: dont't use _PAGE_EXEC in book3s/32

2018-09-05 Thread Christophe Leroy
book3s/32 doesn't define _PAGE_EXEC, so no need to use it.

All other platforms define _PAGE_EXEC so no need to check
it is not NUL when not book3s/32.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 2 +-
 arch/powerpc/mm/pgtable.c| 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 751cf931bb3f..b6d3b25d255c 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -234,7 +234,7 @@ static inline void __ptep_set_access_flags(struct 
vm_area_struct *vma,
   int psize)
 {
unsigned long set = pte_val(entry) &
-   (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+   (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
unsigned long clr = ~pte_val(entry) & _PAGE_RO;
 
pte_update(ptep, clr, set);
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index d71c669c..4f788f3762a9 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -73,7 +73,7 @@ static struct page *maybe_pte_to_page(pte_t pte)
return page;
 }
 
-#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
+#ifdef CONFIG_PPC_BOOK3S
 
 /* Server-style MMU handles coherency when hashing if HW exec permission
  * is supposed per page (currently 64-bit only). If not, then, we always
@@ -106,7 +106,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct 
vm_area_struct *vma,
return pte;
 }
 
-#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
+#else /* CONFIG_PPC_BOOK3S */
 
 /* Embedded type MMU with HW exec support. This is a bit more complicated
  * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
@@ -179,7 +179,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct 
vm_area_struct *vma,
return __pte(pte_val(pte) | _PAGE_EXEC);
 }
 
-#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
+#endif /* CONFIG_PPC_BOOK3S */
 
 /*
  * set_pte stores a linux PTE into the linux page table.
-- 
2.13.3



[RFC PATCH v1 02/17] powerpc/mm: remove direct use of flags related to cache

2018-09-05 Thread Christophe Leroy
As already done for PPC64, use pgprot_cache() helpers
instead of flags in ioremap() derived functions.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/pgtable.h |  2 ++
 arch/powerpc/mm/pgtable_32.c  | 15 +--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index b321c82b3624..5b82e44c4231 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -197,6 +197,8 @@ extern int ptep_set_access_flags(struct vm_area_struct 
*vma, unsigned long addre
 #if _PAGE_WRITETHRU != 0
 #define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & 
~_PAGE_CACHE_CTL) | \
_PAGE_COHERENT | _PAGE_WRITETHRU))
+#else
+#define pgprot_cached_wthru(prot)  pgprot_noncached(prot)
 #endif
 
 #define pgprot_cached_noncoherent(prot) \
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 528999738645..f983ffa24aa0 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -76,24 +76,27 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long 
address)
 void __iomem *
 ioremap(phys_addr_t addr, unsigned long size)
 {
-   return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
-   __builtin_return_address(0));
+   unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
+
+   return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *
 ioremap_wc(phys_addr_t addr, unsigned long size)
 {
-   return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
-   __builtin_return_address(0));
+   unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
+
+   return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_wc);
 
 void __iomem *
 ioremap_wt(phys_addr_t addr, unsigned long size)
 {
-   return __ioremap_caller(addr, size, _PAGE_WRITETHRU,
-   __builtin_return_address(0));
+   unsigned long flags = pgprot_val(pgprot_cached_wthru(__pgprot(0)));
+
+   return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_wt);
 
-- 
2.13.3



[RFC PATCH v1 00/17] ban the use of _PAGE_XXX flags outside platform specific code

2018-09-05 Thread Christophe Leroy
Today flags like for instance _PAGE_RW or _PAGE_USER are used through
common parts of code.
Using those directly in common parts of code have proven to lead to
mistakes or misbehaviour, because their use is not always as trivial
as one could think.

For instance, (flags & _PAGE_USER) == 0 isn't enough to tell
that a page is a kernel page, because some targets are using
_PAGE_PRIVILEDGED and not _PAGE_USER, so the test has to be 
(flags & (_PAGE_USER | _PAGE_PRIVILEDGED)) == _PAGE_PRIVILEDGED
This has to (bad) consequences:

 - All targets must define every bit, even the unsupported ones,
   leading to a lot of useless #define _PAGE_XXX 0
 - If someone forgets to take into account all possible _PAGE_XXX bits
   for the case, we can get unexpected behaviour on some targets.

This becomes even more complex when we come to using _PAGE_RW.
Testing (flags & _PAGE_RW) is not enough to test whether a page
if writable or not, because:

 - Some targets have _PAGE_RO instead, which has to be unset to tell
   a page is writable
 - Some targets have _PAGE_R and _PAGE_W, in which case
   _PAGE_RW = _PAGE_R | _PAGE_W
 - Even knowing whether a page is readable is not always trivial because:
   - Some targets requires to check that _PAGE_R is set to ensure page
   is readable
   - Some targets requires to check that _PAGE_NA is not set
   - Some targets requires to check that _PAGE_RO or _PAGE_RW is set

Etc 

In order to work around all those issues and minimise the risks of errors,
this serie aims at removing all use of _PAGE_XXX flags from powerpc code
and always use pte_xxx() and pte_mkxxx() accessors instead. Those accessors
are then defined in target specific parts of the kernel code.

Christophe Leroy (17):
  powerpc/32: Add ioremap_wt()
  powerpc/mm: remove direct use of flags related to cache
  powerpc/mm: dont't use _PAGE_EXEC in book3s/32
  powerpc/mm: move some nohash pte helpers in nohash/[32:64]/pgtable.h
  powerpc/mm: add pte helpers to query and change pte flags
  powerpc/mm: use pte helpers in generic code
  powerpc/mm: Split dump_pagelinuxtables flag_array table
  powerpc/mm: drop unused page flags
  powerpc/mm: move __P and __S tables in the common pgtable.h
  powerpc/book3s/32: do not include pte-common.h
  powerpc/mm: Move pte_user() into nohash/pgtable.h
  powerpc/mm: Distribute platform specific PAGE and PMD flags and
definitions
  powerpc/nohash/64: do not include pte-common.h
  powerpc/mm: Allow platforms to redefine some helpers
  powerpc/mm: Define platform default caches related flags
  powerpc/mm: Get rid of pte-common.h
  powerpc/8xx: change name of a few page flags to avoid confusion

 arch/powerpc/include/asm/book3s/32/pgtable.h   | 141 +++--
 arch/powerpc/include/asm/book3s/64/pgtable.h   |  60 +++---
 arch/powerpc/include/asm/io.h  |   6 +
 arch/powerpc/include/asm/nohash/32/pgtable.h   |  65 +-
 arch/powerpc/include/asm/nohash/32/pte-40x.h   |  48 +
 arch/powerpc/include/asm/nohash/32/pte-44x.h   |  35 
 arch/powerpc/include/asm/nohash/32/pte-8xx.h   |  92 -
 arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h |  38 
 arch/powerpc/include/asm/nohash/64/pgtable.h   |  37 +++-
 arch/powerpc/include/asm/nohash/pgtable.h  |  96 ++---
 arch/powerpc/include/asm/nohash/pte-book3e.h   |  30 +++
 arch/powerpc/include/asm/pgtable.h |  19 ++
 arch/powerpc/include/asm/pte-common.h  | 219 -
 arch/powerpc/kernel/head_8xx.S |   6 +-
 arch/powerpc/mm/8xx_mmu.c  |   2 +-
 arch/powerpc/mm/Makefile   |   7 +
 arch/powerpc/mm/dump_linuxpagetables-8xx.c |  82 
 arch/powerpc/mm/dump_linuxpagetables-book3s64.c| 115 +++
 arch/powerpc/mm/dump_linuxpagetables-generic.c |  82 
 arch/powerpc/mm/dump_linuxpagetables.c | 155 +--
 arch/powerpc/mm/dump_linuxpagetables.h |  19 ++
 arch/powerpc/mm/mem.c  |   2 +-
 arch/powerpc/mm/pgtable.c  |  25 +--
 arch/powerpc/mm/pgtable_32.c   |  45 +++--
 arch/powerpc/mm/pgtable_64.c   |  21 +-
 arch/powerpc/xmon/xmon.c   |  12 +-
 26 files changed, 960 insertions(+), 499 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/pte-common.h
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables-8xx.c
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables-book3s64.c
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables-generic.c
 create mode 100644 arch/powerpc/mm/dump_linuxpagetables.h

-- 
2.13.3



[RFC PATCH v1 01/17] powerpc/32: Add ioremap_wt()

2018-09-05 Thread Christophe Leroy
Other arches have ioremap_wt() to map IO areas write-through.
Implement it on PPC as well in order to avoid drivers using
__ioremap(_PAGE_WRITETHRU)

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/io.h | 6 ++
 arch/powerpc/mm/pgtable_32.c  | 8 
 2 files changed, 14 insertions(+)

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index e0331e754568..3380b5b22450 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -3,6 +3,9 @@
 #ifdef __KERNEL__
 
 #define ARCH_HAS_IOREMAP_WC
+#ifdef CONFIG_PPC32
+#define ARCH_HAS_IOREMAP_WT
+#endif
 
 /*
  * This program is free software; you can redistribute it and/or
@@ -746,6 +749,8 @@ static inline void iosync(void)
  *
  * * ioremap_wc enables write combining
  *
+ * * ioremap_wt enables write through
+ *
  * * iounmap undoes such a mapping and can be hooked
  *
  * * __ioremap_at (and the pending __iounmap_at) are low level functions to
@@ -767,6 +772,7 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned 
long size);
 extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
  unsigned long flags);
 extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
+void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
 #define ioremap_nocache(addr, size)ioremap((addr), (size))
 #define ioremap_uc(addr, size) ioremap((addr), (size))
 #define ioremap_cache(addr, size) \
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..528999738645 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -90,6 +90,14 @@ ioremap_wc(phys_addr_t addr, unsigned long size)
 EXPORT_SYMBOL(ioremap_wc);
 
 void __iomem *
+ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+   return __ioremap_caller(addr, size, _PAGE_WRITETHRU,
+   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
 ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
 {
/* writeable implies dirty for kernel addresses */
-- 
2.13.3



[PATCH 3/3] tty: hvc: hvc_write() fix break condition

2018-09-05 Thread Nicholas Piggin
Commit 550ddadcc758 ("tty: hvc: hvc_write() may sleep") broke the
termination condition in case the driver stops accepting characters.
This can result in unnecessary polling of the busy driver.

Restore it by testing the hvc_push return code.

Fixes: 550ddadcc758 ("tty: hvc: hvc_write() may sleep")
Tested-by: Matteo Croce 
Tested-by: Leon Romanovsky 
Signed-off-by: Nicholas Piggin 
---
 drivers/tty/hvc/hvc_console.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index bacf9b73ec98..c09a38fb0d66 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -522,6 +522,8 @@ static int hvc_write(struct tty_struct *tty, const unsigned 
char *buf, int count
return -EIO;
 
while (count > 0) {
+   int ret;
+
spin_lock_irqsave(>lock, flags);
 
rsize = hp->outbuf_size - hp->n_outbuf;
@@ -537,10 +539,13 @@ static int hvc_write(struct tty_struct *tty, const 
unsigned char *buf, int count
}
 
if (hp->n_outbuf > 0)
-   hvc_push(hp);
+   ret = hvc_push(hp);
 
spin_unlock_irqrestore(>lock, flags);
 
+   if (!ret)
+   break;
+
if (count) {
if (hp->n_outbuf > 0)
hvc_flush(hp);
-- 
2.18.0



[PATCH 2/3] tty: hvc: hvc_poll() fix read loop batching

2018-09-05 Thread Nicholas Piggin
Patch ec97eaad1383 ("tty: hvc: hvc_poll() break hv read loop") removes
get_chars batching entirely, which slows down large console operations
like paste -- virtio console "feels worse than a 9600 baud serial
line," reports Matteo.

This adds back batching in a more latency friendly way. If the caller
can sleep then we try to fill the entire flip buffer, releasing the
lock and scheduling between each iteration. If it can not sleep, then
batches are limited to 128 bytes. Matteo confirms this fixes the
performance problem.

Latency testing the powerpc OPAL console with OpenBMC UART with a
large paste shows about 0.25ms latency, which seems reasonable. 10ms
latencies were typical for this case before the latency breaking work,
so we still see most of the benefit.

  kopald-12040d.h.5us : hvc_poll <-hvc_handle_interrupt
  kopald-12040d.h.5us : __hvc_poll <-hvc_handle_interrupt
  kopald-12040d.h.5us : _raw_spin_lock_irqsave <-__hvc_poll
  kopald-12040d.h.5us : tty_port_tty_get <-__hvc_poll
  kopald-12040d.h.6us : _raw_spin_lock_irqsave <-tty_port_tty_get
  kopald-12040d.h.6us : _raw_spin_unlock_irqrestore <-tty_port_tty_get
  kopald-12040d.h.6us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.7us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.7us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.   36us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.   36us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.   36us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.   65us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.   65us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.   66us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.   94us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.   95us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.   95us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.  124us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  124us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  125us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.  154us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  154us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  154us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.  183us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  184us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  184us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.  213us : tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  213us : __tty_buffer_request_room <-__hvc_poll
  kopald-12040d.h.  213us+: opal_get_chars <-__hvc_poll
  kopald-12040d.h.  242us : _raw_spin_unlock_irqrestore <-__hvc_poll
  kopald-12040d.h.  242us : tty_flip_buffer_push <-__hvc_poll
  kopald-12040d.h.  243us : queue_work_on <-tty_flip_buffer_push
  kopald-12040d.h.  243us : tty_kref_put <-__hvc_poll
  kopald-12040d.h.  243us : hvc_kick <-hvc_handle_interrupt
  kopald-12040d.h.  243us : wake_up_process <-hvc_kick
  kopald-12040d.h.  244us : try_to_wake_up <-hvc_kick
  kopald-12040d.h.  244us : _raw_spin_lock_irqsave <-try_to_wake_up
  kopald-12040d.h.  244us : _raw_spin_unlock_irqrestore <-try_to_wake_up

Fixes: ec97eaad1383 ("tty: hvc: hvc_poll() break hv read loop")
Reported-by: Matteo Croce 
Tested-by: Matteo Croce 
Tested-by: Leon Romanovsky 
Signed-off-by: Nicholas Piggin 
---
 drivers/tty/hvc/hvc_console.c | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index c917749708d2..bacf9b73ec98 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -623,6 +623,15 @@ static int hvc_chars_in_buffer(struct tty_struct *tty)
 #define MAX_TIMEOUT(2000)
 static u32 timeout = MIN_TIMEOUT;
 
+/*
+ * Maximum number of bytes to get from the console driver if hvc_poll is
+ * called from driver (and can't sleep). Any more than this and we break
+ * and start polling with khvcd. This value was derived from from an OpenBMC
+ * console with the OPAL driver that results in about 0.25ms interrupts off
+ * latency.
+ */
+#define HVC_ATOMIC_READ_MAX128
+
 #define HVC_POLL_READ  0x0001
 #define HVC_POLL_WRITE 0x0002
 
@@ -669,8 +678,8 @@ static int __hvc_poll(struct hvc_struct *hp, bool may_sleep)
if (!hp->irq_requested)
poll_mask |= HVC_POLL_READ;
 
+ read_again:
/* Read data if any */
-
count = tty_buffer_request_room(>port, N_INBUF);
 
/* If flip is full, just reschedule a later read */
@@ -717,7 +726,18 @@ static int __hvc_poll(struct hvc_struct *hp, bool 
may_sleep)
 #endif /* CONFIG_MAGIC_SYSRQ */
tty_insert_flip_char(>port, buf[i], 0);
}
-   read_total = n;
+   read_total 

[PATCH 1/3] tty: hvc: hvc_poll() fix read loop hang

2018-09-05 Thread Nicholas Piggin
Patch ec97eaad1383 ("tty: hvc: hvc_poll() break hv read loop") causes
the virtio console to hang at times (e.g., if you paste a bunch of
characters to it.

The reason is that get_chars must return 0 before we can be sure the
driver will kick or poll input again, but this change only scheduled a
poll if get_chars had returned a full count. Change this to poll on
any > 0 count.

Fixes: ec97eaad1383 ("tty: hvc: hvc_poll() break hv read loop")
Reported-by: Matteo Croce 
Reported-by: Jason Gunthorpe 
Tested-by: Matteo Croce 
Tested-by: Leon Romanovsky 
Signed-off-by: Nicholas Piggin 
---
 drivers/tty/hvc/hvc_console.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 5414c4a87bea..c917749708d2 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -717,10 +717,13 @@ static int __hvc_poll(struct hvc_struct *hp, bool 
may_sleep)
 #endif /* CONFIG_MAGIC_SYSRQ */
tty_insert_flip_char(>port, buf[i], 0);
}
-   if (n == count)
-   poll_mask |= HVC_POLL_READ;
read_total = n;
 
+   /*
+* Latency break, schedule another poll immediately.
+*/
+   poll_mask |= HVC_POLL_READ;
+
  out:
/* Wakeup write queue if necessary */
if (hp->do_wakeup) {
-- 
2.18.0



[PATCH 0/3] tty: hvc: latency break regression fixes

2018-09-05 Thread Nicholas Piggin
Hi Greg,

Here are fixes for a few regressions that came in with my
carelessness with the irq latency work for the hvc subsystem.
These were independently reported in 2 configurations, and I
confirmed with another.

I think those went upstream via Michael's tree, but he's away
at the moment so if you would be able to consider them for
the tty tree that would be appreciated.

Thanks,
Nick

Nicholas Piggin (3):
  tty: hvc: hvc_poll() fix read loop hang
  tty: hvc: hvc_poll() fix read loop batching
  tty: hvc: hvc_write() fix break condition

 drivers/tty/hvc/hvc_console.c | 38 ++-
 1 file changed, 33 insertions(+), 5 deletions(-)

-- 
2.18.0



Re: [PATCH 6/6] powerpc sstep: Add modsd, modud instruction emulation

2018-09-05 Thread Sandipan Das
Hi Segher,

On Wednesday 05 September 2018 02:51 AM, Segher Boessenkool wrote:
> On Mon, Sep 03, 2018 at 08:49:38PM +0530, Sandipan Das wrote:
>> +#ifdef __powerpc64__
>> +case 265:   /* modud */
>> +if (!cpu_has_feature(CPU_FTR_ARCH_300))
>> +return -1;
>> +op->val = regs->gpr[ra] % regs->gpr[rb];
>> +goto compute_done;
>> +#endif
> 
> The mod instruction has special cases that aren't handled by this C code,
> too (divide by 0, or signed division of the most negative number by -1).
> For the mod intruction the behaviour is undefined in those cases, but you
> probably should force some specific behaviour.  You don't want the kernel
> to execute a trap instruction, etc. :-)
> 

Agreed. In that case, the same would apply to the divw, divwu, divd and divdu
instructions as well, right? Cause I don't see these cases being handled for
them currently.

Also, if I execute a modulo or division instruction for any of these special
cases in a userspace binary, I don't see any exceptions being generated. It's
just that the result is undefined (usually same as one of the source operands,
I don't remember if it was the dividend or the divisor). So, I'm wondering if
this would be necessary.

- Sandipan



Re: [PATCH 1/6] powerpc sstep: Add maddhd, maddhdu, maddld instruction emulation

2018-09-05 Thread Sandipan Das
Hi Segher,

On Wednesday 05 September 2018 03:42 AM, Segher Boessenkool wrote:
> On Mon, Sep 03, 2018 at 08:49:33PM +0530, Sandipan Das wrote:
>> +#ifdef __powerpc64__
>> +case 4:
>> +if (!cpu_has_feature(CPU_FTR_ARCH_300))
>> +return -1;
>> +
>> +switch (instr & 0x3f) {
>> +case 48:/* maddhd */
>> +asm("maddhd %0,%1,%2,%3" : "=r" (op->val) :
>> +"r" (regs->gpr[ra]), "r" (regs->gpr[rb]),
>> +"r" (regs->gpr[rc]));
>> +goto compute_done;
> 
> If running maddhd does not work, will running it in kernel mode work?
> 

Not sure what you meant here but one of the scenarios that I'm aware of
where this is will be used is if we place a probe at a location having
an maddhd instruction. The kernel would first attempt to emulate its
behaviour, which in this case is done by executing the same instruction
(similar to what is done for mulhd and mulhw) and if that fails, try to
execute the instruction natively.

- Sandipan



Re: v4.17 regression: PowerMac G3 won't boot, was Re: [PATCH v5 1/3] of: cache phandle nodes to reduce cost of of_find_node_by_phandle()

2018-09-05 Thread Benjamin Herrenschmidt
On Fri, 2018-08-31 at 14:35 +1000, Benjamin Herrenschmidt wrote:
> 
> > If I force output with "-f", the resulting file has no occurrences 
> > of "phandle".
> 
> Are you booting with BootX or Open Firmware ?

Assuming you are using BootX (or miBoot), can you try this patch ?

--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -37,6 +37,7 @@ static unsigned long __initdata bootx_dt_strend;
 static unsigned long __initdata bootx_node_chosen;
 static boot_infos_t * __initdata bootx_info;
 static char __initdata bootx_disp_path[256];
+static int __initdata bootx_phandle;
 
 /* Is boot-info compatible ? */
 #define BOOT_INFO_IS_COMPATIBLE(bi) \
@@ -258,6 +259,8 @@ static void __init bootx_scan_dt_build_strings(unsigned 
long base,
namep = pp->name ? (char *)(base + pp->name) : NULL;
if (namep == NULL || strcmp(namep, "name") == 0)
goto next;
+   if (!strcmp(namep, "phandle") || !strcmp(namep, 
"linux,phandle"))
+   bootx_phandle = -1;
/* get/create string entry */
soff = bootx_dt_find_string(namep);
if (soff == 0)
@@ -330,6 +333,12 @@ static void __init bootx_scan_dt_build_struct(unsigned 
long base,
ppp = >next;
}
 
+   /* add a phandle */
+   if (bootx_phandle > 0) {
+   bootx_dt_add_prop("phandle", _phandle, 4, mem_end);
+   bootx_phandle++;
+   }
+
if (node == bootx_node_chosen) {
bootx_add_chosen_props(base, mem_end);
if (bootx_info->dispDeviceRegEntryOffset == 0)
@@ -385,6 +394,8 @@ static unsigned long __init bootx_flatten_dt(unsigned long 
start)
bootx_dt_add_string("linux,bootx-height", _end);
bootx_dt_add_string("linux,bootx-linebytes", _end);
bootx_dt_add_string("linux,bootx-addr", _end);
+   if (bootx_phandle > 0)
+   bootx_dt_add_string("phandle", _end);
/* Wrap up strings */
hdr->off_dt_strings = bootx_dt_strbase - mem_start;
hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase;
@@ -482,6 +493,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
bootx_dt_strbase = bootx_dt_strend = 0;
bootx_node_chosen = 0;
bootx_disp_path[0] = 0;
+   bootx_phandle = 1;
 
if (!BOOT_INFO_IS_V2_COMPATIBLE(bi))
bi->logicalDisplayBase = bi->dispDeviceBase;




Re: [PATCH 3/6] powerpc sstep: Add cnttzw, cnttzd instruction emulation

2018-09-05 Thread Paul Mackerras
On Tue, Sep 04, 2018 at 04:12:07PM -0500, Segher Boessenkool wrote:
> On Mon, Sep 03, 2018 at 08:49:35PM +0530, Sandipan Das wrote:
> > +   case 538:   /* cnttzw */
> > +   if (!cpu_has_feature(CPU_FTR_ARCH_300))
> > +   return -1;
> > +   val = (unsigned int) regs->gpr[rd];
> > +   op->val = ( val ? __builtin_ctz(val) : 32 );
> > +   goto logical_done;
> > +#ifdef __powerpc64__
> > +   case 570:   /* cnttzd */
> > +   if (!cpu_has_feature(CPU_FTR_ARCH_300))
> > +   return -1;
> > +   val = regs->gpr[rd];
> > +   op->val = ( val ? __builtin_ctzl(val) : 64 );
> > +   goto logical_done;
> 
> __builtin_ctz(val) is undefined for val == 0.

Which would be why he only calls it when val != 0, presumably, and
uses 64 when val == 0.  Apart from idiosyncratic whitespace his code
looks correct to me.

Are you saying there is a bug in his code, or that his patch
description is incomplete, or what?

Paul.


Re: [PATCH v2] powerpc/powernv: Make possible for user to force a full ipl cec reboot

2018-09-05 Thread Vasant Hegde

On 09/03/2018 03:56 PM, Vaibhav Jain wrote:

Ever since fast reboot is enabled by default in opal,
opal_cec_reboot() will use fast-reset instead of full IPL to perform
system reboot. This leaves the user with no direct way to force a full
IPL reboot except changing an nvram setting that persistently disables
fast-reset for all subsequent reboots.

This patch provides a more direct way for the user to force a one-shot
full IPL reboot by passing the command line argument 'full' to the
reboot command. So the user will be able to tweak the reboot behavior
via:



.../...



  /* Argument to OPAL_PCI_TCE_KILL */
diff --git a/arch/powerpc/platforms/powernv/setup.c 
b/arch/powerpc/platforms/powernv/setup.c
index ae023622..650484e0940b 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -224,7 +224,22 @@ static void  __noreturn pnv_restart(char *cmd)
pnv_prepare_going_down();

while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
-   rc = opal_cec_reboot();
+   /* See if we need to do a full IPL reboot */
+   if (cmd && strcmp(cmd, "full") == 0)
+   rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
+   else
+   rc = OPAL_UNSUPPORTED;


As discussed offline, please handle OPAL_SUCCESS case well.

-Vasant