Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash.h    |   1 +
 arch/powerpc/include/asm/nohash/64/pgtable.h |   2 +
 arch/powerpc/mm/Makefile                     |   3 +-
 arch/powerpc/mm/init_64.c                    | 114 +------------
 arch/powerpc/mm/mem.c                        |  29 +---
 arch/powerpc/mm/mmu_decl.h                   |   4 -
 arch/powerpc/mm/pgtable-book3e.c             | 163 ++++++++++++++++++
 arch/powerpc/mm/pgtable-hash64.c             | 246 +++++++++++++++++++++++++++
 arch/powerpc/mm/pgtable.c                    |   9 +
 arch/powerpc/mm/pgtable_64.c                 |  88 ----------
 arch/powerpc/mm/ppc_mmu_32.c                 |  30 ++++
 11 files changed, 461 insertions(+), 228 deletions(-)
 create mode 100644 arch/powerpc/mm/pgtable-book3e.c
 create mode 100644 arch/powerpc/mm/pgtable-hash64.c

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index ee8dd7e561b0..d51709dad729 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -604,6 +604,7 @@ static inline void hpte_do_hugepage_flush(struct mm_struct 
*mm,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index b9f734dd5b81..a68e809d7739 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -359,6 +359,8 @@ static inline void __ptep_set_access_flags(pte_t *ptep, 
pte_t entry)
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
+extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1ffeda85c086..6b5cc805c7ba 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -13,7 +13,8 @@ obj-$(CONFIG_PPC_MMU_NOHASH)  += mmu_context_nohash.o 
tlb_nohash.o \
                                   tlb_nohash_low.o
 obj-$(CONFIG_PPC_BOOK3E)       += tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)    := hash_native_64.o
-obj-$(CONFIG_PPC_STD_MMU_64)   += hash_utils_64.o slb_low.o slb.o $(hash64-y)
+obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
+obj-$(CONFIG_PPC_STD_MMU_64)   += pgtable-hash64.o hash_utils_64.o slb_low.o 
slb.o $(hash64-y)
 obj-$(CONFIG_PPC_STD_MMU_32)   += ppc_mmu_32.o hash_low_32.o
 obj-$(CONFIG_PPC_STD_MMU)      += tlb_hash$(CONFIG_WORD_SIZE).o \
                                   mmu_context_hash$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 8ce1ec24d573..05b025a0efe6 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -65,38 +65,10 @@
 
 #include "mmu_decl.h"
 
-#ifdef CONFIG_PPC_STD_MMU_64
-#if PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
-#endif /* CONFIG_PPC_STD_MMU_64 */
-
 phys_addr_t memstart_addr = ~0;
 EXPORT_SYMBOL_GPL(memstart_addr);
 phys_addr_t kernstart_addr;
 EXPORT_SYMBOL_GPL(kernstart_addr);
-
-static void pgd_ctor(void *addr)
-{
-       memset(addr, 0, PGD_TABLE_SIZE);
-}
-
-static void pud_ctor(void *addr)
-{
-       memset(addr, 0, PUD_TABLE_SIZE);
-}
-
-static void pmd_ctor(void *addr)
-{
-       memset(addr, 0, PMD_TABLE_SIZE);
-}
-
-struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
-
 /*
  * Create a kmem_cache() for pagetables.  This is not used for PTE
  * pages - they're linked to struct page, come from the normal free
@@ -104,6 +76,7 @@ struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
  * everything else.  Caches created by this function are used for all
  * the higher level pagetables, and for hugepage pagetables.
  */
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
 {
        char *name;
@@ -138,25 +111,6 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void 
*))
        pr_debug("Allocated pgtable cache for order %d\n", shift);
 }
 
-
-void pgtable_cache_init(void)
-{
-       pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
-       pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
-       /*
-        * In all current configs, when the PUD index exists it's the
-        * same size as either the pgd or pmd index except with THP enabled
-        * on book3s 64
-        */
-       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
-               pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
-
-       if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
-               panic("Couldn't allocate pgtable caches");
-       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
-               panic("Couldn't allocate pud pgtable caches");
-}
-
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Given an address within the vmemmap, determine the pfn of the page that
@@ -189,67 +143,6 @@ static int __meminit vmemmap_populated(unsigned long 
start, int page_size)
        return 0;
 }
 
-/* On hash-based CPUs, the vmemmap is bolted in the hash table.
- *
- * On Book3E CPUs, the vmemmap is currently mapped in the top half of
- * the vmalloc space using normal page tables, though the size of
- * pages encoded in the PTEs can be different
- */
-
-#ifdef CONFIG_PPC_BOOK3E
-static void __meminit vmemmap_create_mapping(unsigned long start,
-                                            unsigned long page_size,
-                                            unsigned long phys)
-{
-       /* Create a PTE encoding without page size */
-       unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
-               _PAGE_KERNEL_RW;
-
-       /* PTEs only contain page size encodings up to 32M */
-       BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
-
-       /* Encode the size in the PTE */
-       flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
-
-       /* For each PTE for that area, map things. Note that we don't
-        * increment phys because all PTEs are of the large size and
-        * thus must have the low bits clear
-        */
-       for (i = 0; i < page_size; i += PAGE_SIZE)
-               BUG_ON(map_kernel_page(start + i, phys, flags));
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void vmemmap_remove_mapping(unsigned long start,
-                                  unsigned long page_size)
-{
-}
-#endif
-#else /* CONFIG_PPC_BOOK3E */
-static void __meminit vmemmap_create_mapping(unsigned long start,
-                                            unsigned long page_size,
-                                            unsigned long phys)
-{
-       int  mapped = htab_bolt_mapping(start, start + page_size, phys,
-                                       pgprot_val(PAGE_KERNEL),
-                                       mmu_vmemmap_psize,
-                                       mmu_kernel_ssize);
-       BUG_ON(mapped < 0);
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void vmemmap_remove_mapping(unsigned long start,
-                                  unsigned long page_size)
-{
-       int mapped = htab_remove_mapping(start, start + page_size,
-                                        mmu_vmemmap_psize,
-                                        mmu_kernel_ssize);
-       BUG_ON(mapped < 0);
-}
-#endif
-
-#endif /* CONFIG_PPC_BOOK3E */
-
 struct vmemmap_backing *vmemmap_list;
 static struct vmemmap_backing *next;
 static int num_left;
@@ -301,6 +194,9 @@ static __meminit void vmemmap_list_populate(unsigned long 
phys,
        vmemmap_list = vmem_back;
 }
 
+extern void __meminit vmemmap_create_mapping(unsigned long start,
+                                            unsigned long page_size,
+                                            unsigned long phys);
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int 
node)
 {
        unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@ -332,6 +228,8 @@ int __meminit vmemmap_populate(unsigned long start, 
unsigned long end, int node)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+extern void vmemmap_remove_mapping(unsigned long start,
+                                  unsigned long page_size);
 static unsigned long vmemmap_list_free(unsigned long start)
 {
        struct vmemmap_backing *vmem_back, *vmem_back_prev;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22d94c3e6fc4..b9cab99fb303 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -476,6 +476,7 @@ void flush_icache_user_range(struct vm_area_struct *vma, 
struct page *page,
 }
 EXPORT_SYMBOL(flush_icache_user_range);
 
+#ifndef CONFIG_PPC_STD_MMU
 /*
  * This is called at the end of handling a user page fault, when the
  * fault has been handled by updating a PTE in the linux page tables.
@@ -487,39 +488,13 @@ EXPORT_SYMBOL(flush_icache_user_range);
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
                      pte_t *ptep)
 {
-#ifdef CONFIG_PPC_STD_MMU
-       /*
-        * We don't need to worry about _PAGE_PRESENT here because we are
-        * called with either mm->page_table_lock held or ptl lock held
-        */
-       unsigned long access = 0, trap;
-
-       /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
-       if (!pte_young(*ptep) || address >= TASK_SIZE)
-               return;
-
-       /* We try to figure out if we are coming from an instruction
-        * access fault and pass that down to __hash_page so we avoid
-        * double-faulting on execution of fresh text. We have to test
-        * for regs NULL since init will get here first thing at boot
-        *
-        * We also avoid filling the hash if not coming from a fault
-        */
-       if (current->thread.regs == NULL)
-               return;
-       trap = TRAP(current->thread.regs);
-       if (trap == 0x400)
-               access |= _PAGE_EXEC;
-       else if (trap != 0x300)
-               return;
-       hash_preload(vma->vm_mm, address, access, trap);
-#endif /* CONFIG_PPC_STD_MMU */
 #if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
        && defined(CONFIG_HUGETLB_PAGE)
        if (is_vm_hugetlb_page(vma))
                book3e_hugetlb_preload(vma, address, *ptep);
 #endif
 }
+#endif /* !CONFIG_PPC_STD_MMU */
 
 /*
  * System memory should not be in /proc/iomem but various tools expect it
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9f58ff44a075..6360f54ef2d0 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -109,10 +109,6 @@ extern unsigned long Hash_size, Hash_mask;
 
 #endif /* CONFIG_PPC32 */
 
-#ifdef CONFIG_PPC64
-extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
-#endif /* CONFIG_PPC64 */
-
 extern unsigned long ioremap_bot;
 extern unsigned long __max_low_memory;
 extern phys_addr_t __initial_memory_limit_addr;
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c
new file mode 100644
index 000000000000..cfea44b66eac
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-book3e.c
@@ -0,0 +1,163 @@
+
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/dma.h>
+
+#include "mmu_decl.h"
+
+#if (TASK_SIZE_USER64 > PGTABLE_RANGE)
+#warning TASK_SIZE is larger than page table range
+#endif
+
+static void pgd_ctor(void *addr)
+{
+       memset(addr, 0, PGD_TABLE_SIZE);
+}
+
+static void pud_ctor(void *addr)
+{
+       memset(addr, 0, PUD_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+       memset(addr, 0, PMD_TABLE_SIZE);
+}
+
+void pgtable_cache_init(void)
+{
+       pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
+       pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+       /*
+        * In all current configs, when the PUD index exists it's the
+        * same size as either the pgd or pmd index except with THP enabled
+        * on book3s 64
+        */
+       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+               pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+
+       if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
+               panic("Couldn't allocate pgtable caches");
+       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+               panic("Couldn't allocate pud pgtable caches");
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On Book3E CPUs, the vmemmap is currently mapped in the top half of
+ * the vmalloc space using normal page tables, though the size of
+ * pages encoded in the PTEs can be different
+ */
+void __meminit vmemmap_create_mapping(unsigned long start,
+                                      unsigned long page_size,
+                                      unsigned long phys)
+{
+       /* Create a PTE encoding without page size */
+       unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
+               _PAGE_KERNEL_RW;
+
+       /* PTEs only contain page size encodings up to 32M */
+       BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
+
+       /* Encode the size in the PTE */
+       flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
+
+       /* For each PTE for that area, map things. Note that we don't
+        * increment phys because all PTEs are of the large size and
+        * thus must have the low bits clear
+        */
+       for (i = 0; i < page_size; i += PAGE_SIZE)
+               BUG_ON(map_kernel_page(start + i, phys, flags));
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+                            unsigned long page_size)
+{
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static __ref void *early_alloc_pgtable(unsigned long size)
+{
+       void *pt;
+
+       pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
+       memset(pt, 0, size);
+
+       return pt;
+}
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
+{
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
+
+       if (slab_is_available()) {
+               pgdp = pgd_offset_k(ea);
+               pudp = pud_alloc(&init_mm, pgdp, ea);
+               if (!pudp)
+                       return -ENOMEM;
+               pmdp = pmd_alloc(&init_mm, pudp, ea);
+               if (!pmdp)
+                       return -ENOMEM;
+               ptep = pte_alloc_kernel(pmdp, ea);
+               if (!ptep)
+                       return -ENOMEM;
+               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+                                                         __pgprot(flags)));
+       } else {
+               pgdp = pgd_offset_k(ea);
+#ifndef __PAGETABLE_PUD_FOLDED
+               if (pgd_none(*pgdp)) {
+                       pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+                       BUG_ON(pudp == NULL);
+                       pgd_populate(&init_mm, pgdp, pudp);
+               }
+#endif /* !__PAGETABLE_PUD_FOLDED */
+               pudp = pud_offset(pgdp, ea);
+               if (pud_none(*pudp)) {
+                       pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+                       BUG_ON(pmdp == NULL);
+                       pud_populate(&init_mm, pudp, pmdp);
+               }
+               pmdp = pmd_offset(pudp, ea);
+               if (!pmd_present(*pmdp)) {
+                       ptep = early_alloc_pgtable(PAGE_SIZE);
+                       BUG_ON(ptep == NULL);
+                       pmd_populate_kernel(&init_mm, pmdp, ptep);
+               }
+               ptep = pte_offset_kernel(pmdp, ea);
+               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+                                                         __pgprot(flags)));
+       }
+
+       smp_wmb();
+       return 0;
+}
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
new file mode 100644
index 000000000000..1ab62bd102c7
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/sched.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+#if (TASK_SIZE_USER64 > PGTABLE_RANGE)
+#warning TASK_SIZE is larger than page table range
+#endif
+
+static void pgd_ctor(void *addr)
+{
+       memset(addr, 0, PGD_TABLE_SIZE);
+}
+
+static void pud_ctor(void *addr)
+{
+       memset(addr, 0, PUD_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+       memset(addr, 0, PMD_TABLE_SIZE);
+}
+
+
+void pgtable_cache_init(void)
+{
+       pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
+       pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+       /*
+        * In all current configs, when the PUD index exists it's the
+        * same size as either the pgd or pmd index except with THP enabled
+        * on book3s 64
+        */
+       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+               pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+
+       if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
+               panic("Couldn't allocate pgtable caches");
+       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+               panic("Couldn't allocate pud pgtable caches");
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On hash-based CPUs, the vmemmap is bolted in the hash table.
+ *
+ */
+void __meminit vmemmap_create_mapping(unsigned long start,
+                                     unsigned long page_size,
+                                     unsigned long phys)
+{
+       int  mapped = htab_bolt_mapping(start, start + page_size, phys,
+                                       pgprot_val(PAGE_KERNEL),
+                                       mmu_vmemmap_psize,
+                                       mmu_kernel_ssize);
+       BUG_ON(mapped < 0);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+                           unsigned long page_size)
+{
+       int mapped = htab_remove_mapping(start, start + page_size,
+                                        mmu_vmemmap_psize,
+                                        mmu_kernel_ssize);
+       BUG_ON(mapped < 0);
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+                     pte_t *ptep)
+{
+       /*
+        * We don't need to worry about _PAGE_PRESENT here because we are
+        * called with either mm->page_table_lock held or ptl lock held
+        */
+       unsigned long access = 0, trap;
+
+       /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+       if (!pte_young(*ptep) || address >= TASK_SIZE)
+               return;
+
+       /* We try to figure out if we are coming from an instruction
+        * access fault and pass that down to __hash_page so we avoid
+        * double-faulting on execution of fresh text. We have to test
+        * for regs NULL since init will get here first thing at boot
+        *
+        * We also avoid filling the hash if not coming from a fault
+        */
+       if (current->thread.regs == NULL)
+               return;
+       trap = TRAP(current->thread.regs);
+       if (trap == 0x400)
+               access |= _PAGE_EXEC;
+       else if (trap != 0x300)
+               return;
+       hash_preload(vma->vm_mm, address, access, trap);
+}
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
+{
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
+
+       if (slab_is_available()) {
+               pgdp = pgd_offset_k(ea);
+               pudp = pud_alloc(&init_mm, pgdp, ea);
+               if (!pudp)
+                       return -ENOMEM;
+               pmdp = pmd_alloc(&init_mm, pudp, ea);
+               if (!pmdp)
+                       return -ENOMEM;
+               ptep = pte_alloc_kernel(pmdp, ea);
+               if (!ptep)
+                       return -ENOMEM;
+               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+                                                         __pgprot(flags)));
+       } else {
+               /*
+                * If the mm subsystem is not fully up, we cannot create a
+                * linux page table entry for this mapping.  Simply bolt an
+                * entry in the hardware page table.
+                *
+                */
+               if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+                                     mmu_io_psize, mmu_kernel_ssize)) {
+                       printk(KERN_ERR "Failed to do bolted mapping IO "
+                              "memory at %016lx !\n", pa);
+                       return -ENOMEM;
+               }
+       }
+
+       smp_wmb();
+       return 0;
+}
+
+/*
+ * We only try to do i/d cache coherency on stuff that looks like
+ * reasonably "normal" PTEs. We currently require a PTE to be present
+ * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that
+ * on userspace PTEs
+ */
+static inline int pte_looks_normal(pte_t pte)
+{
+       return (pte_val(pte) &
+           (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
+           (_PAGE_PRESENT | _PAGE_USER);
+}
+
+static struct page *maybe_pte_to_page(pte_t pte)
+{
+       unsigned long pfn = pte_pfn(pte);
+       struct page *page;
+
+       if (unlikely(!pfn_valid(pfn)))
+               return NULL;
+       page = pfn_to_page(pfn);
+       if (PageReserved(page))
+               return NULL;
+       return page;
+}
+
+/* Server-style MMU handles coherency when hashing if HW exec permission
+ * is supposed per page (currently 64-bit only). If not, then, we always
+ * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec
+ * support falls into the same category.
+ */
+static pte_t set_pte_filter(pte_t pte)
+{
+       pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+       if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) 
||
+                                      cpu_has_feature(CPU_FTR_NOEXECUTE))) {
+               struct page *pg = maybe_pte_to_page(pte);
+               if (!pg)
+                       return pte;
+               if (!test_bit(PG_arch_1, &pg->flags)) {
+                       flush_dcache_icache_page(pg);
+                       set_bit(PG_arch_1, &pg->flags);
+               }
+       }
+       return pte;
+}
+
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+               pte_t pte)
+{
+       /*
+        * When handling numa faults, we already have the pte marked
+        * _PAGE_PRESENT, but we can be sure that it is not in hpte.
+        * Hence we can use set_pte_at for them.
+        */
+       VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
+               (_PAGE_PRESENT | _PAGE_USER));
+
+       /*
+        * Add the pte bit when tryint set a pte
+        */
+       pte = __pte(pte_val(pte) | _PAGE_PTE);
+
+       /* Note: mm->context.id might not yet have been assigned as
+        * this context might not have been activated yet when this
+        * is called.
+        */
+       pte = set_pte_filter(pte);
+
+       /* Perform the setting of the PTE */
+       __set_pte_at(mm, addr, ptep, pte, 0);
+}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfd7925c72..c73f8017bf7d 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -31,6 +31,8 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 
+#ifndef CONFIG_PPC_BOOK3S_64
+/* We have alternate definition for the below in pgtable-hash64.c */
 static inline int is_exec_fault(void)
 {
        return current->thread.regs && TRAP(current->thread.regs) == 0x400;
@@ -193,6 +195,13 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, 
pte_t *ptep,
        /* Perform the setting of the PTE */
        __set_pte_at(mm, addr, ptep, pte, 0);
 }
+#else
+static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
+                                     int dirty)
+{
+        return pte;
+}
+#endif /* !CONFIG_PPC_BOOK3S_64 */
 
 /*
  * This is called when relaxing access to a PTE. It's also called in the page
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6d161cec2e32..21a9a171c267 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -58,11 +58,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/thp.h>
 
-/* Some sanity checking */
-#if TASK_SIZE_USER64 > PGTABLE_RANGE
-#error TASK_SIZE_USER64 exceeds pagetable range
-#endif
-
 #ifdef CONFIG_PPC_STD_MMU_64
 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
 #error TASK_SIZE_USER64 exceeds user VSID range
@@ -71,89 +66,6 @@
 
 unsigned long ioremap_bot = IOREMAP_BASE;
 
-#ifdef CONFIG_PPC_MMU_NOHASH
-static __ref void *early_alloc_pgtable(unsigned long size)
-{
-       void *pt;
-
-       pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
-       memset(pt, 0, size);
-
-       return pt;
-}
-#endif /* CONFIG_PPC_MMU_NOHASH */
-
-/*
- * map_kernel_page currently only called by __ioremap
- * map_kernel_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
- */
-int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
-{
-       pgd_t *pgdp;
-       pud_t *pudp;
-       pmd_t *pmdp;
-       pte_t *ptep;
-
-       if (slab_is_available()) {
-               pgdp = pgd_offset_k(ea);
-               pudp = pud_alloc(&init_mm, pgdp, ea);
-               if (!pudp)
-                       return -ENOMEM;
-               pmdp = pmd_alloc(&init_mm, pudp, ea);
-               if (!pmdp)
-                       return -ENOMEM;
-               ptep = pte_alloc_kernel(pmdp, ea);
-               if (!ptep)
-                       return -ENOMEM;
-               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-                                                         __pgprot(flags)));
-       } else {
-#ifdef CONFIG_PPC_MMU_NOHASH
-               pgdp = pgd_offset_k(ea);
-#ifdef PUD_TABLE_SIZE
-               if (pgd_none(*pgdp)) {
-                       pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
-                       BUG_ON(pudp == NULL);
-                       pgd_populate(&init_mm, pgdp, pudp);
-               }
-#endif /* PUD_TABLE_SIZE */
-               pudp = pud_offset(pgdp, ea);
-               if (pud_none(*pudp)) {
-                       pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
-                       BUG_ON(pmdp == NULL);
-                       pud_populate(&init_mm, pudp, pmdp);
-               }
-               pmdp = pmd_offset(pudp, ea);
-               if (!pmd_present(*pmdp)) {
-                       ptep = early_alloc_pgtable(PAGE_SIZE);
-                       BUG_ON(ptep == NULL);
-                       pmd_populate_kernel(&init_mm, pmdp, ptep);
-               }
-               ptep = pte_offset_kernel(pmdp, ea);
-               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-                                                         __pgprot(flags)));
-#else /* CONFIG_PPC_MMU_NOHASH */
-               /*
-                * If the mm subsystem is not fully up, we cannot create a
-                * linux page table entry for this mapping.  Simply bolt an
-                * entry in the hardware page table.
-                *
-                */
-               if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
-                                     mmu_io_psize, mmu_kernel_ssize)) {
-                       printk(KERN_ERR "Failed to do bolted mapping IO "
-                              "memory at %016lx !\n", pa);
-                       return -ENOMEM;
-               }
-#endif /* !CONFIG_PPC_MMU_NOHASH */
-       }
-
-       smp_wmb();
-       return 0;
-}
-
-
 /**
  * __ioremap_at - Low level function to establish the page tables
  *                for an IO mapping
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 6b2f3e457171..51ffa60001f2 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -174,6 +174,36 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
                add_hash_page(mm->context.id, ea, pmd_val(*pmd));
 }
 
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+                      pte_t *ptep)
+{
+        /*
+         * We don't need to worry about _PAGE_PRESENT here because we are
+         * called with either mm->page_table_lock held or ptl lock held
+         */
+        unsigned long access = 0, trap;
+
+        /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+        if (!pte_young(*ptep) || address >= TASK_SIZE)
+                return;
+
+        /* We try to figure out if we are coming from an instruction
+         * access fault and pass that down to __hash_page so we avoid
+         * double-faulting on execution of fresh text. We have to test
+         * for regs NULL since init will get here first thing at boot
+         *
+         * We also avoid filling the hash if not coming from a fault
+         */
+        if (current->thread.regs == NULL)
+                return;
+        trap = TRAP(current->thread.regs);
+        if (trap == 0x400)
+                access |= _PAGE_EXEC;
+        else if (trap != 0x300)
+                return;
+        hash_preload(vma->vm_mm, address, access, trap);
+}
+
 /*
  * Initialize the hash table and patch the instructions in hashtable.S.
  */
-- 
2.5.0

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to