Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=ee4f2ea48674b6c9d91bc854edc51a3e6a7168c4
Commit:     ee4f2ea48674b6c9d91bc854edc51a3e6a7168c4
Parent:     3be4e6990edf65624cfcbf8f7e33810626b2eefa
Author:     Benjamin Herrenschmidt <[EMAIL PROTECTED]>
AuthorDate: Thu Apr 12 15:30:22 2007 +1000
Committer:  Paul Mackerras <[EMAIL PROTECTED]>
CommitDate: Fri Apr 13 04:09:39 2007 +1000

    [POWERPC] Fix 32-bit mm operations when not using BATs
    
    On hash table based 32 bits powerpc's, the hash management code runs with
    a big spinlock. It's thus important that it never causes itself a hash
    fault. That code is generally safe (it does memory accesses in real mode
    among other things) with the exception of the actual access to the code
    itself. That is, the kernel text needs to be accessible without taking
    a hash miss exceptions.
    
    This is currently guaranteed by having a BAT register mapping part of the
    linear mapping permanently, which includes the kernel text. But this is
    not true if using the "nobats" kernel command line option (which can be
    useful for debugging) and will not be true when using DEBUG_PAGEALLOC
    implemented in a subsequent patch.
    
    This patch fixes this by pre-faulting in the hash table pages that hit
    the kernel text, and making sure we never evict such a page under hash
    pressure.
    
    Signed-off-by: Benjamin Herrenchmidt <[EMAIL PROTECTED]>
    
     arch/powerpc/mm/hash_low_32.S |   22 ++++++++++++++++++++--
     arch/powerpc/mm/mem.c         |    3 ---
     arch/powerpc/mm/mmu_decl.h    |    4 ++++
     arch/powerpc/mm/pgtable_32.c  |   11 +++++++----
     4 files changed, 31 insertions(+), 9 deletions(-)
    Signed-off-by: Paul Mackerras <[EMAIL PROTECTED]>
---
 arch/powerpc/mm/hash_low_32.S |   22 ++++++++++++++++++++--
 arch/powerpc/mm/mem.c         |    3 ---
 arch/powerpc/mm/mmu_decl.h    |    4 ++++
 arch/powerpc/mm/pgtable_32.c  |   11 +++++++----
 4 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index bd68df5..ddceefc 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -283,6 +283,7 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
 #define PTEG_SIZE      64
 #define LG_PTEG_SIZE   6
 #define LDPTEu         lwzu
+#define LDPTE          lwz
 #define STPTE          stw
 #define CMPPTE         cmpw
 #define PTE_H          0x40
@@ -389,13 +390,30 @@ _GLOBAL(hash_page_patch_C)
         * and we know there is a definite (although small) speed
         * advantage to putting the PTE in the primary PTEG, we always
         * put the PTE in the primary PTEG.
+        *
+        * In addition, we skip any slot that is mapping kernel text in
+        * order to avoid a deadlock when not using BAT mappings if
+        * trying to hash in the kernel hash code itself after it has
+        * already taken the hash table lock. This works in conjunction
+        * with pre-faulting of the kernel text.
+        *
+        * If the hash table bucket is full of kernel text entries, we'll
+        * lockup here but that shouldn't happen
         */
-       addis   r4,r7,[EMAIL PROTECTED]
+
+1:     addis   r4,r7,[EMAIL PROTECTED]         /* get next evict slot */
        lwz     r6,[EMAIL PROTECTED](r4)
-       addi    r6,r6,PTE_SIZE
+       addi    r6,r6,PTE_SIZE                  /* search for candidate */
        andi.   r6,r6,7*PTE_SIZE
        stw     r6,[EMAIL PROTECTED](r4)
        add     r4,r3,r6
+       LDPTE   r0,PTE_SIZE/2(r4)               /* get PTE second word */
+       clrrwi  r0,r0,12
+       lis     r6,[EMAIL PROTECTED]
+       ori     r6,r6,[EMAIL PROTECTED]                 /* get etext */
+       tophys(r6,r6)
+       cmpl    cr0,r0,r6                       /* compare and try again */
+       blt     1b
 
 #ifndef CONFIG_SMP
        /* Store PTE in PTEG */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 52f397c..c4bcd75 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -58,9 +58,6 @@ int init_bootmem_done;
 int mem_init_done;
 unsigned long memory_limit;
 
-extern void hash_preload(struct mm_struct *mm, unsigned long ea,
-                        unsigned long access, unsigned long trap);
-
 int page_is_ram(unsigned long pfn)
 {
        unsigned long paddr = (pfn << PAGE_SHIFT);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index bea2d21..ee55e0b 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -22,6 +22,10 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu.h>
 
+extern void hash_preload(struct mm_struct *mm, unsigned long ea,
+                        unsigned long access, unsigned long trap);
+
+
 #ifdef CONFIG_PPC32
 extern void mapin_ram(void);
 extern int map_page(unsigned long va, phys_addr_t pa, int flags);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 95d3afe..f75f2fc 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -282,16 +282,19 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
 void __init mapin_ram(void)
 {
        unsigned long v, p, s, f;
+       int ktext;
 
        s = mmu_mapin_ram();
        v = KERNELBASE + s;
        p = PPC_MEMSTART + s;
        for (; s < total_lowmem; s += PAGE_SIZE) {
-               if ((char *) v >= _stext && (char *) v < etext)
-                       f = _PAGE_RAM_TEXT;
-               else
-                       f = _PAGE_RAM;
+               ktext = ((char *) v >= _stext && (char *) v < etext);
+               f = ktext ?_PAGE_RAM_TEXT : _PAGE_RAM;
                map_page(v, p, f);
+#ifdef CONFIG_PPC_STD_MMU_32
+               if (ktext)
+                       hash_preload(&init_mm, v, 0, 0x300);
+#endif
                v += PAGE_SIZE;
                p += PAGE_SIZE;
        }
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to