[SPARC64]: Support >=cheetah+ dual-dtlbs properly.

Linux Kernel Mailing List Sun, 03 Apr 2005 21:23:01 -0700

ChangeSet 1.2181.2.55, 2005/03/28 15:29:13-08:00, [EMAIL PROTECTED]

        [SPARC64]: Support >=cheetah+ dual-dtlbs properly.
        
        UltraSPARC-III+ (aka. cheetah+) and later chips have three
        D-TLB units.  One is fully assosciative and has 16 entries,
        the other two have 512 entries each and are 2 way subblocked
        and then indexed by the PAGE_MASK bits.  This implies that
        the two 512 entry TLBs need to know the page size in order
        to index things properly.
        
        The page size to use is encoded in page size fields within
        the TLB context registers.
        
        Up until this point we just left them at zero, which meant
        that both 512 entry TLBs were used for 8K translations only.
        The rest (including the 4MB kernel mappings) went into the
        16 entry fully assosciative TLB.
        
        Now, for the kernel, we use the first 512 entries for 4MB
        mappings and the second 512 entries for PAGE_SIZE mappings
        (ie. for vmalloc and modules).  For the user, we use the
        first 512 entries for PAGE_SIZE and the second 512 entries
        for HPAGE_SIZE if the user maps any hugetlb pages else we
        use the second half for user PAGE_SIZE stuff as well.
        
        Most of this changeset is clerical.  We move most of the
        mm->context layout defines into asm/mmu.h, we abstract the
        mm->context type so that it is now much easier to see and
        trap accesses to the context values.  In particular, one
        can find all context handling by grepping the sparc64 sources
        for CTX_* and {PRIMARY,SECONDAY}_CONTEXT.
        
        Happily, the fast paths of the kernel are mostly untouched
        by this new stuff.  We add 2 cycles to trap entry, 1 cycle
        to trap exit, and 2 cycles to the window trap fixup code.
        
        Signed-off-by: David S. Miller <[EMAIL PROTECTED]>




 arch/sparc64/kernel/dtlb_backend.S |    2 
 arch/sparc64/kernel/dtlb_base.S    |    2 
 arch/sparc64/kernel/etrap.S        |   53 +++++++++++++++++++-
 arch/sparc64/kernel/head.S         |   37 ++++++++++++--
 arch/sparc64/kernel/rtrap.S        |   22 ++++++++
 arch/sparc64/kernel/setup.c        |   16 ++++--
 arch/sparc64/kernel/trampoline.S   |   15 +++++
 arch/sparc64/kernel/unaligned.c    |    4 -
 arch/sparc64/kernel/winfixup.S     |   53 ++++++++++++++++----
 arch/sparc64/mm/fault.c            |    4 +
 arch/sparc64/mm/hugetlbpage.c      |   39 +++++++++++++++
 arch/sparc64/mm/init.c             |   55 +++++++++++++++++----
 arch/sparc64/mm/tlb.c              |    8 +--
 arch/sparc64/mm/ultra.S            |   10 +++
 arch/sparc64/prom/p1275.c          |   20 ++++---
 include/asm-sparc64/mmu.h          |   96 ++++++++++++++++++++++++++++++++++++-
 include/asm-sparc64/mmu_context.h  |   31 +----------
 include/asm-sparc64/spitfire.h     |   40 ---------------
 include/asm-sparc64/tlb.h          |    4 -
 19 files changed, 391 insertions(+), 120 deletions(-)


diff -Nru a/arch/sparc64/kernel/dtlb_backend.S 
b/arch/sparc64/kernel/dtlb_backend.S
--- a/arch/sparc64/kernel/dtlb_backend.S        2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/dtlb_backend.S        2005-04-03 21:16:17 -07:00
@@ -7,7 +7,7 @@
  */
 
 #include <asm/pgtable.h>
-#include <asm/mmu_context.h>
+#include <asm/mmu.h>
 
 #if PAGE_SHIFT == 13
 #define SZ_BITS                _PAGE_SZ8K
diff -Nru a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S
--- a/arch/sparc64/kernel/dtlb_base.S   2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/dtlb_base.S   2005-04-03 21:16:17 -07:00
@@ -7,7 +7,7 @@
  */
 
 #include <asm/pgtable.h>
-#include <asm/mmu_context.h>
+#include <asm/mmu.h>
 
 /* %g1 TLB_SFSR        (%g1 + %g1 == TLB_TAG_ACCESS)
  * %g2 (KERN_HIGHBITS | KERN_LOWBITS)
diff -Nru a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
--- a/arch/sparc64/kernel/etrap.S       2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/etrap.S       2005-04-03 21:16:17 -07:00
@@ -14,6 +14,7 @@
 #include <asm/spitfire.h>
 #include <asm/head.h>
 #include <asm/processor.h>
+#include <asm/mmu.h>
 
 #define                TASK_REGOFF             
(THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ)
 #define                ETRAP_PSTATE1           (PSTATE_RMO | PSTATE_PRIV)
@@ -67,7 +68,13 @@
 
                wrpr    %g3, 0, %otherwin
                wrpr    %g2, 0, %wstate
-               stxa    %g0, [%l4] ASI_DMMU
+cplus_etrap_insn_1:
+               sethi   %hi(0), %g3
+               sllx    %g3, 32, %g3
+cplus_etrap_insn_2:
+               sethi   %hi(0), %g2
+               or      %g3, %g2, %g3
+               stxa    %g3, [%l4] ASI_DMMU
                flush   %l6
                wr      %g0, ASI_AIUS, %asi
 2:             wrpr    %g0, 0x0, %tl
@@ -207,7 +214,13 @@
                mov     PRIMARY_CONTEXT, %l4
                wrpr    %g3, 0, %otherwin
                wrpr    %g2, 0, %wstate
-               stxa    %g0, [%l4] ASI_DMMU
+cplus_etrap_insn_3:
+               sethi   %hi(0), %g3
+               sllx    %g3, 32, %g3
+cplus_etrap_insn_4:
+               sethi   %hi(0), %g2
+               or      %g3, %g2, %g3
+               stxa    %g3, [%l4] ASI_DMMU
                flush   %l6
 
                mov     ASI_AIUS, %l7
@@ -248,4 +261,38 @@
 
 #undef TASK_REGOFF
 #undef ETRAP_PSTATE1
-#undef ETRAP_PSTATE2
+
+cplus_einsn_1:
+               sethi                   %uhi(CTX_CHEETAH_PLUS_NUC), %g3
+cplus_einsn_2:
+               sethi                   %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+
+               .globl                  cheetah_plus_patch_etrap
+cheetah_plus_patch_etrap:
+               /* We configure the dTLB512_0 for 4MB pages and the
+                * dTLB512_1 for 8K pages when in context zero.
+                */
+               sethi                   %hi(cplus_einsn_1), %o0
+               sethi                   %hi(cplus_etrap_insn_1), %o2
+               lduw                    [%o0 + %lo(cplus_einsn_1)], %o1
+               or                      %o2, %lo(cplus_etrap_insn_1), %o2
+               stw                     %o1, [%o2]
+               flush                   %o2
+               sethi                   %hi(cplus_etrap_insn_3), %o2
+               or                      %o2, %lo(cplus_etrap_insn_3), %o2
+               stw                     %o1, [%o2]
+               flush                   %o2
+
+               sethi                   %hi(cplus_einsn_2), %o0
+               sethi                   %hi(cplus_etrap_insn_2), %o2
+               lduw                    [%o0 + %lo(cplus_einsn_2)], %o1
+               or                      %o2, %lo(cplus_etrap_insn_2), %o2
+               stw                     %o1, [%o2]
+               flush                   %o2
+               sethi                   %hi(cplus_etrap_insn_4), %o2
+               or                      %o2, %lo(cplus_etrap_insn_4), %o2
+               stw                     %o1, [%o2]
+               flush                   %o2
+
+               retl
+                nop
diff -Nru a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
--- a/arch/sparc64/kernel/head.S        2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/head.S        2005-04-03 21:16:17 -07:00
@@ -25,6 +25,7 @@
 #include <asm/dcu.h>
 #include <asm/head.h>
 #include <asm/ttable.h>
+#include <asm/mmu.h>
        
 /* This section from from _start to sparc64_boot_end should fit into
  * 0x0000.0000.0040.4000 to 0x0000.0000.0040.8000 and will be sharing space
@@ -515,14 +516,29 @@
        membar  #Sync
 
        mov     2, %g2          /* Set TLB type to cheetah+. */
-       BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g5,g7,1f)
+       BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
 
        mov     1, %g2          /* Set TLB type to cheetah. */
 
 1:     sethi   %hi(tlb_type), %g5
        stw     %g2, [%g5 + %lo(tlb_type)]
 
-       /* Patch copy/page operations to cheetah optimized versions. */
+       BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
+       ba,pt   %xcc, 2f
+        nop
+
+1:     /* Patch context register writes to support nucleus page
+        * size correctly.
+        */
+       call    cheetah_plus_patch_etrap
+        nop
+       call    cheetah_plus_patch_rtrap
+        nop
+       call    cheetah_plus_patch_winfixup
+        nop
+       
+
+2:     /* Patch copy/page operations to cheetah optimized versions. */
        call    cheetah_patch_copyops
         nop
        call    cheetah_patch_cachetlbops
@@ -685,10 +701,23 @@
        call    init_irqwork_curcpu
         nop
 
-       sethi   %hi(sparc64_ttable_tl0), %g5
        call    prom_set_trap_table
-        mov    %g5, %o0
+        sethi  %hi(sparc64_ttable_tl0), %o0
+
+       BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f)
+       ba,pt   %xcc, 2f
+        nop
 
+1:     /* Start using proper page size encodings in ctx register.  */
+       sethi   %uhi(CTX_CHEETAH_PLUS_NUC), %g3
+       mov     PRIMARY_CONTEXT, %g1
+       sllx    %g3, 32, %g3
+       sethi   %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+       or      %g3, %g2, %g3
+       stxa    %g3, [%g1] ASI_DMMU
+       membar  #Sync
+
+2:
        rdpr    %pstate, %o1
        or      %o1, PSTATE_IE, %o1
        wrpr    %o1, 0, %pstate
diff -Nru a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
--- a/arch/sparc64/kernel/rtrap.S       2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/rtrap.S       2005-04-03 21:16:17 -07:00
@@ -250,6 +250,10 @@
                brnz,pn                 %l3, kern_rtt
                 mov                    PRIMARY_CONTEXT, %l7
                ldxa                    [%l7 + %l7] ASI_DMMU, %l0
+cplus_rtrap_insn_1:
+               sethi                   %hi(0), %l1
+               sllx                    %l1, 32, %l1
+               or                      %l0, %l1, %l0
                stxa                    %l0, [%l7] ASI_DMMU
                flush                   %g6
                rdpr                    %wstate, %l1
@@ -335,3 +339,21 @@
                wr                      %g0, FPRS_DU, %fprs
                ba,pt                   %xcc, rt_continue
                 stb                    %l5, [%g6 + TI_FPDEPTH]
+
+cplus_rinsn_1:
+               sethi                   %uhi(CTX_CHEETAH_PLUS_NUC), %l1
+
+               .globl                  cheetah_plus_patch_rtrap
+cheetah_plus_patch_rtrap:
+               /* We configure the dTLB512_0 for 4MB pages and the
+                * dTLB512_1 for 8K pages when in context zero.
+                */
+               sethi                   %hi(cplus_rinsn_1), %o0
+               sethi                   %hi(cplus_rtrap_insn_1), %o2
+               lduw                    [%o0 + %lo(cplus_rinsn_1)], %o1
+               or                      %o2, %lo(cplus_rtrap_insn_1), %o2
+               stw                     %o1, [%o2]
+               flush                   %o2
+
+               retl
+                nop
diff -Nru a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
--- a/arch/sparc64/kernel/setup.c       2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/setup.c       2005-04-03 21:16:17 -07:00
@@ -47,6 +47,7 @@
 #include <asm/timer.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/mmu.h>
 
 #ifdef CONFIG_IP_PNP
 #include <net/ipconfig.h>
@@ -157,11 +158,11 @@
 
                        for_each_process(p) {
                                mm = p->mm;
-                               if (CTX_HWBITS(mm->context) == ctx)
+                               if (CTX_NRBITS(mm->context) == ctx)
                                        break;
                        }
                        if (!mm ||
-                           CTX_HWBITS(mm->context) != ctx)
+                           CTX_NRBITS(mm->context) != ctx)
                                goto done;
 
                        pgdp = pgd_offset(mm, va);
@@ -187,12 +188,19 @@
                }
 
                if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) {
+                       unsigned long kernel_pctx = 0;
+
+                       if (tlb_type == cheetah_plus)
+                               kernel_pctx |= (CTX_CHEETAH_PLUS_NUC |
+                                               CTX_CHEETAH_PLUS_CTX0);
+
                        /* Spitfire Errata #32 workaround */
                        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                             "flush     %%g6"
                                             : /* No outputs */
-                                            : "r" (0),
-                                            "r" (PRIMARY_CONTEXT), "i" 
(ASI_DMMU));
+                                            : "r" (kernel_pctx),
+                                              "r" (PRIMARY_CONTEXT),
+                                              "i" (ASI_DMMU));
 
                        /*
                         * Locked down tlb entry.
diff -Nru a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
--- a/arch/sparc64/kernel/trampoline.S  2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/trampoline.S  2005-04-03 21:16:17 -07:00
@@ -15,6 +15,7 @@
 #include <asm/spitfire.h>
 #include <asm/processor.h>
 #include <asm/thread_info.h>
+#include <asm/mmu.h>
 
        .data
        .align  8
@@ -334,6 +335,20 @@
        call            init_irqwork_curcpu
         nop
 
+       BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f)
+       ba,pt   %xcc, 2f
+        nop
+
+1:     /* Start using proper page size encodings in ctx register.  */
+       sethi   %uhi(CTX_CHEETAH_PLUS_NUC), %g3
+       mov     PRIMARY_CONTEXT, %g1
+       sllx    %g3, 32, %g3
+       sethi   %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+       or      %g3, %g2, %g3
+       stxa    %g3, [%g1] ASI_DMMU
+       membar  #Sync
+
+2:
        rdpr            %pstate, %o1
        or              %o1, PSTATE_IE, %o1
        wrpr            %o1, 0, %pstate
diff -Nru a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
--- a/arch/sparc64/kernel/unaligned.c   2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/unaligned.c   2005-04-03 21:16:17 -07:00
@@ -379,8 +379,8 @@
                        printk(KERN_ALERT "Unable to handle kernel paging 
request in mna handler");
                printk(KERN_ALERT " at virtual address %016lx\n",address);
                printk(KERN_ALERT "current->{mm,active_mm}->context = %016lx\n",
-                       (current->mm ? current->mm->context :
-                       current->active_mm->context));
+                       (current->mm ? CTX_HWBITS(current->mm->context) :
+                       CTX_HWBITS(current->active_mm->context)));
                printk(KERN_ALERT "current->{mm,active_mm}->pgd = %016lx\n",
                        (current->mm ? (unsigned long) current->mm->pgd :
                        (unsigned long) current->active_mm->pgd));
diff -Nru a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
--- a/arch/sparc64/kernel/winfixup.S    2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/winfixup.S    2005-04-03 21:16:17 -07:00
@@ -14,6 +14,25 @@
 #include <asm/thread_info.h>
 
        .text
+
+set_pcontext:
+cplus_winfixup_insn_1:
+       sethi   %hi(0), %l1
+       mov     PRIMARY_CONTEXT, %g1
+       sllx    %l1, 32, %l1
+cplus_winfixup_insn_2:
+       sethi   %hi(0), %g2
+       or      %l1, %g2, %l1
+       stxa    %l1, [%g1] ASI_DMMU
+       flush   %g6
+       retl
+        nop
+
+cplus_wfinsn_1:
+       sethi   %uhi(CTX_CHEETAH_PLUS_NUC), %l1
+cplus_wfinsn_2:
+       sethi   %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+
        .align  32
 
        /* Here are the rules, pay attention.
@@ -62,9 +81,8 @@
        wrpr            %g0, 0x0, %canrestore           ! Standard etrap stuff.
        wrpr            %g2, 0x0, %wstate               ! This must be 
consistent.
        wrpr            %g0, 0x0, %otherwin             ! We know this.
-       mov             PRIMARY_CONTEXT, %g1            ! Change contexts...
-       stxa            %g0, [%g1] ASI_DMMU             ! Back into the nucleus.
-       flush           %g6                             ! Flush instruction 
buffers
+       call            set_pcontext                    ! Change contexts...
+        nop
        rdpr            %pstate, %l1                    ! Prepare to change 
globals.
        mov             %g6, %o7                        ! Get current.
 
@@ -183,9 +201,8 @@
 
        wrpr            %g2, 0x0, %wstate               ! This must be 
consistent.
        wrpr            %g0, 0x0, %otherwin             ! We know this.
-       mov             PRIMARY_CONTEXT, %g1            ! Change contexts...
-       stxa            %g0, [%g1] ASI_DMMU             ! Back into the nucleus.
-       flush           %g6                             ! Flush instruction 
buffers
+       call            set_pcontext                    ! Change contexts...
+        nop
        rdpr            %pstate, %l1                    ! Prepare to change 
globals.
        mov             %g4, %o2                        ! Setup args for
        mov             %g5, %o1                        ! final call to 
mem_address_unaligned.
@@ -289,9 +306,8 @@
 
        wrpr            %g2, 0x0, %wstate               ! This must be 
consistent.
        wrpr            %g0, 0x0, %otherwin             ! We know this.
-       mov             PRIMARY_CONTEXT, %g1            ! Change contexts...
-       stxa            %g0, [%g1] ASI_DMMU             ! Back into the nucleus.
-       flush           %g6                             ! Flush instruction 
buffers
+       call            set_pcontext                    ! Change contexts...
+        nop
        rdpr            %pstate, %l1                    ! Prepare to change 
globals.
        mov             %g4, %o1                        ! Setup args for
        mov             %g5, %o2                        ! final call to 
data_access_exception.
@@ -368,3 +384,22 @@
        ba,pt           %xcc, rtrap
         clr            %l6
        
+
+       .globl          cheetah_plus_patch_winfixup
+cheetah_plus_patch_winfixup:
+       sethi                   %hi(cplus_wfinsn_1), %o0
+       sethi                   %hi(cplus_winfixup_insn_1), %o2
+       lduw                    [%o0 + %lo(cplus_wfinsn_1)], %o1
+       or                      %o2, %lo(cplus_winfixup_insn_1), %o2
+       stw                     %o1, [%o2]
+       flush                   %o2
+
+       sethi                   %hi(cplus_wfinsn_2), %o0
+       sethi                   %hi(cplus_winfixup_insn_2), %o2
+       lduw                    [%o0 + %lo(cplus_wfinsn_2)], %o1
+       or                      %o2, %lo(cplus_winfixup_insn_2), %o2
+       stw                     %o1, [%o2]
+       flush                   %o2
+
+       retl
+        nop
diff -Nru a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
--- a/arch/sparc64/mm/fault.c   2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/fault.c   2005-04-03 21:16:17 -07:00
@@ -144,7 +144,9 @@
                       "at virtual address %016lx\n", (unsigned long)address);
        }
        printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n",
-              (tsk->mm ? tsk->mm->context : tsk->active_mm->context));
+              (tsk->mm ?
+               CTX_HWBITS(tsk->mm->context) :
+               CTX_HWBITS(tsk->active_mm->context)));
        printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n",
               (tsk->mm ? (unsigned long) tsk->mm->pgd :
                          (unsigned long) tsk->active_mm->pgd));
diff -Nru a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
--- a/arch/sparc64/mm/hugetlbpage.c     2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/hugetlbpage.c     2005-04-03 21:16:17 -07:00
@@ -20,6 +20,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
 
 static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
@@ -217,11 +218,49 @@
        flush_tlb_range(vma, start, end);
 }
 
+static void context_reload(void *__data)
+{
+       struct mm_struct *mm = __data;
+
+       if (mm == current->mm)
+               load_secondary_context(mm);
+}
+
 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 {
        struct mm_struct *mm = current->mm;
        unsigned long addr;
        int ret = 0;
+
+       /* On UltraSPARC-III+ and later, configure the second half of
+        * the Data-TLB for huge pages.
+        */
+       if (tlb_type == cheetah_plus) {
+               unsigned long ctx;
+
+               spin_lock(&ctx_alloc_lock);
+               ctx = mm->context.sparc64_ctx_val;
+               ctx &= ~CTX_PGSZ_MASK;
+               ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+               ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
+
+               if (ctx != mm->context.sparc64_ctx_val) {
+                       /* When changing the page size fields, we
+                        * must perform a context flush so that no
+                        * stale entries match.  This flush must
+                        * occur with the original context register
+                        * settings.
+                        */
+                       do_flush_tlb_mm(mm);
+
+                       /* Reload the context register of all processors
+                        * also executing in this address space.
+                        */
+                       mm->context.sparc64_ctx_val = ctx;
+                       on_each_cpu(context_reload, mm, 0, 0);
+               }
+               spin_unlock(&ctx_alloc_lock);
+       }
 
        BUG_ON(vma->vm_start & ~HPAGE_MASK);
        BUG_ON(vma->vm_end & ~HPAGE_MASK);
diff -Nru a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
--- a/arch/sparc64/mm/init.c    2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/init.c    2005-04-03 21:16:17 -07:00
@@ -61,7 +61,7 @@
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
 unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
-#define CTX_BMAP_SLOTS (1UL << (CTX_VERSION_SHIFT - 6))
+#define CTX_BMAP_SLOTS (1UL << (CTX_NR_BITS - 6))
 unsigned long mmu_context_bmap[CTX_BMAP_SLOTS];
 
 /* References to special section boundaries */
@@ -195,7 +195,7 @@
        }
 
        if (get_thread_fault_code())
-               __update_mmu_cache(vma->vm_mm->context & TAG_CONTEXT_BITS,
+               __update_mmu_cache(CTX_NRBITS(vma->vm_mm->context),
                                   address, pte, get_thread_fault_code());
 }
 
@@ -421,11 +421,15 @@
        prom_printf("Remapping the kernel... ");
 
        /* Spitfire Errata #32 workaround */
+       /* NOTE: Using plain zero for the context value is
+        *       correct here, we are not using the Linux trap
+        *       tables yet so we should not use the special
+        *       UltraSPARC-III+ page size encodings yet.
+        */
        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                             "flush     %%g6"
                             : /* No outputs */
-                            : "r" (0),
-                            "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+                            : "r" (0), "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
 
        switch (tlb_type) {
        default:
@@ -485,6 +489,11 @@
        tte_vaddr = (unsigned long) KERNBASE;
 
        /* Spitfire Errata #32 workaround */
+       /* NOTE: Using plain zero for the context value is
+        *       correct here, we are not using the Linux trap
+        *       tables yet so we should not use the special
+        *       UltraSPARC-III+ page size encodings yet.
+        */
        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                             "flush     %%g6"
                             : /* No outputs */
@@ -503,6 +512,11 @@
 
 
        /* Spitfire Errata #32 workaround */
+       /* NOTE: Using plain zero for the context value is
+        *       correct here, we are not using the Linux trap
+        *       tables yet so we should not use the special
+        *       UltraSPARC-III+ page size encodings yet.
+        */
        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                             "flush     %%g6"
                             : /* No outputs */
@@ -589,6 +603,9 @@
                        unsigned long tag;
 
                        /* Spitfire Errata #32 workaround */
+                       /* NOTE: Always runs on spitfire, so no cheetah+
+                        *       page size encodings.
+                        */
                        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                             "flush     %%g6"
                                             : /* No outputs */
@@ -755,6 +772,9 @@
                        unsigned long data;
 
                        /* Spitfire Errata #32 workaround */
+                       /* NOTE: Always runs on spitfire, so no cheetah+
+                        *       page size encodings.
+                        */
                        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                             "flush     %%g6"
                                             : /* No outputs */
@@ -766,6 +786,9 @@
                                unsigned long tag;
 
                                /* Spitfire Errata #32 workaround */
+                               /* NOTE: Always runs on spitfire, so no
+                                *       cheetah+ page size encodings.
+                                */
                                __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                                     "flush     %%g6"
                                                     : /* No outputs */
@@ -793,6 +816,9 @@
                        unsigned long data;
 
                        /* Spitfire Errata #32 workaround */
+                       /* NOTE: Always runs on spitfire, so no
+                        *       cheetah+ page size encodings.
+                        */
                        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                             "flush     %%g6"
                                             : /* No outputs */
@@ -804,6 +830,9 @@
                                unsigned long tag;
 
                                /* Spitfire Errata #32 workaround */
+                               /* NOTE: Always runs on spitfire, so no
+                                *       cheetah+ page size encodings.
+                                */
                                __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                                     "flush     %%g6"
                                                     : /* No outputs */
@@ -959,6 +988,9 @@
        if (tlb_type == spitfire) {
                for (i = 0; i < 64; i++) {
                        /* Spitfire Errata #32 workaround */
+                       /* NOTE: Always runs on spitfire, so no
+                        *       cheetah+ page size encodings.
+                        */
                        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                             "flush     %%g6"
                                             : /* No outputs */
@@ -974,6 +1006,9 @@
                        }
 
                        /* Spitfire Errata #32 workaround */
+                       /* NOTE: Always runs on spitfire, so no
+                        *       cheetah+ page size encodings.
+                        */
                        __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
                                             "flush     %%g6"
                                             : /* No outputs */
@@ -1007,11 +1042,14 @@
 void get_new_mmu_context(struct mm_struct *mm)
 {
        unsigned long ctx, new_ctx;
+       unsigned long orig_pgsz_bits;
        
+
        spin_lock(&ctx_alloc_lock);
-       ctx = CTX_HWBITS(tlb_context_cache + 1);
-       new_ctx = find_next_zero_bit(mmu_context_bmap, 1UL << 
CTX_VERSION_SHIFT, ctx);
-       if (new_ctx >= (1UL << CTX_VERSION_SHIFT)) {
+       orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
+       ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
+       new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
+       if (new_ctx >= (1 << CTX_NR_BITS)) {
                new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
                if (new_ctx >= ctx) {
                        int i;
@@ -1040,9 +1078,8 @@
        new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
 out:
        tlb_context_cache = new_ctx;
+       mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
        spin_unlock(&ctx_alloc_lock);
-
-       mm->context = new_ctx;
 }
 
 #ifndef CONFIG_SMP
diff -Nru a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
--- a/arch/sparc64/mm/tlb.c     2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/tlb.c     2005-04-03 21:16:17 -07:00
@@ -26,15 +26,13 @@
        struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
 
        if (mp->tlb_nr) {
-               unsigned long context = mp->mm->context;
-
-               if (CTX_VALID(context)) {
+               if (CTX_VALID(mp->mm->context)) {
 #ifdef CONFIG_SMP
                        smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
                                              &mp->vaddrs[0]);
 #else
-                       __flush_tlb_pending(CTX_HWBITS(context), mp->tlb_nr,
-                                           &mp->vaddrs[0]);
+                       __flush_tlb_pending(CTX_HWBITS(mp->mm->context),
+                                           mp->tlb_nr, &mp->vaddrs[0]);
 #endif
                }
                mp->tlb_nr = 0;
diff -Nru a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
--- a/arch/sparc64/mm/ultra.S   2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/ultra.S   2005-04-03 21:16:17 -07:00
@@ -223,7 +223,15 @@
         or             %o5, %o0, %o5
        ba,a,pt         %xcc, __prefill_itlb
 
-       /* Cheetah specific versions, patched at boot time.  */
+       /* Cheetah specific versions, patched at boot time.
+        *
+        * This writes of the PRIMARY_CONTEXT register in this file are
+        * safe even on Cheetah+ and later wrt. the page size fields.
+        * The nucleus page size fields do not matter because we make
+        * no data references, and these instructions execute out of a
+        * locked I-TLB entry sitting in the fully assosciative I-TLB.
+        * This sequence should also never trap.
+        */
 __cheetah_flush_tlb_mm: /* 15 insns */
        rdpr            %pstate, %g7
        andn            %g7, PSTATE_IE, %g2
diff -Nru a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
--- a/arch/sparc64/prom/p1275.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/prom/p1275.c 2005-04-03 21:16:17 -07:00
@@ -30,6 +30,16 @@
 extern void prom_cif_interface(void);
 extern void prom_cif_callback(void);
 
+static inline unsigned long spitfire_get_primary_context(void)
+{
+       unsigned long ctx;
+
+       __asm__ __volatile__("ldxa      [%1] %2, %0"
+                            : "=r" (ctx)
+                            : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+       return ctx;
+}
+
 /*
  * This provides SMP safety on the p1275buf. prom_callback() drops this lock
  * to allow recursuve acquisition.
@@ -43,14 +53,9 @@
        int nargs, nrets, i;
        va_list list;
        long attrs, x;
-       long ctx = 0;
        
        p = p1275buf.prom_buffer;
-       ctx = spitfire_get_primary_context ();
-       if (ctx) {
-               flushw_user ();
-               spitfire_set_primary_context (0);
-       }
+       BUG_ON((spitfire_get_primary_context() & CTX_NR_MASK) != 0);
 
        spin_lock_irqsave(&prom_entry_lock, flags);
 
@@ -145,9 +150,6 @@
        x = p1275buf.prom_args [nargs + 3];
 
        spin_unlock_irqrestore(&prom_entry_lock, flags);
-
-       if (ctx)
-               spitfire_set_primary_context (ctx);
 
        return x;
 }
diff -Nru a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
--- a/include/asm-sparc64/mmu.h 2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/mmu.h 2005-04-03 21:16:17 -07:00
@@ -1,7 +1,99 @@
 #ifndef __MMU_H
 #define __MMU_H
 
-/* Default "unsigned long" context */
-typedef unsigned long mm_context_t;
+#include <linux/config.h>
+#include <asm/page.h>
+#include <asm/const.h>
 
+/*
+ * For the 8k pagesize kernel, use only 10 hw context bits to optimize some
+ * shifts in the fast tlbmiss handlers, instead of all 13 bits (specifically
+ * for vpte offset calculation). For other pagesizes, this optimization in
+ * the tlbhandlers can not be done; but still, all 13 bits can not be used
+ * because the tlb handlers use "andcc" instruction which sign extends 13
+ * bit arguments.
+ */
+#if PAGE_SHIFT == 13
+#define CTX_NR_BITS            10
+#else
+#define CTX_NR_BITS            12
 #endif
+
+#define TAG_CONTEXT_BITS       ((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL))
+
+/* UltraSPARC-III+ and later have a feature whereby you can
+ * select what page size the various Data-TLB instances in the
+ * chip.  In order to gracefully support this, we put the version
+ * field in a spot outside of the areas of the context register
+ * where this parameter is specified.
+ */
+#define CTX_VERSION_SHIFT      22
+#define CTX_VERSION_MASK       ((~0UL) << CTX_VERSION_SHIFT)
+
+#define CTX_PGSZ_8KB           _AC(0x0,UL)
+#define CTX_PGSZ_64KB          _AC(0x1,UL)
+#define CTX_PGSZ_512KB         _AC(0x2,UL)
+#define CTX_PGSZ_4MB           _AC(0x3,UL)
+#define CTX_PGSZ_BITS          _AC(0x7,UL)
+#define CTX_PGSZ0_NUC_SHIFT    61
+#define CTX_PGSZ1_NUC_SHIFT    58
+#define CTX_PGSZ0_SHIFT                16
+#define CTX_PGSZ1_SHIFT                19
+#define CTX_PGSZ_MASK          ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \
+                                (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT))
+
+#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
+#define CTX_PGSZ_BASE  CTX_PGSZ_8KB
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
+#define CTX_PGSZ_BASE  CTX_PGSZ_64KB
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB)
+#define CTX_PGSZ_BASE  CTX_PGSZ_512KB
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB)
+#define CTX_PGSZ_BASE  CTX_PGSZ_4MB
+#else
+#error No page size specified in kernel configuration
+#endif
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
+#define CTX_PGSZ_HUGE          CTX_PGSZ_4MB
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+#define CTX_PGSZ_HUGE          CTX_PGSZ_512KB
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+#define CTX_PGSZ_HUGE          CTX_PGSZ_64KB
+#endif
+
+#define CTX_PGSZ_KERN  CTX_PGSZ_4MB
+
+/* Thus, when running on UltraSPARC-III+ and later, we use the following
+ * PRIMARY_CONTEXT register values for the kernel context.
+ */
+#define CTX_CHEETAH_PLUS_NUC \
+       ((CTX_PGSZ_KERN << CTX_PGSZ0_NUC_SHIFT) | \
+        (CTX_PGSZ_BASE << CTX_PGSZ1_NUC_SHIFT))
+
+#define CTX_CHEETAH_PLUS_CTX0 \
+       ((CTX_PGSZ_KERN << CTX_PGSZ0_SHIFT) | \
+        (CTX_PGSZ_BASE << CTX_PGSZ1_SHIFT))
+
+/* If you want "the TLB context number" use CTX_NR_MASK.  If you
+ * want "the bits I program into the context registers" use
+ * CTX_HW_MASK.
+ */
+#define CTX_NR_MASK            TAG_CONTEXT_BITS
+#define CTX_HW_MASK            (CTX_NR_MASK | CTX_PGSZ_MASK)
+
+#define CTX_FIRST_VERSION      ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
+#define CTX_VALID(__ctx)       \
+        (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
+#define CTX_HWBITS(__ctx)      ((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
+#define CTX_NRBITS(__ctx)      ((__ctx.sparc64_ctx_val) & CTX_NR_MASK)
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+       unsigned long   sparc64_ctx_val;
+} mm_context_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __MMU_H */
diff -Nru a/include/asm-sparc64/mmu_context.h 
b/include/asm-sparc64/mmu_context.h
--- a/include/asm-sparc64/mmu_context.h 2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/mmu_context.h 2005-04-03 21:16:17 -07:00
@@ -4,23 +4,6 @@
 
 /* Derived heavily from Linus's Alpha/AXP ASN code... */
 
-#include <asm/page.h>
-
-/*
- * For the 8k pagesize kernel, use only 10 hw context bits to optimize some 
shifts in
- * the fast tlbmiss handlers, instead of all 13 bits (specifically for vpte 
offset
- * calculation). For other pagesizes, this optimization in the tlbhandlers can 
not be 
- * done; but still, all 13 bits can not be used because the tlb handlers use 
"andcc"
- * instruction which sign extends 13 bit arguments.
- */
-#if PAGE_SHIFT == 13
-#define CTX_VERSION_SHIFT      10
-#define TAG_CONTEXT_BITS       0x3ff
-#else
-#define CTX_VERSION_SHIFT      12
-#define TAG_CONTEXT_BITS       0xfff
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <linux/spinlock.h>
@@ -35,19 +18,14 @@
 extern unsigned long tlb_context_cache;
 extern unsigned long mmu_context_bmap[];
 
-#define CTX_VERSION_MASK       ((~0UL) << CTX_VERSION_SHIFT)
-#define CTX_FIRST_VERSION      ((1UL << CTX_VERSION_SHIFT) + 1UL)
-#define CTX_VALID(__ctx)       \
-        (!(((__ctx) ^ tlb_context_cache) & CTX_VERSION_MASK))
-#define CTX_HWBITS(__ctx)      ((__ctx) & ~CTX_VERSION_MASK)
-
 extern void get_new_mmu_context(struct mm_struct *mm);
 
 /* Initialize a new mmu context.  This is invoked when a new
  * address space instance (unique or shared) is instantiated.
  * This just needs to set mm->context to an invalid context.
  */
-#define init_new_context(__tsk, __mm)  (((__mm)->context = 0UL), 0)
+#define init_new_context(__tsk, __mm)  \
+       (((__mm)->context.sparc64_ctx_val = 0UL), 0)
 
 /* Destroy a dead context.  This occurs when mmput drops the
  * mm_users count to zero, the mmaps have been released, and
@@ -59,7 +37,7 @@
 #define destroy_context(__mm)                                  \
 do {   spin_lock(&ctx_alloc_lock);                             \
        if (CTX_VALID((__mm)->context)) {                       \
-               unsigned long nr = CTX_HWBITS((__mm)->context); \
+               unsigned long nr = CTX_NRBITS((__mm)->context); \
                mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
        }                                                       \
        spin_unlock(&ctx_alloc_lock);                           \
@@ -135,7 +113,8 @@
                 */
                if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) {
                        cpu_set(cpu, mm->cpu_vm_mask);
-                       __flush_tlb_mm(CTX_HWBITS(mm->context), 
SECONDARY_CONTEXT);
+                       __flush_tlb_mm(CTX_HWBITS(mm->context),
+                                      SECONDARY_CONTEXT);
                }
        }
        spin_unlock(&mm->page_table_lock);
diff -Nru a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
--- a/include/asm-sparc64/spitfire.h    2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/spitfire.h    2005-04-03 21:16:17 -07:00
@@ -99,46 +99,6 @@
                             : "r" (sfsr), "r" (TLB_SFSR), "i" (ASI_DMMU));
 }
 
-static __inline__ unsigned long spitfire_get_primary_context(void)
-{
-       unsigned long ctx;
-
-       __asm__ __volatile__("ldxa      [%1] %2, %0"
-                            : "=r" (ctx)
-                            : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-       return ctx;
-}
-
-static __inline__ void spitfire_set_primary_context(unsigned long ctx)
-{
-       __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
-                            "membar    #Sync"
-                            : /* No outputs */
-                            : "r" (ctx & 0x3ff),
-                              "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-       __asm__ __volatile__ ("membar #Sync" : : : "memory");
-}
-
-static __inline__ unsigned long spitfire_get_secondary_context(void)
-{
-       unsigned long ctx;
-
-       __asm__ __volatile__("ldxa      [%1] %2, %0"
-                            : "=r" (ctx)
-                            : "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU));
-       return ctx;
-}
-
-static __inline__ void spitfire_set_secondary_context(unsigned long ctx)
-{
-       __asm__ __volatile__("stxa      %0, [%1] %2\n\t"
-                            "membar    #Sync"
-                            : /* No outputs */
-                            : "r" (ctx & 0x3ff),
-                              "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU));
-       __asm__ __volatile__ ("membar #Sync" : : : "memory");
-}
-
 /* The data cache is write through, so this just invalidates the
  * specified line.
  */
diff -Nru a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
--- a/include/asm-sparc64/tlb.h 2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/tlb.h 2005-04-03 21:16:17 -07:00
@@ -89,9 +89,7 @@
        tlb_flush_mmu(mp);
 
        if (mp->tlb_frozen) {
-               unsigned long context = mm->context;
-
-               if (CTX_VALID(context))
+               if (CTX_VALID(mm->context))
                        do_flush_tlb_mm(mm);
                mp->tlb_frozen = 0;
        } else
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[SPARC64]: Support >=cheetah+ dual-dtlbs properly.

Reply via email to