ChangeSet 1.2181.2.55, 2005/03/28 15:29:13-08:00, [EMAIL PROTECTED]
[SPARC64]: Support >=cheetah+ dual-dtlbs properly.
UltraSPARC-III+ (aka. cheetah+) and later chips have three
D-TLB units. One is fully assosciative and has 16 entries,
the other two have 512 entries each and are 2 way subblocked
and then indexed by the PAGE_MASK bits. This implies that
the two 512 entry TLBs need to know the page size in order
to index things properly.
The page size to use is encoded in page size fields within
the TLB context registers.
Up until this point we just left them at zero, which meant
that both 512 entry TLBs were used for 8K translations only.
The rest (including the 4MB kernel mappings) went into the
16 entry fully assosciative TLB.
Now, for the kernel, we use the first 512 entries for 4MB
mappings and the second 512 entries for PAGE_SIZE mappings
(ie. for vmalloc and modules). For the user, we use the
first 512 entries for PAGE_SIZE and the second 512 entries
for HPAGE_SIZE if the user maps any hugetlb pages else we
use the second half for user PAGE_SIZE stuff as well.
Most of this changeset is clerical. We move most of the
mm->context layout defines into asm/mmu.h, we abstract the
mm->context type so that it is now much easier to see and
trap accesses to the context values. In particular, one
can find all context handling by grepping the sparc64 sources
for CTX_* and {PRIMARY,SECONDAY}_CONTEXT.
Happily, the fast paths of the kernel are mostly untouched
by this new stuff. We add 2 cycles to trap entry, 1 cycle
to trap exit, and 2 cycles to the window trap fixup code.
Signed-off-by: David S. Miller <[EMAIL PROTECTED]>
arch/sparc64/kernel/dtlb_backend.S | 2
arch/sparc64/kernel/dtlb_base.S | 2
arch/sparc64/kernel/etrap.S | 53 +++++++++++++++++++-
arch/sparc64/kernel/head.S | 37 ++++++++++++--
arch/sparc64/kernel/rtrap.S | 22 ++++++++
arch/sparc64/kernel/setup.c | 16 ++++--
arch/sparc64/kernel/trampoline.S | 15 +++++
arch/sparc64/kernel/unaligned.c | 4 -
arch/sparc64/kernel/winfixup.S | 53 ++++++++++++++++----
arch/sparc64/mm/fault.c | 4 +
arch/sparc64/mm/hugetlbpage.c | 39 +++++++++++++++
arch/sparc64/mm/init.c | 55 +++++++++++++++++----
arch/sparc64/mm/tlb.c | 8 +--
arch/sparc64/mm/ultra.S | 10 +++
arch/sparc64/prom/p1275.c | 20 ++++---
include/asm-sparc64/mmu.h | 96 ++++++++++++++++++++++++++++++++++++-
include/asm-sparc64/mmu_context.h | 31 +----------
include/asm-sparc64/spitfire.h | 40 ---------------
include/asm-sparc64/tlb.h | 4 -
19 files changed, 391 insertions(+), 120 deletions(-)
diff -Nru a/arch/sparc64/kernel/dtlb_backend.S
b/arch/sparc64/kernel/dtlb_backend.S
--- a/arch/sparc64/kernel/dtlb_backend.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/dtlb_backend.S 2005-04-03 21:16:17 -07:00
@@ -7,7 +7,7 @@
*/
#include <asm/pgtable.h>
-#include <asm/mmu_context.h>
+#include <asm/mmu.h>
#if PAGE_SHIFT == 13
#define SZ_BITS _PAGE_SZ8K
diff -Nru a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S
--- a/arch/sparc64/kernel/dtlb_base.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/dtlb_base.S 2005-04-03 21:16:17 -07:00
@@ -7,7 +7,7 @@
*/
#include <asm/pgtable.h>
-#include <asm/mmu_context.h>
+#include <asm/mmu.h>
/* %g1 TLB_SFSR (%g1 + %g1 == TLB_TAG_ACCESS)
* %g2 (KERN_HIGHBITS | KERN_LOWBITS)
diff -Nru a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
--- a/arch/sparc64/kernel/etrap.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/etrap.S 2005-04-03 21:16:17 -07:00
@@ -14,6 +14,7 @@
#include <asm/spitfire.h>
#include <asm/head.h>
#include <asm/processor.h>
+#include <asm/mmu.h>
#define TASK_REGOFF
(THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ)
#define ETRAP_PSTATE1 (PSTATE_RMO | PSTATE_PRIV)
@@ -67,7 +68,13 @@
wrpr %g3, 0, %otherwin
wrpr %g2, 0, %wstate
- stxa %g0, [%l4] ASI_DMMU
+cplus_etrap_insn_1:
+ sethi %hi(0), %g3
+ sllx %g3, 32, %g3
+cplus_etrap_insn_2:
+ sethi %hi(0), %g2
+ or %g3, %g2, %g3
+ stxa %g3, [%l4] ASI_DMMU
flush %l6
wr %g0, ASI_AIUS, %asi
2: wrpr %g0, 0x0, %tl
@@ -207,7 +214,13 @@
mov PRIMARY_CONTEXT, %l4
wrpr %g3, 0, %otherwin
wrpr %g2, 0, %wstate
- stxa %g0, [%l4] ASI_DMMU
+cplus_etrap_insn_3:
+ sethi %hi(0), %g3
+ sllx %g3, 32, %g3
+cplus_etrap_insn_4:
+ sethi %hi(0), %g2
+ or %g3, %g2, %g3
+ stxa %g3, [%l4] ASI_DMMU
flush %l6
mov ASI_AIUS, %l7
@@ -248,4 +261,38 @@
#undef TASK_REGOFF
#undef ETRAP_PSTATE1
-#undef ETRAP_PSTATE2
+
+cplus_einsn_1:
+ sethi %uhi(CTX_CHEETAH_PLUS_NUC), %g3
+cplus_einsn_2:
+ sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+
+ .globl cheetah_plus_patch_etrap
+cheetah_plus_patch_etrap:
+ /* We configure the dTLB512_0 for 4MB pages and the
+ * dTLB512_1 for 8K pages when in context zero.
+ */
+ sethi %hi(cplus_einsn_1), %o0
+ sethi %hi(cplus_etrap_insn_1), %o2
+ lduw [%o0 + %lo(cplus_einsn_1)], %o1
+ or %o2, %lo(cplus_etrap_insn_1), %o2
+ stw %o1, [%o2]
+ flush %o2
+ sethi %hi(cplus_etrap_insn_3), %o2
+ or %o2, %lo(cplus_etrap_insn_3), %o2
+ stw %o1, [%o2]
+ flush %o2
+
+ sethi %hi(cplus_einsn_2), %o0
+ sethi %hi(cplus_etrap_insn_2), %o2
+ lduw [%o0 + %lo(cplus_einsn_2)], %o1
+ or %o2, %lo(cplus_etrap_insn_2), %o2
+ stw %o1, [%o2]
+ flush %o2
+ sethi %hi(cplus_etrap_insn_4), %o2
+ or %o2, %lo(cplus_etrap_insn_4), %o2
+ stw %o1, [%o2]
+ flush %o2
+
+ retl
+ nop
diff -Nru a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
--- a/arch/sparc64/kernel/head.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/head.S 2005-04-03 21:16:17 -07:00
@@ -25,6 +25,7 @@
#include <asm/dcu.h>
#include <asm/head.h>
#include <asm/ttable.h>
+#include <asm/mmu.h>
/* This section from from _start to sparc64_boot_end should fit into
* 0x0000.0000.0040.4000 to 0x0000.0000.0040.8000 and will be sharing space
@@ -515,14 +516,29 @@
membar #Sync
mov 2, %g2 /* Set TLB type to cheetah+. */
- BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g5,g7,1f)
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
mov 1, %g2 /* Set TLB type to cheetah. */
1: sethi %hi(tlb_type), %g5
stw %g2, [%g5 + %lo(tlb_type)]
- /* Patch copy/page operations to cheetah optimized versions. */
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
+ ba,pt %xcc, 2f
+ nop
+
+1: /* Patch context register writes to support nucleus page
+ * size correctly.
+ */
+ call cheetah_plus_patch_etrap
+ nop
+ call cheetah_plus_patch_rtrap
+ nop
+ call cheetah_plus_patch_winfixup
+ nop
+
+
+2: /* Patch copy/page operations to cheetah optimized versions. */
call cheetah_patch_copyops
nop
call cheetah_patch_cachetlbops
@@ -685,10 +701,23 @@
call init_irqwork_curcpu
nop
- sethi %hi(sparc64_ttable_tl0), %g5
call prom_set_trap_table
- mov %g5, %o0
+ sethi %hi(sparc64_ttable_tl0), %o0
+
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f)
+ ba,pt %xcc, 2f
+ nop
+1: /* Start using proper page size encodings in ctx register. */
+ sethi %uhi(CTX_CHEETAH_PLUS_NUC), %g3
+ mov PRIMARY_CONTEXT, %g1
+ sllx %g3, 32, %g3
+ sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+ or %g3, %g2, %g3
+ stxa %g3, [%g1] ASI_DMMU
+ membar #Sync
+
+2:
rdpr %pstate, %o1
or %o1, PSTATE_IE, %o1
wrpr %o1, 0, %pstate
diff -Nru a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
--- a/arch/sparc64/kernel/rtrap.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/rtrap.S 2005-04-03 21:16:17 -07:00
@@ -250,6 +250,10 @@
brnz,pn %l3, kern_rtt
mov PRIMARY_CONTEXT, %l7
ldxa [%l7 + %l7] ASI_DMMU, %l0
+cplus_rtrap_insn_1:
+ sethi %hi(0), %l1
+ sllx %l1, 32, %l1
+ or %l0, %l1, %l0
stxa %l0, [%l7] ASI_DMMU
flush %g6
rdpr %wstate, %l1
@@ -335,3 +339,21 @@
wr %g0, FPRS_DU, %fprs
ba,pt %xcc, rt_continue
stb %l5, [%g6 + TI_FPDEPTH]
+
+cplus_rinsn_1:
+ sethi %uhi(CTX_CHEETAH_PLUS_NUC), %l1
+
+ .globl cheetah_plus_patch_rtrap
+cheetah_plus_patch_rtrap:
+ /* We configure the dTLB512_0 for 4MB pages and the
+ * dTLB512_1 for 8K pages when in context zero.
+ */
+ sethi %hi(cplus_rinsn_1), %o0
+ sethi %hi(cplus_rtrap_insn_1), %o2
+ lduw [%o0 + %lo(cplus_rinsn_1)], %o1
+ or %o2, %lo(cplus_rtrap_insn_1), %o2
+ stw %o1, [%o2]
+ flush %o2
+
+ retl
+ nop
diff -Nru a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
--- a/arch/sparc64/kernel/setup.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/setup.c 2005-04-03 21:16:17 -07:00
@@ -47,6 +47,7 @@
#include <asm/timer.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/mmu.h>
#ifdef CONFIG_IP_PNP
#include <net/ipconfig.h>
@@ -157,11 +158,11 @@
for_each_process(p) {
mm = p->mm;
- if (CTX_HWBITS(mm->context) == ctx)
+ if (CTX_NRBITS(mm->context) == ctx)
break;
}
if (!mm ||
- CTX_HWBITS(mm->context) != ctx)
+ CTX_NRBITS(mm->context) != ctx)
goto done;
pgdp = pgd_offset(mm, va);
@@ -187,12 +188,19 @@
}
if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) {
+ unsigned long kernel_pctx = 0;
+
+ if (tlb_type == cheetah_plus)
+ kernel_pctx |= (CTX_CHEETAH_PLUS_NUC |
+ CTX_CHEETAH_PLUS_CTX0);
+
/* Spitfire Errata #32 workaround */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i"
(ASI_DMMU));
+ : "r" (kernel_pctx),
+ "r" (PRIMARY_CONTEXT),
+ "i" (ASI_DMMU));
/*
* Locked down tlb entry.
diff -Nru a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
--- a/arch/sparc64/kernel/trampoline.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/trampoline.S 2005-04-03 21:16:17 -07:00
@@ -15,6 +15,7 @@
#include <asm/spitfire.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
+#include <asm/mmu.h>
.data
.align 8
@@ -334,6 +335,20 @@
call init_irqwork_curcpu
nop
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f)
+ ba,pt %xcc, 2f
+ nop
+
+1: /* Start using proper page size encodings in ctx register. */
+ sethi %uhi(CTX_CHEETAH_PLUS_NUC), %g3
+ mov PRIMARY_CONTEXT, %g1
+ sllx %g3, 32, %g3
+ sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+ or %g3, %g2, %g3
+ stxa %g3, [%g1] ASI_DMMU
+ membar #Sync
+
+2:
rdpr %pstate, %o1
or %o1, PSTATE_IE, %o1
wrpr %o1, 0, %pstate
diff -Nru a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
--- a/arch/sparc64/kernel/unaligned.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/unaligned.c 2005-04-03 21:16:17 -07:00
@@ -379,8 +379,8 @@
printk(KERN_ALERT "Unable to handle kernel paging
request in mna handler");
printk(KERN_ALERT " at virtual address %016lx\n",address);
printk(KERN_ALERT "current->{mm,active_mm}->context = %016lx\n",
- (current->mm ? current->mm->context :
- current->active_mm->context));
+ (current->mm ? CTX_HWBITS(current->mm->context) :
+ CTX_HWBITS(current->active_mm->context)));
printk(KERN_ALERT "current->{mm,active_mm}->pgd = %016lx\n",
(current->mm ? (unsigned long) current->mm->pgd :
(unsigned long) current->active_mm->pgd));
diff -Nru a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
--- a/arch/sparc64/kernel/winfixup.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/kernel/winfixup.S 2005-04-03 21:16:17 -07:00
@@ -14,6 +14,25 @@
#include <asm/thread_info.h>
.text
+
+set_pcontext:
+cplus_winfixup_insn_1:
+ sethi %hi(0), %l1
+ mov PRIMARY_CONTEXT, %g1
+ sllx %l1, 32, %l1
+cplus_winfixup_insn_2:
+ sethi %hi(0), %g2
+ or %l1, %g2, %l1
+ stxa %l1, [%g1] ASI_DMMU
+ flush %g6
+ retl
+ nop
+
+cplus_wfinsn_1:
+ sethi %uhi(CTX_CHEETAH_PLUS_NUC), %l1
+cplus_wfinsn_2:
+ sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2
+
.align 32
/* Here are the rules, pay attention.
@@ -62,9 +81,8 @@
wrpr %g0, 0x0, %canrestore ! Standard etrap stuff.
wrpr %g2, 0x0, %wstate ! This must be
consistent.
wrpr %g0, 0x0, %otherwin ! We know this.
- mov PRIMARY_CONTEXT, %g1 ! Change contexts...
- stxa %g0, [%g1] ASI_DMMU ! Back into the nucleus.
- flush %g6 ! Flush instruction
buffers
+ call set_pcontext ! Change contexts...
+ nop
rdpr %pstate, %l1 ! Prepare to change
globals.
mov %g6, %o7 ! Get current.
@@ -183,9 +201,8 @@
wrpr %g2, 0x0, %wstate ! This must be
consistent.
wrpr %g0, 0x0, %otherwin ! We know this.
- mov PRIMARY_CONTEXT, %g1 ! Change contexts...
- stxa %g0, [%g1] ASI_DMMU ! Back into the nucleus.
- flush %g6 ! Flush instruction
buffers
+ call set_pcontext ! Change contexts...
+ nop
rdpr %pstate, %l1 ! Prepare to change
globals.
mov %g4, %o2 ! Setup args for
mov %g5, %o1 ! final call to
mem_address_unaligned.
@@ -289,9 +306,8 @@
wrpr %g2, 0x0, %wstate ! This must be
consistent.
wrpr %g0, 0x0, %otherwin ! We know this.
- mov PRIMARY_CONTEXT, %g1 ! Change contexts...
- stxa %g0, [%g1] ASI_DMMU ! Back into the nucleus.
- flush %g6 ! Flush instruction
buffers
+ call set_pcontext ! Change contexts...
+ nop
rdpr %pstate, %l1 ! Prepare to change
globals.
mov %g4, %o1 ! Setup args for
mov %g5, %o2 ! final call to
data_access_exception.
@@ -368,3 +384,22 @@
ba,pt %xcc, rtrap
clr %l6
+
+ .globl cheetah_plus_patch_winfixup
+cheetah_plus_patch_winfixup:
+ sethi %hi(cplus_wfinsn_1), %o0
+ sethi %hi(cplus_winfixup_insn_1), %o2
+ lduw [%o0 + %lo(cplus_wfinsn_1)], %o1
+ or %o2, %lo(cplus_winfixup_insn_1), %o2
+ stw %o1, [%o2]
+ flush %o2
+
+ sethi %hi(cplus_wfinsn_2), %o0
+ sethi %hi(cplus_winfixup_insn_2), %o2
+ lduw [%o0 + %lo(cplus_wfinsn_2)], %o1
+ or %o2, %lo(cplus_winfixup_insn_2), %o2
+ stw %o1, [%o2]
+ flush %o2
+
+ retl
+ nop
diff -Nru a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
--- a/arch/sparc64/mm/fault.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/fault.c 2005-04-03 21:16:17 -07:00
@@ -144,7 +144,9 @@
"at virtual address %016lx\n", (unsigned long)address);
}
printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n",
- (tsk->mm ? tsk->mm->context : tsk->active_mm->context));
+ (tsk->mm ?
+ CTX_HWBITS(tsk->mm->context) :
+ CTX_HWBITS(tsk->active_mm->context)));
printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n",
(tsk->mm ? (unsigned long) tsk->mm->pgd :
(unsigned long) tsk->active_mm->pgd));
diff -Nru a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
--- a/arch/sparc64/mm/hugetlbpage.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/hugetlbpage.c 2005-04-03 21:16:17 -07:00
@@ -20,6 +20,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
@@ -217,11 +218,49 @@
flush_tlb_range(vma, start, end);
}
+static void context_reload(void *__data)
+{
+ struct mm_struct *mm = __data;
+
+ if (mm == current->mm)
+ load_secondary_context(mm);
+}
+
int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret = 0;
+
+ /* On UltraSPARC-III+ and later, configure the second half of
+ * the Data-TLB for huge pages.
+ */
+ if (tlb_type == cheetah_plus) {
+ unsigned long ctx;
+
+ spin_lock(&ctx_alloc_lock);
+ ctx = mm->context.sparc64_ctx_val;
+ ctx &= ~CTX_PGSZ_MASK;
+ ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+ ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
+
+ if (ctx != mm->context.sparc64_ctx_val) {
+ /* When changing the page size fields, we
+ * must perform a context flush so that no
+ * stale entries match. This flush must
+ * occur with the original context register
+ * settings.
+ */
+ do_flush_tlb_mm(mm);
+
+ /* Reload the context register of all processors
+ * also executing in this address space.
+ */
+ mm->context.sparc64_ctx_val = ctx;
+ on_each_cpu(context_reload, mm, 0, 0);
+ }
+ spin_unlock(&ctx_alloc_lock);
+ }
BUG_ON(vma->vm_start & ~HPAGE_MASK);
BUG_ON(vma->vm_end & ~HPAGE_MASK);
diff -Nru a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
--- a/arch/sparc64/mm/init.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/init.c 2005-04-03 21:16:17 -07:00
@@ -61,7 +61,7 @@
/* get_new_mmu_context() uses "cache + 1". */
DEFINE_SPINLOCK(ctx_alloc_lock);
unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
-#define CTX_BMAP_SLOTS (1UL << (CTX_VERSION_SHIFT - 6))
+#define CTX_BMAP_SLOTS (1UL << (CTX_NR_BITS - 6))
unsigned long mmu_context_bmap[CTX_BMAP_SLOTS];
/* References to special section boundaries */
@@ -195,7 +195,7 @@
}
if (get_thread_fault_code())
- __update_mmu_cache(vma->vm_mm->context & TAG_CONTEXT_BITS,
+ __update_mmu_cache(CTX_NRBITS(vma->vm_mm->context),
address, pte, get_thread_fault_code());
}
@@ -421,11 +421,15 @@
prom_printf("Remapping the kernel... ");
/* Spitfire Errata #32 workaround */
+ /* NOTE: Using plain zero for the context value is
+ * correct here, we are not using the Linux trap
+ * tables yet so we should not use the special
+ * UltraSPARC-III+ page size encodings yet.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+ : "r" (0), "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
switch (tlb_type) {
default:
@@ -485,6 +489,11 @@
tte_vaddr = (unsigned long) KERNBASE;
/* Spitfire Errata #32 workaround */
+ /* NOTE: Using plain zero for the context value is
+ * correct here, we are not using the Linux trap
+ * tables yet so we should not use the special
+ * UltraSPARC-III+ page size encodings yet.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -503,6 +512,11 @@
/* Spitfire Errata #32 workaround */
+ /* NOTE: Using plain zero for the context value is
+ * correct here, we are not using the Linux trap
+ * tables yet so we should not use the special
+ * UltraSPARC-III+ page size encodings yet.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -589,6 +603,9 @@
unsigned long tag;
/* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no cheetah+
+ * page size encodings.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -755,6 +772,9 @@
unsigned long data;
/* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no cheetah+
+ * page size encodings.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -766,6 +786,9 @@
unsigned long tag;
/* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no
+ * cheetah+ page size encodings.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -793,6 +816,9 @@
unsigned long data;
/* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no
+ * cheetah+ page size encodings.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -804,6 +830,9 @@
unsigned long tag;
/* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no
+ * cheetah+ page size encodings.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -959,6 +988,9 @@
if (tlb_type == spitfire) {
for (i = 0; i < 64; i++) {
/* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no
+ * cheetah+ page size encodings.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -974,6 +1006,9 @@
}
/* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no
+ * cheetah+ page size encodings.
+ */
__asm__ __volatile__("stxa %0, [%1] %2\n\t"
"flush %%g6"
: /* No outputs */
@@ -1007,11 +1042,14 @@
void get_new_mmu_context(struct mm_struct *mm)
{
unsigned long ctx, new_ctx;
+ unsigned long orig_pgsz_bits;
+
spin_lock(&ctx_alloc_lock);
- ctx = CTX_HWBITS(tlb_context_cache + 1);
- new_ctx = find_next_zero_bit(mmu_context_bmap, 1UL <<
CTX_VERSION_SHIFT, ctx);
- if (new_ctx >= (1UL << CTX_VERSION_SHIFT)) {
+ orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
+ ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
+ new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
+ if (new_ctx >= (1 << CTX_NR_BITS)) {
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
if (new_ctx >= ctx) {
int i;
@@ -1040,9 +1078,8 @@
new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
out:
tlb_context_cache = new_ctx;
+ mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
spin_unlock(&ctx_alloc_lock);
-
- mm->context = new_ctx;
}
#ifndef CONFIG_SMP
diff -Nru a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
--- a/arch/sparc64/mm/tlb.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/tlb.c 2005-04-03 21:16:17 -07:00
@@ -26,15 +26,13 @@
struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
if (mp->tlb_nr) {
- unsigned long context = mp->mm->context;
-
- if (CTX_VALID(context)) {
+ if (CTX_VALID(mp->mm->context)) {
#ifdef CONFIG_SMP
smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
&mp->vaddrs[0]);
#else
- __flush_tlb_pending(CTX_HWBITS(context), mp->tlb_nr,
- &mp->vaddrs[0]);
+ __flush_tlb_pending(CTX_HWBITS(mp->mm->context),
+ mp->tlb_nr, &mp->vaddrs[0]);
#endif
}
mp->tlb_nr = 0;
diff -Nru a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
--- a/arch/sparc64/mm/ultra.S 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/mm/ultra.S 2005-04-03 21:16:17 -07:00
@@ -223,7 +223,15 @@
or %o5, %o0, %o5
ba,a,pt %xcc, __prefill_itlb
- /* Cheetah specific versions, patched at boot time. */
+ /* Cheetah specific versions, patched at boot time.
+ *
+ * This writes of the PRIMARY_CONTEXT register in this file are
+ * safe even on Cheetah+ and later wrt. the page size fields.
+ * The nucleus page size fields do not matter because we make
+ * no data references, and these instructions execute out of a
+ * locked I-TLB entry sitting in the fully assosciative I-TLB.
+ * This sequence should also never trap.
+ */
__cheetah_flush_tlb_mm: /* 15 insns */
rdpr %pstate, %g7
andn %g7, PSTATE_IE, %g2
diff -Nru a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
--- a/arch/sparc64/prom/p1275.c 2005-04-03 21:16:17 -07:00
+++ b/arch/sparc64/prom/p1275.c 2005-04-03 21:16:17 -07:00
@@ -30,6 +30,16 @@
extern void prom_cif_interface(void);
extern void prom_cif_callback(void);
+static inline unsigned long spitfire_get_primary_context(void)
+{
+ unsigned long ctx;
+
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=r" (ctx)
+ : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+ return ctx;
+}
+
/*
* This provides SMP safety on the p1275buf. prom_callback() drops this lock
* to allow recursuve acquisition.
@@ -43,14 +53,9 @@
int nargs, nrets, i;
va_list list;
long attrs, x;
- long ctx = 0;
p = p1275buf.prom_buffer;
- ctx = spitfire_get_primary_context ();
- if (ctx) {
- flushw_user ();
- spitfire_set_primary_context (0);
- }
+ BUG_ON((spitfire_get_primary_context() & CTX_NR_MASK) != 0);
spin_lock_irqsave(&prom_entry_lock, flags);
@@ -145,9 +150,6 @@
x = p1275buf.prom_args [nargs + 3];
spin_unlock_irqrestore(&prom_entry_lock, flags);
-
- if (ctx)
- spitfire_set_primary_context (ctx);
return x;
}
diff -Nru a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
--- a/include/asm-sparc64/mmu.h 2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/mmu.h 2005-04-03 21:16:17 -07:00
@@ -1,7 +1,99 @@
#ifndef __MMU_H
#define __MMU_H
-/* Default "unsigned long" context */
-typedef unsigned long mm_context_t;
+#include <linux/config.h>
+#include <asm/page.h>
+#include <asm/const.h>
+/*
+ * For the 8k pagesize kernel, use only 10 hw context bits to optimize some
+ * shifts in the fast tlbmiss handlers, instead of all 13 bits (specifically
+ * for vpte offset calculation). For other pagesizes, this optimization in
+ * the tlbhandlers can not be done; but still, all 13 bits can not be used
+ * because the tlb handlers use "andcc" instruction which sign extends 13
+ * bit arguments.
+ */
+#if PAGE_SHIFT == 13
+#define CTX_NR_BITS 10
+#else
+#define CTX_NR_BITS 12
#endif
+
+#define TAG_CONTEXT_BITS ((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL))
+
+/* UltraSPARC-III+ and later have a feature whereby you can
+ * select what page size the various Data-TLB instances in the
+ * chip. In order to gracefully support this, we put the version
+ * field in a spot outside of the areas of the context register
+ * where this parameter is specified.
+ */
+#define CTX_VERSION_SHIFT 22
+#define CTX_VERSION_MASK ((~0UL) << CTX_VERSION_SHIFT)
+
+#define CTX_PGSZ_8KB _AC(0x0,UL)
+#define CTX_PGSZ_64KB _AC(0x1,UL)
+#define CTX_PGSZ_512KB _AC(0x2,UL)
+#define CTX_PGSZ_4MB _AC(0x3,UL)
+#define CTX_PGSZ_BITS _AC(0x7,UL)
+#define CTX_PGSZ0_NUC_SHIFT 61
+#define CTX_PGSZ1_NUC_SHIFT 58
+#define CTX_PGSZ0_SHIFT 16
+#define CTX_PGSZ1_SHIFT 19
+#define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \
+ (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT))
+
+#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
+#define CTX_PGSZ_BASE CTX_PGSZ_8KB
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
+#define CTX_PGSZ_BASE CTX_PGSZ_64KB
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB)
+#define CTX_PGSZ_BASE CTX_PGSZ_512KB
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB)
+#define CTX_PGSZ_BASE CTX_PGSZ_4MB
+#else
+#error No page size specified in kernel configuration
+#endif
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
+#define CTX_PGSZ_HUGE CTX_PGSZ_4MB
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+#define CTX_PGSZ_HUGE CTX_PGSZ_512KB
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+#define CTX_PGSZ_HUGE CTX_PGSZ_64KB
+#endif
+
+#define CTX_PGSZ_KERN CTX_PGSZ_4MB
+
+/* Thus, when running on UltraSPARC-III+ and later, we use the following
+ * PRIMARY_CONTEXT register values for the kernel context.
+ */
+#define CTX_CHEETAH_PLUS_NUC \
+ ((CTX_PGSZ_KERN << CTX_PGSZ0_NUC_SHIFT) | \
+ (CTX_PGSZ_BASE << CTX_PGSZ1_NUC_SHIFT))
+
+#define CTX_CHEETAH_PLUS_CTX0 \
+ ((CTX_PGSZ_KERN << CTX_PGSZ0_SHIFT) | \
+ (CTX_PGSZ_BASE << CTX_PGSZ1_SHIFT))
+
+/* If you want "the TLB context number" use CTX_NR_MASK. If you
+ * want "the bits I program into the context registers" use
+ * CTX_HW_MASK.
+ */
+#define CTX_NR_MASK TAG_CONTEXT_BITS
+#define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK)
+
+#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
+#define CTX_VALID(__ctx) \
+ (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
+#define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
+#define CTX_NRBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_NR_MASK)
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+ unsigned long sparc64_ctx_val;
+} mm_context_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __MMU_H */
diff -Nru a/include/asm-sparc64/mmu_context.h
b/include/asm-sparc64/mmu_context.h
--- a/include/asm-sparc64/mmu_context.h 2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/mmu_context.h 2005-04-03 21:16:17 -07:00
@@ -4,23 +4,6 @@
/* Derived heavily from Linus's Alpha/AXP ASN code... */
-#include <asm/page.h>
-
-/*
- * For the 8k pagesize kernel, use only 10 hw context bits to optimize some
shifts in
- * the fast tlbmiss handlers, instead of all 13 bits (specifically for vpte
offset
- * calculation). For other pagesizes, this optimization in the tlbhandlers can
not be
- * done; but still, all 13 bits can not be used because the tlb handlers use
"andcc"
- * instruction which sign extends 13 bit arguments.
- */
-#if PAGE_SHIFT == 13
-#define CTX_VERSION_SHIFT 10
-#define TAG_CONTEXT_BITS 0x3ff
-#else
-#define CTX_VERSION_SHIFT 12
-#define TAG_CONTEXT_BITS 0xfff
-#endif
-
#ifndef __ASSEMBLY__
#include <linux/spinlock.h>
@@ -35,19 +18,14 @@
extern unsigned long tlb_context_cache;
extern unsigned long mmu_context_bmap[];
-#define CTX_VERSION_MASK ((~0UL) << CTX_VERSION_SHIFT)
-#define CTX_FIRST_VERSION ((1UL << CTX_VERSION_SHIFT) + 1UL)
-#define CTX_VALID(__ctx) \
- (!(((__ctx) ^ tlb_context_cache) & CTX_VERSION_MASK))
-#define CTX_HWBITS(__ctx) ((__ctx) & ~CTX_VERSION_MASK)
-
extern void get_new_mmu_context(struct mm_struct *mm);
/* Initialize a new mmu context. This is invoked when a new
* address space instance (unique or shared) is instantiated.
* This just needs to set mm->context to an invalid context.
*/
-#define init_new_context(__tsk, __mm) (((__mm)->context = 0UL), 0)
+#define init_new_context(__tsk, __mm) \
+ (((__mm)->context.sparc64_ctx_val = 0UL), 0)
/* Destroy a dead context. This occurs when mmput drops the
* mm_users count to zero, the mmaps have been released, and
@@ -59,7 +37,7 @@
#define destroy_context(__mm) \
do { spin_lock(&ctx_alloc_lock); \
if (CTX_VALID((__mm)->context)) { \
- unsigned long nr = CTX_HWBITS((__mm)->context); \
+ unsigned long nr = CTX_NRBITS((__mm)->context); \
mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
} \
spin_unlock(&ctx_alloc_lock); \
@@ -135,7 +113,8 @@
*/
if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) {
cpu_set(cpu, mm->cpu_vm_mask);
- __flush_tlb_mm(CTX_HWBITS(mm->context),
SECONDARY_CONTEXT);
+ __flush_tlb_mm(CTX_HWBITS(mm->context),
+ SECONDARY_CONTEXT);
}
}
spin_unlock(&mm->page_table_lock);
diff -Nru a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
--- a/include/asm-sparc64/spitfire.h 2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/spitfire.h 2005-04-03 21:16:17 -07:00
@@ -99,46 +99,6 @@
: "r" (sfsr), "r" (TLB_SFSR), "i" (ASI_DMMU));
}
-static __inline__ unsigned long spitfire_get_primary_context(void)
-{
- unsigned long ctx;
-
- __asm__ __volatile__("ldxa [%1] %2, %0"
- : "=r" (ctx)
- : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
- return ctx;
-}
-
-static __inline__ void spitfire_set_primary_context(unsigned long ctx)
-{
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : /* No outputs */
- : "r" (ctx & 0x3ff),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
- __asm__ __volatile__ ("membar #Sync" : : : "memory");
-}
-
-static __inline__ unsigned long spitfire_get_secondary_context(void)
-{
- unsigned long ctx;
-
- __asm__ __volatile__("ldxa [%1] %2, %0"
- : "=r" (ctx)
- : "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU));
- return ctx;
-}
-
-static __inline__ void spitfire_set_secondary_context(unsigned long ctx)
-{
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : /* No outputs */
- : "r" (ctx & 0x3ff),
- "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU));
- __asm__ __volatile__ ("membar #Sync" : : : "memory");
-}
-
/* The data cache is write through, so this just invalidates the
* specified line.
*/
diff -Nru a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
--- a/include/asm-sparc64/tlb.h 2005-04-03 21:16:17 -07:00
+++ b/include/asm-sparc64/tlb.h 2005-04-03 21:16:17 -07:00
@@ -89,9 +89,7 @@
tlb_flush_mmu(mp);
if (mp->tlb_frozen) {
- unsigned long context = mm->context;
-
- if (CTX_VALID(context))
+ if (CTX_VALID(mm->context))
do_flush_tlb_mm(mm);
mp->tlb_frozen = 0;
} else
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html