ChangeSet 1.2226, 2005/03/31 14:56:22-08:00, [EMAIL PROTECTED]
[SPARC64]: Put per-cpu area base into register g5.
FINALLY, we can put the per-cpu base into register
g5 on SMP. There are many simplifications and improvements
now, but this is the base implementation.
Thanks to Rusty and the IA64 folks for urging that I pursue
this kind of scheme instead of locking stuff into the TLB
at some fixed virtual address.
Signed-off-by: David S. Miller <[EMAIL PROTECTED]>
arch/sparc64/Makefile | 4 +--
arch/sparc64/kernel/etrap.S | 19 +++++++++++++-----
arch/sparc64/kernel/head.S | 42 ++++++++++++++++++++++------------------
arch/sparc64/kernel/rtrap.S | 5 ++--
arch/sparc64/kernel/semaphore.c | 6 ++---
arch/sparc64/kernel/smp.c | 9 ++++++++
arch/sparc64/kernel/unaligned.c | 2 -
arch/sparc64/kernel/winfixup.S | 21 ++++++++++++++++++++
arch/sparc64/prom/map.S | 2 +
include/asm-sparc64/cpudata.h | 3 +-
include/asm-sparc64/percpu.h | 41 ++++++++++++++++++++++++++++++++++++++-
include/asm-sparc64/pgalloc.h | 2 -
include/asm-sparc64/system.h | 2 -
include/asm-sparc64/tlb.h | 2 -
14 files changed, 124 insertions(+), 36 deletions(-)
diff -Nru a/arch/sparc64/Makefile b/arch/sparc64/Makefile
--- a/arch/sparc64/Makefile 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/Makefile 2005-04-03 21:20:29 -07:00
@@ -41,10 +41,10 @@
ifneq ($(NEW_GCC),y)
CFLAGS := $(CFLAGS) -pipe -mno-fpu -mtune=ultrasparc -mmedlow \
- -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare
+ -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
else
CFLAGS := $(CFLAGS) -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow \
- -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare \
+ -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare \
$(CC_UNDECL)
AFLAGS += -m64 -mcpu=ultrasparc $(CC_UNDECL)
endif
diff -Nru a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
--- a/arch/sparc64/kernel/etrap.S 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/etrap.S 2005-04-03 21:20:29 -07:00
@@ -102,11 +102,15 @@
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
wrpr %g0, ETRAP_PSTATE2, %pstate
mov %l6, %g6
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g3
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g3, 3, %g3
+ ldx [%g2 + %g3], %g5
+#endif
jmpl %l2 + 0x4, %g0
ldx [%g6 + TI_TASK], %g4
- nop
- nop
- nop
3: ldub [%l6 + TI_FPDEPTH], %l5
add %l6, TI_FPSAVED + 1, %l4
@@ -254,10 +258,15 @@
stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
mov %l6, %g6
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g3
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g3, 3, %g3
+ ldx [%g2 + %g3], %g5
+#endif
ldx [%g6 + TI_TASK], %g4
done
- nop
- nop
#undef TASK_REGOFF
#undef ETRAP_PSTATE1
diff -Nru a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
--- a/arch/sparc64/kernel/head.S 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/head.S 2005-04-03 21:20:29 -07:00
@@ -89,8 +89,8 @@
* PROM entry point is on %o4
*/
sparc64_boot:
- BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_boot)
- BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_boot)
+ BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
ba,pt %xcc, spitfire_boot
nop
@@ -103,11 +103,11 @@
mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
wr %g1, %asr18
- sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
- or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
- sllx %g5, 32, %g5
- or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
- stxa %g5, [%g0] ASI_DCU_CONTROL_REG
+ sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ or %g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+ sllx %g7, 32, %g7
+ or %g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7
+ stxa %g7, [%g0] ASI_DCU_CONTROL_REG
membar #Sync
cheetah_generic_boot:
@@ -492,7 +492,7 @@
stxa %g3, [%g2] ASI_DMMU
membar #Sync
- BRANCH_IF_ANY_CHEETAH(g1,g5,cheetah_tlb_fixup)
+ BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup)
ba,pt %xcc, spitfire_tlb_fixup
nop
@@ -520,8 +520,8 @@
mov 1, %g2 /* Set TLB type to cheetah. */
-1: sethi %hi(tlb_type), %g5
- stw %g2, [%g5 + %lo(tlb_type)]
+1: sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
ba,pt %xcc, 2f
@@ -567,8 +567,8 @@
/* Set TLB type to spitfire. */
mov 0, %g2
- sethi %hi(tlb_type), %g5
- stw %g2, [%g5 + %lo(tlb_type)]
+ sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
tlb_fixup_done:
sethi %hi(init_thread_union), %g6
@@ -596,12 +596,18 @@
#endif
wr %g0, ASI_P, %asi
- mov 1, %g5
- sllx %g5, THREAD_SHIFT, %g5
- sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
- add %g6, %g5, %sp
+ mov 1, %g1
+ sllx %g1, THREAD_SHIFT, %g1
+ sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+ add %g6, %g1, %sp
mov 0, %fp
+ /* Set per-cpu pointer initially to zero, this makes
+ * the boot-cpu use the in-kernel-image per-cpu areas
+ * before setup_per_cpu_area() is invoked.
+ */
+ clr %g5
+
wrpr %g0, 0, %wstate
wrpr %g0, 0x0, %tl
@@ -637,8 +643,8 @@
rdpr %pstate, %o1
mov %g6, %o2
wrpr %o1, (PSTATE_AG|PSTATE_IE), %pstate
- sethi %hi(sparc64_ttable_tl0), %g5
- wrpr %g5, %tba
+ sethi %hi(sparc64_ttable_tl0), %g1
+ wrpr %g1, %tba
mov %o2, %g6
/* Set up MMU globals */
diff -Nru a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
--- a/arch/sparc64/kernel/rtrap.S 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/rtrap.S 2005-04-03 21:20:29 -07:00
@@ -222,8 +222,9 @@
ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
- ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
- ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
+ brz,a,pn %l3, 1f
+ ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
+1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
diff -Nru a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
--- a/arch/sparc64/kernel/semaphore.c 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/semaphore.c 2005-04-03 21:20:29 -07:00
@@ -83,7 +83,7 @@
" restore\n"
" .previous\n"
: : "r" (sem), "i" (__up)
- : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+ : "g1", "g2", "g3", "g7", "memory", "cc");
}
static void __sched __down(struct semaphore * sem)
@@ -140,7 +140,7 @@
" restore\n"
" .previous\n"
: : "r" (sem), "i" (__down)
- : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+ : "g1", "g2", "g3", "g7", "memory", "cc");
}
int down_trylock(struct semaphore *sem)
@@ -246,6 +246,6 @@
" .previous\n"
: "=r" (ret)
: "0" (ret), "r" (sem), "i" (__down_interruptible)
- : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+ : "g1", "g2", "g3", "g7", "memory", "cc");
return ret;
}
diff -Nru a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
--- a/arch/sparc64/kernel/smp.c 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/smp.c 2005-04-03 21:20:29 -07:00
@@ -107,6 +107,10 @@
__flush_tlb_all();
+ __asm__ __volatile__("mov %0, %%g5\n\t"
+ : /* no outputs */
+ : "r" (__per_cpu_offset[cpuid]));
+
smp_setup_percpu_timer();
local_irq_enable();
@@ -1115,6 +1119,11 @@
}
current_thread_info()->cpu = hard_smp_processor_id();
+
+ __asm__ __volatile__("mov %0, %%g5\n\t"
+ : /* no outputs */
+ : "r" (__per_cpu_offset[smp_processor_id()]));
+
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), phys_cpu_present_map);
}
diff -Nru a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
--- a/arch/sparc64/kernel/unaligned.c 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/unaligned.c 2005-04-03 21:20:29 -07:00
@@ -413,7 +413,7 @@
:
: "r" (regs), "r" (insn)
: "o0", "o1", "o2", "o3", "o4", "o5", "o7",
- "g1", "g2", "g3", "g4", "g5", "g7", "cc");
+ "g1", "g2", "g3", "g4", "g7", "cc");
} else {
unsigned long addr = compute_effective_address(regs, insn,
((insn >> 25) & 0x1f));
diff -Nru a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
--- a/arch/sparc64/kernel/winfixup.S 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/winfixup.S 2005-04-03 21:20:29 -07:00
@@ -93,6 +93,13 @@
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6
ldx [%g6 + TI_TASK], %g4
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g1
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g1, 3, %g1
+ ldx [%g2 + %g1], %g5
+#endif
/* This is the same as below, except we handle this a bit special
* since we must preserve %l5 and %l6, see comment above.
@@ -213,6 +220,13 @@
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g1
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g1, 3, %g1
+ ldx [%g2 + %g1], %g5
+#endif
call mem_address_unaligned
add %sp, PTREGS_OFF, %o0
@@ -318,6 +332,13 @@
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
+#ifdef CONFIG_SMP
+ ldub [%g6 + TI_CPU], %g1
+ sethi %hi(__per_cpu_offset), %g2
+ or %g2, %lo(__per_cpu_offset), %g2
+ sllx %g1, 3, %g1
+ ldx [%g2 + %g1], %g5
+#endif
call data_access_exception
add %sp, PTREGS_OFF, %o0
diff -Nru a/arch/sparc64/prom/map.S b/arch/sparc64/prom/map.S
--- a/arch/sparc64/prom/map.S 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/prom/map.S 2005-04-03 21:20:29 -07:00
@@ -32,6 +32,7 @@
ldx [%g2 + 0x08], %l0 ! prom_cif_handler
mov %g6, %i3
mov %g4, %i4
+ mov %g5, %i5
flushw
sethi %hi(prom_remap - call_method), %g7
@@ -62,6 +63,7 @@
/* Restore hard-coded globals. */
mov %i3, %g6
mov %i4, %g4
+ mov %i5, %g5
/* Wheee.... we are done. */
ret
diff -Nru a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
--- a/include/asm-sparc64/cpudata.h 2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/cpudata.h 2005-04-03 21:20:29 -07:00
@@ -25,6 +25,7 @@
} cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
-#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data() __get_cpu_var(__cpu_data)
#endif /* _SPARC64_CPUDATA_H */
diff -Nru a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
--- a/include/asm-sparc64/percpu.h 2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/percpu.h 2005-04-03 21:20:29 -07:00
@@ -1,6 +1,45 @@
#ifndef __ARCH_SPARC64_PERCPU__
#define __ARCH_SPARC64_PERCPU__
-#include <asm-generic/percpu.h>
+#include <linux/compiler.h>
+
+#define __GENERIC_PER_CPU
+#ifdef CONFIG_SMP
+
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+/* Separate out the type, so (int[3], foo) works. */
+#define DEFINE_PER_CPU(type, name) \
+ __attribute__((__section__(".data.percpu"))) __typeof__(type)
per_cpu__##name
+
+register unsigned long __local_per_cpu_offset asm("g5");
+
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var,
__local_per_cpu_offset))
+
+/* A macro to avoid #include hell... */
+#define percpu_modcopy(pcpudst, src, size) \
+do { \
+ unsigned int __i; \
+ for (__i = 0; __i < NR_CPUS; __i++) \
+ if (cpu_possible(__i)) \
+ memcpy((pcpudst)+__per_cpu_offset[__i], \
+ (src), (size)); \
+} while (0)
+#else /* ! SMP */
+
+#define DEFINE_PER_CPU(type, name) \
+ __typeof__(type) per_cpu__##name
+
+#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
+#define __get_cpu_var(var) per_cpu__##var
+
+#endif /* SMP */
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
#endif /* __ARCH_SPARC64_PERCPU__ */
diff -Nru a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
--- a/include/asm-sparc64/pgalloc.h 2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/pgalloc.h 2005-04-03 21:20:29 -07:00
@@ -14,7 +14,7 @@
/* Page table allocation/freeing. */
#ifdef CONFIG_SMP
/* Sliiiicck */
-#define pgt_quicklists cpu_data(smp_processor_id())
+#define pgt_quicklists local_cpu_data()
#else
extern struct pgtable_cache_struct {
unsigned long *pgd_cache;
diff -Nru a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
--- a/include/asm-sparc64/system.h 2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/system.h 2005-04-03 21:20:29 -07:00
@@ -215,7 +215,7 @@
"i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_FLAGS), "i" (TI_CWP), \
"i" (_TIF_NEWCHILD), "i" (TI_TASK) \
: "cc", \
- "g1", "g2", "g3", "g5", "g7", \
+ "g1", "g2", "g3", "g7", \
"l2", "l3", "l4", "l5", "l6", "l7", \
"i0", "i1", "i2", "i3", "i4", "i5", \
"o0", "o1", "o2", "o3", "o4", "o5", "o7" EXTRA_CLOBBER);\
diff -Nru a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
--- a/include/asm-sparc64/tlb.h 2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/tlb.h 2005-04-03 21:20:29 -07:00
@@ -44,7 +44,7 @@
static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned
int full_mm_flush)
{
- struct mmu_gather *mp = &per_cpu(mmu_gathers, smp_processor_id());
+ struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
BUG_ON(mp->tlb_nr);
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html