ChangeSet 1.2226, 2005/03/31 14:56:22-08:00, [EMAIL PROTECTED]

        [SPARC64]: Put per-cpu area base into register g5.
        
        FINALLY, we can put the per-cpu base into register
        g5 on SMP.  There are many simplifications and improvements
        now, but this is the base implementation.
        
        Thanks to Rusty and the IA64 folks for urging that I pursue
        this kind of scheme instead of locking stuff into the TLB
        at some fixed virtual address.
        
        Signed-off-by: David S. Miller <[EMAIL PROTECTED]>



 arch/sparc64/Makefile           |    4 +--
 arch/sparc64/kernel/etrap.S     |   19 +++++++++++++-----
 arch/sparc64/kernel/head.S      |   42 ++++++++++++++++++++++------------------
 arch/sparc64/kernel/rtrap.S     |    5 ++--
 arch/sparc64/kernel/semaphore.c |    6 ++---
 arch/sparc64/kernel/smp.c       |    9 ++++++++
 arch/sparc64/kernel/unaligned.c |    2 -
 arch/sparc64/kernel/winfixup.S  |   21 ++++++++++++++++++++
 arch/sparc64/prom/map.S         |    2 +
 include/asm-sparc64/cpudata.h   |    3 +-
 include/asm-sparc64/percpu.h    |   41 ++++++++++++++++++++++++++++++++++++++-
 include/asm-sparc64/pgalloc.h   |    2 -
 include/asm-sparc64/system.h    |    2 -
 include/asm-sparc64/tlb.h       |    2 -
 14 files changed, 124 insertions(+), 36 deletions(-)


diff -Nru a/arch/sparc64/Makefile b/arch/sparc64/Makefile
--- a/arch/sparc64/Makefile     2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/Makefile     2005-04-03 21:20:29 -07:00
@@ -41,10 +41,10 @@
 
 ifneq ($(NEW_GCC),y)
   CFLAGS := $(CFLAGS) -pipe -mno-fpu -mtune=ultrasparc -mmedlow \
-           -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare
+           -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
 else
   CFLAGS := $(CFLAGS) -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow \
-           -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare \
+           -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare \
            $(CC_UNDECL)
   AFLAGS += -m64 -mcpu=ultrasparc $(CC_UNDECL)
 endif
diff -Nru a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
--- a/arch/sparc64/kernel/etrap.S       2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/etrap.S       2005-04-03 21:20:29 -07:00
@@ -102,11 +102,15 @@
                stx     %i7, [%sp + PTREGS_OFF + PT_V9_I7]
                wrpr    %g0, ETRAP_PSTATE2, %pstate
                mov     %l6, %g6
+#ifdef CONFIG_SMP
+               ldub    [%g6 + TI_CPU], %g3
+               sethi   %hi(__per_cpu_offset), %g2
+               or      %g2, %lo(__per_cpu_offset), %g2
+               sllx    %g3, 3, %g3
+               ldx     [%g2 + %g3], %g5
+#endif
                jmpl    %l2 + 0x4, %g0
                 ldx    [%g6 + TI_TASK], %g4
-               nop
-               nop
-               nop
 
 3:             ldub    [%l6 + TI_FPDEPTH], %l5
                add     %l6, TI_FPSAVED + 1, %l4
@@ -254,10 +258,15 @@
                stx     %i6, [%sp + PTREGS_OFF + PT_V9_I6]
                mov     %l6, %g6
                stx     %i7, [%sp + PTREGS_OFF + PT_V9_I7]
+#ifdef CONFIG_SMP
+               ldub    [%g6 + TI_CPU], %g3
+               sethi   %hi(__per_cpu_offset), %g2
+               or      %g2, %lo(__per_cpu_offset), %g2
+               sllx    %g3, 3, %g3
+               ldx     [%g2 + %g3], %g5
+#endif
                ldx     [%g6 + TI_TASK], %g4
                done
-               nop
-               nop
 
 #undef TASK_REGOFF
 #undef ETRAP_PSTATE1
diff -Nru a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
--- a/arch/sparc64/kernel/head.S        2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/head.S        2005-04-03 21:20:29 -07:00
@@ -89,8 +89,8 @@
         * PROM entry point is on %o4
         */
 sparc64_boot:
-       BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_boot)
-       BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_boot)
+       BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
+       BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
        ba,pt   %xcc, spitfire_boot
         nop
 
@@ -103,11 +103,11 @@
        mov     DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
        wr      %g1, %asr18
 
-       sethi   %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
-       or      %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
-       sllx    %g5, 32, %g5
-       or      %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
-       stxa    %g5, [%g0] ASI_DCU_CONTROL_REG
+       sethi   %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+       or      %g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+       sllx    %g7, 32, %g7
+       or      %g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7
+       stxa    %g7, [%g0] ASI_DCU_CONTROL_REG
        membar  #Sync
 
 cheetah_generic_boot:
@@ -492,7 +492,7 @@
        stxa    %g3, [%g2] ASI_DMMU
        membar  #Sync
 
-       BRANCH_IF_ANY_CHEETAH(g1,g5,cheetah_tlb_fixup)
+       BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup)
 
        ba,pt   %xcc, spitfire_tlb_fixup
         nop
@@ -520,8 +520,8 @@
 
        mov     1, %g2          /* Set TLB type to cheetah. */
 
-1:     sethi   %hi(tlb_type), %g5
-       stw     %g2, [%g5 + %lo(tlb_type)]
+1:     sethi   %hi(tlb_type), %g1
+       stw     %g2, [%g1 + %lo(tlb_type)]
 
        BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
        ba,pt   %xcc, 2f
@@ -567,8 +567,8 @@
 
        /* Set TLB type to spitfire. */
        mov     0, %g2
-       sethi   %hi(tlb_type), %g5
-       stw     %g2, [%g5 + %lo(tlb_type)]
+       sethi   %hi(tlb_type), %g1
+       stw     %g2, [%g1 + %lo(tlb_type)]
 
 tlb_fixup_done:
        sethi   %hi(init_thread_union), %g6
@@ -596,12 +596,18 @@
 #endif
 
        wr      %g0, ASI_P, %asi
-       mov     1, %g5
-       sllx    %g5, THREAD_SHIFT, %g5
-       sub     %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
-       add     %g6, %g5, %sp
+       mov     1, %g1
+       sllx    %g1, THREAD_SHIFT, %g1
+       sub     %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+       add     %g6, %g1, %sp
        mov     0, %fp
 
+       /* Set per-cpu pointer initially to zero, this makes
+        * the boot-cpu use the in-kernel-image per-cpu areas
+        * before setup_per_cpu_area() is invoked.
+        */
+       clr     %g5
+
        wrpr    %g0, 0, %wstate
        wrpr    %g0, 0x0, %tl
 
@@ -637,8 +643,8 @@
        rdpr    %pstate, %o1
        mov     %g6, %o2
        wrpr    %o1, (PSTATE_AG|PSTATE_IE), %pstate
-       sethi   %hi(sparc64_ttable_tl0), %g5
-       wrpr    %g5, %tba
+       sethi   %hi(sparc64_ttable_tl0), %g1
+       wrpr    %g1, %tba
        mov     %o2, %g6
 
        /* Set up MMU globals */
diff -Nru a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
--- a/arch/sparc64/kernel/rtrap.S       2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/rtrap.S       2005-04-03 21:20:29 -07:00
@@ -222,8 +222,9 @@
 
                ldx                     [%sp + PTREGS_OFF + PT_V9_G3], %g3
                ldx                     [%sp + PTREGS_OFF + PT_V9_G4], %g4
-               ldx                     [%sp + PTREGS_OFF + PT_V9_G5], %g5
-               ldx                     [%sp + PTREGS_OFF + PT_V9_G6], %g6
+               brz,a,pn                %l3, 1f
+                ldx                    [%sp + PTREGS_OFF + PT_V9_G5], %g5
+1:             ldx                     [%sp + PTREGS_OFF + PT_V9_G6], %g6
                ldx                     [%sp + PTREGS_OFF + PT_V9_G7], %g7
                wrpr                    %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
                ldx                     [%sp + PTREGS_OFF + PT_V9_I0], %i0
diff -Nru a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
--- a/arch/sparc64/kernel/semaphore.c   2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/semaphore.c   2005-04-03 21:20:29 -07:00
@@ -83,7 +83,7 @@
 "       restore\n"
 "      .previous\n"
        : : "r" (sem), "i" (__up)
-       : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+       : "g1", "g2", "g3", "g7", "memory", "cc");
 }
 
 static void __sched __down(struct semaphore * sem)
@@ -140,7 +140,7 @@
 "       restore\n"
 "      .previous\n"
        : : "r" (sem), "i" (__down)
-       : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+       : "g1", "g2", "g3", "g7", "memory", "cc");
 }
 
 int down_trylock(struct semaphore *sem)
@@ -246,6 +246,6 @@
 "      .previous\n"
        : "=r" (ret)
        : "0" (ret), "r" (sem), "i" (__down_interruptible)
-       : "g1", "g2", "g3", "g5", "g7", "memory", "cc");
+       : "g1", "g2", "g3", "g7", "memory", "cc");
        return ret;
 }
diff -Nru a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
--- a/arch/sparc64/kernel/smp.c 2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/smp.c 2005-04-03 21:20:29 -07:00
@@ -107,6 +107,10 @@
 
        __flush_tlb_all();
 
+       __asm__ __volatile__("mov %0, %%g5\n\t"
+                            : /* no outputs */
+                            : "r" (__per_cpu_offset[cpuid]));
+
        smp_setup_percpu_timer();
 
        local_irq_enable();
@@ -1115,6 +1119,11 @@
        }
 
        current_thread_info()->cpu = hard_smp_processor_id();
+
+       __asm__ __volatile__("mov %0, %%g5\n\t"
+                            : /* no outputs */
+                            : "r" (__per_cpu_offset[smp_processor_id()]));
+
        cpu_set(smp_processor_id(), cpu_online_map);
        cpu_set(smp_processor_id(), phys_cpu_present_map);
 }
diff -Nru a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
--- a/arch/sparc64/kernel/unaligned.c   2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/unaligned.c   2005-04-03 21:20:29 -07:00
@@ -413,7 +413,7 @@
                :
                : "r" (regs), "r" (insn)
                : "o0", "o1", "o2", "o3", "o4", "o5", "o7",
-                 "g1", "g2", "g3", "g4", "g5", "g7", "cc");
+                 "g1", "g2", "g3", "g4", "g7", "cc");
        } else {
                unsigned long addr = compute_effective_address(regs, insn, 
((insn >> 25) & 0x1f));
 
diff -Nru a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
--- a/arch/sparc64/kernel/winfixup.S    2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/kernel/winfixup.S    2005-04-03 21:20:29 -07:00
@@ -93,6 +93,13 @@
        wrpr            %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
        mov             %o7, %g6
        ldx             [%g6 + TI_TASK], %g4
+#ifdef CONFIG_SMP
+       ldub            [%g6 + TI_CPU], %g1
+       sethi           %hi(__per_cpu_offset), %g2
+       or              %g2, %lo(__per_cpu_offset), %g2
+       sllx            %g1, 3, %g1
+       ldx             [%g2 + %g1], %g5
+#endif
 
        /* This is the same as below, except we handle this a bit special
         * since we must preserve %l5 and %l6, see comment above.
@@ -213,6 +220,13 @@
        wrpr            %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
        mov             %o7, %g6                        ! Get current back.
        ldx             [%g6 + TI_TASK], %g4            ! Finish it.
+#ifdef CONFIG_SMP
+       ldub            [%g6 + TI_CPU], %g1
+       sethi           %hi(__per_cpu_offset), %g2
+       or              %g2, %lo(__per_cpu_offset), %g2
+       sllx            %g1, 3, %g1
+       ldx             [%g2 + %g1], %g5
+#endif
        call            mem_address_unaligned
         add            %sp, PTREGS_OFF, %o0
 
@@ -318,6 +332,13 @@
        wrpr            %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
        mov             %o7, %g6                        ! Get current back.
        ldx             [%g6 + TI_TASK], %g4            ! Finish it.
+#ifdef CONFIG_SMP
+       ldub            [%g6 + TI_CPU], %g1
+       sethi           %hi(__per_cpu_offset), %g2
+       or              %g2, %lo(__per_cpu_offset), %g2
+       sllx            %g1, 3, %g1
+       ldx             [%g2 + %g1], %g5
+#endif
        call            data_access_exception
         add            %sp, PTREGS_OFF, %o0
 
diff -Nru a/arch/sparc64/prom/map.S b/arch/sparc64/prom/map.S
--- a/arch/sparc64/prom/map.S   2005-04-03 21:20:29 -07:00
+++ b/arch/sparc64/prom/map.S   2005-04-03 21:20:29 -07:00
@@ -32,6 +32,7 @@
        ldx     [%g2 + 0x08], %l0               ! prom_cif_handler
        mov     %g6, %i3
        mov     %g4, %i4
+       mov     %g5, %i5
        flushw
 
        sethi   %hi(prom_remap - call_method), %g7
@@ -62,6 +63,7 @@
        /* Restore hard-coded globals. */
        mov     %i3, %g6
        mov     %i4, %g4
+       mov     %i5, %g5
 
        /* Wheee.... we are done. */
        ret
diff -Nru a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
--- a/include/asm-sparc64/cpudata.h     2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/cpudata.h     2005-04-03 21:20:29 -07:00
@@ -25,6 +25,7 @@
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
-#define cpu_data(__cpu)        per_cpu(__cpu_data, (__cpu))
+#define cpu_data(__cpu)                per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data()       __get_cpu_var(__cpu_data)
 
 #endif /* _SPARC64_CPUDATA_H */
diff -Nru a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
--- a/include/asm-sparc64/percpu.h      2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/percpu.h      2005-04-03 21:20:29 -07:00
@@ -1,6 +1,45 @@
 #ifndef __ARCH_SPARC64_PERCPU__
 #define __ARCH_SPARC64_PERCPU__
 
-#include <asm-generic/percpu.h>
+#include <linux/compiler.h>
+
+#define __GENERIC_PER_CPU
+#ifdef CONFIG_SMP
+
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+/* Separate out the type, so (int[3], foo) works. */
+#define DEFINE_PER_CPU(type, name) \
+    __attribute__((__section__(".data.percpu"))) __typeof__(type) 
per_cpu__##name
+
+register unsigned long __local_per_cpu_offset asm("g5");
+
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, 
__local_per_cpu_offset))
+
+/* A macro to avoid #include hell... */
+#define percpu_modcopy(pcpudst, src, size)                     \
+do {                                                           \
+       unsigned int __i;                                       \
+       for (__i = 0; __i < NR_CPUS; __i++)                     \
+               if (cpu_possible(__i))                          \
+                       memcpy((pcpudst)+__per_cpu_offset[__i], \
+                              (src), (size));                  \
+} while (0)
+#else /* ! SMP */
+
+#define DEFINE_PER_CPU(type, name) \
+    __typeof__(type) per_cpu__##name
+
+#define per_cpu(var, cpu)                      (*((void)cpu, &per_cpu__##var))
+#define __get_cpu_var(var)                     per_cpu__##var
+
+#endif /* SMP */
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
 
 #endif /* __ARCH_SPARC64_PERCPU__ */
diff -Nru a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
--- a/include/asm-sparc64/pgalloc.h     2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/pgalloc.h     2005-04-03 21:20:29 -07:00
@@ -14,7 +14,7 @@
 /* Page table allocation/freeing. */
 #ifdef CONFIG_SMP
 /* Sliiiicck */
-#define pgt_quicklists cpu_data(smp_processor_id())
+#define pgt_quicklists local_cpu_data()
 #else
 extern struct pgtable_cache_struct {
        unsigned long *pgd_cache;
diff -Nru a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
--- a/include/asm-sparc64/system.h      2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/system.h      2005-04-03 21:20:29 -07:00
@@ -215,7 +215,7 @@
          "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_FLAGS), "i" (TI_CWP),  \
          "i" (_TIF_NEWCHILD), "i" (TI_TASK)                            \
        : "cc",                                                         \
-               "g1", "g2", "g3",       "g5",       "g7",               \
+               "g1", "g2", "g3",                   "g7",               \
                      "l2", "l3", "l4", "l5", "l6", "l7",               \
          "i0", "i1", "i2", "i3", "i4", "i5",                           \
          "o0", "o1", "o2", "o3", "o4", "o5",       "o7" EXTRA_CLOBBER);\
diff -Nru a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
--- a/include/asm-sparc64/tlb.h 2005-04-03 21:20:29 -07:00
+++ b/include/asm-sparc64/tlb.h 2005-04-03 21:20:29 -07:00
@@ -44,7 +44,7 @@
 
 static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned 
int full_mm_flush)
 {
-       struct mmu_gather *mp = &per_cpu(mmu_gathers, smp_processor_id());
+       struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
 
        BUG_ON(mp->tlb_nr);
 
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to