Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=a0776ec8e97bf109e7d973d09fc3e1814eb32bfb
Commit:     a0776ec8e97bf109e7d973d09fc3e1814eb32bfb
Parent:     62d0cfcb27cf755cebdc93ca95dabc83608007cd
Author:     Chen, Kenneth W <[EMAIL PROTECTED]>
AuthorDate: Fri Oct 13 10:05:45 2006 -0700
Committer:  Tony Luck <[EMAIL PROTECTED]>
CommitDate: Tue Feb 6 15:04:18 2007 -0800

    [IA64] remove per-cpu ia64_phys_stacked_size_p8
    
    It's not efficient to use a per-cpu variable just to store
    how many physical stack register a cpu has.  Ever since the
    incarnation of ia64 up till upcoming Montecito processor, that
    variable has "glued" to 96. Having a variable in memory means
    that the kernel is burning an extra cacheline access on every
    syscall and kernel exit path.  Such "static" value is better
    served with the instruction patching utility exists today.
    Convert ia64_phys_stacked_size_p8 into dynamic insn patching.
    
    This also has a pleasant side effect of eliminating access to
    per-cpu area while psr.ic=0 in the kernel exit path. (fixable
    for per-cpu DTC work, but why bother?)
    
    There are some concerns with the default value that the instruc-
    tion encoded in the kernel image.  It shouldn't be concerned.
    The reasons are:
    
    (1) cpu_init() is called at CPU initialization.  In there, we
        find out physical stack register size from PAL and patch
        two instructions in kernel exit code.  The code in question
        can not be executed before the patching is done.
    
    (2) current implementation stores zero in ia64_phys_stacked_size_p8,
        and that's what the current kernel exit path loads the value with.
        With the new code, it is equivalent that we store reg size 96
        in ia64_phys_stacked_size_p8, thus creating a better safety net.
        Given (1) above can never fail, having (2) is just a bonus.
    
    All in all, this patch allow one less memory reference in the kernel
    exit path, thus reducing syscall and interrupt return latency; and
    avoid polluting potential useful data in the CPU cache.
    
    Signed-off-by: Ken Chen <[EMAIL PROTECTED]>
    Signed-off-by: Tony Luck <[EMAIL PROTECTED]>
---
 arch/ia64/kernel/entry.S       |    7 ++-----
 arch/ia64/kernel/patch.c       |   20 ++++++++++++++++++++
 arch/ia64/kernel/setup.c       |    7 +++++--
 arch/ia64/kernel/vmlinux.lds.S |    7 +++++++
 include/asm-ia64/asmmacro.h    |   10 ++++++++++
 include/asm-ia64/patch.h       |    1 +
 include/asm-ia64/processor.h   |    1 +
 include/asm-ia64/sections.h    |    1 +
 8 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 15234ed..ac4b304 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall)
        ld8.fill r15=[r3]                       // M0|1 restore r15
        mov b6=r18                              // I0   restore b6
 
-       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+       LOAD_PHYS_STACK_REG_SIZE(r17)
        mov f9=f0                                       // F    clear f9
 (pKStk) br.cond.dpnt.many skip_rbs_switch              // B
 
@@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall)
        shr.u r18=r19,16                // I0|1 get byte size of existing 
"dirty" partition
        cover                           // B    add current frame into dirty 
partition & set cr.ifs
        ;;
-(pUStk) ld4 r17=[r17]                  // M0|1 r17 = 
cpu_data->phys_stacked_size_p8
        mov r19=ar.bsp                  // M2   get new backing store pointer
        mov f10=f0                      // F    clear f10
 
@@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        shr.u r18=r19,16        // get byte size of existing "dirty" partition
        ;;
        mov r16=ar.bsp          // get existing backing store pointer
-       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-       ;;
-       ld4 r17=[r17]           // r17 = cpu_data->phys_stacked_size_p8
+       LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)        br.cond.dpnt skip_rbs_switch
 
        /*
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index bc11bb0..e796e29 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -195,3 +195,23 @@ ia64_patch_gate (void)
        ia64_patch_vtop(START(vtop), END(vtop));
        ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
 }
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+       s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+       s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+       u64 ip, mask, imm;
+
+       /* see instruction format A4: adds r1 = imm13, r3 */
+       mask = (0x3fUL << 27) | (0x7f << 13);
+       imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+       while (offp < end) {
+               ip = (u64) offp + *offp;
+               ia64_patch(ip, mask, imm);
+               ia64_fc(ip);
+               ++offp;
+       }
+       ia64_sync_i();
+       ia64_srlz_i();
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index ad567b8..f167b89 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void);
 
 DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
 DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
 struct screen_info screen_info;
@@ -836,6 +835,7 @@ void __cpuinit
 cpu_init (void)
 {
        extern void __cpuinit ia64_mmu_init (void *);
+       static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
        unsigned long num_phys_stacked;
        pal_vm_info_2_u_t vmi;
        unsigned int max_ctx;
@@ -949,7 +949,10 @@ cpu_init (void)
                num_phys_stacked = 96;
        }
        /* size of physical stacked register partition plus 8 bytes: */
-       __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+       if (num_phys_stacked > max_num_phys_stacked) {
+               ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+               max_num_phys_stacked = num_phys_stacked;
+       }
        platform_cpu_init();
        pm_idle = default_idle;
 }
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index d6083a0..d9599dc 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -78,6 +78,13 @@ SECTIONS
          __stop___mca_table = .;
        }
 
+  .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - 
LOAD_OFFSET)
+       {
+         __start___phys_stack_reg_patchlist = .;
+         *(.data.patch.phys_stack_reg)
+         __end___phys_stack_reg_patchlist = .;
+       }
+
   /* Global data */
   _data = .;
 
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index c22b465..c1642fd 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -104,6 +104,16 @@ name:
 #endif
 
 /*
+ * If physical stack register size is different from DEF_NUM_STACK_REG,
+ * dynamically patch the kernel for correct size.
+ */
+       .section ".data.patch.phys_stack_reg", "a"
+       .previous
+#define LOAD_PHYS_STACK_REG_SIZE(reg)                  \
+[1:]   adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0;        \
+       .xdata4 ".data.patch.phys_stack_reg", 1b-.
+
+/*
  * Up until early 2004, use of .align within a function caused bad unwind info.
  * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into 
nothing
  * otherwise.
diff --git a/include/asm-ia64/patch.h b/include/asm-ia64/patch.h
index 4797f35..a715430 100644
--- a/include/asm-ia64/patch.h
+++ b/include/asm-ia64/patch.h
@@ -20,6 +20,7 @@ extern void ia64_patch_imm60 (u64 insn_addr, u64 val);        
        /* patch "brl" w/ip-rel
 
 extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
 extern void ia64_patch_vtop (unsigned long start, unsigned long end);
+extern void ia64_patch_phys_stack_reg(unsigned long val);
 extern void ia64_patch_gate (void);
 
 #endif /* _ASM_IA64_PATCH_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 5830d36..88c728b 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -19,6 +19,7 @@
 #include <asm/ptrace.h>
 #include <asm/ustack.h>
 
+#define IA64_NUM_PHYS_STACK_REG        96
 #define IA64_NUM_DBG_REGS      8
 
 #define DEFAULT_MAP_BASE       __IA64_UL_CONST(0x2000000000000000)
diff --git a/include/asm-ia64/sections.h b/include/asm-ia64/sections.h
index e9eb7f6..dc42a35 100644
--- a/include/asm-ia64/sections.h
+++ b/include/asm-ia64/sections.h
@@ -11,6 +11,7 @@
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
+extern char __start___phys_stack_reg_patchlist[], 
__end___phys_stack_reg_patchlist[];
 extern char __start_gate_section[];
 extern char __start_gate_mckinley_e9_patchlist[], 
__end_gate_mckinley_e9_patchlist[];
 extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to