Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=00b65985fb2fc542b855b03fcda0d0f2bab4f442
Commit:     00b65985fb2fc542b855b03fcda0d0f2bab4f442
Parent:     a0776ec8e97bf109e7d973d09fc3e1814eb32bfb
Author:     Chen, Kenneth W <[EMAIL PROTECTED]>
AuthorDate: Fri Oct 13 10:08:13 2006 -0700
Committer:  Tony Luck <[EMAIL PROTECTED]>
CommitDate: Tue Feb 6 15:04:48 2007 -0800

    [IA64] relax per-cpu TLB requirement to DTC
    
    Instead of pinning per-cpu TLB into a DTR, use DTC.  This will free up
    one TLB entry for application, or even kernel if access pattern to
    per-cpu data area has high temporal locality.
    
    Since per-cpu is mapped at the top of region 7 address, we just need to
    add special case in alt_dtlb_miss.  The physical address of per-cpu data
    is already conveniently stored in IA64_KR(PER_CPU_DATA).  Latency for
    alt_dtlb_miss is not affected as we can hide all the latency.  It was
    measured that alt_dtlb_miss handler has 23 cycles latency before and
    after the patch.
    
    The performance effect is massive for applications that put lots of tlb
    pressure on CPU.  Workload environment like database online transaction
    processing or application uses tera-byte of memory would benefit the most.
    Measurement with industry standard database benchmark shown an upward
    of 1.6% gain.  While smaller workloads like cpu, java also showing small
    improvement.
    
    Signed-off-by: Ken Chen <[EMAIL PROTECTED]>
    Signed-off-by: Tony Luck <[EMAIL PROTECTED]>
---
 arch/ia64/kernel/ivt.S     |   19 ++++++++++++++-----
 arch/ia64/kernel/mca_asm.S |   24 ------------------------
 arch/ia64/mm/init.c        |   11 +----------
 include/asm-ia64/kregs.h   |    3 +--
 4 files changed, 16 insertions(+), 41 deletions(-)

diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 6b7fcbd..34f44d8 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -374,6 +374,7 @@ ENTRY(alt_dtlb_miss)
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        mov r21=cr.ipsr
        mov r31=pr
+       mov r24=PERCPU_ADDR
        ;;
 #ifdef CONFIG_DISABLE_VHPT
        shr.u r22=r16,61                        // get the region number into 
r21
@@ -386,22 +387,30 @@ ENTRY(alt_dtlb_miss)
 (p8)   mov r29=b0                              // save b0
 (p8)   br.cond.dptk dtlb_fault
 #endif
+       cmp.ge p10,p11=r16,r24                  // access to per_cpu_data?
+       tbit.z p12,p0=r16,61                    // access to region 6?
+       mov r25=PERCPU_PAGE_SHIFT << 2
+       mov r26=PERCPU_PAGE_SIZE
+       nop.m 0
+       nop.b 0
+       ;;
+(p10)  mov r19=IA64_KR(PER_CPU_DATA)
+(p11)  and r19=r19,r16                         // clear non-ppn fields
        extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
        and r22=IA64_ISR_CODE_MASK,r20          // get the isr.code field
        tbit.nz p6,p7=r20,IA64_ISR_SP_BIT       // is speculation bit on?
-       shr.u r18=r16,57                        // move address bit 61 to bit 4
-       and r19=r19,r16                         // clear ed, reserved bits, and 
PTE control bits
        tbit.nz p9,p0=r20,IA64_ISR_NA_BIT       // is non-access bit on?
        ;;
-       andcm r18=0x10,r18      // bit 4=~address-bit(61)
+(p10)  sub r19=r19,r26
+(p10)  mov cr.itir=r25
        cmp.ne p8,p0=r0,r23
 (p9)   cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22  // check isr.code field
+(p12)  dep r17=-1,r17,4,1                      // set ma=UC for region 6 addr
 (p8)   br.cond.spnt page_fault
 
        dep r21=-1,r21,IA64_PSR_ED_BIT,1
-       or r19=r19,r17          // insert PTE control bits into r19
        ;;
-       or r19=r19,r18          // set bit 4 (uncached) if the access was to 
region 6
+       or r19=r19,r17          // insert PTE control bits into r19
 (p6)   mov cr.ipsr=r21
        ;;
 (p7)   itc.d r19               // insert the TLB entry
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index c6b607c..8c9c26a 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -101,14 +101,6 @@ ia64_do_tlb_purge:
        ;;
        srlz.d
        ;;
-       // 2. Purge DTR for PERCPU data.
-       movl r16=PERCPU_ADDR
-       mov r18=PERCPU_PAGE_SHIFT<<2
-       ;;
-       ptr.d r16,r18
-       ;;
-       srlz.d
-       ;;
        // 3. Purge ITR for PAL code.
        GET_THIS_PADDR(r2, ia64_mca_pal_base)
        ;;
@@ -196,22 +188,6 @@ ia64_reload_tr:
        srlz.i
        srlz.d
        ;;
-       // 2. Reload DTR register for PERCPU data.
-       GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
-       ;;
-       movl r16=PERCPU_ADDR            // vaddr
-       movl r18=PERCPU_PAGE_SHIFT<<2
-       ;;
-       mov cr.itir=r18
-       mov cr.ifa=r16
-       ;;
-       ld8 r18=[r2]                    // load per-CPU PTE
-       mov r16=IA64_TR_PERCPU_DATA;
-       ;;
-       itr.d dtr[r16]=r18
-       ;;
-       srlz.d
-       ;;
        // 3. Reload ITR for PAL code.
        GET_THIS_PADDR(r2, ia64_mca_pal_pte)
        ;;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1373fae..07d82cd 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -337,7 +337,7 @@ setup_gate (void)
 void __devinit
 ia64_mmu_init (void *my_cpu_data)
 {
-       unsigned long psr, pta, impl_va_bits;
+       unsigned long pta, impl_va_bits;
        extern void __devinit tlb_init (void);
 
 #ifdef CONFIG_DISABLE_VHPT
@@ -346,15 +346,6 @@ ia64_mmu_init (void *my_cpu_data)
 #      define VHPT_ENABLE_BIT  1
 #endif
 
-       /* Pin mapping for percpu area into TLB */
-       psr = ia64_clear_ic();
-       ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
-                pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
-                PERCPU_PAGE_SHIFT);
-
-       ia64_set_psr(psr);
-       ia64_srlz_i();
-
        /*
         * Check if the virtually mapped linear page table (VMLPT) overlaps 
with a mapped
         * address space.  The IA-64 architecture guarantees that at least 50 
bits of
diff --git a/include/asm-ia64/kregs.h b/include/asm-ia64/kregs.h
index 221b5cb..7e55a58 100644
--- a/include/asm-ia64/kregs.h
+++ b/include/asm-ia64/kregs.h
@@ -29,8 +29,7 @@
  */
 #define IA64_TR_KERNEL         0       /* itr0, dtr0: maps kernel image (code 
& data) */
 #define IA64_TR_PALCODE                1       /* itr1: maps PALcode as 
required by EFI */
-#define IA64_TR_PERCPU_DATA    1       /* dtr1: percpu data */
-#define IA64_TR_CURRENT_STACK  2       /* dtr2: maps kernel's memory- & 
register-stacks */
+#define IA64_TR_CURRENT_STACK  1       /* dtr1: maps kernel's memory- & 
register-stacks */
 
 /* Processor status register bits: */
 #define IA64_PSR_BE_BIT                1
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to