[Xen-ia64-devel] [PATCH] hand optimize for hyperprivop

2008-03-07 Thread Kouya Shimura
This patch slightly optimizes hyperprivop emulation
especially hyper_rfi.
It shows about 2% faster in fstat system call on dom0.

Signed-off-by: Kouya Shimura [EMAIL PROTECTED]

diff -r 71a8366fb212 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Fri Feb 29 09:19:58 2008 -0700
+++ b/xen/arch/ia64/xen/hyperprivop.S   Fri Mar 07 17:18:44 2008 +0900
@@ -67,19 +67,18 @@
 // r19 == ipsr.cpl
 // r31 == pr
 GLOBAL_ENTRY(fast_hyperprivop)
+   adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18
// HYPERPRIVOP_SSM_I?
// assumes domain interrupts pending, so just do it
cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
 (p7)   br.sptk.many hyper_ssm_i;;
 
// Check pending event indication
-   adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS, r18;;
-   ld8 r20=[r20]
+   ld8 r20=[r20]   // interrupt_mask_addr
;;
ld1 r22=[r20],-1// evtchn_upcall_mask
;;
ld1 r20=[r20]   // evtchn_upcall_pending
-   ;;
 
// HYPERPRIVOP_RFI?
cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
@@ -210,9 +209,8 @@ ENTRY(hyper_ssm_i)
// give up for now if: ipsr.be==1, ipsr.pp==1
mov r30=cr.ipsr
mov r29=cr.iip;;
-   extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-   cmp.ne p7,p0=r21,r0
-(p7)   br.sptk.many dispatch_break_fault ;;
+   tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
+(p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);;
ld4 r21=[r20];;
@@ -220,8 +218,7 @@ ENTRY(hyper_ssm_i)
st4 [r20]=r21;;
 #endif
// set shared_mem iip to instruction after HYPER_SSM_I
-   extr.u r20=r30,IA64_PSR_RI_BIT,2 ;;
-   cmp.eq p6,p7=2,r20 ;;
+   tbit.nz p6,p7=r30,IA64_PSR_RI_BIT+1 ;;  // cr.ipsr.ri = 2 ?
 (p6)   mov r20=0
 (p6)   adds r29=16,r29
 (p7)   adds r20=1,r20 ;;
@@ -346,8 +343,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
 (p6)   br.cond.spnt.few rp;;
mov r17=cr.ipsr;;
// slow path if: ipsr.pp==1
-   extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
-   cmp.ne p6,p0=r21,r0
+   tbit.nz p6,p0=r17,IA64_PSR_PP_BIT
 (p6)   br.cond.spnt.few rp;;
// definitely have a domain tick
mov cr.eoi=r0
@@ -537,8 +533,7 @@ GLOBAL_ENTRY(fast_break_reflect)
 #endif
mov r30=cr.ipsr
mov r29=cr.iip;;
-   extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-   cmp.ne p7,p0=r21,r0
+   tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
 (p7)   br.spnt.few dispatch_break_fault ;;
 movl r20=IA64_PSR_CPL ;; 
 and r22=r20,r30 ;;
@@ -722,8 +717,7 @@ GLOBAL_ENTRY(fast_access_reflect)
 #endif
mov r30=cr.ipsr
mov r29=cr.iip;;
-   extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-   cmp.ne p7,p0=r21,r0
+   tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
 (p7)   br.spnt.few dispatch_reflection ;;
extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
cmp.eq p7,p0=r21,r0
@@ -769,8 +763,7 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
cmp.eq p7,p0=r21,r0
 (p7)   br.spnt.few page_fault ;;
// slow path if strange ipsr or isr bits set
-   extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-   cmp.ne p7,p0=r21,r0
+   tbit.nz p7,p0=r30,IA64_PSR_PP_BIT,1
 (p7)   br.spnt.few page_fault ;;
movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
and r21=r16,r21;;
@@ -1023,45 +1016,27 @@ ENTRY(hyper_rfi)
 #ifndef FAST_RFI
br.spnt.few slow_vcpu_rfi ;;
 #endif
-   // if no interrupts pending, proceed
-   mov r30=r0
-   cmp.eq p7,p0=r20,r0
-(p7)   br.sptk.many 1f
-   ;;
-   adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
-   ld8 r21=[r20];; // r21 = vcr.ipsr
-   extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
-   mov r30=r22;;
-   // r30 determines whether we might deliver an immediate extint
-#ifndef RFI_TO_INTERRUPT // see beginning of file
-   cmp.ne p6,p0=r30,r0
-(p6)   br.cond.spnt.few slow_vcpu_rfi ;;
-#endif
-1:
-   adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
-   ld8 r21=[r20];; // r21 = vcr.ipsr
+   // if interrupts pending and vcr.ipsr.i=1, do it the slow way
+   adds r19=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
+   adds r23=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18
+   cmp.ne p8,p0=r20,r0;;   // evtchn_upcall_pending != 0
// if (!(vpsr.dt  vpsr.rt  vpsr.it)), do it the slow way
-   movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
-   and r22=r20,r21
-   ;;
-   cmp.ne p7,p0=r22,r20
-(p7)   br.spnt.few slow_vcpu_rfi ;;
+   ld8 r21=[r19],XSI_IIP_OFS-XSI_IPSR_OFS // r21=vcr.ipsr
+   movl r20=~(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
+   or r20=r20,r21
+   // p8 determines whether we might deliver an immediate extint
+(p8)   tbit.nz p8,p0=r21,IA64_PSR_I_BIT;;
+   cmp.ne p7,p0=-1,r20
+   ld4 r23=[r23]   // r23=metaphysical_mode
+#ifndef RFI_TO_INTERRUPT   // see beginning of file
+(p8)   br.cond.spnt.few slow_vcpu_rfi
+#endif
+(p7)   br.spnt.few slow_vcpu_rfi;;
// if was in metaphys mode, do it the 

Re: [Xen-ia64-devel] [PATCH] hand optimize for hyperprivop

2008-03-07 Thread Alex Williamson

On Fri, 2008-03-07 at 17:57 +0900, Kouya Shimura wrote:
 This patch slightly optimizes hyperprivop emulation
 especially hyper_rfi.
 It shows about 2% faster in fstat system call on dom0.

   Nice.  Applied.  Thanks,

Alex

-- 
Alex Williamson HP Open Source  Linux Org.


___
Xen-ia64-devel mailing list
Xen-ia64-devel@lists.xensource.com
http://lists.xensource.com/xen-ia64-devel