Re: [RFC PATCH v2 11/13] powerpc/syscall: Avoid stack frame in likely part of syscall_call_exception()

2020-04-06 Thread Christophe Leroy




Le 06/04/2020 à 03:29, Nicholas Piggin a écrit :

Christophe Leroy's on April 6, 2020 3:44 am:

When r3 is not modified, reload it from regs->orig_r3 to free
volatile registers. This avoids a stack frame for the likely part
of syscall_call_exception()

Before : 353 cycles on null_syscall
After  : 347 cycles on null_syscall



[...]



Signed-off-by: Christophe Leroy 
---
  arch/powerpc/kernel/syscall.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 69d75fc4a5eb..630c423e089a 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -91,6 +91,8 @@ notrace long system_call_exception(long r3, long r4, long r5,
  
  	} else if (unlikely(r0 >= NR_syscalls)) {

return -ENOSYS;
+   } else {
+   r3 = regs->orig_gpr3;
}


So this just gives enough volatiles to avoid spilling to stack? I wonder
about other various options here if they would cause a spill anyway.

Interesting optimisation, it would definitely need a comment. Would be
nice if we had a way to tell the compiler that a local can be reloaded
from a particular address.


Ok, comment added.

Christophe


Re: [RFC PATCH v2 11/13] powerpc/syscall: Avoid stack frame in likely part of syscall_call_exception()

2020-04-05 Thread Nicholas Piggin
Christophe Leroy's on April 6, 2020 3:44 am:
> When r3 is not modified, reload it from regs->orig_r3 to free
> volatile registers. This avoids a stack frame for the likely part
> of syscall_call_exception()
> 
> Before : 353 cycles on null_syscall
> After  : 347 cycles on null_syscall
> 
> Before the patch:
> 
> c000b4d4 :
> c000b4d4: 7c 08 02 a6 mflrr0
> c000b4d8: 94 21 ff e0 stwur1,-32(r1)
> c000b4dc: 93 e1 00 1c stw r31,28(r1)
> c000b4e0: 90 01 00 24 stw r0,36(r1)
> c000b4e4: 90 6a 00 88 stw r3,136(r10)
> c000b4e8: 81 6a 00 84 lwz r11,132(r10)
> c000b4ec: 69 6b 00 02 xorir11,r11,2
> c000b4f0: 55 6b ff fe rlwinm  r11,r11,31,31,31
> c000b4f4: 0f 0b 00 00 twnei   r11,0
> c000b4f8: 81 6a 00 a0 lwz r11,160(r10)
> c000b4fc: 55 6b 07 fe clrlwi  r11,r11,31
> c000b500: 0f 0b 00 00 twnei   r11,0
> c000b504: 7c 0c 42 e6 mftbr0
> c000b508: 83 e2 00 08 lwz r31,8(r2)
> c000b50c: 81 82 00 28 lwz r12,40(r2)
> c000b510: 90 02 00 24 stw r0,36(r2)
> c000b514: 7d 8c f8 50 subfr12,r12,r31
> c000b518: 7c 0c 02 14 add r0,r12,r0
> c000b51c: 90 02 00 08 stw r0,8(r2)
> c000b520: 7c 10 13 a6 mtspr   80,r0
> c000b524: 81 62 00 70 lwz r11,112(r2)
> c000b528: 71 60 86 91 andi.   r0,r11,34449
> c000b52c: 40 82 00 34 bne c000b560 
> c000b530: 2b 89 01 b6 cmplwi  cr7,r9,438
> c000b534: 41 9d 00 64 bgt cr7,c000b598 
> 
> c000b538: 3d 40 c0 5c lis r10,-16292
> c000b53c: 55 29 10 3a rlwinm  r9,r9,2,0,29
> c000b540: 39 4a 41 e8 addir10,r10,16872
> c000b544: 80 01 00 24 lwz r0,36(r1)
> c000b548: 7d 2a 48 2e lwzxr9,r10,r9
> c000b54c: 7c 08 03 a6 mtlrr0
> c000b550: 7d 29 03 a6 mtctr   r9
> c000b554: 83 e1 00 1c lwz r31,28(r1)
> c000b558: 38 21 00 20 addir1,r1,32
> c000b55c: 4e 80 04 20 bctr
> 
> After the patch:
> 
> c000b4d4 :
> c000b4d4: 81 6a 00 84 lwz r11,132(r10)
> c000b4d8: 90 6a 00 88 stw r3,136(r10)
> c000b4dc: 69 6b 00 02 xorir11,r11,2
> c000b4e0: 55 6b ff fe rlwinm  r11,r11,31,31,31
> c000b4e4: 0f 0b 00 00 twnei   r11,0
> c000b4e8: 80 6a 00 a0 lwz r3,160(r10)
> c000b4ec: 54 63 07 fe clrlwi  r3,r3,31
> c000b4f0: 0f 03 00 00 twnei   r3,0
> c000b4f4: 7d 6c 42 e6 mftbr11
> c000b4f8: 81 82 00 08 lwz r12,8(r2)
> c000b4fc: 80 02 00 28 lwz r0,40(r2)
> c000b500: 91 62 00 24 stw r11,36(r2)
> c000b504: 7c 00 60 50 subfr0,r0,r12
> c000b508: 7d 60 5a 14 add r11,r0,r11
> c000b50c: 91 62 00 08 stw r11,8(r2)
> c000b510: 7c 10 13 a6 mtspr   80,r0
> c000b514: 80 62 00 70 lwz r3,112(r2)
> c000b518: 70 6b 86 91 andi.   r11,r3,34449
> c000b51c: 40 82 00 28 bne c000b544 
> c000b520: 2b 89 01 b6 cmplwi  cr7,r9,438
> c000b524: 41 9d 00 84 bgt cr7,c000b5a8 
> 
> c000b528: 80 6a 00 88 lwz r3,136(r10)
> c000b52c: 3d 40 c0 5c lis r10,-16292
> c000b530: 55 29 10 3a rlwinm  r9,r9,2,0,29
> c000b534: 39 4a 41 e4 addir10,r10,16868
> c000b538: 7d 2a 48 2e lwzxr9,r10,r9
> c000b53c: 7d 29 03 a6 mtctr   r9
> c000b540: 4e 80 04 20 bctr
> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/kernel/syscall.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> index 69d75fc4a5eb..630c423e089a 100644
> --- a/arch/powerpc/kernel/syscall.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -91,6 +91,8 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>  
>   } else if (unlikely(r0 >= NR_syscalls)) {
>   return -ENOSYS;
> + } else {
> + r3 = regs->orig_gpr3;
>   }

So this just gives enough volatiles to avoid spilling to stack? I wonder
about other various options here if they would cause a spill anyway.

Interesting optimisation, it would definitely need a comment. Would be
nice if we had a way to tell the compiler that a local can be reloaded
from a particular address.

Thanks,
Nick



[RFC PATCH v2 11/13] powerpc/syscall: Avoid stack frame in likely part of syscall_call_exception()

2020-04-05 Thread Christophe Leroy
When r3 is not modified, reload it from regs->orig_r3 to free
volatile registers. This avoids a stack frame for the likely part
of syscall_call_exception()

Before : 353 cycles on null_syscall
After  : 347 cycles on null_syscall

Before the patch:

c000b4d4 :
c000b4d4:   7c 08 02 a6 mflrr0
c000b4d8:   94 21 ff e0 stwur1,-32(r1)
c000b4dc:   93 e1 00 1c stw r31,28(r1)
c000b4e0:   90 01 00 24 stw r0,36(r1)
c000b4e4:   90 6a 00 88 stw r3,136(r10)
c000b4e8:   81 6a 00 84 lwz r11,132(r10)
c000b4ec:   69 6b 00 02 xorir11,r11,2
c000b4f0:   55 6b ff fe rlwinm  r11,r11,31,31,31
c000b4f4:   0f 0b 00 00 twnei   r11,0
c000b4f8:   81 6a 00 a0 lwz r11,160(r10)
c000b4fc:   55 6b 07 fe clrlwi  r11,r11,31
c000b500:   0f 0b 00 00 twnei   r11,0
c000b504:   7c 0c 42 e6 mftbr0
c000b508:   83 e2 00 08 lwz r31,8(r2)
c000b50c:   81 82 00 28 lwz r12,40(r2)
c000b510:   90 02 00 24 stw r0,36(r2)
c000b514:   7d 8c f8 50 subfr12,r12,r31
c000b518:   7c 0c 02 14 add r0,r12,r0
c000b51c:   90 02 00 08 stw r0,8(r2)
c000b520:   7c 10 13 a6 mtspr   80,r0
c000b524:   81 62 00 70 lwz r11,112(r2)
c000b528:   71 60 86 91 andi.   r0,r11,34449
c000b52c:   40 82 00 34 bne c000b560 
c000b530:   2b 89 01 b6 cmplwi  cr7,r9,438
c000b534:   41 9d 00 64 bgt cr7,c000b598 

c000b538:   3d 40 c0 5c lis r10,-16292
c000b53c:   55 29 10 3a rlwinm  r9,r9,2,0,29
c000b540:   39 4a 41 e8 addir10,r10,16872
c000b544:   80 01 00 24 lwz r0,36(r1)
c000b548:   7d 2a 48 2e lwzxr9,r10,r9
c000b54c:   7c 08 03 a6 mtlrr0
c000b550:   7d 29 03 a6 mtctr   r9
c000b554:   83 e1 00 1c lwz r31,28(r1)
c000b558:   38 21 00 20 addir1,r1,32
c000b55c:   4e 80 04 20 bctr

After the patch:

c000b4d4 :
c000b4d4:   81 6a 00 84 lwz r11,132(r10)
c000b4d8:   90 6a 00 88 stw r3,136(r10)
c000b4dc:   69 6b 00 02 xorir11,r11,2
c000b4e0:   55 6b ff fe rlwinm  r11,r11,31,31,31
c000b4e4:   0f 0b 00 00 twnei   r11,0
c000b4e8:   80 6a 00 a0 lwz r3,160(r10)
c000b4ec:   54 63 07 fe clrlwi  r3,r3,31
c000b4f0:   0f 03 00 00 twnei   r3,0
c000b4f4:   7d 6c 42 e6 mftbr11
c000b4f8:   81 82 00 08 lwz r12,8(r2)
c000b4fc:   80 02 00 28 lwz r0,40(r2)
c000b500:   91 62 00 24 stw r11,36(r2)
c000b504:   7c 00 60 50 subfr0,r0,r12
c000b508:   7d 60 5a 14 add r11,r0,r11
c000b50c:   91 62 00 08 stw r11,8(r2)
c000b510:   7c 10 13 a6 mtspr   80,r0
c000b514:   80 62 00 70 lwz r3,112(r2)
c000b518:   70 6b 86 91 andi.   r11,r3,34449
c000b51c:   40 82 00 28 bne c000b544 
c000b520:   2b 89 01 b6 cmplwi  cr7,r9,438
c000b524:   41 9d 00 84 bgt cr7,c000b5a8 

c000b528:   80 6a 00 88 lwz r3,136(r10)
c000b52c:   3d 40 c0 5c lis r10,-16292
c000b530:   55 29 10 3a rlwinm  r9,r9,2,0,29
c000b534:   39 4a 41 e4 addir10,r10,16868
c000b538:   7d 2a 48 2e lwzxr9,r10,r9
c000b53c:   7d 29 03 a6 mtctr   r9
c000b540:   4e 80 04 20 bctr

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/syscall.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 69d75fc4a5eb..630c423e089a 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -91,6 +91,8 @@ notrace long system_call_exception(long r3, long r4, long r5,
 
} else if (unlikely(r0 >= NR_syscalls)) {
return -ENOSYS;
+   } else {
+   r3 = regs->orig_gpr3;
}
 
/* May be faster to do array_index_nospec? */
-- 
2.25.0