Unroll the loops in kuep_lock and kuep_unlock.

Benchmarked on an mpc 8321 with a standard kernel having a
3M/1M user/kernel memory split, i.e. 12 segments for user.

Without KUEP, null_syscall benchmark is 220 cycles.
With KUEP, null_syscall benchmark is 439 cycles.

Once loops are unrolled, null_syscall benchmark is 366 cycles.
This is almost 17% reduction.

It is assumed that userspace covers at least 4 segments and
at most 14 segments.

The isync is removed, it saves 8 cycles. For kuep_unlock, the rfi
will do the synchronisation. For kuep_lock, we get a small window
during which exec is still possible, but is won't last more than a
few instructions.

Both macros are called two times so the size increase is in
the noise (approx 120 instructions).

Signed-off-by: Christophe Leroy <christophe.le...@csgroup.eu>
---
 arch/powerpc/include/asm/book3s/32/kup.h | 67 ++++++++++++++++++------
 1 file changed, 52 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index a0117a9d5b06..e800b515ac02 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -7,21 +7,61 @@
 
 #ifdef __ASSEMBLY__
 
-.macro kuep_update_sr  gpr1, gpr2              /* NEVER use r0 as gpr2 due to 
addis */
-101:   mtsrin  \gpr1, \gpr2
-       addi    \gpr1, \gpr1, 0x111             /* next VSID */
-       rlwinm  \gpr1, \gpr1, 0, 0xf0ffffff     /* clear VSID overflow */
-       addis   \gpr2, \gpr2, 0x1000            /* address of next segment */
-       bdnz    101b
-       isync
+.macro kuep_increment gpr1, gpr2
+       addi    \gpr1, \gpr1, 0x222             /* Next second VSID */
+       addi    \gpr2, \gpr2, 0x222             /* Next second VSID */
+       rlwinm  \gpr1, \gpr1, 0, 0xf0ffffff     /* Clear VSID overflow */
+       rlwinm  \gpr2, \gpr2, 0, 0xf0ffffff     /* Clear VSID overflow */
+.endm
+
+.macro kuep_update_sr gpr1, gpr2               /* NEVER use r0 as gpr1 or gpr2 
due to addi */
+       addi    \gpr2, \gpr1, 0x111             /* Next VSID */
+       rlwinm  \gpr2, \gpr2, 0, 0xf0ffffff     /* Clear VSID overflow */
+       mtsr    0, \gpr1
+       mtsr    1, \gpr2
+       kuep_increment \gpr1, \gpr2
+       mtsr    2, \gpr1
+       mtsr    3, \gpr2
+#if NUM_USER_SEGMENTS > 4
+       kuep_increment \gpr1, \gpr2
+       mtsr    4, \gpr1
+#if NUM_USER_SEGMENTS > 5
+       mtsr    5, \gpr2
+#if NUM_USER_SEGMENTS > 6
+       kuep_increment \gpr1, \gpr2
+       mtsr    6, \gpr1
+#if NUM_USER_SEGMENTS > 7
+       mtsr    7, \gpr2
+#if NUM_USER_SEGMENTS > 8
+       kuep_increment \gpr1, \gpr2
+       mtsr    8, \gpr1
+#if NUM_USER_SEGMENTS > 9
+       mtsr    9, \gpr2
+#if NUM_USER_SEGMENTS > 10
+       kuep_increment \gpr1, \gpr2
+       mtsr    10, \gpr1
+#if NUM_USER_SEGMENTS > 11
+       mtsr    11, \gpr2
+#if NUM_USER_SEGMENTS > 12
+       kuep_increment \gpr1, \gpr2
+       mtsr    12, \gpr1
+#if NUM_USER_SEGMENTS > 13
+       mtsr    13, \gpr2
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
 .endm
 
 .macro kuep_lock       gpr1, gpr2
 #ifdef CONFIG_PPC_KUEP
-       li      \gpr1, NUM_USER_SEGMENTS
-       li      \gpr2, 0
-       mtctr   \gpr1
-       mfsrin  \gpr1, \gpr2
+       mfsr    \gpr1, 0
        oris    \gpr1, \gpr1, SR_NX@h           /* set Nx */
        kuep_update_sr \gpr1, \gpr2
 #endif
@@ -29,10 +69,7 @@
 
 .macro kuep_unlock     gpr1, gpr2
 #ifdef CONFIG_PPC_KUEP
-       li      \gpr1, NUM_USER_SEGMENTS
-       li      \gpr2, 0
-       mtctr   \gpr1
-       mfsrin  \gpr1, \gpr2
+       mfsr    \gpr1, 0
        rlwinm  \gpr1, \gpr1, 0, ~SR_NX         /* Clear Nx */
        kuep_update_sr \gpr1, \gpr2
 #endif
-- 
2.25.0

Reply via email to