Unroll the loops in kuep_lock and kuep_unlock. Benchmarked on an mpc 8321 with a standard kernel having a 3M/1M user/kernel memory split, i.e. 12 segments for user.
Without KUEP, null_syscall benchmark is 220 cycles. With KUEP, null_syscall benchmark is 439 cycles. Once loops are unrolled, null_syscall benchmark is 366 cycles. This is almost 17% reduction. It is assumed that userspace covers at least 4 segments and at most 14 segments. The isync is removed, it saves 8 cycles. For kuep_unlock, the rfi will do the synchronisation. For kuep_lock, we get a small window during which exec is still possible, but is won't last more than a few instructions. Both macros are called two times so the size increase is in the noise (approx 120 instructions). Signed-off-by: Christophe Leroy <christophe.le...@csgroup.eu> --- arch/powerpc/include/asm/book3s/32/kup.h | 67 ++++++++++++++++++------ 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index a0117a9d5b06..e800b515ac02 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,21 +7,61 @@ #ifdef __ASSEMBLY__ -.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - bdnz 101b - isync +.macro kuep_increment gpr1, gpr2 + addi \gpr1, \gpr1, 0x222 /* Next second VSID */ + addi \gpr2, \gpr2, 0x222 /* Next second VSID */ + rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* Clear VSID overflow */ + rlwinm \gpr2, \gpr2, 0, 0xf0ffffff /* Clear VSID overflow */ +.endm + +.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr1 or gpr2 due to addi */ + addi \gpr2, \gpr1, 0x111 /* Next VSID */ + rlwinm \gpr2, \gpr2, 0, 0xf0ffffff /* Clear VSID overflow */ + mtsr 0, \gpr1 + mtsr 1, \gpr2 + kuep_increment \gpr1, \gpr2 + mtsr 2, \gpr1 + mtsr 3, \gpr2 +#if NUM_USER_SEGMENTS > 4 + kuep_increment \gpr1, \gpr2 + mtsr 4, \gpr1 +#if NUM_USER_SEGMENTS > 5 + mtsr 5, \gpr2 +#if NUM_USER_SEGMENTS > 6 + kuep_increment \gpr1, \gpr2 + mtsr 6, \gpr1 +#if NUM_USER_SEGMENTS > 7 + mtsr 7, \gpr2 +#if NUM_USER_SEGMENTS > 8 + kuep_increment \gpr1, \gpr2 + mtsr 8, \gpr1 +#if NUM_USER_SEGMENTS > 9 + mtsr 9, \gpr2 +#if NUM_USER_SEGMENTS > 10 + kuep_increment \gpr1, \gpr2 + mtsr 10, \gpr1 +#if NUM_USER_SEGMENTS > 11 + mtsr 11, \gpr2 +#if NUM_USER_SEGMENTS > 12 + kuep_increment \gpr1, \gpr2 + mtsr 12, \gpr1 +#if NUM_USER_SEGMENTS > 13 + mtsr 13, \gpr2 +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif .endm .macro kuep_lock gpr1, gpr2 #ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 + mfsr \gpr1, 0 oris \gpr1, \gpr1, SR_NX@h /* set Nx */ kuep_update_sr \gpr1, \gpr2 #endif @@ -29,10 +69,7 @@ .macro kuep_unlock gpr1, gpr2 #ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 + mfsr \gpr1, 0 rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ kuep_update_sr \gpr1, \gpr2 #endif -- 2.25.0