Le 05/09/2019 à 14:35, Nicholas Piggin a écrit :
System call entry and particularly exit code is beyond the limit of what
is reasonable to implement in asm.

This conversion moves all conditional branches out of the asm code,
except for the case that all GPRs should be restored at exit.

Null syscall test is about 5% faster after this patch, because the exit
work is handled under local_irq_disable, and the hard mask and pending
interrupt replay is handled after that, which avoids games with MSR.

Signed-off-by: Nicholas Piggin <npig...@gmail.com>

v3:
- Fix !KUAP build [mpe]
- Fix BookE build/boot [mpe]
- Don't trace irqs with MSR[RI]=0
- Don't allow syscall_exit_prepare to be ftraced, because function
   graph tracing which traces exits barfs after the IRQ state is
   prepared for kernel exit.
- Fix BE syscall table to use normal function descriptors now that they
   are called from C.
- Comment syscall_exit_prepare.
---
  arch/powerpc/include/asm/asm-prototypes.h     |  11 -
  .../powerpc/include/asm/book3s/64/kup-radix.h |  14 +-
  arch/powerpc/include/asm/cputime.h            |  24 ++
  arch/powerpc/include/asm/hw_irq.h             |   4 +
  arch/powerpc/include/asm/ptrace.h             |   3 +
  arch/powerpc/include/asm/signal.h             |   3 +
  arch/powerpc/include/asm/switch_to.h          |   5 +
  arch/powerpc/include/asm/time.h               |   3 +
  arch/powerpc/kernel/Makefile                  |   3 +-
  arch/powerpc/kernel/entry_64.S                | 337 +++---------------
  arch/powerpc/kernel/signal.h                  |   2 -
  arch/powerpc/kernel/syscall_64.c              | 195 ++++++++++
  arch/powerpc/kernel/systbl.S                  |   9 +-
  13 files changed, 300 insertions(+), 313 deletions(-)
  create mode 100644 arch/powerpc/kernel/syscall_64.c


[...]

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 5a3e0b5c9ad1..15bc2a872a76 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -63,12 +63,6 @@ exception_marker:
.globl system_call_common
  system_call_common:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
-       bne     .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
        mr      r10,r1
        ld      r1,PACAKSAVE(r13)
        std     r10,0(r1)
@@ -76,350 +70,111 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
        std     r12,_MSR(r1)
        std     r0,GPR0(r1)
        std     r10,GPR1(r1)
+       std     r2,GPR2(r1)
  #ifdef CONFIG_PPC_FSL_BOOK3E
  START_BTB_FLUSH_SECTION
        BTB_FLUSH(r10)
  END_BTB_FLUSH_SECTION
  #endif
-       ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-       std     r2,GPR2(r1)
+       ld      r2,PACATOC(r13)
+       mfcr    r12
+       li      r11,0
+       /* Can we avoid saving r3-r8 in common case? */
        std     r3,GPR3(r1)
-       mfcr    r2
        std     r4,GPR4(r1)
        std     r5,GPR5(r1)
        std     r6,GPR6(r1)
        std     r7,GPR7(r1)
        std     r8,GPR8(r1)
-       li      r11,0
+       /* Zero r9-r12, this should only be required when restoring all GPRs */
        std     r11,GPR9(r1)
        std     r11,GPR10(r1)
        std     r11,GPR11(r1)
        std     r11,GPR12(r1)
-       std     r11,_XER(r1)
-       std     r11,_CTR(r1)
        std     r9,GPR13(r1)
        SAVE_NVGPRS(r1)
+       std     r11,_XER(r1)
+       std     r11,_CTR(r1)
        mflr    r10
+
        /*
         * This clears CR0.SO (bit 28), which is the error indication on
         * return from this system call.
         */
-       rldimi  r2,r11,28,(63-28)
+       rldimi  r12,r11,28,(63-28)
        li      r11,0xc00
        std     r10,_LINK(r1)
        std     r11,_TRAP(r1)
+       std     r12,_CCR(r1)
        std     r3,ORIG_GPR3(r1)
-       std     r2,_CCR(r1)
-       ld      r2,PACATOC(r13)
-       addi    r9,r1,STACK_FRAME_OVERHEAD
+       addi    r10,r1,STACK_FRAME_OVERHEAD
        ld      r11,exception_marker@toc(r2)
-       std     r11,-16(r9)             /* "regshere" marker */
-
-       kuap_check_amr r10, r11
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
-       /* see if there are any DTL entries to process */
-       ld      r10,PACALPPACAPTR(r13)  /* get ptr to VPA */
-       ld      r11,PACA_DTL_RIDX(r13)  /* get log read index */
-       addi    r10,r10,LPPACA_DTLIDX
-       LDX_BE  r10,0,r10               /* get log write index */
-       cmpd    r11,r10
-       beq+    33f
-       bl      accumulate_stolen_time
-       REST_GPR(0,r1)
-       REST_4GPRS(3,r1)
-       REST_2GPRS(7,r1)
-       addi    r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
-       /*
-        * A syscall should always be called with interrupts enabled
-        * so we just unconditionally hard-enable here. When some kind
-        * of irq tracing is used, we additionally check that condition
-        * is correct
-        */
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
-       lbz     r10,PACAIRQSOFTMASK(r13)
-1:     tdnei   r10,IRQS_ENABLED
-       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
-       wrteei  1
-#else
-       li      r11,MSR_RI
-       ori     r11,r11,MSR_EE
-       mtmsrd  r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-system_call:                   /* label this so stack traces look sane */
-       /* We do need to set SOFTE in the stack frame or the return
-        * from interrupt will be painful
-        */
-       li      r10,IRQS_ENABLED
-       std     r10,SOFTE(r1)
+       std     r11,-16(r10)            /* "regshere" marker */
- ld r11, PACA_THREAD_INFO(r13)
-       ld      r10,TI_FLAGS(r11)
-       andi.   r11,r10,_TIF_SYSCALL_DOTRACE
-       bne     .Lsyscall_dotrace               /* does not return */
-       cmpldi  0,r0,NR_syscalls
-       bge-    .Lsyscall_enosys
+       /* Calling convention has r9 = orig r0, r10 = regs */
+       mr      r9,r0
+       bl      system_call_exception
-.Lsyscall:
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
-       ld      r11,SYS_CALL_TABLE@toc(2)
-       andis.  r10,r10,_TIF_32BIT@h
-       beq     15f
-       ld      r11,COMPAT_SYS_CALL_TABLE@toc(2)
-       clrldi  r3,r3,32
-       clrldi  r4,r4,32
-       clrldi  r5,r5,32
-       clrldi  r6,r6,32
-       clrldi  r7,r7,32
-       clrldi  r8,r8,32
-15:
-       slwi    r0,r0,3
-
-       barrier_nospec_asm
-       /*
-        * Prevent the load of the handler below (based on the user-passed
-        * system call number) being speculatively executed until the test
-        * against NR_syscalls and branch to .Lsyscall_enosys above has
-        * committed.
-        */
-
-       ldx     r12,r11,r0      /* Fetch system call handler [ptr] */
-       mtctr   r12
-       bctrl                   /* Call handler */
-
-       /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
  .Lsyscall_exit:
-       std     r3,RESULT(r1)
+       addi    r4,r1,STACK_FRAME_OVERHEAD
+       bl      syscall_exit_prepare
-#ifdef CONFIG_DEBUG_RSEQ
-       /* Check whether the syscall is issued inside a restartable sequence */
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      rseq_syscall
-       ld      r3,RESULT(r1)
-#endif
-
-       ld      r12, PACA_THREAD_INFO(r13)
-
-       ld      r8,_MSR(r1)
-
-/*
- * This is a few instructions into the actual syscall exit path (which actually
- * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
- * number of visible symbols for profiling purposes.
- *
- * We can probe from system_call until this point as MSR_RI is set. But once it
- * is cleared below, we won't be able to take a trap.
- *
- * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
- */
-system_call_exit:
-       /*
-        * Disable interrupts so current_thread_info()->flags can't change,
-        * and so that we don't get interrupted after loading SRR0/1.
-        *
-        * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
-        * could fault on the load of the TI_FLAGS below.
-        */
-#ifdef CONFIG_PPC_BOOK3E
-       wrteei  0
-#else
-       li      r11,MSR_RI
-       mtmsrd  r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-       ld      r9,TI_FLAGS(r12)
-       li      r11,-MAX_ERRNO
-       andi.   
r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
-       bne-    .Lsyscall_exit_work
+       ld      r2,_CCR(r1)
+       ld      r4,_NIP(r1)
+       ld      r5,_MSR(r1)
+       ld      r6,_LINK(r1)
- andi. r0,r8,MSR_FP
-       beq 2f
-#ifdef CONFIG_ALTIVEC
-       andis.  r0,r8,MSR_VEC@h
-       bne     3f
-#endif
-2:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      restore_math
-       ld      r8,_MSR(r1)
-       ld      r3,RESULT(r1)
-       li      r11,-MAX_ERRNO
-
-3:     cmpld   r3,r11
-       ld      r5,_CCR(r1)
-       bge-    .Lsyscall_error
-.Lsyscall_error_cont:
-       ld      r7,_NIP(r1)
  BEGIN_FTR_SECTION
        stdcx.  r0,0,r1                 /* to clear the reservation */
  END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-       andi.   r6,r8,MSR_PR
-       ld      r4,_LINK(r1)
- kuap_check_amr r10, r11
+       mtspr   SPRN_SRR0,r4
+       mtspr   SPRN_SRR1,r5

That looks dangerous. Once you have modified SRR0 and SRR1, your exception becomes unrecoverable, so this should be done as close as possible to the rfi. Here you seem to do many thinks inbetween, including restoring some registers from the stack.

Christophe

Reply via email to