If a system call is made with a transaction active, the kernel
immediately aborts it and returns. scv system calls disable irqs even
earlier in their interrupt handler, and tabort_syscall does not fix this
up.

This can result in irq soft-mask state being messed up on the next
kernel entry, and crashing at BUG_ON(arch_irq_disabled_regs(regs)) in
the kernel exit handlers, or possibly worse.

This can't easily be fixed in asm because at this point an async irq may
have hit, which is soft-masked and marked pending. The pending interrupt
has to be replayed before returning to userspace. The fix is to move the
tabort_syscall code to C in the main syscall handler, and just skip the
system call but otherwise return as usual, which will take care of the
pending irqs. This also does a bunch of other things including possible
signal delivery to the process, but the doomed transaction should still
be aborted when it is eventually returned to.

The sc system call path is changed to use the new C function as well to
reduce code and path differences. This slows down how quickly system
calls are aborted when called while a transaction is active, which could
potentially impact TM performance. But making any system call is already
bad for performance, and TM is on the way out, so go with simpler over
faster.

Reported-by: Eirik Fuller <eful...@redhat.com>
Fixes: 7fa95f9adaee7 ("powerpc/64s: system call support for scv/rfscv 
instructions")
Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---

v2 of this fix had a bug where an irq could be soft masked and pending
before we hard disable interrupts in tabort_syscall for the case of
scv (because it enters the kernel with EE enabled). So this actually
requires a pretty large change to fix because we can't replay interrupts
just from this early asm context.

Thanks,
Nick

 arch/powerpc/kernel/interrupt.c    | 29 +++++++++++++++++++++
 arch/powerpc/kernel/interrupt_64.S | 41 ------------------------------
 2 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 21bbd615ca41..c77c80214ad3 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -19,6 +19,7 @@
 #include <asm/switch_to.h>
 #include <asm/syscall.h>
 #include <asm/time.h>
+#include <asm/tm.h>
 #include <asm/unistd.h>
 
 #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
@@ -138,6 +139,34 @@ notrace long system_call_exception(long r3, long r4, long 
r5,
         */
        irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
 
+       /*
+        * If the system call was made with a transaction active, doom it and
+        * return without performing the system call. Unless it was an
+        * unsupported scv vector, in which case it's treated like an illegal
+        * instruction.
+        */
+       if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+                       unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
+                       !trap_is_unsupported_scv(regs)) {
+               /* Enable TM in the kernel, and disable EE (for scv) */
+               hard_irq_disable();
+               mtmsr(mfmsr() | MSR_TM);
+
+               /* tabort, this dooms the transaction, nothing else */
+               asm volatile(".long 0x7c00071d | ((%0) << 16)"
+                               :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
+
+               /*
+                * Userspace will never see the return value. Execution will
+                * resume after the tbegin. of the aborted transaction with the
+                * checkpointed register state. A context switch could occur
+                * or signal delivered to the process before resuming the
+                * doomed transaction context, but that should all be handled
+                * as expected.
+                */
+               return -ENOSYS;
+       }
+
        local_irq_enable();
 
        if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index d4212d2ff0b5..ec950b08a8dc 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -12,7 +12,6 @@
 #include <asm/mmu.h>
 #include <asm/ppc_asm.h>
 #include <asm/ptrace.h>
-#include <asm/tm.h>
 
        .section        ".toc","aw"
 SYS_CALL_TABLE:
@@ -55,12 +54,6 @@ COMPAT_SYS_CALL_TABLE:
        .globl system_call_vectored_\name
 system_call_vectored_\name:
 _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
-       bne     tabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
        SCV_INTERRUPT_TO_KERNEL
        mr      r10,r1
        ld      r1,PACAKSAVE(r13)
@@ -247,12 +240,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_common_real)
        .globl system_call_common
 system_call_common:
 _ASM_NOKPROBE_SYMBOL(system_call_common)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
-       bne     tabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
        mr      r10,r1
        ld      r1,PACAKSAVE(r13)
        std     r10,0(r1)
@@ -425,34 +412,6 @@ SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b)
 RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart)
 #endif
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-tabort_syscall:
-_ASM_NOKPROBE_SYMBOL(tabort_syscall)
-       /* Firstly we need to enable TM in the kernel */
-       mfmsr   r10
-       li      r9, 1
-       rldimi  r10, r9, MSR_TM_LG, 63-MSR_TM_LG
-       mtmsrd  r10, 0
-
-       /* tabort, this dooms the transaction, nothing else */
-       li      r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
-       TABORT(R9)
-
-       /*
-        * Return directly to userspace. We have corrupted user register state,
-        * but userspace will never see that register state. Execution will
-        * resume after the tbegin of the aborted transaction with the
-        * checkpointed register state.
-        */
-       li      r9, MSR_RI
-       andc    r10, r10, r9
-       mtmsrd  r10, 1
-       mtspr   SPRN_SRR0, r11
-       mtspr   SPRN_SRR1, r12
-       RFI_TO_USER
-       b       .       /* prevent speculative execution */
-#endif
-
        /*
         * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
         * touched, no exit work created, then this can be used.
-- 
2.23.0

Reply via email to