It's not very nice to zero trap for this, because then system calls no
longer have TRAP_IS_SYSCALL(regs) invariant, and we can't distinguish
between sc and scv system calls (in a later patch).

Take one last unused bit from the low bits of the pt_regs.trap word for
this instead. There is not a really good reason why it should be in trap
as opposed to another field, but trap has some concept of flags and it
exists. Ideally I think we would move trap to 2-byte field and have 2
more bytes available independently.

Add a selftests case for this, which can be seen to fail if
TRAP_NORESTART is defined to false.

Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 arch/powerpc/include/asm/ptrace.h             |  15 +-
 arch/powerpc/kernel/signal.c                  |   7 +-
 arch/powerpc/kernel/signal_32.c               |   2 +-
 arch/powerpc/kernel/signal_64.c               |  10 +-
 .../testing/selftests/powerpc/signal/Makefile |   2 +-
 .../powerpc/signal/sig_sc_double_restart.c    | 174 ++++++++++++++++++
 6 files changed, 194 insertions(+), 16 deletions(-)
 create mode 100644 
tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c

diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index 5eb249c725bd..5ee7eb128fb9 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -184,13 +184,13 @@ extern int ptrace_put_reg(struct task_struct *task, int 
regno,
 
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3S
-#define TRAP(regs)             ((regs)->trap)
-#define SET_TRAP(regs, val)    ((regs)->trap = (val))
+#define TRAP(regs)             ((regs)->trap & ~0x10)
+#define SET_TRAP(regs, val)    ((regs)->trap = ((regs)->trap & 0x10) | ((val) 
& ~0x10))
 #define FULL_REGS(regs)                true
 #define SET_FULL_REGS(regs)    do { } while (0)
 #else
-#define TRAP(regs)             ((regs)->trap & ~0x1)
-#define SET_TRAP(regs, val)    ((regs)->trap = ((regs)->trap & 0x1) | ((val) & 
~0x1))
+#define TRAP(regs)             ((regs)->trap & ~0x11)
+#define SET_TRAP(regs, val)    ((regs)->trap = ((regs)->trap & 0x11) | ((val) 
& ~0x11))
 #define FULL_REGS(regs)                (((regs)->trap & 1) == 0)
 #define SET_FULL_REGS(regs)    ((regs)->trap |= 1)
 #endif
@@ -204,8 +204,8 @@ extern int ptrace_put_reg(struct task_struct *task, int 
regno,
  * On 4xx we use the next bit to indicate whether the exception
  * is a critical exception (1 means it is).
  */
-#define TRAP(regs)             ((regs)->trap & ~0xF)
-#define SET_TRAP(regs, val)    ((regs)->trap = ((regs)->trap & 0xF) | ((val) & 
~0xF))
+#define TRAP(regs)             ((regs)->trap & ~0x1F)
+#define SET_TRAP(regs, val)    ((regs)->trap = ((regs)->trap & 0x1F) | ((val) 
& ~0x1F))
 #define FULL_REGS(regs)                (((regs)->trap & 1) == 0)
 #define SET_FULL_REGS(regs)    ((regs)->trap |= 1)
 #define IS_CRITICAL_EXC(regs)  (((regs)->trap & 2) != 0)
@@ -219,6 +219,9 @@ do {                                                        
                      \
 } while (0)
 #endif /* __powerpc64__ */
 
+#define TRAP_NORESTART(regs)   (((regs)->trap & 16) == 16)
+#define SET_TRAP_NORESTART(regs) ((regs)->trap |= 16)
+
 #define arch_has_single_step() (1)
 #ifndef CONFIG_BOOK3S_601
 #define arch_has_block_step()  (true)
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 4b74eef1d881..0de314075a8f 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -201,6 +201,9 @@ static void check_syscall_restart(struct pt_regs *regs, 
struct k_sigaction *ka,
        if (!TRAP_IS_SYSCALL(regs))
                return;
 
+       if (TRAP_NORESTART(regs))
+               return;
+
        /* error signalled ? */
        if (!(regs->ccr & 0x10000000))
                return;
@@ -258,7 +261,7 @@ static void do_signal(struct task_struct *tsk)
        if (ksig.sig <= 0) {
                /* No signal to deliver -- put the saved sigmask back */
                restore_saved_sigmask();
-               tsk->thread.regs->trap = 0;
+               SET_TRAP_NORESTART(tsk->thread.regs);
                return;               /* no signals delivered */
        }
 
@@ -285,7 +288,7 @@ static void do_signal(struct task_struct *tsk)
                ret = handle_rt_signal64(&ksig, oldset, tsk);
        }
 
-       tsk->thread.regs->trap = 0;
+       SET_TRAP_NORESTART(tsk->thread.regs);
        signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP));
 }
 
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 4f96d29a22bf..2970e1fd8d02 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -500,7 +500,7 @@ static long restore_user_regs(struct pt_regs *regs,
        if (!sig)
                save_r2 = (unsigned int)regs->gpr[2];
        err = restore_general_regs(regs, sr);
-       regs->trap = 0;
+       SET_TRAP_NORESTART(regs);
        err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
        if (!sig)
                regs->gpr[2] = (unsigned long) save_r2;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index adfde59cf4ba..1f6565dbe248 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -350,8 +350,8 @@ static long restore_sigcontext(struct task_struct *tsk, 
sigset_t *set, int sig,
        err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
        err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
        err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
-       /* skip SOFTE */
-       regs->trap = 0;
+       /* Don't allow userspace to set SOFTE */
+       SET_TRAP_NORESTART(regs);
        err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
        err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
        err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
@@ -472,10 +472,8 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
                          &sc->gp_regs[PT_XER]);
        err |= __get_user(tsk->thread.ckpt_regs.ccr,
                          &sc->gp_regs[PT_CCR]);
-
-       /* Don't allow userspace to set the trap value */
-       regs->trap = 0;
-
+       /* Don't allow userspace to set SOFTE */
+       SET_TRAP_NORESTART(regs);
        /* These regs are not checkpointed; they can go in 'regs'. */
        err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
        err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
diff --git a/tools/testing/selftests/powerpc/signal/Makefile 
b/tools/testing/selftests/powerpc/signal/Makefile
index 932a032bf036..d6ae54663aed 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso
+TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
 
 CFLAGS += -maltivec
 $(OUTPUT)/signal_tm: CFLAGS += -mhtm
diff --git a/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c 
b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
new file mode 100644
index 000000000000..64732adf3d91
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test that a syscall does not get restarted twice.
+ *
+ * Based on Al's description, and a test for the bug fixed in this commit:
+ *
+ * commit 9a81c16b527528ad307843be5571111aa8d35a80
+ * Author: Al Viro <v...@zeniv.linux.org.uk>
+ * Date:   Mon Sep 20 21:48:57 2010 +0100
+ *
+ *  powerpc: fix double syscall restarts
+ *
+ *  Make sigreturn zero regs->trap, make do_signal() do the same on all
+ *  paths.  As it is, signal interrupting e.g. read() from fd 512 (==
+ *  ERESTARTSYS) with another signal getting unblocked when the first
+ *  handler finishes will lead to restart one insn earlier than it ought
+ *  to.  Same for multiple signals with in-kernel handlers interrupting
+ *  that sucker at the same time.  Same for multiple signals of any kind
+ *  interrupting that sucker on 64bit...
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static void SIGUSR1_handler(int sig)
+{
+       kill(getpid(), SIGUSR2);
+       /*
+        * SIGUSR2 is blocked until the handler exits, at which point it will
+        * be raised again and think there is a restart to be done because the
+        * pending restarted syscall has 512 (ERESTARTSYS) in r3. The second
+        * restart will retreat NIP another 4 bytes to fail case branch.
+        */
+}
+
+static void SIGUSR2_handler(int sig)
+{
+}
+
+static ssize_t raw_read(int fd, void *buf, size_t count)
+{
+       register long nr asm("r0") = __NR_read;
+       register long _fd asm("r3") = fd;
+       register void *_buf asm("r4") = buf;
+       register size_t _count asm("r5") = count;
+
+       asm volatile(
+"              b       0f              \n"
+"              b       1f              \n"
+"      0:      sc      0               \n"
+"              bns     2f              \n"
+"              neg     %0,%0           \n"
+"              b       2f              \n"
+"      1:                              \n"
+"              li      %0,%4           \n"
+"      2:                              \n"
+               : "+r"(_fd), "+r"(nr), "+r"(_buf), "+r"(_count)
+               : "i"(-ENOANO)
+               : "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "ctr", 
"cr0");
+
+       if (_fd < 0) {
+               errno = -_fd;
+               _fd = -1;
+       }
+
+       return _fd;
+}
+
+#define DATA "test 123"
+#define DLEN (strlen(DATA)+1)
+
+int test_restart(void)
+{
+       int pipefd[2];
+       pid_t pid;
+       char buf[512];
+
+       if (pipe(pipefd) == -1) {
+               perror("pipe");
+               exit(EXIT_FAILURE);
+       }
+
+       pid = fork();
+       if (pid == -1) {
+               perror("fork");
+               exit(EXIT_FAILURE);
+       }
+
+       if (pid == 0) { /* Child reads from pipe */
+               struct sigaction act;
+               int fd;
+
+               memset(&act, 0, sizeof(act));
+               sigaddset(&act.sa_mask, SIGUSR2);
+               act.sa_handler = SIGUSR1_handler;
+               act.sa_flags = SA_RESTART;
+               if (sigaction(SIGUSR1, &act, NULL) == -1) {
+                       perror("sigaction");
+                       exit(EXIT_FAILURE);
+               }
+
+               memset(&act, 0, sizeof(act));
+               act.sa_handler = SIGUSR2_handler;
+               act.sa_flags = SA_RESTART;
+               if (sigaction(SIGUSR2, &act, NULL) == -1) {
+                       perror("sigaction");
+                       exit(EXIT_FAILURE);
+               }
+
+               /* Let's get ERESTARTSYS into r3 */
+               while ((fd = dup(pipefd[0])) != 512) {
+                       if (fd == -1) {
+                               perror("dup");
+                               exit(EXIT_FAILURE);
+                       }
+               }
+
+               if (raw_read(fd, buf, 512) == -1) {
+                       if (errno == ENOANO) {
+                               fprintf(stderr, "Double restart moved restart 
before sc instruction.\n");
+                               _exit(EXIT_FAILURE);
+                       }
+                       perror("read");
+                       exit(EXIT_FAILURE);
+               }
+
+               if (strncmp(buf, DATA, DLEN)) {
+                       fprintf(stderr, "bad test string %s\n", buf);
+                       exit(EXIT_FAILURE);
+               }
+
+               return 0;
+
+       } else {
+               int wstatus;
+
+               usleep(100000);         /* Hack to get reader waiting */
+               kill(pid, SIGUSR1);
+               usleep(100000);
+               if (write(pipefd[1], DATA, DLEN) != DLEN) {
+                       perror("write");
+                       exit(EXIT_FAILURE);
+               }
+               close(pipefd[0]);
+               close(pipefd[1]);
+               if (wait(&wstatus) == -1) {
+                       perror("wait");
+                       exit(EXIT_FAILURE);
+               }
+               if (!WIFEXITED(wstatus)) {
+                       fprintf(stderr, "child exited abnormally\n");
+                       exit(EXIT_FAILURE);
+               }
+
+               FAIL_IF(WEXITSTATUS(wstatus) != EXIT_SUCCESS);
+
+               return 0;
+       }
+}
+
+int main(void)
+{
+       test_harness_set_timeout(10);
+       return test_harness(test_restart, "sig sys restart");
+}
-- 
2.23.0

Reply via email to