On Wed, 17 Nov 2021 at 16:12, Richard Henderson <richard.hender...@linaro.org> wrote: > > The current api from safe_syscall_base() is to return -errno, which is > the interface provided by *some* linux kernel abis. The wrapper macro, > safe_syscall(), detects error, stores into errno, and returns -1, to > match the api of the system syscall(). > > For those kernel abis that do not return -errno natively, this leads > to double syscall error detection. E.g. Linux ppc64, which sets the > SO flag for error. > > Simplify the usage from C by moving the error detection into assembly. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > linux-user/safe-syscall.h | 20 +++--- > common-user/host/aarch64/safe-syscall.inc.S | 55 +++++++++------- > common-user/host/arm/safe-syscall.inc.S | 58 ++++++++++------- > common-user/host/i386/safe-syscall.inc.S | 51 +++++++++------ > common-user/host/ppc64/safe-syscall.inc.S | 63 +++++++++++-------- > common-user/host/riscv/safe-syscall.inc.S | 50 +++++++++------ > common-user/host/s390x/safe-syscall.inc.S | 50 +++++++++------ > common-user/host/x86_64/safe-syscall.inc.S | 70 ++++++++++++--------- > 8 files changed, 243 insertions(+), 174 deletions(-) > > diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h > index aaa9ffc0e2..ea0e8a8d24 100644 > --- a/linux-user/safe-syscall.h > +++ b/linux-user/safe-syscall.h > @@ -125,23 +125,17 @@ > * kinds of restartability. > */ > #ifdef HAVE_SAFE_SYSCALL > -/* The core part of this function is implemented in assembly */ > -extern long safe_syscall_base(int *pending, long number, ...); > + > +/* The core part of this function is implemented in assembly. */ > +extern long safe_syscall_base(int *pending, int *errnop, long number, ...); > + > /* These are defined by the safe-syscall.inc.S file */ > extern char safe_syscall_start[]; > extern char safe_syscall_end[]; > > -#define safe_syscall(...) \ > - ({ \ > - long ret_; \ > - int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \ > - ret_ = safe_syscall_base(psp_, __VA_ARGS__); \ > - if (is_error(ret_)) { \ > - errno = -ret_; \ > - ret_ = -1; \ > - } \ > - ret_; \ > - }) > +#define safe_syscall(...) \ > + safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \ > + &errno, __VA_ARGS__) > > #else > > diff --git a/common-user/host/aarch64/safe-syscall.inc.S > b/common-user/host/aarch64/safe-syscall.inc.S > index bc1f5a9792..95c60d8609 100644 > --- a/common-user/host/aarch64/safe-syscall.inc.S > +++ b/common-user/host/aarch64/safe-syscall.inc.S > @@ -17,22 +17,21 @@ > .type safe_syscall_start, #function > .type safe_syscall_end, #function > > - /* This is the entry point for making a system call. The calling > + /* > + * This is the entry point for making a system call. The calling > * convention here is that of a C varargs function with the > * first argument an 'int *' to the signal_pending flag, the > * second one the system call number (as a 'long'), and all further > * arguments being syscall arguments (also 'long').
This comment text needs updating to mention the new errnop argument. (Applies to all the similar comments in the files for the other archs.) > - * We return a long which is the syscall's return value, which > - * may be negative-errno on failure. Conversion to the > - * -1-and-errno-set convention is done by the calling wrapper. > */ > safe_syscall_base: > .cfi_startproc > - /* The syscall calling convention isn't the same as the > - * C one: > + /* > + * The syscall calling convention isn't the same as the C one: Looks like the indent here is wrong ? > * we enter with x0 == *signal_pending > - * x1 == syscall number > - * x2 ... x7, (stack) == syscall arguments > + * x1 == errno "int* address of errno" > + * x2 == syscall number > + * x3 ... x7, (stack) == syscall arguments > * and return the result in x0 > * and the syscall instruction needs > * x8 == syscall number > @@ -40,17 +39,18 @@ safe_syscall_base: > * and returns the result in x0 > * Shuffle everything around appropriately. > */ > - mov x9, x0 /* signal_pending pointer */ > - mov x8, x1 /* syscall number */ > - mov x0, x2 /* syscall arguments */ > - mov x1, x3 > - mov x2, x4 > - mov x3, x5 > - mov x4, x6 > - mov x5, x7 > - ldr x6, [sp] > + mov x10, x0 /* signal_pending pointer */ > + mov x11, x1 /* errno pointer */ > + mov x8, x2 /* syscall number */ > + mov x0, x3 /* syscall arguments */ > + mov x1, x4 > + mov x2, x5 > + mov x3, x6 > + mov x4, x7 > + ldp x5, x6, [sp] > > - /* This next sequence of code works in conjunction with the > + /* > + * This next sequence of code works in conjunction with the > * rewind_if_safe_syscall_function(). If a signal is taken > * and the interrupted PC is anywhere between 'safe_syscall_start' > * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. > @@ -59,17 +59,26 @@ safe_syscall_base: > */ > safe_syscall_start: > /* if signal_pending is non-zero, don't do the call */ > - ldr w10, [x9] > - cbnz w10, 0f > + ldr w9, [x10] > + cbnz w9, 2f > svc 0x0 > safe_syscall_end: > + > /* code path for having successfully executed the syscall */ > + cmn x0, #4095 > + b.cs 1f Shouldn't this be going to label 0f ? We need to do the 'neg', and unless I'm misreading the diff there's currently no path of execution that gets to that. Alternatively, branch on the opposite-sense condition to the 'ret' after the set-errno stuff. > ret > > -0: > - /* code path when we didn't execute the syscall */ > - mov x0, #-TARGET_ERESTARTSYS > + /* code path setting errno */ > +0: neg w0, w0 /* create positive errno */ > +1: str w0, [x11] /* store errno */ > + mov x0, #-1 > ret > + > + /* code path when we didn't execute the syscall */ > +2: mov w0, #TARGET_ERESTARTSYS > + b 1b > + > .cfi_endproc > > .size safe_syscall_base, .-safe_syscall_base > diff --git a/common-user/host/arm/safe-syscall.inc.S > b/common-user/host/arm/safe-syscall.inc.S > index 88c4958504..17839c6486 100644 > --- a/common-user/host/arm/safe-syscall.inc.S > +++ b/common-user/host/arm/safe-syscall.inc.S > @@ -22,33 +22,35 @@ > .arm > .align 2 > > - /* This is the entry point for making a system call. The calling > + /* > + * This is the entry point for making a system call. The calling > * convention here is that of a C varargs function with the > * first argument an 'int *' to the signal_pending flag, the > * second one the system call number (as a 'long'), and all further > * arguments being syscall arguments (also 'long'). > - * We return a long which is the syscall's return value, which > - * may be negative-errno on failure. Conversion to the > - * -1-and-errno-set convention is done by the calling wrapper. > */ > safe_syscall_base: > .fnstart > .cfi_startproc > mov r12, sp /* save entry stack */ > - push { r4, r5, r6, r7, r8, lr } > - .save { r4, r5, r6, r7, r8, lr } > - .cfi_adjust_cfa_offset 24 > + push { r4, r5, r6, r7, r8, r9, r10, lr } > + .save { r4, r5, r6, r7, r8, r9, r10, lr } > + .cfi_adjust_cfa_offset 32 > .cfi_rel_offset r4, 0 > .cfi_rel_offset r5, 4 > .cfi_rel_offset r6, 8 > .cfi_rel_offset r7, 12 > .cfi_rel_offset r8, 16 > - .cfi_rel_offset lr, 20 > + .cfi_rel_offset r9, 20 > + .cfi_rel_offset r10, 24 > + .cfi_rel_offset lr, 28 > > - /* The syscall calling convention isn't the same as the C one: > - * we enter with r0 == *signal_pending > - * r1 == syscall number > - * r2, r3, [sp+0] ... [sp+12] == syscall arguments > + /* > + * The syscall calling convention isn't the same as the C one: > + * we enter with r0 == &signal_pending > + * r1 == &errno Odd indent ? > + * r2 == syscall number > + * r3, [sp+0] ... [sp+16] == syscall arguments > * and return the result in r0 Don't we wind up with a potential issue here with 64-bit arguments due to the calling convention wanting to put those in aligned memory/register locations? Previously because we had just two extra arguments the arguments started at r2 and had the same alignment behaviour as the syscall wants for them starting at r0; but now we start at r3 so if for instance the first argument is 64-bit it will be in [sp+0][sp+4] but should go in r0:r1 I think... (Stopped reviewing here because if we need to change the way we call these functions there's no point my reviewing the fine detail of the asm.) -- PMM