https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=10380e9f446640c8d1bd4d60ec085bb31efa26f1
commit 10380e9f446640c8d1bd4d60ec085bb31efa26f1 Author: Thirumalai Nagalingam <thirumalai.nagalin...@multicorewareinc.com> Date: Tue Jul 1 18:17:24 2025 +0000 Cygwin: Aarch64: optimize pthread_wrapper register usage This patch resolves issues related to unsafe access to deallocated stack memory in the pthread wrapper for AArch64. Key changes: - Removed use of x19 by directly loading the thread function and argument using LDP from [WRAPPER_ARG], freeing one register. - Stored thread function and argument in x20 and x21 before VirtualFree to preserve them across calls. - Used x1 as a temporary register to load the stack base, subtract CYGTLS, and update SP. - Moved the thread argument back into x0 after VirtualFree and before calling the thread function. Earlier, `wrapper_arg` lived on the stack, which was freed via `VirtualFree`, risking segfaults on later access. Now, the thread `func` and `arg` are loaded before the stack is freed, stored in callee-saved registers, and restored to `x0` before calling the thread function. Fixes: f4ba145056db ("Aarch64: Add inline assembly pthread wrapper") Signed-off-by: Thirumalai Nagalingam <thirumalai.nagalin...@multicorewareinc.com> Diff: --- winsup/cygwin/create_posix_thread.cc | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/winsup/cygwin/create_posix_thread.cc b/winsup/cygwin/create_posix_thread.cc index 592aaf1a5..17bb607f7 100644 --- a/winsup/cygwin/create_posix_thread.cc +++ b/winsup/cygwin/create_posix_thread.cc @@ -103,18 +103,19 @@ pthread_wrapper (PVOID arg) /* Sets up a new thread stack, frees the original OS stack, * and calls the thread function with its arg using AArch64 ABI. */ __asm__ __volatile__ ("\n\ - mov x19, %[WRAPPER_ARG] // x19 = &wrapper_arg \n\ - ldp x0, x10, [x19, #16] // x0 = stackaddr, x10 = stackbase \n\ - sub sp, x10, %[CYGTLS] // sp = stackbase - (CYGTLS) \n\ - mov fp, xzr // clear frame pointer (x29) \n\ - mov x1, xzr // x1 = 0 (dwSize) \n\ - mov x2, #0x8000 // x2 = MEM_RELEASE \n\ - bl VirtualFree // free original stack \n\ - ldp x19, x0, [x19] // x19 = func, x0 = arg \n\ - blr x19 // call thread function \n" + ldp x20, x21, [%[WRAPPER_ARG]] // x20 = thread func, x21 = thread arg \n\ + ldp x0, x1, [%[WRAPPER_ARG], #16] // x0 = stackaddr, x1 = stackbase \n\ + sub sp, x1, %[CYGTLS] // sp = stackbase - (CYGTLS) \n\ + mov fp, xzr // clear frame pointer (x29) \n\ + // x0 already has stackaddr \n\ + mov x1, xzr // x1 = 0 (dwSize) \n\ + mov x2, #0x8000 // x2 = MEM_RELEASE \n\ + bl VirtualFree // free original stack \n\ + mov x0, x21 // Move arg into x0 \n\ + blr x20 // call thread function \n" : : [WRAPPER_ARG] "r" (&wrapper_arg), [CYGTLS] "r" (__CYGTLS_PADSIZE__) - : "x0", "x1", "x2", "x10", "x19", "x29", "memory"); + : "x0", "x1", "x2", "x20", "x21", "x29", "memory"); #else #error unimplemented for this target #endif