https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=10380e9f446640c8d1bd4d60ec085bb31efa26f1

commit 10380e9f446640c8d1bd4d60ec085bb31efa26f1
Author: Thirumalai Nagalingam <thirumalai.nagalin...@multicorewareinc.com>
Date:   Tue Jul 1 18:17:24 2025 +0000

    Cygwin: Aarch64: optimize pthread_wrapper register usage
    
    This patch resolves issues related to unsafe access to deallocated
    stack memory in the pthread wrapper for AArch64.
    
    Key changes:
    - Removed use of x19 by directly loading the thread function and
      argument using LDP from [WRAPPER_ARG], freeing one register.
    - Stored thread function and argument in x20 and x21 before
      VirtualFree to preserve them across calls.
    - Used x1 as a temporary register to load the stack base,
      subtract CYGTLS, and update SP.
    - Moved the thread argument back into x0 after VirtualFree and
      before calling the thread function.
    
    Earlier, `wrapper_arg` lived on the stack, which was freed via
    `VirtualFree`, risking segfaults on later access. Now, the thread
    `func` and `arg` are loaded before the stack is freed, stored in
    callee-saved registers, and restored to `x0` before calling the
    thread function.
    
    Fixes: f4ba145056db ("Aarch64: Add inline assembly pthread wrapper")
    Signed-off-by: Thirumalai Nagalingam 
<thirumalai.nagalin...@multicorewareinc.com>

Diff:
---
 winsup/cygwin/create_posix_thread.cc | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/winsup/cygwin/create_posix_thread.cc 
b/winsup/cygwin/create_posix_thread.cc
index 592aaf1a5..17bb607f7 100644
--- a/winsup/cygwin/create_posix_thread.cc
+++ b/winsup/cygwin/create_posix_thread.cc
@@ -103,18 +103,19 @@ pthread_wrapper (PVOID arg)
   /* Sets up a new thread stack, frees the original OS stack,
    * and calls the thread function with its arg using AArch64 ABI. */
   __asm__ __volatile__ ("\n\
-          mov     x19, %[WRAPPER_ARG]  // x19 = &wrapper_arg              \n\
-          ldp     x0, x10, [x19, #16]  // x0 = stackaddr, x10 = stackbase \n\
-          sub     sp, x10, %[CYGTLS]   // sp = stackbase - (CYGTLS)       \n\
-          mov     fp, xzr              // clear frame pointer (x29)       \n\
-          mov     x1, xzr              // x1 = 0 (dwSize)                 \n\
-          mov     x2, #0x8000          // x2 = MEM_RELEASE                \n\
-          bl      VirtualFree          // free original stack             \n\
-          ldp     x19, x0, [x19]       // x19 = func, x0 = arg            \n\
-          blr     x19                  // call thread function            \n"
+          ldp     x20, x21, [%[WRAPPER_ARG]]    // x20 = thread func, x21 = 
thread arg \n\
+          ldp     x0, x1, [%[WRAPPER_ARG], #16] // x0 = stackaddr, x1 = 
stackbase      \n\
+          sub     sp, x1, %[CYGTLS]             // sp = stackbase - (CYGTLS)   
        \n\
+          mov     fp, xzr                       // clear frame pointer (x29)   
        \n\
+                                                // x0 already has stackaddr    
        \n\
+          mov     x1, xzr                       // x1 = 0 (dwSize)             
        \n\
+          mov     x2, #0x8000                   // x2 = MEM_RELEASE            
        \n\
+          bl      VirtualFree                   // free original stack         
        \n\
+          mov     x0, x21                       // Move arg into x0            
        \n\
+          blr     x20                           // call thread function        
        \n"
           : : [WRAPPER_ARG] "r" (&wrapper_arg),
               [CYGTLS] "r" (__CYGTLS_PADSIZE__)
-          : "x0", "x1", "x2", "x10", "x19", "x29", "memory");
+          : "x0", "x1", "x2", "x20", "x21", "x29", "memory");
 #else
 #error unimplemented for this target
 #endif

Reply via email to