Resending as v3 - no code changes from v2. Just updated the commit message to remove the outdated note about shadow space.
Patch is included below (inline) and attached for convenience. >From 84ee99298f1a18d05cf4ef8bb9ae5314cbb78241 Mon Sep 17 00:00:00 2001 From: Thirumalai Nagalingam <thirumalai.nagalin...@multicorewareinc.com> Date: Fri, 20 Jun 2025 01:45:56 +0530 Subject: [PATCH] Aarch64: Add inline assembly pthread wrapper This patch adds AArch64-specific inline assembly block for the pthread wrapper used to bootstrap new threads. It sets up the thread stack, adjusts for __CYGTLS_PADSIZE__, releases the original stack via VirtualFree, and invokes the target thread function. --- winsup/cygwin/create_posix_thread.cc | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/winsup/cygwin/create_posix_thread.cc b/winsup/cygwin/create_posix_thread.cc index 3fcd61707..d0d3096b2 100644 --- a/winsup/cygwin/create_posix_thread.cc +++ b/winsup/cygwin/create_posix_thread.cc @@ -75,7 +75,7 @@ pthread_wrapper (PVOID arg) /* Initialize new _cygtls. */ _my_tls.init_thread (wrapper_arg.stackbase - __CYGTLS_PADSIZE__, (DWORD (*)(void*, void*)) wrapper_arg.func); -#ifdef __x86_64__ +#if defined(__x86_64__) __asm__ ("\n\ leaq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\ movq (%%rbx), %%r12 # Load thread func into r12 \n\ @@ -99,6 +99,23 @@ pthread_wrapper (PVOID arg) call *%%r12 # Call thread func \n" : : [WRAPPER_ARG] "o" (wrapper_arg), [CYGTLS] "i" (__CYGTLS_PADSIZE__)); +#elif defined(__aarch64__) + /* Sets up a new thread stack, frees the original OS stack, + * and calls the thread function with its arg using AArch64 ABI. */ + __asm__ __volatile__ ("\n\ + mov x19, %[WRAPPER_ARG] // x19 = &wrapper_arg \n\ + ldr x10, [x19, #24] // x10 = wrapper_arg.stackbase \n\ + sub sp, x10, %[CYGTLS] // sp = stackbase - (CYGTLS) \n\ + mov fp, xzr // clear frame pointer (x29) \n\ + mov x0, [x19, #16] // x0 = wrapper_arg.stackaddr \n\ + mov x1, xzr // x1 = 0 (dwSize) \n\ + mov x2, #0x8000 // x2 = MEM_RELEASE \n\ + bl VirtualFree // free original stack \n\ + ldp x19, x0, [x19] // x19 = func, x0 = arg \n\ + blr x19 // call thread function \n" + : : [WRAPPER_ARG] "r" (&wrapper_arg), + [CYGTLS] "r" (__CYGTLS_PADSIZE__) + : "x0", "x1", "x2", "x10", "x19", "x29", "memory"); #else #error unimplemented for this target #endif -- 2.49.0.windows.1 Thanks, Thirumalai Nagalingam -----Original Message----- From: Jeremy Drake <cyg...@jdrake.com> Sent: 18 June 2025 23:22 To: Thirumalai Nagalingam <thirumalai.nagalin...@multicorewareinc.com> Cc: cygwin-patches@cygwin.com Subject: Re: [PATCH] Cygwin: Aarch64: Add inline assembly pthread wrapper On Thu, 5 Jun 2025, Thirumalai Nagalingam wrote: > Hello, > > Please find my patch attached for review. Please either send patches via something like git send-email that puts the patch in the body, or if you can't send patches in that way without some mail software mangling them, please include the patch in the body of the email in addition to attaching it, for easier review. > > This patch adds AArch64-specific inline assembly block for the pthread > wrapper used to bootstrap new threads. It sets up the thread stack, > adjusts for __CYGTLS_PADSIZE__ and shadow space, releases the original > stack via VirtualFree, and invokes the target thread function. > > Thanks & regards > Thirumalai Nagalingam > > From c897d7361356c73b5837afa466f78a58520c1e9e Mon Sep 17 00:00:00 2001 > From: Thirumalai Nagalingam > <thirumalai.nagalin...@multicorewareinc.com> > Date: Thu, 5 Jun 2025 00:30:48 -0700 > Subject: [PATCH] Aarch64: Add inline assembly pthread wrapper > > This patch adds AArch64-specific inline assembly block for the pthread > wrapper used to bootstrap new threads. It sets up the thread stack, > adjusts for __CYGTLS_PADSIZE__ and shadow space, releases the original > stack via VirtualFree, and invokes the target thread function. > --- > winsup/cygwin/create_posix_thread.cc | 19 ++++++++++++++++++- > 1 file changed, 18 insertions(+), 1 deletion(-) > > diff --git a/winsup/cygwin/create_posix_thread.cc > b/winsup/cygwin/create_posix_thread.cc > index 8e06099e4..b1d0cbb43 100644 > --- a/winsup/cygwin/create_posix_thread.cc > +++ b/winsup/cygwin/create_posix_thread.cc > @@ -75,7 +75,7 @@ pthread_wrapper (PVOID arg) > /* Initialize new _cygtls. */ > _my_tls.init_thread (wrapper_arg.stackbase - __CYGTLS_PADSIZE__, > (DWORD (*)(void*, void*)) wrapper_arg.func); -#ifdef > __x86_64__ > +#if defined(__x86_64__) > __asm__ ("\n\ > leaq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\ > movq (%%rbx), %%r12 # Load thread func into r12 \n\ > @@ -99,6 +99,23 @@ pthread_wrapper (PVOID arg) > call *%%r12 # Call thread func \n" > : : [WRAPPER_ARG] "o" (wrapper_arg), > [CYGTLS] "i" (__CYGTLS_PADSIZE__)); > +#elif defined(__aarch64__) > + /* Sets up a new thread stack, frees the original OS stack, > + * and calls the thread function with its arg using AArch64 ABI. */ > + __asm__ __volatile__ ("\n\ > + mov x19, %[WRAPPER_ARG] // x19 = &wrapper_arg > \n\ > + ldr x10, [x19, #24] // x10 = wrapper_arg.stackbase > \n\ > + sub sp, x10, %[CYGTLS] // sp = stackbase - (CYGTLS + > 32)\n\ > + mov fp, xzr // clear frame pointer (x29) > \n\ > + mov x0, sp // x0 = new stack pointer > \n\ This seems wrong. Shouldn't it be mov x0, [x19, #16] // x0 = wrapper_arg.stackaddr > + mov x1, xzr // x1 = 0 (dwSize) > \n\ > + mov x2, #0x8000 // x2 = MEM_RELEASE > \n\ > + bl VirtualFree // free original stack > \n\ > + ldp x19, x0, [x19] // x19 = func, x0 = arg > \n\ > + blr x19 // call thread function > \n" > + : : [WRAPPER_ARG] "r" (&wrapper_arg), > + [CYGTLS] "r" (__CYGTLS_PADSIZE__ + 32) // add 32 bytes shadow > +space I asked this on another patch, but is the 32-byte shadow area actually part of the aarch64 calling convention, or is this just following what x64 was doing (where it is part of the calling convention) > + : "x0", "x1", "x2", "x10", "x19", "x29", "memory"); > #else > #error unimplemented for this target > #endif > -- > 2.34.1 >
0001-Aarch64-Add-inline-assembly-pthread-wrapper.patch
Description: 0001-Aarch64-Add-inline-assembly-pthread-wrapper.patch