https://github.com/python/cpython/commit/966260841b69480708f8efa1f8f5738b45fca68d
commit: 966260841b69480708f8efa1f8f5738b45fca68d
branch: main
author: Diego Russo <diego.ru...@arm.com>
committer: brandtbucher <brandtbuc...@gmail.com>
date: 2024-07-01T15:52:33-07:00
summary:

GH-119726: Use LDR for AArch64 trampolines (GH-121001)

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-06-25-16-26-44.gh-issue-119726.WqvHxB.rst
M Tools/jit/_stencils.py

diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-06-25-16-26-44.gh-issue-119726.WqvHxB.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-06-25-16-26-44.gh-issue-119726.WqvHxB.rst
new file mode 100644
index 00000000000000..2e5132f61e504f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-06-25-16-26-44.gh-issue-119726.WqvHxB.rst 
@@ -0,0 +1,2 @@
+Improve the speed and memory use of C function calls from JIT code on AArch64.
+Patch by Diego Russo
diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py
index ede5a9930e7316..68eb1d13394170 100644
--- a/Tools/jit/_stencils.py
+++ b/Tools/jit/_stencils.py
@@ -204,33 +204,20 @@ def emit_aarch64_trampoline(self, hole: Hole, alignment: 
int) -> None:
             return
 
         self.disassembly += [
-            f"{base + 4 * 0:x}: d2800008      mov     x8, #0x0",
-            f"{base + 4 * 0:016x}:  R_AARCH64_MOVW_UABS_G0_NC    
{hole.symbol}",
-            f"{base + 4 * 1:x}: f2a00008      movk    x8, #0x0, lsl #16",
-            f"{base + 4 * 1:016x}:  R_AARCH64_MOVW_UABS_G1_NC    
{hole.symbol}",
-            f"{base + 4 * 2:x}: f2c00008      movk    x8, #0x0, lsl #32",
-            f"{base + 4 * 2:016x}:  R_AARCH64_MOVW_UABS_G2_NC    
{hole.symbol}",
-            f"{base + 4 * 3:x}: f2e00008      movk    x8, #0x0, lsl #48",
-            f"{base + 4 * 3:016x}:  R_AARCH64_MOVW_UABS_G3       
{hole.symbol}",
-            f"{base + 4 * 4:x}: d61f0100      br      x8",
+            f"{base + 4 * 0:x}: 58000048      ldr     x8, 8",
+            f"{base + 4 * 1:x}: d61f0100      br      x8",
+            f"{base + 4 * 2:x}: 00000000",
+            f"{base + 4 * 2:016x}:  R_AARCH64_ABS64    {hole.symbol}",
+            f"{base + 4 * 3:x}: 00000000",
         ]
         for code in [
-            0xD2800008.to_bytes(4, sys.byteorder),
-            0xF2A00008.to_bytes(4, sys.byteorder),
-            0xF2C00008.to_bytes(4, sys.byteorder),
-            0xF2E00008.to_bytes(4, sys.byteorder),
+            0x58000048.to_bytes(4, sys.byteorder),
             0xD61F0100.to_bytes(4, sys.byteorder),
+            0x00000000.to_bytes(4, sys.byteorder),
+            0x00000000.to_bytes(4, sys.byteorder),
         ]:
             self.body.extend(code)
-        for i, kind in enumerate(
-            [
-                "R_AARCH64_MOVW_UABS_G0_NC",
-                "R_AARCH64_MOVW_UABS_G1_NC",
-                "R_AARCH64_MOVW_UABS_G2_NC",
-                "R_AARCH64_MOVW_UABS_G3",
-            ]
-        ):
-            self.holes.append(hole.replace(offset=base + 4 * i, kind=kind))
+        self.holes.append(hole.replace(offset=base + 8, 
kind="R_AARCH64_ABS64"))
         self.trampolines[hole.symbol] = base
 
     def remove_jump(self, *, alignment: int = 1) -> None:

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to