https://github.com/python/cpython/commit/ea0453ee979174d6fc14aae0fd85e4ede6742a86
commit: ea0453ee979174d6fc14aae0fd85e4ede6742a86
branch: main
author: Diego Russo <diego.ru...@arm.com>
committer: brandtbucher <brandtbuc...@gmail.com>
date: 2025-03-25T10:15:36-07:00
summary:

GH-130887: Always remove trailing jumps in AArch64 JIT stencils (GH-131042)

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst
M Tools/jit/_stencils.py
M Tools/jit/_targets.py

diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst
new file mode 100644
index 00000000000000..daf9c8ea09f081
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst
@@ -0,0 +1 @@
+Optimize the AArch64 code generation for the JIT. Patch by Diego Russo
diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py
index 4ddbe967438bd1..8faa9e8cac2d85 100644
--- a/Tools/jit/_stencils.py
+++ b/Tools/jit/_stencils.py
@@ -209,7 +209,24 @@ def pad(self, alignment: int) -> None:
             self.disassembly.append(f"{offset:x}: {' '.join(['00'] * 
padding)}")
         self.body.extend([0] * padding)
 
-    def remove_jump(self, *, alignment: int = 1) -> None:
+    def add_nops(self, nop: bytes, alignment: int) -> None:
+        """Add NOPs until there is alignment. Fail if it is not possible."""
+        offset = len(self.body)
+        nop_size = len(nop)
+
+        # Calculate the gap to the next multiple of alignment.
+        gap = -offset % alignment
+        if gap:
+            if gap % nop_size == 0:
+                count = gap // nop_size
+                self.body.extend(nop * count)
+            else:
+                raise ValueError(
+                    f"Cannot add nops of size '{nop_size}' to a body with "
+                    f"offset '{offset}' to align with '{alignment}'"
+                )
+
+    def remove_jump(self) -> None:
         """Remove a zero-length continuation jump, if it exists."""
         hole = max(self.holes, key=lambda hole: hole.offset)
         match hole:
@@ -244,7 +261,7 @@ def remove_jump(self, *, alignment: int = 1) -> None:
                 jump = b"\x00\x00\x00\x14"
             case _:
                 return
-        if self.body[offset:] == jump and offset % alignment == 0:
+        if self.body[offset:] == jump:
             self.body = self.body[:offset]
             self.holes.remove(hole)
 
@@ -266,10 +283,7 @@ class StencilGroup:
     _trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
 
     def process_relocations(
-        self,
-        known_symbols: dict[str, int],
-        *,
-        alignment: int = 1,
+        self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes 
= b""
     ) -> None:
         """Fix up all GOT and internal relocations for this stencil group."""
         for hole in self.code.holes.copy():
@@ -289,8 +303,8 @@ def process_relocations(
                 self._trampolines.add(ordinal)
                 hole.addend = ordinal
                 hole.symbol = None
-        self.code.remove_jump(alignment=alignment)
-        self.code.pad(alignment)
+        self.code.remove_jump()
+        self.code.add_nops(nop=nop, alignment=alignment)
         self.data.pad(8)
         for stencil in [self.code, self.data]:
             for hole in stencil.holes:
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index aa2b56abf446b1..b5a839e07d4daf 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]):
     verbose: bool = False
     known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
 
+    def _get_nop(self) -> bytes:
+        if re.fullmatch(r"aarch64-.*", self.triple):
+            nop = b"\x1f\x20\x03\xD5"
+        elif re.fullmatch(r"x86_64-.*|i686.*", self.triple):
+            nop = b"\x90"
+        else:
+            raise ValueError(f"NOP not defined for {self.triple}")
+        return nop
+
     def _compute_digest(self, out: pathlib.Path) -> str:
         hasher = hashlib.sha256()
         hasher.update(self.triple.encode())
@@ -172,7 +181,9 @@ async def _build_stencils(self) -> dict[str, 
_stencils.StencilGroup]:
         stencil_groups = {task.get_name(): task.result() for task in tasks}
         for stencil_group in stencil_groups.values():
             stencil_group.process_relocations(
-                known_symbols=self.known_symbols, alignment=self.alignment
+                known_symbols=self.known_symbols,
+                alignment=self.alignment,
+                nop=self._get_nop(),
             )
         return stencil_groups
 

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to