On 14/12/2025 15:36, Iain Sandoe wrote:
Hi Alfie,

(just a head’s up)

This change breaks bootstrap on my darwin development branch.

The issue is in aarch64_output_mi_thunk ()
and it is that
  -  the thunk is referring to a virtual dtor
  -  the compilation of the virtual dtor is “complete” and function.cc 
<http://function.cc/>:free_after_compilation () is called _before_ the thunk 
output happens.
That resets cfun->machine to nullptr (and then we segv).

I am not sure why this fires on Darwin and not on Linux - but it’s possible 
that the different handling of clones is responsible - since Darwin does not 
have strong symbol aliases, clones are real copies (but this is speculation, 
unproven).

Anyway, I propose to resolve the issue as below … (at least in my local branch) 
- open to better solutions, of course

Ah thank you, this fix looks good to me. I am tracking down a aarch64 linux profiled boostrap failure that may be related.


no action needed - as said, just a head’s up.
thanks
Iain


On 12 Dec 2025, at 20:44, Andrew Pinski <[email protected]> wrote:

On Tue, Nov 18, 2025 at 9:03 AM Alfie Richards <[email protected]> wrote:

Hi All,

This fixes the compile time regression noted by Andrew Pinski.

The compilation speed regression was caused by my patch requirig querying
fndecl_abi signifficantly more than previously. This fixes this by caching
the pcs in the machine_function struct.

Regr tested and bootstrapped on aarch64-linux-gnu.

Okay for master?

Just to confirm this did fix the regression and thanks again for
looking into the compile time regression.

links to the graphs:
-O0: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=358.576.8
-Og: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=349.576.8

Thanks,
Andrew Pinski


Alfie

-- >8 --

As aarch64_function_arg_regno_p is a very hot function called many times for a
function it should not call fndecl_abi every time as it is expensive and
unecessasary.

This caches the result and in doing so fixes a regression in compilation speed
introduced by r16-5076-g7197d8062fddc2.

gcc/ChangeLog:

        * config/aarch64/aarch64.cc (aarch64_fndecl_abi): New function.
        (aarch64_function_arg_regno_p): Update to use aarch64_fndecl_abi.
        (aarch64_output_mi_thunk): Likewise.
        (aarch64_is_variant_pcs): Likewise.
        (aarch64_set_current_function): Update to initialize pcs value.
        * config/aarch64/aarch64.h (enum arm_pcs): Move location earlier in
        file.
        (machine_function) Add pcs value.
---
gcc/config/aarch64/aarch64.cc | 29 +++++++++++++++++++++++++----
gcc/config/aarch64/aarch64.h  | 31 +++++++++++++++++--------------
2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 60608a19078..2e6e468e62b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -728,6 +728,22 @@ aarch64_merge_string_arguments (tree args, tree old_attr,
   return !use_old_attr;
}

+/* Get the PCS for a decl and store the result to avoid needing to do
+   too many calls to fndecl_abi which can be expensive.  */
+
+static arm_pcs
+aarch64_fndecl_abi (tree fn)
+{
+  gcc_assert (TREE_CODE (fn) == FUNCTION_DECL);

^^ I wonder if the asserts  could be gcc_checking_asserts() if this is hot code.
I think this was requested in a review but I forgot it.

+  struct function *fun = DECL_STRUCT_FUNCTION (fn);
+  if (!fun)

I am going to make this ^^^  if (!fun || !fun->machine)
That sounds good thank you!

+    return arm_pcs (fndecl_abi (fn).id ());
+  if (fun->machine->pcs == ARM_PCS_UNKNOWN)
+    fun->machine->pcs = arm_pcs (fndecl_abi (fn).id ());
+
+  return fun->machine->pcs;
+}
+
/* Check whether an 'aarch64_vector_pcs' attribute is valid.  */

static tree
@@ -7896,8 +7912,7 @@ function_arg_preserve_none_regno_p (unsigned regno)
bool
aarch64_function_arg_regno_p (unsigned regno)
{
-  enum arm_pcs pcs
-    = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64;
+  enum arm_pcs pcs = cfun ? aarch64_fndecl_abi (cfun->decl) : ARM_PCS_AAPCS64;

   switch (pcs)
     {
@@ -10764,7 +10779,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk 
ATTRIBUTE_UNUSED,
   funexp = XEXP (DECL_RTL (function), 0);
   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
   auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function));
-  auto pcs_variant = arm_pcs (fndecl_abi (function).id ());
+  auto pcs_variant = aarch64_fndecl_abi (function);
   bool ir = lookup_attribute ("indirect_return",
                              TYPE_ATTRIBUTES (TREE_TYPE (function)));
   rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant, ir);
@@ -19735,6 +19750,7 @@ aarch64_init_machine_status (void)
{
   struct machine_function *machine;
   machine = ggc_cleared_alloc<machine_function> ();
+  machine->pcs = ARM_PCS_UNKNOWN;
   return machine;
}

@@ -19891,6 +19907,11 @@ aarch64_set_current_function (tree fndecl)

   aarch64_previous_fndecl = fndecl;

+  /* Initialize the PCS value to UNKNOWN.  */
+  if (fndecl && TREE_CODE (fndecl) == FUNCTION_DECL)
+    if (function *fn = DECL_STRUCT_FUNCTION (fndecl))
+      fn->machine->pcs = ARM_PCS_UNKNOWN;
+
   /* First set the target options.  */
   cl_target_option_restore (&global_options, &global_options_set,
                            TREE_TARGET_OPTION (new_tree));
@@ -25648,7 +25669,7 @@ static bool
aarch64_is_variant_pcs (tree fndecl)
{
   /* Check for ABIs that preserve more registers than usual.  */
-  arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id ();
+  arm_pcs pcs = aarch64_fndecl_abi (fndecl);
   if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs == 
ARM_PCS_PRESERVE_NONE)
     return true;

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 0e596b59744..de0db894c7a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -984,6 +984,20 @@ extern enum aarch64_cpu aarch64_tune;

#define DEFAULT_PCC_STRUCT_RETURN 0

+/* The set of available Procedure Call Stardards.  */
+
+enum arm_pcs
+{
+  ARM_PCS_AAPCS64,             /* Base standard AAPCS for 64 bit.  */
+  ARM_PCS_SIMD,                        /* For aarch64_vector_pcs functions.  */
+  ARM_PCS_SVE,                 /* For functions that pass or return
+                                  values in SVE registers.  */
+  ARM_PCS_TLSDESC,             /* For targets of tlsdesc calls.  */
+  ARM_PCS_PRESERVE_NONE,       /* PCS variant with no call-preserved
+                                  registers except X29.  */
+  ARM_PCS_UNKNOWN
+};
+
#if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H)
struct GTY (()) aarch64_frame
{
@@ -1151,6 +1165,9 @@ typedef struct GTY (()) machine_function

   /* During SEH output, this is non-null.  */
   struct seh_frame_state * GTY ((skip (""))) seh;
+
+  /* The Procedure Call Standard for the function.  */
+  enum arm_pcs pcs;
} machine_function;
#endif
#endif
@@ -1168,20 +1185,6 @@ enum aarch64_abi_type

#define TARGET_ILP32   (aarch64_abi & AARCH64_ABI_ILP32)

-enum arm_pcs
-{
-  ARM_PCS_AAPCS64,             /* Base standard AAPCS for 64 bit.  */
-  ARM_PCS_SIMD,                        /* For aarch64_vector_pcs functions.  */
-  ARM_PCS_SVE,                 /* For functions that pass or return
-                                  values in SVE registers.  */
-  ARM_PCS_TLSDESC,             /* For targets of tlsdesc calls.  */
-  ARM_PCS_PRESERVE_NONE,       /* PCS variant with no call-preserved
-                                  registers except X29.  */
-  ARM_PCS_UNKNOWN
-};
-
-
-

/* We can't use machine_mode inside a generator file because it
    hasn't been created yet; we shouldn't be using any code that
--
2.34.1



Reply via email to