On Sun, Dec 14, 2025 at 11:42 AM Alfie Richards <[email protected]> wrote: > > On 14/12/2025 15:36, Iain Sandoe wrote: > > Hi Alfie, > > > > (just a head’s up) > > > > This change breaks bootstrap on my darwin development branch. > > > > The issue is in aarch64_output_mi_thunk () > > and it is that > > - the thunk is referring to a virtual dtor > > - the compilation of the virtual dtor is “complete” and function.cc > > <http://function.cc/>:free_after_compilation () is called _before_ the > > thunk output happens. > > That resets cfun->machine to nullptr (and then we segv). > > > > I am not sure why this fires on Darwin and not on Linux - but it’s possible > > that the different handling of clones is responsible - since Darwin does > > not have strong symbol aliases, clones are real copies (but this is > > speculation, unproven). > > > > Anyway, I propose to resolve the issue as below … (at least in my local > > branch) - open to better solutions, of course > > Ah thank you, this fix looks good to me. I am tracking down a aarch64 > linux profiled boostrap failure that may be related.
That might be https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122456 though. There is a patch from Richi which solves the problem for me. Thanks, Andrew > > > > > no action needed - as said, just a head’s up. > > thanks > > Iain > > > > > >> On 12 Dec 2025, at 20:44, Andrew Pinski <[email protected]> > >> wrote: > >> > >> On Tue, Nov 18, 2025 at 9:03 AM Alfie Richards <[email protected]> > >> wrote: > >>> > >>> Hi All, > >>> > >>> This fixes the compile time regression noted by Andrew Pinski. > >>> > >>> The compilation speed regression was caused by my patch requirig querying > >>> fndecl_abi signifficantly more than previously. This fixes this by caching > >>> the pcs in the machine_function struct. > >>> > >>> Regr tested and bootstrapped on aarch64-linux-gnu. > >>> > >>> Okay for master? > >> > >> Just to confirm this did fix the regression and thanks again for > >> looking into the compile time regression. > >> > >> links to the graphs: > >> -O0: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=358.576.8 > >> -Og: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=349.576.8 > >> > >> Thanks, > >> Andrew Pinski > >> > >>> > >>> Alfie > >>> > >>> -- >8 -- > >>> > >>> As aarch64_function_arg_regno_p is a very hot function called many times > >>> for a > >>> function it should not call fndecl_abi every time as it is expensive and > >>> unecessasary. > >>> > >>> This caches the result and in doing so fixes a regression in compilation > >>> speed > >>> introduced by r16-5076-g7197d8062fddc2. > >>> > >>> gcc/ChangeLog: > >>> > >>> * config/aarch64/aarch64.cc (aarch64_fndecl_abi): New function. > >>> (aarch64_function_arg_regno_p): Update to use aarch64_fndecl_abi. > >>> (aarch64_output_mi_thunk): Likewise. > >>> (aarch64_is_variant_pcs): Likewise. > >>> (aarch64_set_current_function): Update to initialize pcs value. > >>> * config/aarch64/aarch64.h (enum arm_pcs): Move location earlier > >>> in > >>> file. > >>> (machine_function) Add pcs value. > >>> --- > >>> gcc/config/aarch64/aarch64.cc | 29 +++++++++++++++++++++++++---- > >>> gcc/config/aarch64/aarch64.h | 31 +++++++++++++++++-------------- > >>> 2 files changed, 42 insertions(+), 18 deletions(-) > >>> > >>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > >>> index 60608a19078..2e6e468e62b 100644 > >>> --- a/gcc/config/aarch64/aarch64.cc > >>> +++ b/gcc/config/aarch64/aarch64.cc > >>> @@ -728,6 +728,22 @@ aarch64_merge_string_arguments (tree args, tree > >>> old_attr, > >>> return !use_old_attr; > >>> } > >>> > >>> +/* Get the PCS for a decl and store the result to avoid needing to do > >>> + too many calls to fndecl_abi which can be expensive. */ > >>> + > >>> +static arm_pcs > >>> +aarch64_fndecl_abi (tree fn) > >>> +{ > >>> + gcc_assert (TREE_CODE (fn) == FUNCTION_DECL); > > > > ^^ I wonder if the asserts could be gcc_checking_asserts() if this is hot > > code. > I think this was requested in a review but I forgot it. > > > >>> + struct function *fun = DECL_STRUCT_FUNCTION (fn); > >>> + if (!fun) > > > > I am going to make this ^^^ if (!fun || !fun->machine) > That sounds good thank you! > > > >>> + return arm_pcs (fndecl_abi (fn).id ()); > >>> + if (fun->machine->pcs == ARM_PCS_UNKNOWN) > >>> + fun->machine->pcs = arm_pcs (fndecl_abi (fn).id ()); > >>> + > >>> + return fun->machine->pcs; > >>> +} > >>> + > >>> /* Check whether an 'aarch64_vector_pcs' attribute is valid. */ > >>> > >>> static tree > >>> @@ -7896,8 +7912,7 @@ function_arg_preserve_none_regno_p (unsigned regno) > >>> bool > >>> aarch64_function_arg_regno_p (unsigned regno) > >>> { > >>> - enum arm_pcs pcs > >>> - = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64; > >>> + enum arm_pcs pcs = cfun ? aarch64_fndecl_abi (cfun->decl) : > >>> ARM_PCS_AAPCS64; > >>> > >>> switch (pcs) > >>> { > >>> @@ -10764,7 +10779,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk > >>> ATTRIBUTE_UNUSED, > >>> funexp = XEXP (DECL_RTL (function), 0); > >>> funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); > >>> auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function)); > >>> - auto pcs_variant = arm_pcs (fndecl_abi (function).id ()); > >>> + auto pcs_variant = aarch64_fndecl_abi (function); > >>> bool ir = lookup_attribute ("indirect_return", > >>> TYPE_ATTRIBUTES (TREE_TYPE (function))); > >>> rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant, ir); > >>> @@ -19735,6 +19750,7 @@ aarch64_init_machine_status (void) > >>> { > >>> struct machine_function *machine; > >>> machine = ggc_cleared_alloc<machine_function> (); > >>> + machine->pcs = ARM_PCS_UNKNOWN; > >>> return machine; > >>> } > >>> > >>> @@ -19891,6 +19907,11 @@ aarch64_set_current_function (tree fndecl) > >>> > >>> aarch64_previous_fndecl = fndecl; > >>> > >>> + /* Initialize the PCS value to UNKNOWN. */ > >>> + if (fndecl && TREE_CODE (fndecl) == FUNCTION_DECL) > >>> + if (function *fn = DECL_STRUCT_FUNCTION (fndecl)) > >>> + fn->machine->pcs = ARM_PCS_UNKNOWN; > >>> + > >>> /* First set the target options. */ > >>> cl_target_option_restore (&global_options, &global_options_set, > >>> TREE_TARGET_OPTION (new_tree)); > >>> @@ -25648,7 +25669,7 @@ static bool > >>> aarch64_is_variant_pcs (tree fndecl) > >>> { > >>> /* Check for ABIs that preserve more registers than usual. */ > >>> - arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id (); > >>> + arm_pcs pcs = aarch64_fndecl_abi (fndecl); > >>> if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs == > >>> ARM_PCS_PRESERVE_NONE) > >>> return true; > >>> > >>> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > >>> index 0e596b59744..de0db894c7a 100644 > >>> --- a/gcc/config/aarch64/aarch64.h > >>> +++ b/gcc/config/aarch64/aarch64.h > >>> @@ -984,6 +984,20 @@ extern enum aarch64_cpu aarch64_tune; > >>> > >>> #define DEFAULT_PCC_STRUCT_RETURN 0 > >>> > >>> +/* The set of available Procedure Call Stardards. */ > >>> + > >>> +enum arm_pcs > >>> +{ > >>> + ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ > >>> + ARM_PCS_SIMD, /* For aarch64_vector_pcs > >>> functions. */ > >>> + ARM_PCS_SVE, /* For functions that pass or return > >>> + values in SVE registers. */ > >>> + ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ > >>> + ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved > >>> + registers except X29. */ > >>> + ARM_PCS_UNKNOWN > >>> +}; > >>> + > >>> #if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H) > >>> struct GTY (()) aarch64_frame > >>> { > >>> @@ -1151,6 +1165,9 @@ typedef struct GTY (()) machine_function > >>> > >>> /* During SEH output, this is non-null. */ > >>> struct seh_frame_state * GTY ((skip (""))) seh; > >>> + > >>> + /* The Procedure Call Standard for the function. */ > >>> + enum arm_pcs pcs; > >>> } machine_function; > >>> #endif > >>> #endif > >>> @@ -1168,20 +1185,6 @@ enum aarch64_abi_type > >>> > >>> #define TARGET_ILP32 (aarch64_abi & AARCH64_ABI_ILP32) > >>> > >>> -enum arm_pcs > >>> -{ > >>> - ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ > >>> - ARM_PCS_SIMD, /* For aarch64_vector_pcs > >>> functions. */ > >>> - ARM_PCS_SVE, /* For functions that pass or return > >>> - values in SVE registers. */ > >>> - ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ > >>> - ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved > >>> - registers except X29. */ > >>> - ARM_PCS_UNKNOWN > >>> -}; > >>> - > >>> - > >>> - > >>> > >>> /* We can't use machine_mode inside a generator file because it > >>> hasn't been created yet; we shouldn't be using any code that > >>> -- > >>> 2.34.1 > >>> > > >
