On Tue, Nov 18, 2025 at 9:03 AM Alfie Richards <[email protected]> wrote: > > Hi All, > > This fixes the compile time regression noted by Andrew Pinski. > > The compilation speed regression was caused by my patch requirig querying > fndecl_abi signifficantly more than previously. This fixes this by caching > the pcs in the machine_function struct. > > Regr tested and bootstrapped on aarch64-linux-gnu. > > Okay for master?
Just to confirm this did fix the regression and thanks again for looking into the compile time regression. links to the graphs: -O0: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=358.576.8 -Og: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=349.576.8 Thanks, Andrew Pinski > > Alfie > > -- >8 -- > > As aarch64_function_arg_regno_p is a very hot function called many times for a > function it should not call fndecl_abi every time as it is expensive and > unecessasary. > > This caches the result and in doing so fixes a regression in compilation speed > introduced by r16-5076-g7197d8062fddc2. > > gcc/ChangeLog: > > * config/aarch64/aarch64.cc (aarch64_fndecl_abi): New function. > (aarch64_function_arg_regno_p): Update to use aarch64_fndecl_abi. > (aarch64_output_mi_thunk): Likewise. > (aarch64_is_variant_pcs): Likewise. > (aarch64_set_current_function): Update to initialize pcs value. > * config/aarch64/aarch64.h (enum arm_pcs): Move location earlier in > file. > (machine_function) Add pcs value. > --- > gcc/config/aarch64/aarch64.cc | 29 +++++++++++++++++++++++++---- > gcc/config/aarch64/aarch64.h | 31 +++++++++++++++++-------------- > 2 files changed, 42 insertions(+), 18 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 60608a19078..2e6e468e62b 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -728,6 +728,22 @@ aarch64_merge_string_arguments (tree args, tree old_attr, > return !use_old_attr; > } > > +/* Get the PCS for a decl and store the result to avoid needing to do > + too many calls to fndecl_abi which can be expensive. */ > + > +static arm_pcs > +aarch64_fndecl_abi (tree fn) > +{ > + gcc_assert (TREE_CODE (fn) == FUNCTION_DECL); > + struct function *fun = DECL_STRUCT_FUNCTION (fn); > + if (!fun) > + return arm_pcs (fndecl_abi (fn).id ()); > + if (fun->machine->pcs == ARM_PCS_UNKNOWN) > + fun->machine->pcs = arm_pcs (fndecl_abi (fn).id ()); > + > + return fun->machine->pcs; > +} > + > /* Check whether an 'aarch64_vector_pcs' attribute is valid. */ > > static tree > @@ -7896,8 +7912,7 @@ function_arg_preserve_none_regno_p (unsigned regno) > bool > aarch64_function_arg_regno_p (unsigned regno) > { > - enum arm_pcs pcs > - = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64; > + enum arm_pcs pcs = cfun ? aarch64_fndecl_abi (cfun->decl) : > ARM_PCS_AAPCS64; > > switch (pcs) > { > @@ -10764,7 +10779,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk > ATTRIBUTE_UNUSED, > funexp = XEXP (DECL_RTL (function), 0); > funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); > auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function)); > - auto pcs_variant = arm_pcs (fndecl_abi (function).id ()); > + auto pcs_variant = aarch64_fndecl_abi (function); > bool ir = lookup_attribute ("indirect_return", > TYPE_ATTRIBUTES (TREE_TYPE (function))); > rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant, ir); > @@ -19735,6 +19750,7 @@ aarch64_init_machine_status (void) > { > struct machine_function *machine; > machine = ggc_cleared_alloc<machine_function> (); > + machine->pcs = ARM_PCS_UNKNOWN; > return machine; > } > > @@ -19891,6 +19907,11 @@ aarch64_set_current_function (tree fndecl) > > aarch64_previous_fndecl = fndecl; > > + /* Initialize the PCS value to UNKNOWN. */ > + if (fndecl && TREE_CODE (fndecl) == FUNCTION_DECL) > + if (function *fn = DECL_STRUCT_FUNCTION (fndecl)) > + fn->machine->pcs = ARM_PCS_UNKNOWN; > + > /* First set the target options. */ > cl_target_option_restore (&global_options, &global_options_set, > TREE_TARGET_OPTION (new_tree)); > @@ -25648,7 +25669,7 @@ static bool > aarch64_is_variant_pcs (tree fndecl) > { > /* Check for ABIs that preserve more registers than usual. */ > - arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id (); > + arm_pcs pcs = aarch64_fndecl_abi (fndecl); > if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs == > ARM_PCS_PRESERVE_NONE) > return true; > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 0e596b59744..de0db894c7a 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -984,6 +984,20 @@ extern enum aarch64_cpu aarch64_tune; > > #define DEFAULT_PCC_STRUCT_RETURN 0 > > +/* The set of available Procedure Call Stardards. */ > + > +enum arm_pcs > +{ > + ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ > + ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. > */ > + ARM_PCS_SVE, /* For functions that pass or return > + values in SVE registers. */ > + ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ > + ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved > + registers except X29. */ > + ARM_PCS_UNKNOWN > +}; > + > #if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H) > struct GTY (()) aarch64_frame > { > @@ -1151,6 +1165,9 @@ typedef struct GTY (()) machine_function > > /* During SEH output, this is non-null. */ > struct seh_frame_state * GTY ((skip (""))) seh; > + > + /* The Procedure Call Standard for the function. */ > + enum arm_pcs pcs; > } machine_function; > #endif > #endif > @@ -1168,20 +1185,6 @@ enum aarch64_abi_type > > #define TARGET_ILP32 (aarch64_abi & AARCH64_ABI_ILP32) > > -enum arm_pcs > -{ > - ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ > - ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. > */ > - ARM_PCS_SVE, /* For functions that pass or return > - values in SVE registers. */ > - ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ > - ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved > - registers except X29. */ > - ARM_PCS_UNKNOWN > -}; > - > - > - > > /* We can't use machine_mode inside a generator file because it > hasn't been created yet; we shouldn't be using any code that > -- > 2.34.1 >
