On Fri, 2021-05-07 at 12:19 +0930, Alan Modra via Gcc-patches wrote:
> PowerPC64 ELFv2 dual entry point functions have a couple of problems
> with -fpatchable-function-entry. One is that the nops added after the
> global entry land in the global entry code which is constrained to be
> a power of two number of instructions, and zero global entry code has
> special meaning for linkage. The other is that the global entry code
> isn't always used on function entry, and some uses of
> -fpatchable-function-entry might want to affect all entries to the
> function. So this patch arranges to put one batch of nops before the
> global entry, and the other after the local entry point.
>
Hi,
Description good. :-)
> PR target/98125
> * config/rs6000/rs6000.c (rs6000_print_patchable_function_entry): New
> function.
> (TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY): Define.
> * config/rs6000/rs6000-logue.c: Include targhooks.h.
> (rs6000_output_function_prologue): Handle nops for
> -fpatchable-function-entry after local entry point.
>
> diff --git a/gcc/config/rs6000/rs6000-logue.c
> b/gcc/config/rs6000/rs6000-logue.c
> index b0ac183ceff..ffa3bb3dcf1 100644
> --- a/gcc/config/rs6000/rs6000-logue.c
> +++ b/gcc/config/rs6000/rs6000-logue.c
> @@ -51,6 +51,7 @@
> #include "gstab.h" /* for N_SLINE */
> #include "dbxout.h" /* dbxout_ */
> #endif
> +#include "targhooks.h"
>
> static int rs6000_ra_ever_killed (void);
> static void is_altivec_return_reg (rtx, void *);
> @@ -3991,6 +3992,10 @@ rs6000_output_function_prologue (FILE *file)
> fputs (",1\n", file);
> }
>
> + int nops_after_entry = crtl->patch_area_size - crtl->patch_area_entry;
> + if (nops_after_entry > 0)
> + default_print_patchable_function_entry (file, nops_after_entry, false);
> +
> /* Output -mprofile-kernel code. This needs to be done here instead of
> in output_function_profile since it must go after the ELFv2 ABI
> local entry point. */
ok
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index d43c36e7f1a..97f1b3e0674 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1404,6 +1404,10 @@ static const struct attribute_spec
> rs6000_attribute_table[] =
> #undef TARGET_ASM_FUNCTION_EPILOGUE
> #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
>
> +#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
> +#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
> + rs6000_print_patchable_function_entry
> +
ok
> #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
> #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
>
> @@ -14953,6 +14957,33 @@ rs6000_assemble_visibility (tree decl, int vis)
> }
> #endif
>
> +/* Write NOPs into the asm outfile FILE around a function entry. This
> + routine may be called twice per function to put NOPs before and after
> + the function entry. If RECORD_P is true the location of the NOPs will
> + be recorded by default_print_patchable_function_entry in a special
> + object section called "__patchable_function_entries". Disable output
> + of any NOPs for the second call. Those, if any, are output by
> + rs6000_output_function_prologue. This means that for ELFv2 any NOPs
> + after the function entry are placed after the local entry point, not
> + the global entry point. NOPs after the entry may be found at
> + record_loc + nops_before * 4 + local_entry_offset. This holds true
> + when nops_before is zero. */
> +
> +static void
> +rs6000_print_patchable_function_entry (FILE *file,
> + unsigned HOST_WIDE_INT patch_area_size
> ATTRIBUTE_UNUSED,
> + bool record_p)
> +{
> + /* Always call default_print_patchable_function_entry when RECORD_P in
when RECORD_P is true? (implied, but I like to be specific..)
> + order to output the location of the NOPs, but use the size of the
> + area before the entry on both possible calls. If RECORD_P is true
> + on the second call then the area before the entry was zero size and
> + thus no NOPs will be output. */
> + if (record_p)
> + default_print_patchable_function_entry (file, crtl->patch_area_entry,
> + record_p);
> +}
ok.
lgtm,thx
-Will
> +
> enum rtx_code
> rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
> {