On Sat, Jul 26, 2025 at 8:15 AM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> When I did r16-1067-gaa935ce40a7, I thought it would be
> enough to mark the decl as mergable to get it to merge on
> all targets. Turns out a few things needed to be changed
> to support it being mergable on all targets.
> The first thing is improve the selecting of the mergable
> section and instead of basing it on the DECL's mode, it
> should be based on the size instead.
> The second thing that needed to be happen is change the
> alignment of the CSWTCH decl to be aligned to the next power
> of 2 compared to the size if the size is less than 32bytes
> (the max mergable size that is supported).
>
> With these changes, cswtch-6.c passes on ia32 and other targets.
> And the new testcase cswtch-7.c will pass now too.
>
> Note I noticed the darwin's darwin_mergeable_constant_section could
> be "fixed" up to use DECL_SIZE instead of the DECL_MODE but I am not
> sure it makes a huge difference.
>
> Bootstrapped and tested on x86_64-linux-gnu.
>
>         PR middle-end/120523
> gcc/ChangeLog:
>
>         * output.h (mergeable_constant_section): New declaration taking
>         unsigned HOST_WIDE_INT for the size.
>         * tree-switch-conversion.cc (switch_conversion::build_one_array):
>         Increase the alignment of CSWTCH for sizes less than 32bytes.
>         * varasm.cc (mergeable_constant_section): Split out twice.
>         One that takes the size in unsigned HOST_WIDE_INT and the
>         other size in a tree.
>         (default_elf_select_section): Pass DECL_SIZE instead of
>         DECL_MODE to mergeable_constant_section.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/cswtch-7.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  gcc/output.h                             |  3 ++
>  gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c | 48 ++++++++++++++++++++++++
>  gcc/tree-switch-conversion.cc            | 11 ++++++
>  gcc/varasm.cc                            | 44 ++++++++++++++++++----
>  4 files changed, 99 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c
>
> diff --git a/gcc/output.h b/gcc/output.h
> index 0c329ffac3c..5cc72ebee37 100644
> --- a/gcc/output.h
> +++ b/gcc/output.h
> @@ -557,6 +557,9 @@ extern rtx get_section_anchor (struct object_block *, 
> HOST_WIDE_INT,
>  extern section *mergeable_constant_section (machine_mode,
>                                             unsigned HOST_WIDE_INT,
>                                             unsigned int);
> +extern section *mergeable_constant_section (unsigned HOST_WIDE_INT,
> +                                           unsigned HOST_WIDE_INT,
> +                                           unsigned int);
>  extern section *function_section (tree);
>  extern section *unlikely_text_section (void);
>  extern section *current_function_section (void);
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c
> new file mode 100644
> index 00000000000..7b79780771b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c
> @@ -0,0 +1,48 @@
> +/* PR tree-optimization/120523 */
> +/* PR tree-optimization/120451 */
> +/* { dg-do compile { target elf } } */
> +/* { dg-options "-O2" } */
> +
> +void foo (int, int);
> +
> +__attribute__((noinline, noclone)) void
> +f1 (int v, int w)
> +{
> +  int i, j;
> +  if (w)
> +    {
> +      i = 129;
> +      j = i - 1;
> +      goto lab;
> +    }
> +  switch (v)
> +    {
> +    case 170:
> +      j = 7;
> +      i = 27;
> +      break;
> +    case 171:
> +      i = 8;
> +      j = 122;
> +      break;
> +    case 172:
> +      i = 21;
> +      j = -19;
> +      break;
> +    case 173:
> +      i = 18;
> +      j = 17;
> +      break;
> +    case 174:
> +      i = 33;
> +      j = 55;
> +      break;
> +    default:
> +      __builtin_abort ();
> +    }
> +
> + lab:
> +  foo (i, j);
> +}
> +
> +/* { dg-final { scan-assembler ".rodata.cst32" } } */
> diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
> index d0882879e61..b3ae93d57e1 100644
> --- a/gcc/tree-switch-conversion.cc
> +++ b/gcc/tree-switch-conversion.cc
> @@ -1033,6 +1033,17 @@ switch_conversion::build_one_array (int num, tree 
> arr_index_type,
>        /* The decl is mergable since we don't take the address ever and
>          just reading from it. */
>        DECL_MERGEABLE (decl) = 1;
> +
> +      /* Increase the alignments as needed. */
> +      if (tree_to_uhwi (DECL_SIZE (decl)) > DECL_ALIGN (decl))
> +       {
> +         unsigned HOST_WIDE_INT s = tree_to_uhwi (DECL_SIZE (decl));
> +         /* Only support up to 32bytes since that is what is
> +            supported for merging. */
> +         if (s <= 256)

Shouldn't this be BIGGEST_ALIGNMENT instead?  Or if 32bytes is really
hard-coded somewhere it should be a #define in output.h instead?

> +           SET_DECL_ALIGN (decl, HOST_WIDE_INT_1U << ceil_log2 (s));
> +       }
> +
>        if (offloading_function_p (cfun->decl))
>         DECL_ATTRIBUTES (decl)
>           = tree_cons (get_identifier ("omp declare target"), NULL_TREE,
> diff --git a/gcc/varasm.cc b/gcc/varasm.cc
> index 8384e8b635c..c6ca9b0e3ec 100644
> --- a/gcc/varasm.cc
> +++ b/gcc/varasm.cc
> @@ -919,14 +919,12 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED,
>  /* Return the section to use for constant merging.  */
>
>  section *
> -mergeable_constant_section (machine_mode mode ATTRIBUTE_UNUSED,
> -                           unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED,
> -                           unsigned int flags ATTRIBUTE_UNUSED)
> +mergeable_constant_section (unsigned HOST_WIDE_INT size_bits,
> +                           unsigned HOST_WIDE_INT align,
> +                           unsigned int flags)
>  {
>    if (HAVE_GAS_SHF_MERGE && flag_merge_constants
> -      && mode != VOIDmode
> -      && mode != BLKmode
> -      && known_le (GET_MODE_BITSIZE (mode), align)
> +      && size_bits <= align
>        && align >= 8
>        && align <= 256

Ah, here ...

Otherwise LGTM.

Richard.

>        && (align & (align - 1)) == 0)
> @@ -940,6 +938,38 @@ mergeable_constant_section (machine_mode mode 
> ATTRIBUTE_UNUSED,
>      }
>    return readonly_data_section;
>  }
> +
> +
> +/* Return the section to use for constant merging. Like the above
> +   but the size stored as a tree.  */
> +static section *
> +mergeable_constant_section (tree size_bits,
> +                           unsigned HOST_WIDE_INT align,
> +                           unsigned int flags)
> +{
> +  if (!size_bits || !tree_fits_uhwi_p (size_bits))
> +    return readonly_data_section;
> +  return mergeable_constant_section (tree_to_uhwi (size_bits), align, flags);
> +}
> +
> +
> +/* Return the section to use for constant merging. Like the above
> +   but given a mode rather than the size.  */
> +
> +section *
> +mergeable_constant_section (machine_mode mode,
> +                           unsigned HOST_WIDE_INT align,
> +                           unsigned int flags)
> +{
> +  /* If the mode is unknown (BLK or VOID), then return a non mergable 
> section.  */
> +  if (mode == BLKmode || mode == VOIDmode)
> +    return readonly_data_section;
> +  unsigned HOST_WIDE_INT size;
> +  if (!GET_MODE_BITSIZE (mode).is_constant (&size))
> +    return readonly_data_section;
> +  return mergeable_constant_section (size, align, flags);
> +}
> +
>
>  /* Given NAME, a putative register name, discard any customary prefixes.  */
>
> @@ -7453,7 +7483,7 @@ default_elf_select_section (tree decl, int reloc,
>      case SECCAT_RODATA_MERGE_STR_INIT:
>        return mergeable_string_section (DECL_INITIAL (decl), align, 0);
>      case SECCAT_RODATA_MERGE_CONST:
> -      return mergeable_constant_section (DECL_MODE (decl), align, 0);
> +      return mergeable_constant_section (DECL_SIZE (decl), align, 0);
>      case SECCAT_SRODATA:
>        sname = ".sdata2";
>        break;
> --
> 2.43.0
>

Reply via email to