On Mon, Jul 28, 2025 at 2:13 AM Richard Biener
<richard.guent...@gmail.com> wrote:
>
> On Sat, Jul 26, 2025 at 8:15 AM Andrew Pinski <quic_apin...@quicinc.com> 
> wrote:
> >
> > When I did r16-1067-gaa935ce40a7, I thought it would be
> > enough to mark the decl as mergable to get it to merge on
> > all targets. Turns out a few things needed to be changed
> > to support it being mergable on all targets.
> > The first thing is improve the selecting of the mergable
> > section and instead of basing it on the DECL's mode, it
> > should be based on the size instead.
> > The second thing that needed to be happen is change the
> > alignment of the CSWTCH decl to be aligned to the next power
> > of 2 compared to the size if the size is less than 32bytes
> > (the max mergable size that is supported).
> >
> > With these changes, cswtch-6.c passes on ia32 and other targets.
> > And the new testcase cswtch-7.c will pass now too.
> >
> > Note I noticed the darwin's darwin_mergeable_constant_section could
> > be "fixed" up to use DECL_SIZE instead of the DECL_MODE but I am not
> > sure it makes a huge difference.
> >
> > Bootstrapped and tested on x86_64-linux-gnu.
> >
> >         PR middle-end/120523
> > gcc/ChangeLog:
> >
> >         * output.h (mergeable_constant_section): New declaration taking
> >         unsigned HOST_WIDE_INT for the size.
> >         * tree-switch-conversion.cc (switch_conversion::build_one_array):
> >         Increase the alignment of CSWTCH for sizes less than 32bytes.
> >         * varasm.cc (mergeable_constant_section): Split out twice.
> >         One that takes the size in unsigned HOST_WIDE_INT and the
> >         other size in a tree.
> >         (default_elf_select_section): Pass DECL_SIZE instead of
> >         DECL_MODE to mergeable_constant_section.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/tree-ssa/cswtch-7.c: New test.
> >
> > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> > ---
> >  gcc/output.h                             |  3 ++
> >  gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c | 48 ++++++++++++++++++++++++
> >  gcc/tree-switch-conversion.cc            | 11 ++++++
> >  gcc/varasm.cc                            | 44 ++++++++++++++++++----
> >  4 files changed, 99 insertions(+), 7 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c
> >
> > diff --git a/gcc/output.h b/gcc/output.h
> > index 0c329ffac3c..5cc72ebee37 100644
> > --- a/gcc/output.h
> > +++ b/gcc/output.h
> > @@ -557,6 +557,9 @@ extern rtx get_section_anchor (struct object_block *, 
> > HOST_WIDE_INT,
> >  extern section *mergeable_constant_section (machine_mode,
> >                                             unsigned HOST_WIDE_INT,
> >                                             unsigned int);
> > +extern section *mergeable_constant_section (unsigned HOST_WIDE_INT,
> > +                                           unsigned HOST_WIDE_INT,
> > +                                           unsigned int);
> >  extern section *function_section (tree);
> >  extern section *unlikely_text_section (void);
> >  extern section *current_function_section (void);
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c
> > new file mode 100644
> > index 00000000000..7b79780771b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c
> > @@ -0,0 +1,48 @@
> > +/* PR tree-optimization/120523 */
> > +/* PR tree-optimization/120451 */
> > +/* { dg-do compile { target elf } } */
> > +/* { dg-options "-O2" } */
> > +
> > +void foo (int, int);
> > +
> > +__attribute__((noinline, noclone)) void
> > +f1 (int v, int w)
> > +{
> > +  int i, j;
> > +  if (w)
> > +    {
> > +      i = 129;
> > +      j = i - 1;
> > +      goto lab;
> > +    }
> > +  switch (v)
> > +    {
> > +    case 170:
> > +      j = 7;
> > +      i = 27;
> > +      break;
> > +    case 171:
> > +      i = 8;
> > +      j = 122;
> > +      break;
> > +    case 172:
> > +      i = 21;
> > +      j = -19;
> > +      break;
> > +    case 173:
> > +      i = 18;
> > +      j = 17;
> > +      break;
> > +    case 174:
> > +      i = 33;
> > +      j = 55;
> > +      break;
> > +    default:
> > +      __builtin_abort ();
> > +    }
> > +
> > + lab:
> > +  foo (i, j);
> > +}
> > +
> > +/* { dg-final { scan-assembler ".rodata.cst32" } } */
> > diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
> > index d0882879e61..b3ae93d57e1 100644
> > --- a/gcc/tree-switch-conversion.cc
> > +++ b/gcc/tree-switch-conversion.cc
> > @@ -1033,6 +1033,17 @@ switch_conversion::build_one_array (int num, tree 
> > arr_index_type,
> >        /* The decl is mergable since we don't take the address ever and
> >          just reading from it. */
> >        DECL_MERGEABLE (decl) = 1;
> > +
> > +      /* Increase the alignments as needed. */
> > +      if (tree_to_uhwi (DECL_SIZE (decl)) > DECL_ALIGN (decl))
> > +       {
> > +         unsigned HOST_WIDE_INT s = tree_to_uhwi (DECL_SIZE (decl));
> > +         /* Only support up to 32bytes since that is what is
> > +            supported for merging. */
> > +         if (s <= 256)
>
> Shouldn't this be BIGGEST_ALIGNMENT instead?  Or if 32bytes is really
> hard-coded somewhere it should be a #define in output.h instead?
>
> > +           SET_DECL_ALIGN (decl, HOST_WIDE_INT_1U << ceil_log2 (s));
> > +       }
> > +
> >        if (offloading_function_p (cfun->decl))
> >         DECL_ATTRIBUTES (decl)
> >           = tree_cons (get_identifier ("omp declare target"), NULL_TREE,
> > diff --git a/gcc/varasm.cc b/gcc/varasm.cc
> > index 8384e8b635c..c6ca9b0e3ec 100644
> > --- a/gcc/varasm.cc
> > +++ b/gcc/varasm.cc
> > @@ -919,14 +919,12 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED,
> >  /* Return the section to use for constant merging.  */
> >
> >  section *
> > -mergeable_constant_section (machine_mode mode ATTRIBUTE_UNUSED,
> > -                           unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED,
> > -                           unsigned int flags ATTRIBUTE_UNUSED)
> > +mergeable_constant_section (unsigned HOST_WIDE_INT size_bits,
> > +                           unsigned HOST_WIDE_INT align,
> > +                           unsigned int flags)
> >  {
> >    if (HAVE_GAS_SHF_MERGE && flag_merge_constants
> > -      && mode != VOIDmode
> > -      && mode != BLKmode
> > -      && known_le (GET_MODE_BITSIZE (mode), align)
> > +      && size_bits <= align
> >        && align >= 8
> >        && align <= 256
>
> Ah, here ...
>
> Otherwise LGTM.

Attached is the follow up patch which I pushed along with this one
that changes to use the macro.
Also I looked into GNU binutils to see if there is a limit there and
the limit is 4GB for the alignment.

Thanks,
Andrew



>
> Richard.
>
> >        && (align & (align - 1)) == 0)
> > @@ -940,6 +938,38 @@ mergeable_constant_section (machine_mode mode 
> > ATTRIBUTE_UNUSED,
> >      }
> >    return readonly_data_section;
> >  }
> > +
> > +
> > +/* Return the section to use for constant merging. Like the above
> > +   but the size stored as a tree.  */
> > +static section *
> > +mergeable_constant_section (tree size_bits,
> > +                           unsigned HOST_WIDE_INT align,
> > +                           unsigned int flags)
> > +{
> > +  if (!size_bits || !tree_fits_uhwi_p (size_bits))
> > +    return readonly_data_section;
> > +  return mergeable_constant_section (tree_to_uhwi (size_bits), align, 
> > flags);
> > +}
> > +
> > +
> > +/* Return the section to use for constant merging. Like the above
> > +   but given a mode rather than the size.  */
> > +
> > +section *
> > +mergeable_constant_section (machine_mode mode,
> > +                           unsigned HOST_WIDE_INT align,
> > +                           unsigned int flags)
> > +{
> > +  /* If the mode is unknown (BLK or VOID), then return a non mergable 
> > section.  */
> > +  if (mode == BLKmode || mode == VOIDmode)
> > +    return readonly_data_section;
> > +  unsigned HOST_WIDE_INT size;
> > +  if (!GET_MODE_BITSIZE (mode).is_constant (&size))
> > +    return readonly_data_section;
> > +  return mergeable_constant_section (size, align, flags);
> > +}
> > +
> >
> >  /* Given NAME, a putative register name, discard any customary prefixes.  
> > */
> >
> > @@ -7453,7 +7483,7 @@ default_elf_select_section (tree decl, int reloc,
> >      case SECCAT_RODATA_MERGE_STR_INIT:
> >        return mergeable_string_section (DECL_INITIAL (decl), align, 0);
> >      case SECCAT_RODATA_MERGE_CONST:
> > -      return mergeable_constant_section (DECL_MODE (decl), align, 0);
> > +      return mergeable_constant_section (DECL_SIZE (decl), align, 0);
> >      case SECCAT_SRODATA:
> >        sname = ".sdata2";
> >        break;
> > --
> > 2.43.0
> >
From 7006fd1c8fc3e69e2de50f179736b68e77afc3f1 Mon Sep 17 00:00:00 2001
From: Andrew Pinski <quic_apinski@quicinc.com>
Date: Mon, 28 Jul 2025 20:22:53 -0700
Subject: [PATCH] output: Move an special # (256) to a new macro

This is a followup to the review of mergability of CSWTCH patch
located at https://gcc.gnu.org/pipermail/gcc-patches/2025-July/690810.html.
Moves the special # (256) to a macro so it is not used bare in the source
and there is only the need to change it in one place.

Pushed as obvious after build and test on x86_64.

gcc/ChangeLog:

	* output.h (MAX_ALIGN_MERGABLE): New define.
	* tree-switch-conversion.cc (switch_conversion::build_one_array):
	Use MAX_ALIGN_MERGABLE instead of 256.
	* varasm.cc (mergeable_string_section): Likewise
	(mergeable_constant_section): Likewise

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/output.h                  | 3 +++
 gcc/tree-switch-conversion.cc | 6 +++---
 gcc/varasm.cc                 | 6 +++---
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/output.h b/gcc/output.h
index 5cc72ebee37..51c2d36f8f6 100644
--- a/gcc/output.h
+++ b/gcc/output.h
@@ -545,6 +545,9 @@ extern GTY(()) section *bss_noswitch_section;
 extern GTY(()) section *in_section;
 extern GTY(()) bool in_cold_section_p;
 
+/* MAX bit alignment for mergable sections. */
+#define MAX_ALIGN_MERGABLE 256
+
 extern section *get_unnamed_section (unsigned int, void (*) (const char *),
 				     const char *);
 extern section *get_section (const char *, unsigned int, tree,
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index b3ae93d57e1..04b357fca4c 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -55,6 +55,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
 #include "hwint.h"
 #include "internal-fn.h"
 #include "diagnostic-core.h"
+#include "output.h"
 
 /* ??? For lang_hooks.types.type_for_mode, but is there a word_mode
    type in the GIMPLE type system that is language-independent?  */
@@ -1038,9 +1039,8 @@ switch_conversion::build_one_array (int num, tree arr_index_type,
       if (tree_to_uhwi (DECL_SIZE (decl)) > DECL_ALIGN (decl))
 	{
 	  unsigned HOST_WIDE_INT s = tree_to_uhwi (DECL_SIZE (decl));
-	  /* Only support up to 32bytes since that is what is
-	     supported for merging. */
-	  if (s <= 256)
+	  /* Only support up to the max supported for merging. */
+	  if (s <= MAX_ALIGN_MERGABLE)
 	    SET_DECL_ALIGN (decl, HOST_WIDE_INT_1U << ceil_log2 (s));
 	}
 
diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index 834cdefbe4f..000ad9e26f6 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -871,7 +871,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED,
   if (HAVE_GAS_SHF_MERGE && flag_merge_constants
       && TREE_CODE (decl) == STRING_CST
       && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE
-      && align <= 256
+      && align <= MAX_ALIGN_MERGABLE
       && (len = int_size_in_bytes (TREE_TYPE (decl))) > 0
       && TREE_STRING_LENGTH (decl) == len)
     {
@@ -885,7 +885,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED,
 
       mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (TREE_TYPE (decl)));
       modesize = GET_MODE_BITSIZE (mode);
-      if (modesize >= 8 && modesize <= 256
+      if (modesize >= 8 && modesize <= MAX_ALIGN_MERGABLE
 	  && (modesize & (modesize - 1)) == 0)
 	{
 	  if (align < modesize)
@@ -926,7 +926,7 @@ mergeable_constant_section (unsigned HOST_WIDE_INT size_bits,
   if (HAVE_GAS_SHF_MERGE && flag_merge_constants
       && size_bits <= align
       && align >= 8
-      && align <= 256
+      && align <= MAX_ALIGN_MERGABLE
       && (align & (align - 1)) == 0)
     {
       const char *prefix = function_mergeable_rodata_prefix ();
-- 
2.43.0

Reply via email to