On Sat, Jul 26, 2025 at 8:15 AM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > When I did r16-1067-gaa935ce40a7, I thought it would be > enough to mark the decl as mergable to get it to merge on > all targets. Turns out a few things needed to be changed > to support it being mergable on all targets. > The first thing is improve the selecting of the mergable > section and instead of basing it on the DECL's mode, it > should be based on the size instead. > The second thing that needed to be happen is change the > alignment of the CSWTCH decl to be aligned to the next power > of 2 compared to the size if the size is less than 32bytes > (the max mergable size that is supported). > > With these changes, cswtch-6.c passes on ia32 and other targets. > And the new testcase cswtch-7.c will pass now too. > > Note I noticed the darwin's darwin_mergeable_constant_section could > be "fixed" up to use DECL_SIZE instead of the DECL_MODE but I am not > sure it makes a huge difference. > > Bootstrapped and tested on x86_64-linux-gnu. > > PR middle-end/120523 > gcc/ChangeLog: > > * output.h (mergeable_constant_section): New declaration taking > unsigned HOST_WIDE_INT for the size. > * tree-switch-conversion.cc (switch_conversion::build_one_array): > Increase the alignment of CSWTCH for sizes less than 32bytes. > * varasm.cc (mergeable_constant_section): Split out twice. > One that takes the size in unsigned HOST_WIDE_INT and the > other size in a tree. > (default_elf_select_section): Pass DECL_SIZE instead of > DECL_MODE to mergeable_constant_section. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/cswtch-7.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/output.h | 3 ++ > gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c | 48 ++++++++++++++++++++++++ > gcc/tree-switch-conversion.cc | 11 ++++++ > gcc/varasm.cc | 44 ++++++++++++++++++---- > 4 files changed, 99 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c > > diff --git a/gcc/output.h b/gcc/output.h > index 0c329ffac3c..5cc72ebee37 100644 > --- a/gcc/output.h > +++ b/gcc/output.h > @@ -557,6 +557,9 @@ extern rtx get_section_anchor (struct object_block *, > HOST_WIDE_INT, > extern section *mergeable_constant_section (machine_mode, > unsigned HOST_WIDE_INT, > unsigned int); > +extern section *mergeable_constant_section (unsigned HOST_WIDE_INT, > + unsigned HOST_WIDE_INT, > + unsigned int); > extern section *function_section (tree); > extern section *unlikely_text_section (void); > extern section *current_function_section (void); > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c > b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c > new file mode 100644 > index 00000000000..7b79780771b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c > @@ -0,0 +1,48 @@ > +/* PR tree-optimization/120523 */ > +/* PR tree-optimization/120451 */ > +/* { dg-do compile { target elf } } */ > +/* { dg-options "-O2" } */ > + > +void foo (int, int); > + > +__attribute__((noinline, noclone)) void > +f1 (int v, int w) > +{ > + int i, j; > + if (w) > + { > + i = 129; > + j = i - 1; > + goto lab; > + } > + switch (v) > + { > + case 170: > + j = 7; > + i = 27; > + break; > + case 171: > + i = 8; > + j = 122; > + break; > + case 172: > + i = 21; > + j = -19; > + break; > + case 173: > + i = 18; > + j = 17; > + break; > + case 174: > + i = 33; > + j = 55; > + break; > + default: > + __builtin_abort (); > + } > + > + lab: > + foo (i, j); > +} > + > +/* { dg-final { scan-assembler ".rodata.cst32" } } */ > diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc > index d0882879e61..b3ae93d57e1 100644 > --- a/gcc/tree-switch-conversion.cc > +++ b/gcc/tree-switch-conversion.cc > @@ -1033,6 +1033,17 @@ switch_conversion::build_one_array (int num, tree > arr_index_type, > /* The decl is mergable since we don't take the address ever and > just reading from it. */ > DECL_MERGEABLE (decl) = 1; > + > + /* Increase the alignments as needed. */ > + if (tree_to_uhwi (DECL_SIZE (decl)) > DECL_ALIGN (decl)) > + { > + unsigned HOST_WIDE_INT s = tree_to_uhwi (DECL_SIZE (decl)); > + /* Only support up to 32bytes since that is what is > + supported for merging. */ > + if (s <= 256)
Shouldn't this be BIGGEST_ALIGNMENT instead? Or if 32bytes is really hard-coded somewhere it should be a #define in output.h instead? > + SET_DECL_ALIGN (decl, HOST_WIDE_INT_1U << ceil_log2 (s)); > + } > + > if (offloading_function_p (cfun->decl)) > DECL_ATTRIBUTES (decl) > = tree_cons (get_identifier ("omp declare target"), NULL_TREE, > diff --git a/gcc/varasm.cc b/gcc/varasm.cc > index 8384e8b635c..c6ca9b0e3ec 100644 > --- a/gcc/varasm.cc > +++ b/gcc/varasm.cc > @@ -919,14 +919,12 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED, > /* Return the section to use for constant merging. */ > > section * > -mergeable_constant_section (machine_mode mode ATTRIBUTE_UNUSED, > - unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED, > - unsigned int flags ATTRIBUTE_UNUSED) > +mergeable_constant_section (unsigned HOST_WIDE_INT size_bits, > + unsigned HOST_WIDE_INT align, > + unsigned int flags) > { > if (HAVE_GAS_SHF_MERGE && flag_merge_constants > - && mode != VOIDmode > - && mode != BLKmode > - && known_le (GET_MODE_BITSIZE (mode), align) > + && size_bits <= align > && align >= 8 > && align <= 256 Ah, here ... Otherwise LGTM. Richard. > && (align & (align - 1)) == 0) > @@ -940,6 +938,38 @@ mergeable_constant_section (machine_mode mode > ATTRIBUTE_UNUSED, > } > return readonly_data_section; > } > + > + > +/* Return the section to use for constant merging. Like the above > + but the size stored as a tree. */ > +static section * > +mergeable_constant_section (tree size_bits, > + unsigned HOST_WIDE_INT align, > + unsigned int flags) > +{ > + if (!size_bits || !tree_fits_uhwi_p (size_bits)) > + return readonly_data_section; > + return mergeable_constant_section (tree_to_uhwi (size_bits), align, flags); > +} > + > + > +/* Return the section to use for constant merging. Like the above > + but given a mode rather than the size. */ > + > +section * > +mergeable_constant_section (machine_mode mode, > + unsigned HOST_WIDE_INT align, > + unsigned int flags) > +{ > + /* If the mode is unknown (BLK or VOID), then return a non mergable > section. */ > + if (mode == BLKmode || mode == VOIDmode) > + return readonly_data_section; > + unsigned HOST_WIDE_INT size; > + if (!GET_MODE_BITSIZE (mode).is_constant (&size)) > + return readonly_data_section; > + return mergeable_constant_section (size, align, flags); > +} > + > > /* Given NAME, a putative register name, discard any customary prefixes. */ > > @@ -7453,7 +7483,7 @@ default_elf_select_section (tree decl, int reloc, > case SECCAT_RODATA_MERGE_STR_INIT: > return mergeable_string_section (DECL_INITIAL (decl), align, 0); > case SECCAT_RODATA_MERGE_CONST: > - return mergeable_constant_section (DECL_MODE (decl), align, 0); > + return mergeable_constant_section (DECL_SIZE (decl), align, 0); > case SECCAT_SRODATA: > sname = ".sdata2"; > break; > -- > 2.43.0 >