[PATCH] i386: Enable small loop unrolling for O2

2022-10-25 Thread Hongyu Wang via Gcc-patches
Hi,

Inspired by rs6000 and s390 port changes, this patch
enables loop unrolling for small size loop at O2 by default.
The default behavior is to unroll loop with unknown trip-count and
less than 4 insns by 1 time.

This improves 548.exchange2 by 3.5% on icelake and 6% on zen3 with
1.2% codesize increment. For other benchmarks the variants are minor
and overall codesize increased by 0.2%.

The kernel image size increased by 0.06%, and no impact on eembc.

Bootstrapped & regrtested on x86_64-pc-linux-gnu.

Ok for trunk?

gcc/ChangeLog:

* common/config/i386/i386-common.cc (ix86_optimization_table):
Enable loop unroll and small loop unroll at O2 by default.
* config/i386/i386-options.cc
(ix86_override_options_after_change):
Disable small loop unroll when funroll-loops enabled, reset
cunroll_grow_size when it is not explicitly enabled.
(ix86_option_override_internal): Call
ix86_override_options_after_change instead of calling
ix86_recompute_optlev_based_flags and ix86_default_align
separately.
* config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll
factor if -munroll-only-small-loops enabled.
* config/i386/i386.opt: Add -munroll-only-small-loops,
-param=x86-small-unroll-ninsns= for loop insn limit,
-param=x86-small-unroll-factor= for unroll factor.
* doc/invoke.texi: Document -munroll-only-small-loops,
x86-small-unroll-ninsns and x86-small-unroll-factor.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr86270.c: Add -mno-unroll-only-small-loops.
* gcc.target/i386/pr93002.c: Likewise.
---
 gcc/common/config/i386/i386-common.cc   |  6 
 gcc/config/i386/i386-options.cc | 40 ++---
 gcc/config/i386/i386.cc | 13 
 gcc/config/i386/i386.opt| 13 
 gcc/doc/invoke.texi | 14 +
 gcc/testsuite/gcc.target/i386/pr86270.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr93002.c |  2 +-
 7 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index d6a68dc9b1d..0e580b39d14 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1686,6 +1686,12 @@ static const struct default_options 
ix86_option_optimization_table[] =
 /* The STC algorithm produces the smallest code at -Os, for x86.  */
 { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL,
   REORDER_BLOCKS_ALGORITHM_STC },
+{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
+/* Turns off -frename-registers and -fweb which are enabled by
+   funroll-loops.  */
+{ OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
+{ OPT_LEVELS_ALL, OPT_fweb, NULL, 0 },
 /* Turn off -fschedule-insns by default.  It tends to make the
problem with not enough registers even worse.  */
 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index acb2291e70f..6ea347c32e1 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1819,8 +1819,43 @@ ix86_recompute_optlev_based_flags (struct gcc_options 
*opts,
 void
 ix86_override_options_after_change (void)
 {
+  /* Default align_* from the processor table.  */
   ix86_default_align (_options);
+
   ix86_recompute_optlev_based_flags (_options, _options_set);
+
+  /* Disable unrolling small loops when there's explicit
+ -f{,no}unroll-loop.  */
+  if ((OPTION_SET_P (flag_unroll_loops))
+ || (OPTION_SET_P (flag_unroll_all_loops)
+&& flag_unroll_all_loops))
+{
+  if (!OPTION_SET_P (ix86_unroll_only_small_loops))
+   ix86_unroll_only_small_loops = 0;
+  /* Re-enable -frename-registers and -fweb if funroll-loops
+enabled.  */
+  if (!OPTION_SET_P (flag_web))
+   flag_web = flag_unroll_loops;
+  if (!OPTION_SET_P (flag_rename_registers))
+   flag_rename_registers = flag_unroll_loops;
+  if (!OPTION_SET_P (flag_cunroll_grow_size))
+   flag_cunroll_grow_size = flag_unroll_loops
+|| flag_peel_loops
+|| optimize >= 3;
+}
+  else
+{
+  if (!OPTION_SET_P (flag_cunroll_grow_size))
+   flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
+  /* Disables loop unrolling if -mno-unroll-only-small-loops is
+explicitly set and -funroll-loops is not enabled.  */
+  if (OPTION_SET_P (ix86_unroll_only_small_loops)
+ && !ix86_unroll_only_small_loops
+ && !(OPTION_SET_P (flag_unroll_loops)
+  || OPTION_SET_P (flag_unroll_all_loops)))
+   flag_unroll_loops = flag_unroll_all_loops = 0;
+}
+
 }
 
 /* Clear stack slot assignments remembered from previous 

Re: [PATCH] RISC-V: Recognized Svinval and Svnapot extensions

2022-10-25 Thread Kito Cheng via Gcc-patches
On Tue, Oct 25, 2022 at 9:37 PM Bernhard Reutner-Fischer via
Gcc-patches  wrote:
>
> On 25 October 2022 08:17:33 CEST, Monk Chiang  wrote:
> >gcc/ChangeLog:
> >
>
> >diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> >index 55e0bc0a0e9..63ac56a8ca0 100644
> >--- a/gcc/config/riscv/riscv-opts.h
> >+++ b/gcc/config/riscv/riscv-opts.h
> >@@ -162,6 +162,12 @@ enum stack_protector_guard {
> > #define MASK_ZMMUL  (1 << 0)
> > #define TARGET_ZMMUL((riscv_zm_subext & MASK_ZMMUL) != 0)
> >
> >+#define MASK_SVINVAL (1 << 0)
> >+#define MASK_SVNAPOT (1 << 1)
> >+
> >+#define TARGET_SVINVAL ((riscv_sv_subext & MASK_SVINVAL) != 0)
> >+#define TARGET_SVNAPOT ((riscv_sv_subext & MASK_SVNAPOT) != 0)
> >+
> > /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
> >set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
> >popcount to caclulate the minimal VLEN.  */
>
> Preexisting, but the above is hard to parse. contintuly, caclulate, will set, 
> has set ?

Thanks for pointing this out, will prepare a patch to improve that later  :)


>
> thanks,


Re: [PATCH] rs6000: using li/lis+oris/xoris to build constants

2022-10-25 Thread Jiufu Guo via Gcc-patches
"Kewen.Lin"  writes:

> Hi Jeff,
>
> Sorry for late review, some comments are inline.
>
> on 2022/8/24 16:13, Jiufu Guo via Gcc-patches wrote:
>> Hi,
>> 
>> PR106708 constaint some constants which can be support by li/lis + 
>> oris/xoris.
>> 
>> For constant C:
>> if ((c & 0x80008000ULL) == 0x8000ULL) or say:
>> 32(0)+1(1)+15(x)+1(0)+15(x), we could use li+oris to build constant 'C'.
>> 
>> if ((c & 0x8000ULL) == 0x8000ULL) or say:
>> 32(1)+16(x)+1(1)+15(x), using li+xoris would be ok.
>> 
>> if ((c & 0xULL) == 0x) or say:
>> 32(1)+1(0)+15(x)+16(0), using lis+xoris would be ok.
>> 
>
> Maybe it's good to add some explanation on the proposed writing "N(M)"
> N continuous bit M, (x for M means either 1 or 0), and not sure if it's
> good to use "||" for concatenation just like what ISA uses, the con
> is it can be mis-interpreted as logical "or".
>
> Or maybe just expand all the low 32 bits and use "1..." or "0..." for the
> high 32 bits.
>
Great, thanks for sugguestions!
>> This patch update rs6000_emit_set_long_const to support these forms.
>> Bootstrap and regtest pass on ppc64 and ppc64le.
>> 
>> Is this ok for trunk?
>> 
>> BR,
>> Jeff(Jiufu)
>> 
>> 
>>  PR target/106708
>> 
>> gcc/ChangeLog:
>> 
>>  * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Using li/lis +
>>  oris/xoris to build constants.
>
> Nit: Support constants which can be built with li + oris or li/lis +
> xoris?
Thanks.
>
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>  * gcc.target/powerpc/pr106708.c: New test.
>>  * gcc.target/powerpc/pr106708.h: New file.
>>  * gcc.target/powerpc/pr106708_1.c: New test.
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc   | 22 +++
>>  gcc/testsuite/gcc.target/powerpc/pr106708.c   | 10 +
>>  gcc/testsuite/gcc.target/powerpc/pr106708.h   |  9 
>>  gcc/testsuite/gcc.target/powerpc/pr106708_1.c | 17 ++
>>  4 files changed, 58 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708_1.c
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index df491bee2ea..243247fb838 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10112,6 +10112,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
>> c)
>>  {
>>rtx temp;
>>HOST_WIDE_INT ud1, ud2, ud3, ud4;
>> +  HOST_WIDE_INT orig_c = c;
>>  
>>ud1 = c & 0x;
>>c = c >> 16;
>> @@ -10137,6 +10138,27 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
>> c)
>>  gen_rtx_IOR (DImode, copy_rtx (temp),
>>   GEN_INT (ud1)));
>>  }
>> +  else if (ud4 == 0 && ud3 == 0 && (ud2 & 0x8000) && !(ud1 & 0x8000))
>> +{
>> +  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +
>> +  /* li+oris */
>> +  emit_move_insn (copy_rtx (temp), GEN_INT (ud1));
>
> Nit: in previous discussion on some other patch, copy_rtx is not
> necessary?
Yeap, thanks!
>
>> +  emit_move_insn (dest, gen_rtx_IOR (DImode, copy_rtx (temp),
>> + GEN_INT (ud2 << 16)));
>> +}
>
> I think this hunk above can be moved to the existing "(ud3 == 0 && ud4 == 0)"
> handling branch (as the diff context below), and ud2 & 0x8000 is already
> asserted there, it also saves check.
Great, thanks for point out this. You are right, this would save some
checking.
>
>> +  else if ((ud4 == 0x && ud3 == 0x)
>> +   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000
>> +{
>> +  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +
>> +  HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
>> + : ((ud2 << 16) - 0x8000);
>> +  /* li/lis + xoris */
>> +  emit_move_insn (copy_rtx (temp), GEN_INT (imm));
>> +  emit_move_insn (dest, gen_rtx_XOR (DImode, copy_rtx (temp),
>> + GEN_INT (orig_c ^ imm)));
>> +}
>
> Same comment for copy_rtx.
>
>>else if (ud3 == 0 && ud4 == 0)
>>  {
>>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr106708.c
>> new file mode 100644
>> index 000..6445fa47747
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c
>> @@ -0,0 +1,10 @@
>> +/* PR target/106708 */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
>> +/* { dg-do compile { target has_arch_ppc64 } } */
>> +
>
> Put dg-do as the first line, if you want has_arch_ppc64 to be behind 
> dg-options,
> separate it into a dg-require-effective-target.
OK, thanks!

>
>> +#include "pr106708.h"
>> +
>> +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */
>> +/* { 

Re: [PATCH] testsuite: Fix failure in test pr105586.c [PR107171]

2022-10-25 Thread Kewen.Lin via Gcc-patches
Hi Surya,

on 2022/10/14 01:02, Surya Kumari Jangala via Gcc-patches wrote:
> testsuite: Fix failure in test pr105586.c [PR107171]
> 
> The test pr105586.c fails on a big endian system when run in 32bit
> mode. The failure occurs as the test case does not guard against
> unsupported __int128.
> 

I thought this is taken as an obvious fix and it didn't ask for a
review/approval, but I just noticed it's not committed yet, so I
assumed maybe you meant to ask for an approval?

Then this patch is OK, it's even obvious, thanks!

BR,
Kewen

> 2022-10-13  Surya Kumari Jangala  
> 
> gcc/testsuite/
>   PR testsuite/107171
>   * gcc.target/powerpc/pr105586.c: Guard against unsupported
>   __int128.
> 
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr105586.c 
> b/gcc/testsuite/gcc.target/powerpc/pr105586.c
> index bd397f5..3f88a09 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr105586.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr105586.c
> @@ -1,4 +1,5 @@
>  /* { dg-options "-mdejagnu-tune=power4 -O2 -fcompare-debug 
> -fno-if-conversion -fno-guess-branch-probability" } */
> +/* { dg-require-effective-target int128 } */
>  
>  extern int bar(int i);


[PATCH] [x86] Enable V4BFmode and V2BFmode.

2022-10-25 Thread liuhongt via Gcc-patches
Enable V4BFmode and V2BFmode with the same ABI as V4HFmode and
V2HFmode. No real operation is supported for them except for movement.
This should solve PR target/107261.

Also I notice there's redundancy in VALID_AVX512FP16_REG_MODE, and
remove V2BFmode remove it.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

PR target/107261
* config/i386/i386-modes.def (VECTOR_MODE): Support V2BFmode.
* config/i386/i386.cc (classify_argument): Handle V4BFmode and
V2BFmode.
(ix86_convert_const_vector_to_integer): Ditto.
* config/i386/i386.h (VALID_AVX512FP16_REG_MODE): Remove
V2BFmode.
(VALID_SSE2_REG_MODE): Add V4BFmode and V2BFmode.
(VALID_MMX_REG_MODE): Add V4BFmode.
* config/i386/i386.md (mode): Add V4BF and V2BF.
(MODE_SIZE): Ditto.
* config/i386/mmx.md (MMXMODE) Add V4BF.
(V_32): Add V2BF.
(V_16_32_64): Add V4BF and V2BF.
(mmxinsnmode): Add V4BF and V2BF.
(*mov_internal): Hanlde V4BFmode and V2BFmode.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr107261.c: New test.
---
 gcc/config/i386/i386-modes.def   |  1 +
 gcc/config/i386/i386.cc  |  6 
 gcc/config/i386/i386.h   |  9 +++---
 gcc/config/i386/i386.md  |  5 ++--
 gcc/config/i386/mmx.md   | 26 +---
 gcc/testsuite/gcc.target/i386/pr107261.c | 38 
 6 files changed, 68 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107261.c

diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index b49daaef253..dbc3165c5fc 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -93,6 +93,7 @@ VECTOR_MODES (FLOAT, 64); /*  V32HF V16SF V8DF V4TF */
 VECTOR_MODES (FLOAT, 128);/* V64HF V32SF V16DF V8TF */
 VECTOR_MODES (FLOAT, 256);/* V128HF V64SF V32DF V16TF */
 VECTOR_MODE (FLOAT, HF, 2);   /*  V2HF */
+VECTOR_MODE (FLOAT, BF, 2);   /*  V2BF */
 VECTOR_MODE (FLOAT, HF, 6);   /*  V6HF */
 VECTOR_MODE (INT, TI, 1); /*   V1TI */
 VECTOR_MODE (INT, DI, 1); /*   V1DI */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index aeea26ef4be..1aca7d55a09 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2507,7 +2507,9 @@ classify_argument (machine_mode mode, const_tree type,
 case E_V2SImode:
 case E_V4HImode:
 case E_V4HFmode:
+case E_V4BFmode:
 case E_V2HFmode:
+case E_V2BFmode:
 case E_V8QImode:
   classes[0] = X86_64_SSE_CLASS;
   return 1;
@@ -2991,6 +2993,7 @@ pass_in_reg:
 case E_V8QImode:
 case E_V4HImode:
 case E_V4HFmode:
+case E_V4BFmode:
 case E_V2SImode:
 case E_V2SFmode:
 case E_V1TImode:
@@ -3240,6 +3243,7 @@ pass_in_reg:
 case E_V8QImode:
 case E_V4HImode:
 case E_V4HFmode:
+case E_V4BFmode:
 case E_V2SImode:
 case E_V2SFmode:
 case E_V1TImode:
@@ -15810,7 +15814,9 @@ ix86_convert_const_vector_to_integer (rtx op, 
machine_mode mode)
}
   break;
 case E_V2HFmode:
+case E_V2BFmode:
 case E_V4HFmode:
+case E_V4BFmode:
 case E_V2SFmode:
   for (int i = 0; i < nunits; ++i)
{
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index fd7c9df47e5..16d9c606077 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1033,13 +1033,12 @@ extern const char *host_detect_local_cpu (int argc, 
const char **argv);
|| (MODE) == V8BFmode || (MODE) == TImode)
 
 #define VALID_AVX512FP16_REG_MODE(MODE)
\
-  ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode\
-   || (MODE) == V2HFmode)
+  ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
 
 #define VALID_SSE2_REG_MODE(MODE)  \
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode   \
-   || (MODE) == V8BFmode \
+   || (MODE) == V8BFmode || (MODE) == V4BFmode || (MODE) == V2BFmode   \
|| (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode   \
|| (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \
|| (MODE) == HFmode || (MODE) == BFmode)
@@ -1057,7 +1056,7 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
   ((MODE) == V1DImode || (MODE) == DImode  \
|| (MODE) == V2SImode || (MODE) == SImode   \
|| (MODE) == V4HImode || (MODE) == V8QImode \
-   || (MODE) == V4HFmode)
+   || (MODE) == V4HFmode || (MODE) == V4BFmode)
 
 #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
 
@@ -1074,7 +1073,7 @@ extern const char 

[PATCH][PUSHED] Don't force DWARF4 for AutoFDO tests

2022-10-25 Thread Eugene Rozenfeld via Gcc-patches
Support for DWARF5 was added to create_gcov in
https://github.com/google/autofdo so we no longer need
to force DWARF4 for AutoFDO tests.

Tested on x86_64-pc-linux-gnu.

gcc/testsuite/ChangeLog:
* lib/profopt.exp: Don't force DWARF4 for AutoFDO tests
---
 gcc/testsuite/lib/profopt.exp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp
index ac7712a5b42..fe88d2fce37 100644
--- a/gcc/testsuite/lib/profopt.exp
+++ b/gcc/testsuite/lib/profopt.exp
@@ -289,7 +289,7 @@ proc auto-profopt-execute { src } {
 return
 }
 set profile_wrapper [profopt-perf-wrapper]
-set profile_option "-gdwarf-4 -DFOR_AUTOFDO_TESTING"
+set profile_option "-g -DFOR_AUTOFDO_TESTING"
 set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING -fearly-inlining"
 set run_autofdo 1
 profopt-execute $src
-- 
2.25.1


[r13-3476 Regression] FAIL: gcc.dg/vect/pr100756.c scan-tree-dump-not vect "epilog loop required" on Linux/x86_64

2022-10-25 Thread haochen.jiang via Gcc-patches
On Linux/x86_64,

19295e8607da2f743368fe6f5708146616aafa91 is the first bad commit
commit 19295e8607da2f743368fe6f5708146616aafa91
Author: Richard Biener 
Date:   Mon Oct 24 09:51:32 2022 +0200

tree-optimization/100756 - niter analysis and folding

caused

FAIL: gcc.dg/vect/pr100756.c -flto -ffat-lto-objects  scan-tree-dump-not vect 
"epilog loop required"
FAIL: gcc.dg/vect/pr100756.c scan-tree-dump-not vect "epilog loop required"

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r13-3476/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="vect.exp=gcc.dg/vect/pr100756.c --target_board='unix{-m32\ 
-march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="vect.exp=gcc.dg/vect/pr100756.c --target_board='unix{-m64\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com)


Re: [PATCH 1/2] Add gcc/make-unique.h

2022-10-25 Thread David Malcolm via Gcc-patches
On Fri, 2022-10-21 at 12:01 -0400, David Malcolm wrote:
> This patch adds gcc/make-unique.h, containing a minimal C++11
> implementation of make_unique (std::make_unique is C++14).
> 
> The followup patch uses this in dozens of places within the analyzer.
> 
> Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> 
> OK for trunk?

I've now realized that I sent an earlier version of this patch here:
  https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598189.html

Dave

> 
> gcc/ChangeLog:
> * make-unique.h: New file.
> 
> Signed-off-by: David Malcolm 
> ---
>  gcc/make-unique.h | 42 ++
>  1 file changed, 42 insertions(+)
>  create mode 100644 gcc/make-unique.h
> 
> diff --git a/gcc/make-unique.h b/gcc/make-unique.h
> new file mode 100644
> index 000..752a1d3dd30
> --- /dev/null
> +++ b/gcc/make-unique.h
> @@ -0,0 +1,42 @@
> +/* Minimal implementation of make_unique for C++11 compatibility.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT
> ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +.  */
> +
> +#ifndef GCC_MAKE_UNIQUE
> +#define GCC_MAKE_UNIQUE
> +
> +/* This header uses std::unique_ptr, but  can't be directly
> +   included due to issues with macros.  Hence  must be
> included
> +   from system.h by defining INCLUDE_MEMORY in any source file using
> +   make-unique.h.  */
> +
> +#ifndef INCLUDE_MEMORY
> +# error "You must define INCLUDE_MEMORY before including system.h to
> use make-unique.h"
> +#endif
> +
> +/* Minimal implementation of make_unique for C++11 compatibility
> +   (std::make_unique is C++14).  */
> +
> +template
> +inline typename std::enable_if::value,
> std::unique_ptr>::type
> +make_unique(Args&&... args)
> +{
> +  return std::unique_ptr (new T (std::forward (args)...));
> +}
> +
> +#endif /* ! GCC_MAKE_UNIQUE */



[PATCH][PUSHED] Start using discriminators in AutoFDO

2022-10-25 Thread Eugene Rozenfeld via Gcc-patches
Tested on x86_64-pc-linux-gnu.

gcc/ChangeLog:

* auto-profile.cc (get_combined_location): Include discriminator in the
returned combined location.
(read_function_instance): Read discriminators from profiles.
---
 gcc/auto-profile.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index ca48404eaf1..97307321cbf 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -363,7 +363,8 @@ get_combined_location (location_t loc, tree decl)
   /* TODO: allow more bits for line and less bits for discriminator.  */
   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
 warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
-  return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
+  return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16)
+| get_discriminator_from_loc (loc);
 }
 
 /* Return the function decl of a given lexical BLOCK.  */
@@ -652,7 +653,7 @@ function_instance::read_function_instance 
(function_instance_stack *stack,
 
   for (unsigned i = 0; i < num_pos_counts; i++)
 {
-  unsigned offset = gcov_read_unsigned () & 0x;
+  unsigned offset = gcov_read_unsigned ();
   unsigned num_targets = gcov_read_unsigned ();
   gcov_type count = gcov_read_counter ();
   s->pos_counts[offset].count = count;
-- 
2.25.1


Re: [PATCH] diagnostics: Allow FEs to keep customizations for middle end [PR101551, PR106274]

2022-10-25 Thread Lewis Hyatt via Gcc-patches
On Tue, Oct 25, 2022 at 7:35 AM Richard Biener
 wrote:
>
> On Thu, Oct 20, 2022 at 1:09 AM Lewis Hyatt via Gcc-patches
>  wrote:
> >
> > Currently, the ipa-free-lang-data pass resets most of the frontend's
> > diagnostic customizations, such as the diagnostic_finalizer that prints 
> > macro
> > expansion information, which is the subject of the two PRs. In most cases,
> > however, there is no need to reset these customizations; they still work 
> > just
> > fine after the language-specific data has been freed. (Macro tracking
> > information, for instance, only depends on the line_maps instance and does 
> > not
> > use the tree data structures at all.)
> >
> > Add an interface whereby frontends can convey which of their customizations
> > should be preserved by ipa-free-lang-data. Only the macro tracking behavior 
> > is
> > changed for now.  Subsequent patches will add further configurations for 
> > each
> > frontend.
>
> One point of the resetting of the hooks is to avoid crashes due to us zapping
> many of the lang specific data structures.  If the hooks were more resilent
> that wouldn't be an issue.
>

Right. The patch I have for C++ (not sent yet) makes the C++ versions
of decl_printable_name and and the diagnostic starter able to work
after free_lang_data runs.  I just worry that future changes to the
C++ hooks would need to preserve this property, which could be error
prone since issues are not immediately apparent, and most of the
testsuite does not use -flto.

> Now - as for macro tracking, how difficult is it to replicate that in the
> default hook implementation?  Basically we have similar issues for
> late diagnostics of the LTO compile step where only the LTO (aka default)
> variant of the hooks are present - it would be nice to improve that as well.
>

It is easy enough to make the default diagnostic finalizer print the
macro tracking information stored in the global line_table. (It just
needs to check if the global line_table is set, in which case call
virt_loc_aware_diagnostic_finalizer()). This would remove the need for
C-family frontends to override that callback. Fortran would still do
so, since it does other things in its finalizer. However, this would
not help with the LTO frontend because the line_table is not part of
what gets streamed out. Rather the line_table is rebuilt from scratch
when reading the data back in, but the macro tracking information is
not available at that time, just the basic location info (filename and
source location). I am not that familiar with the LTO streaming
process but I feel like streaming the entire line_table would not mesh
well with it (especially since multiple of them from different
translation units would need to be combined back together).

> Note free_lang_data exists to "simplify" the LTO bytecode output - things
> freed do not need to be output.  Of course the "freeing" logic could be
> wired into the LTO bytecode output machinery directly - simply do not
> output what we'd free.  That way all info would prevail for the non-LTO
> compile and the hooks could continue to work as they do without any
> LTO streaming done.
>

Naively (emphasis on the naive, as I don't have any experience with
this part of GCC), that is how I would have guessed it worked. But I
understood there are some benefits to freeing the lang data earlier
(e.g. reduced resource usage), and even a hope to start doing it in
non-LTO builds as well, so I thought some incremental changes as in
this patch to make diagnostics better after free_lang_data could
perhaps be useful. Thanks for taking a look at it!

-Lewis


[PATCH] [PR tree-optimization/107394] Canonicalize global franges as they are read back.

2022-10-25 Thread Aldy Hernandez via Gcc-patches
[Richi/Jakub/FP experts, does this sound like the right solution, or am I
missing some subtle IPA/inlining issue?]

The problem here is that we're inlining a global range with NANs into
a function that has been tagged with __attribute__((optimize
("-ffinite-math-only"))).  As the global range is copied from
SSA_NAME_RANGE_INFO, its NAN bits are copied, which then cause
frange::verify_range() to fail a sanity check making sure no NANs
creep in when !HONOR_NANS.

I think what we should do is nuke the NAN bits as we're restoring the
global range.  For that matter, if we use the frange constructor,
everything except that NAN sign will be done automatically, including
dropping INFs to the min/max representable range when appropriate.

PR tree-optimization/107394

gcc/ChangeLog:

* value-range-storage.cc (frange_storage_slot::get_frange): Use
frange constructor.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr107394.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr107394.c | 22 ++
 gcc/value-range-storage.cc   | 18 +++---
 2 files changed, 29 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr107394.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr107394.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr107394.c
new file mode 100644
index 000..0e1e5ac40ce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr107394.c
@@ -0,0 +1,22 @@
+// { dg-do compile }
+// { dg-options "-O2" }
+
+static double
+quux (double x)
+{
+  return __builtin_fabs (x);
+}
+
+__attribute__ ((flatten, optimize ("-ffinite-math-only"))) static int
+bar (int *p)
+{
+  *p = quux (0.0);
+
+  return 0;
+}
+
+void
+foo (int *p)
+{
+  (void) bar (p);
+}
diff --git a/gcc/value-range-storage.cc b/gcc/value-range-storage.cc
index 6e054622830..b660102e064 100644
--- a/gcc/value-range-storage.cc
+++ b/gcc/value-range-storage.cc
@@ -261,17 +261,13 @@ frange_storage_slot::get_frange (frange , tree type) 
const
 {
   gcc_checking_assert (r.supports_type_p (type));
 
-  r.set_undefined ();
-  r.m_kind = m_kind;
-  r.m_type = type;
-  r.m_min = m_min;
-  r.m_max = m_max;
-  r.m_pos_nan = m_pos_nan;
-  r.m_neg_nan = m_neg_nan;
-  r.normalize_kind ();
-
-  if (flag_checking)
-r.verify_range ();
+  // Use the constructor because it will canonicalize the range.
+  r = frange (type, m_min, m_max, m_kind);
+
+  // The constructor will set the NAN bits for HONOR_NANS, but we must
+  // make sure to set the NAN sign if known.
+  if (HONOR_NANS (type) && (m_pos_nan ^ m_neg_nan) == 1)
+r.update_nan (m_neg_nan);
 }
 
 bool
-- 
2.37.3



[PATCH] Convert flag_finite_math_only uses in frange to HONOR_*.

2022-10-25 Thread Aldy Hernandez via Gcc-patches
[As Richi, and probably Jakub, have mentioned in the past...]

As mentioned earlier, we should be using HONOR_* on types rather than
flag_finite_math_only.

Will commit pending tests.

gcc/ChangeLog:

* value-range.cc (frange::set): Use HONOR_*.
(frange::verify_range): Same.
* value-range.h (frange_val_min): Same.
(frange_val_max): Same.
---
 gcc/value-range.cc |  6 +++---
 gcc/value-range.h  | 12 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index d8ee6ec0d0f..77e5a2cc299 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -341,7 +341,7 @@ frange::set (tree type,
 
   // For -ffinite-math-only we can drop ranges outside the
   // representable numbers to min/max for the type.
-  if (flag_finite_math_only)
+  if (!HONOR_INFINITIES (m_type))
 {
   REAL_VALUE_TYPE min_repr = frange_val_min (m_type);
   REAL_VALUE_TYPE max_repr = frange_val_max (m_type);
@@ -712,8 +712,8 @@ frange::supports_type_p (const_tree type) const
 void
 frange::verify_range ()
 {
-  if (flag_finite_math_only)
-gcc_checking_assert (!maybe_isnan ());
+  if (!undefined_p ())
+gcc_checking_assert (HONOR_NANS (m_type) || !maybe_isnan ());
   switch (m_kind)
 {
 case VR_UNDEFINED:
diff --git a/gcc/value-range.h b/gcc/value-range.h
index b48542a68aa..c87734dd8cd 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -1201,10 +1201,10 @@ real_min_representable (const_tree type)
 inline REAL_VALUE_TYPE
 frange_val_min (const_tree type)
 {
-  if (flag_finite_math_only)
-return real_min_representable (type);
-  else
+  if (HONOR_INFINITIES (type))
 return dconstninf;
+  else
+return real_min_representable (type);
 }
 
 // Return the maximum value for TYPE.
@@ -1212,10 +1212,10 @@ frange_val_min (const_tree type)
 inline REAL_VALUE_TYPE
 frange_val_max (const_tree type)
 {
-  if (flag_finite_math_only)
-return real_max_representable (type);
-  else
+  if (HONOR_INFINITIES (type))
 return dconstinf;
+  else
+return real_max_representable (type);
 }
 
 // Return TRUE if R is the minimum value for TYPE.
-- 
2.37.3



c++: Adjust synthetic template parm creation

2022-10-25 Thread Nathan Sidwell via Gcc-patches


We intend to mark synthetic template parameters (coming from use of auto
parms), as DECL_VIRTUAL_P.  The API of process_template_parm is
awkwardly confusing, and we were marking the previous template parm
(unless this was the first parm).  process_template_parm returns the list
of parms, when most (all?) users really want the newly-added final node.
That's a bigger change, so let's not do it right now.  With this, we
correctly mark such synthetic parms DECL_VIRTUAL_P.

I fell over this trying to update template lambda mangling.  The API of 
process_template_parm is somewhat awkaward, as it returns the whole list, and we 
end up doing a lot of tree_last calls, to get to the newly added node.  I think 
all (or nearly all?) uses could be adapted to it returning the newly added list 
node, but I didn't want to go there right now.


nathan

--
Nathan SidwellFrom 43e654afeba484d75fbee080262a038c1da00ad5 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Tue, 25 Oct 2022 09:39:00 -0400
Subject: [PATCH] c++: Adjust synthetic template parm creation

We intend to mark synthetic template parameters (coming from use of auto
parms), as DECL_VIRTUAL_P.  The API of process_template_parm is
awkwardly confusing, and we were marking the previous template parm
(unless this was the first parm).  process_template_parm returns the list
of parms, when most (all?) users really want the newly-added final node.
That's a bigger change, so let's not do it right now.  With this, we
correctly mark such synthetic parms DECL_VIRTUAL_P.

	gcc/cp/
	* parser.cc (synthesize_implicit_template_parm): Fix thinko about
	mark the new parm DECL_VIRTUAL_P.  Avoid unneccessary tree_last call.
---
 gcc/cp/parser.cc | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index a39c5f0d24b..e685f190b3d 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -48996,12 +48996,11 @@ synthesize_implicit_template_parm  (cp_parser *parser, tree constr)
 
   tree proto = constr ? DECL_INITIAL (constr) : NULL_TREE;
   tree synth_id = make_generic_type_name ();
-  tree synth_tmpl_parm;
   bool non_type = false;
 
   /* Synthesize the type template parameter.  */
   gcc_assert(!proto || TREE_CODE (proto) == TYPE_DECL);
-  synth_tmpl_parm = finish_template_type_parm (class_type_node, synth_id);
+  tree synth_tmpl_parm = finish_template_type_parm (class_type_node, synth_id);
 
   if (become_template)
 current_template_parms = tree_cons (size_int (current_template_depth + 1),
@@ -49016,22 +49015,27 @@ synthesize_implicit_template_parm  (cp_parser *parser, tree constr)
 			 node,
 			 /*non_type=*/non_type,
 			 /*param_pack=*/false);
+  // Process_template_parm returns the list of parms, and
+  // parser->implicit_template_parms holds the final node of the parm
+  // list.  We really want to manipulate the newly appended element.
+  gcc_checking_assert (!parser->implicit_template_parms
+		   || parser->implicit_template_parms == new_parm);
+  if (parser->implicit_template_parms)
+new_parm = TREE_CHAIN (new_parm);
+  gcc_checking_assert (!TREE_CHAIN (new_parm));
+
+  // Record the last implicit parm node
+  parser->implicit_template_parms = new_parm;
 
   /* Mark the synthetic declaration "virtual". This is used when
  comparing template-heads to determine if whether an abbreviated
  function template is equivalent to an explicit template.
 
- Note that DECL_ARTIFICIAL is used elsewhere for template parameters.  */
+ Note that DECL_ARTIFICIAL is used elsewhere for template
+ parameters.  */
   if (TREE_VALUE (new_parm) != error_mark_node)
 DECL_VIRTUAL_P (TREE_VALUE (new_parm)) = true;
 
-  // Chain the new parameter to the list of implicit parameters.
-  if (parser->implicit_template_parms)
-parser->implicit_template_parms
-  = TREE_CHAIN (parser->implicit_template_parms);
-  else
-parser->implicit_template_parms = new_parm;
-
   tree new_decl = get_local_decls ();
   if (non_type)
 /* Return the TEMPLATE_PARM_INDEX, not the PARM_DECL.  */
@@ -49059,7 +49063,7 @@ synthesize_implicit_template_parm  (cp_parser *parser, tree constr)
 
   /* If the new parameter was constrained, we need to add that to the
  constraints in the template parameter list.  */
-  if (tree req = TEMPLATE_PARM_CONSTRAINTS (tree_last (new_parm)))
+  if (tree req = TEMPLATE_PARM_CONSTRAINTS (new_parm))
 {
   tree reqs = TEMPLATE_PARMS_CONSTRAINTS (current_template_parms);
   reqs = combine_constraint_expressions (reqs, req);
-- 
2.37.3



Re: [PATCH v3] xtensa: Prepare the transition from Reload to LRA

2022-10-25 Thread Jan-Benedict Glaw
Hi!

On Wed, 2022-10-19 17:16:24 +0900, Takayuki 'January June' Suwa via Gcc-patches 
 wrote:
>   * gcc/config/xtensa/xtensa.md: Add two new split patterns:
> - splits DImode immediate load into two SImode ones
> - puts out-of-constraint SImode constants into the constant pool

> --- a/gcc/config/xtensa/xtensa.md
> +++ b/gcc/config/xtensa/xtensa.md
> @@ -940,14 +940,9 @@
>because of offering further optimization opportunities.  */
>if (register_operand (operands[0], DImode))
>   {
> -   rtx lowpart, highpart;
> -
> -   if (TARGET_BIG_ENDIAN)
> - split_double (operands[1], , );
> -   else
> - split_double (operands[1], , );
> -   emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lowpart));
> -   emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), highpart));
> +   xtensa_split_DI_reg_imm (operands);
> +   emit_move_insn (operands[0], operands[1]);
> +   emit_move_insn (operands[2], operands[3]);

This results in a new warning for me:

[all 2022-10-25 16:04:19] g++  -fno-PIE -c   -g -O2   -DIN_GCC  
-DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti 
-fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic 
-Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror -fno-common 
 -DHAVE_CONFIG_H -I. -I. -I../../gcc/gcc -I../../gcc/gcc/. 
-I../../gcc/gcc/../include -I../../gcc/gcc/../libcpp/include 
-I../../gcc/gcc/../libcody  -I../../gcc/gcc/../libdecnumber 
-I../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber 
-I../../gcc/gcc/../libbacktrace   -o insn-emit.o -MT insn-emit.o -MMD -MP -MF 
./.deps/insn-emit.TPo insn-emit.cc
[all 2022-10-25 16:04:22] ../../gcc/gcc/config/xtensa/xtensa.md: In function 
'rtx_def* gen_movdi(rtx, rtx)':
[all 2022-10-25 16:04:22] ../../gcc/gcc/config/xtensa/xtensa.md:945:26: error: 
array subscript 3 is above array bounds of 'rtx_def* [2]' [-Werror=array-bounds]
[all 2022-10-25 16:04:22]   945 |   emit_move_insn (operands[2], 
operands[3]);
[all 2022-10-25 16:04:22]   |   
~~~^~
[all 2022-10-25 16:04:22] ../../gcc/gcc/config/xtensa/xtensa.md:897:9: note: 
while referencing 'operands'
[all 2022-10-25 16:04:22]   897 |(set_attr "mode" "SF")
[all 2022-10-25 16:04:22]   | ^~~~
[all 2022-10-25 16:04:22] ../../gcc/gcc/config/xtensa/xtensa.md:945:26: error: 
array subscript 2 is above array bounds of 'rtx_def* [2]' [-Werror=array-bounds]
[all 2022-10-25 16:04:22]   945 |   emit_move_insn (operands[2], 
operands[3]);
[all 2022-10-25 16:04:22]   |   
~~~^~
[all 2022-10-25 16:04:22] ../../gcc/gcc/config/xtensa/xtensa.md:897:9: note: 
while referencing 'operands'
[all 2022-10-25 16:04:22]   897 |(set_attr "mode" "SF")
[all 2022-10-25 16:04:22]   | ^~~~

I didn't yet actually check the warning, it may be bogus.

Thanks,
  Jan-Benedict

-- 


signature.asc
Description: PGP signature


Re: [committed] More infrastructure to avoid bogus RTL on H8

2022-10-25 Thread Jan-Benedict Glaw
Hi Jeff!

On Mon, 2022-10-17 17:47:16 -0600, Jeff Law via Gcc-patches 
 wrote:
> --- a/gcc/config/h8300/h8300.cc
> +++ b/gcc/config/h8300/h8300.cc
> @@ -5531,6 +5531,32 @@ h8300_ok_for_sibcall_p (tree fndecl, tree)
>  
>return 1;
>  }
> +
> +/* Return TRUE if OP is a PRE_INC or PRE_DEC
> +   instruction using REG, FALSE otherwise.  */
> +
> +bool
> +pre_incdec_with_reg (rtx op, int reg)
> +{
> +  /* OP must be a MEM.  */
> +  if (GET_CODE (op) != MEM)
> +return false;
> +
> +  /* The address must be a PRE_INC or PRE_DEC.  */
> +  op = XEXP (op, 0);
> +  if (GET_CODE (op) != PRE_DEC && GET_CODE (op) != PRE_INC)
> +return false;
> +
> +  /* It must be a register that is being incremented
> + or decremented.  */
> +  op = XEXP (op, 0);
> +  if (!REG_P (op))
> +return false;
> +
> +  /* Finally, check that the register number matches.  */
> +  return REGNO (op) == reg;

This results in a new signed-vs-unsigned warning for me:

[all 2022-10-25 00:41:11] ../../gcc/gcc/config/h8300/h8300.cc: In function 
'bool pre_incdec_with_reg(rtx, int)':
[all 2022-10-25 00:41:11] ../../gcc/gcc/config/h8300/h8300.cc:5557:21: error: 
comparison of integer expressions of different signedness: 'unsigned int' and 
'int' [-Werror=sign-compare]
[all 2022-10-25 00:41:11]  5557 |   return REGNO (op) == reg;

Thanks,
  Jan-Benedict

-- 


signature.asc
Description: PGP signature


Re: c: tree: target: C2x (...) function prototypes and va_start relaxation

2022-10-25 Thread Joseph Myers
On Tue, 25 Oct 2022, Richard Biener wrote:

> You are missing to stream the new type flag in tree-streamer-{in,out}.cc
> and checking for tree merging in lto-common.cc:compare_tree_sccs_1
> 
> Otherwise looks reasonable.  Can you add a (multi TU) runtime testcase to the
> torture exercising the feature so we can see any ABI issues?

I've made those changes.  Those in turn showed up the need for a
change in fortran/trans-types.cc (to avoid using
build_varargs_function_type_vec when what was wanted was an
unprototyped function type rather than a (...) prototype) to avoid a
failure of gfortran.dg/lto/pr40724.

I've added a two-file testcase to gcc.dg/torture/.  I've also made the
execution tests cover the case where there are named arguments but the
last named argument has a declaration that results in undefined
behavior in previous C standard versions, such as a type changed by
the default argument promotions.

Here is the revised patch version.

c: tree: target: C2x (...) function prototypes and va_start relaxation

C2x allows function prototypes to be given as (...), a prototype
meaning a variable-argument function with no named arguments.  To
allow such functions to access their arguments, requirements for
va_start calls are relaxed so it ignores all but its first argument
(i.e. subsequent arguments, if any, can be arbitrary pp-token
sequences).

Implement this feature accordingly.  The va_start relaxation in
 is itself easy: __builtin_va_start already supports a
second argument of 0 instead of a parameter name, and calls get
converted internally to the form using 0 for that argument, so
 just needs changing to use a variadic macro that passes 0
as the second argument of __builtin_va_start.  (This is done only in
C2x mode, on the expectation that users of older standard would expect
unsupported uses of va_start to be diagnosed.)

For the (...) functions, it's necessary to distinguish these from
unprototyped functions, whereas previously C++ (...) functions and
unprototyped functions both used NULL TYPE_ARG_TYPES.  A flag is added
to tree_type_common to mark the (...) functions; as discussed on gcc@,
doing things this way is likely to be safer for unchanged code in GCC
than adding a different form of representation in TYPE_ARG_TYPES, or
adding a flag that instead signals that the function is unprototyped.

There was previously an option
-fallow-parameterless-variadic-functions to enable support for (...)
prototypes.  The support was incomplete - it treated the functions as
unprototyped, and only parsed some declarations, not e.g.
"int g (int (...));".  This option is changed into a no-op ignored
option; (...) is always accepted syntactically, with a pedwarn_c11
call to given required diagnostics when appropriate.  The peculiarity
of a parameter list with __attribute__ followed by '...' being
accepted with that option is removed.

Interfaces in tree.cc that create function types are adjusted to set
this flag as appropriate.  It is of course possible that some existing
users of the functions to create variable-argument functions actually
wanted unprototyped functions in the no-named-argument case, rather
than functions with a (...) prototype; some such cases in c-common.cc
(for built-in functions and implicit function declarations) turn out
to need updating for that reason.

I didn't do anything to change how the C++ front end creates (...)
function types.  It's very likely there are unchanged places in the
compiler that in fact turn out to need changes to work properly with
(...) function prototypes.

Target setup_incoming_varargs hooks, where they used the information
passed about the last named argument, needed updating to avoid using
that information in the (...) case.  Note that apart from the x86
changes, I haven't done any testing of those target changes beyond
building cc1 to check for syntax errors.  It's possible further
target-specific fixes will be needed; target maintainers should watch
out for failures of c2x-stdarg-4.c or c2x-stdarg-split-1a.c, the
execution tests, which would indicate that this feature is not working
correctly.  Those tests also verify the case where there are named
arguments but the last named argument has a declaration that results
in undefined behavior in previous C standard versions, such as a type
changed by the default argument promotions.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

gcc/
* config/aarch64/aarch64.cc (aarch64_setup_incoming_varargs):
Check TYPE_NO_NAMED_ARGS_STDARG_P.
* config/alpha/alpha.cc (alpha_setup_incoming_varargs): Likewise.
* config/arc/arc.cc (arc_setup_incoming_varargs): Likewise.
* config/arm/arm.cc (arm_setup_incoming_varargs): Likewise.
* config/csky/csky.cc (csky_setup_incoming_varargs): Likewise.
* config/epiphany/epiphany.cc (epiphany_setup_incoming_varargs):
Likewise.
* config/fr30/fr30.cc (fr30_setup_incoming_varargs): Likewise.
* 

Re: [PATCH v2] bpf: add preserve_field_info builtin

2022-10-25 Thread Jose E. Marchesi via Gcc-patches


> Hi Jose,
>
> Thanks for your comments. I think I've addressed them all in the updated
> patch below.
>
>>>+  get_inner_reference (src, , , _off, , ,
>>>+   , );
>>
>>Since the information returned by the builtin is always constant
>>(positions, sizes) I think you will want to adjust the code for the
>>eventuality of variable positioned fields and also variable sized
>>fields.
>>
>>get_inner_reference sets var_off to a tree if the position of the field
>>is variable.  In these cases `bitpos' is relative to that position.
>>
>>Likewise, get_inner_reference sets `mode' is set to BLKmode and
>>`bitsize' will be set to -1.
>>
>>I'm not sure what the built-in is supposed to do/return in these cases.
>>I guess it makes sense to error out, but what does LLVM do?
>
> I would have thought erroring out the only option, but it seems that
> LLVM will return a value from the builtin and record a CO-RE relocation
> as normal.
>
> What value will be returned depends of course on KIND, but from what
> I can tell it seems that such fields are treated as having an offset of
> 0 bits and/or a size of 0 bits. For example FIELD_BYTE_SIZE for a
> flexible-length array will return 0. FIELD_RSHIFT_U64 will be
> calculated as 64 - 0 = 64.
>
> This sort of makes sense if you expect that any BPF loader will honor
> the CO-RE relocations and patch the return value before the program is
> run, i.e. the actual values at compile time are irrelevant.
>
> But, I'm not sure that BPF loaders in practice actually _can_ patch the
> return value correctly. The source of information for resolving the
> relocations is the BTF. But the BTF won't have more information about
> variable position/size members. A flexible-length array for example in
> BTF is represented as an array type with 0 elements. So the size
> calculation when patching the relocation (looking at the impl in
> libbpf) will be elem_size * nelems = 0, and the 'patched' values will
> be the same as the unpatched.
>
> I'm not sure whether this behavior is a known limitation or an
> oversight. In my opinion it makes more sense to error at compile time,
> becuase even after the loader patches the return value it still will
> not be correct for these cases.
>
> So for now I've set these cases to error out, but it would be just as
> simple to mimic the LLVM behavior. WDYT?

I would say it makes more sense to error out than to return invalid
data.

However, the divergence wrt LLVM is a concern.  What about keeping this
behavior in the GCC backend and simultaneously raise the issue in
bpf@vger?  If it was a design oversight and the change doesn't impact
kernel sources, they may be willing to change it.

>>If I read this properly, for something like:
>>
>>__builtin_preserve_field_info (a = foo.bar + bar.baz, KIND)
>>
>>On one side CO-RE relocations are computed for both foo.bar and bar.baz
>>(I see bpf_core_compute does that) as expected.
>>
>>But then the builtin returns information that can only apply to one
>>access.  Which one?
>
> Expressions like this should not be accepted by the builtin. I didn't
> consider this case in v1 so it led to an ICE. Clang rejects this
> outright and errors with "argument 1 is not a field access". It is
> actually very strict about the expressions that are accepted, unlike
> __builtin_preserve_access_index.
>
> I have updated this implementation to behave more like clang in that
> it will reject any expression that isn't directly a field access. That
> even includes rejecting things like:
>
>   __builtin_preserve_field_info (, KIND)
>
> Since unlike preserve_access_index this builtin does not actually
> perform the operation in EXPR, it makes sense to enforce that EXPR must
> be exactly a single field access.

Ok, thanks.

> [...]
> +@deftypefn {Built-in Function} unsigned int __builtin_preserve_field_info 
> (@var{expr}, unsigned int @var{kind})
> +BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to
> +extract information to aid in struct/union relocations.  @var{expr} is
> +an access to a field of a struct or union. Depending on @var{kind}, different
> +information is returned to the program. A CO-RE relocation for the access in
> +@var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect.
> +
> +The following values are supported for @var{kind}:
> +@table @var
> +@item FIELD_BYTE_OFFSET = 0
> +The returned value is the offset, in bytes, of the field from the
> +beginning of the containing structure.

What about bit fields?  Is this the byte offset of the containing word?

> +@item FIELD_BYTE_SIZE = 1
> +The returned value is the size, in bytes, of the field.

For bit fields,  is this the size of the containing word?

> +@item FIELD_EXISTENCE = 2
> +The returned value is 1 if the field exists, 0 otherwise. Always 1 at
> +compile time.
> +
> +@item FIELD_SIGNEDNESS = 3
> +The returned value is 1 if the field is signed, 0 otherwise.
> +
> +@item FIELD_LSHIFT_U64 = 4
> +@itemx FIELD_RSHIFT_U64 = 5
> 

[committed] wwwdocs: contribute: Remove

2022-10-25 Thread Gerald Pfeifer
The contents of this "description" meta tag really just duplicates
the title of the page and does not contribute to search engine
optimzation or otherwise, so simply drop it.

Pushed. 

Gerald
---
 htdocs/contribute.html | 1 -
 1 file changed, 1 deletion(-)

diff --git a/htdocs/contribute.html b/htdocs/contribute.html
index 342c12a8..02843580 100644
--- a/htdocs/contribute.html
+++ b/htdocs/contribute.html
@@ -3,7 +3,6 @@
 
 
 
-
 
 Contributing to GCC
-- 
2.38.0


[pushed] c++: correct fold_operand change

2022-10-25 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

Still want the conversion to bool.

gcc/cp/ChangeLog:

* constexpr.cc (find_failing_clause_r): Re-add the call to
contextual_conv_bool.
---
 gcc/cp/constexpr.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 39bb023b79c..15b4f2c4a08 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1887,7 +1887,8 @@ find_failing_clause_r (const constexpr_ctx *ctx, tree 
expr)
e = find_failing_clause_r (ctx, TREE_OPERAND (expr, 1));
   return e;
 }
-  tree e = fold_operand (expr, ctx);
+  tree e = contextual_conv_bool (expr, tf_none);
+  e = fold_operand (e, ctx);
   if (integer_zerop (e))
 /* This is the failing clause.  */
 return expr;

base-commit: e6a29aab51122103e677ffed523371c9c816ec98
-- 
2.31.1



Re: [PATCH] c++ modules: verify_type failure with typedef enum [PR106848]

2022-10-25 Thread Patrick Palka via Gcc-patches
On Mon, 24 Oct 2022, Nathan Sidwell wrote:

> On 10/21/22 09:11, Patrick Palka wrote:
> > On Fri, 21 Oct 2022, Nathan Sidwell wrote:
> 
> > > 
> > > Thanks for the explanation, it's a situation I didn;t anticipate and your
> > > fix
> > > is good.  Could you add a comment about why you need to propagate the
> > > values
> > > though?
> > 
> > Thanks a lot, will do.  Just to make sure since there are multiple
> > solutions proposed, do you prefer to go with
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603487.html
> > or
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603831.html ?
> > 
> > Both solutions fix the PR106848 issue (empty TYPE_MIN/MAX_VALUE on an
> > enum type variant), but the latter also fixes the related PR102600
> > (empty TYPE_MIN/MAX_VALUE on the main variant of an enum with no
> > enumerators).  (We could maybe even combine the two solutions: stream
> > TYPE_MIN/MAX_VALUE as part of ENUMERAL_TYPE, and also update TYPE_VALUES
> > of each variant during trees_in::read_enum_def)
> 
> 
> Oh, let's go with the latter:
> 
>   * module.cc (trees_out::core_vals): Stream TYPE_MAX_VALUE and
>   TYPE_MIN_VALUE of ENUMERAL_TYPE.
>   (trees_in::core_vals): Likewise.
>   (trees_out::write_enum_def): Don't stream them here.
>   (trees_in::read_enum_def): Likewise.
> 
> but, again, some comments -- at the new streaming point, and in the defn
> streamer were we no longer stream them.
> 
> thanks.

Thanks a lot, here's what I ended up committing:

-- >8 --

Subject: [PATCH] c++ modules: enum TYPE_MIN/MAX_VALUE streaming [PR106848]

In the frontend, the TYPE_MIN/MAX_VALUE of ENUMERAL_TYPE is the same as
that of the enum's underlying type (see start_enum).  And the underlying
type of an enum is always known, even for an opaque enum that lacks a
definition.

But currently, we stream TYPE_MIN/MAX_VALUE of an enum only as part of
its definition.  So if the enum is declared but never defined, the
ENUMERAL_TYPE we stream in will have empty TYPE_MIN/MAX_VALUE fields
despite these fields being non-empty on stream out.

And even if the enum is defined, read_enum_def updates these fields only
on the main variant of the enum type, so for other variants (that we may
have streamed in earlier) these fields remain empty.  That these fields
are unexpectedly empty for some ENUMERAL_TYPEs is ultimately the cause
of the below two PRs.

This patch fixes this by making us stream TYPE_MIN/MAX_VALUE directly
for each ENUMERAL_TYPE rather than as part of the enum's definition, so
that we naturally also stream these fields for opaque enums (and each
enum type variant).

PR c++/106848
PR c++/102600

gcc/cp/ChangeLog:

* module.cc (trees_out::core_vals): Stream TYPE_MAX_VALUE and
TYPE_MIN_VALUE of ENUMERAL_TYPE.
(trees_in::core_vals): Likewise.
(trees_out::write_enum_def): Don't stream them here.
(trees_in::read_enum_def): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/modules/enum-9_a.H: New test.
* g++.dg/modules/enum-9_b.C: New test.
* g++.dg/modules/enum-10_a.H: New test.
* g++.dg/modules/enum-10_b.C: New test.
* g++.dg/modules/enum-11_a.H: New test.
* g++.dg/modules/enum-11_b.C: New test.
---
 gcc/cp/module.cc | 39 +++-
 gcc/testsuite/g++.dg/modules/enum-10_a.H |  5 +++
 gcc/testsuite/g++.dg/modules/enum-10_b.C |  6 
 gcc/testsuite/g++.dg/modules/enum-11_a.H |  5 +++
 gcc/testsuite/g++.dg/modules/enum-11_b.C |  8 +
 gcc/testsuite/g++.dg/modules/enum-9_a.H  | 13 
 gcc/testsuite/g++.dg/modules/enum-9_b.C  |  6 
 7 files changed, 67 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/enum-10_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/enum-10_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/enum-11_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/enum-11_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/enum-9_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/enum-9_b.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 73971e7ff47..9957df510e6 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -6017,9 +6017,17 @@ trees_out::core_vals (tree t)
 
   if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON))
 {
+  if (code == ENUMERAL_TYPE)
+   {
+ /* These fields get set even for opaque enums that lack a
+definition, so we stream them directly for each ENUMERAL_TYPE.
+We stream TYPE_VALUES as part of the definition.  */
+ WT (t->type_non_common.maxval);
+ WT (t->type_non_common.minval);
+   }
   /* Records and unions hold FIELDS, VFIELD & BINFO on these
 things.  */
-  if (!RECORD_OR_UNION_CODE_P (code) && code != ENUMERAL_TYPE)
+  else if (!RECORD_OR_UNION_CODE_P (code))
{
  // FIXME: These are from tpl_parm_value's 'type' writing.
  // Perhaps it should just be 

[PATCH] tsan: fix test for machines without pthread_cond_clockwait

2022-10-25 Thread Michael de Lang via Gcc-patches
Hey,

As per J. Wakeley's comments on
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100655
make the call to pthread_cond_clockwait conditional. This allows
compilation on machines
with older glibc versions.

Cheers,
Michael


2022-10-25 Michael de Lang  

* Fix testcase for pthread_cond_clockwait on machines with old glibc


--- a/gcc/testsuite/g++.dg/tsan/pthread_cond_clockwait.C
+++ b/gcc/testsuite/g++.dg/tsan/pthread_cond_clockwait.C
@@ -4,6 +4,10 @@

 #include 

+// Include this to get the libstdc++ _GLIBCXX_USE_PTHREAD_COND_CLOCKWAIT
+// macro that indicates pthread_cond_clockwait is available.
+#include 
+
 pthread_cond_t cv;
 pthread_mutex_t mtx;

@@ -23,7 +27,9 @@ int main() {
 struct timespec ts;
 clock_gettime(CLOCK_MONOTONIC, );
 ts.tv_sec += 10;
+#ifdef _GLIBCXX_USE_PTHREAD_COND_CLOCKWAIT
 pthread_cond_clockwait(, , CLOCK_MONOTONIC, );
+#endif
 pthread_mutex_unlock();

 pthread_join(tid, NULL);


Re: [PATCH v5 0/2] IBM zSystems: Improve storing asan frame_pc

2022-10-25 Thread Jakub Jelinek via Gcc-patches
On Tue, Sep 27, 2022 at 02:23:32AM +0200, Ilya Leoshkevich wrote:
> This is a resend of v4 with slightly adjusted commit messages:
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2019-July/525016.html
> v2: https://gcc.gnu.org/pipermail/gcc-patches/2019-July/525069.html
> v3: https://gcc.gnu.org/pipermail/gcc-patches/2020-June/548338.html
> v4: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549252.html
> 
> It still survives the bootstrap and the regtest on x86_64-redhat-linux,
> s390x-redhat-linux and ppc64le-redhat-linux.  It also fixes [1].
> 
> I also tried the approach with moving .LASANPC closer to the function
> label and using FUNCTION_BOUNDARY instead of introducing
> CODE_LABEL_BOUNDARY, but the problem there is that it's hard to catch
> the moment where the function label is written.  Architectures can do
> it by calling ASM_OUTPUT_LABEL() or assemble_name() in
> ASM_DECLARE_FUNCTION_NAME(), ASM_OUTPUT_FUNCTION_LABEL() or
> TARGET_ASM_FUNCTION_PROLOGUE().  epiphany_start_function() does that
> twice, but passes the same decl to both calls.  Note that simply
> moving asan_function_start() to final_start_function_1() is not enough,
> since an architecture can write something after the function label.
> This all means that for this approach to work, all the architectures
> need to be adjusted, which looks like an overkill to me.

Sorry for the delay.
I think the right fix is to follow on s390 and other arches what
has been done for x86 in https://gcc.gnu.org/PR98776
That changed not just .LASANPC labels, but also the debug related
labels from after the patchable area to before it.
And then .LASANPC label can just get FUNCTION_BOUNDARY alignment
set in the generic code.

Jakub



[PATCH v2] bpf: add preserve_field_info builtin

2022-10-25 Thread David Faust via Gcc-patches
Hi Jose,

Thanks for your comments. I think I've addressed them all in the updated
patch below.

>>+  get_inner_reference (src, , , _off, , ,
>>+, );
>
>Since the information returned by the builtin is always constant
>(positions, sizes) I think you will want to adjust the code for the
>eventuality of variable positioned fields and also variable sized
>fields.
>
>get_inner_reference sets var_off to a tree if the position of the field
>is variable.  In these cases `bitpos' is relative to that position.
>
>Likewise, get_inner_reference sets `mode' is set to BLKmode and
>`bitsize' will be set to -1.
>
>I'm not sure what the built-in is supposed to do/return in these cases.
>I guess it makes sense to error out, but what does LLVM do?

I would have thought erroring out the only option, but it seems that
LLVM will return a value from the builtin and record a CO-RE relocation
as normal.

What value will be returned depends of course on KIND, but from what
I can tell it seems that such fields are treated as having an offset of
0 bits and/or a size of 0 bits. For example FIELD_BYTE_SIZE for a
flexible-length array will return 0. FIELD_RSHIFT_U64 will be
calculated as 64 - 0 = 64.

This sort of makes sense if you expect that any BPF loader will honor
the CO-RE relocations and patch the return value before the program is
run, i.e. the actual values at compile time are irrelevant.

But, I'm not sure that BPF loaders in practice actually _can_ patch the
return value correctly. The source of information for resolving the
relocations is the BTF. But the BTF won't have more information about
variable position/size members. A flexible-length array for example in
BTF is represented as an array type with 0 elements. So the size
calculation when patching the relocation (looking at the impl in
libbpf) will be elem_size * nelems = 0, and the 'patched' values will
be the same as the unpatched.

I'm not sure whether this behavior is a known limitation or an
oversight. In my opinion it makes more sense to error at compile time,
becuase even after the loader patches the return value it still will
not be correct for these cases.

So for now I've set these cases to error out, but it would be just as
simple to mimic the LLVM behavior. WDYT?


>If I read this properly, for something like:
>
>__builtin_preserve_field_info (a = foo.bar + bar.baz, KIND)
>
>On one side CO-RE relocations are computed for both foo.bar and bar.baz
>(I see bpf_core_compute does that) as expected.
>
>But then the builtin returns information that can only apply to one
>access.  Which one?

Expressions like this should not be accepted by the builtin. I didn't
consider this case in v1 so it led to an ICE. Clang rejects this
outright and errors with "argument 1 is not a field access". It is
actually very strict about the expressions that are accepted, unlike
__builtin_preserve_access_index.

I have updated this implementation to behave more like clang in that
it will reject any expression that isn't directly a field access. That
even includes rejecting things like:

  __builtin_preserve_field_info (, KIND)

Since unlike preserve_access_index this builtin does not actually
perform the operation in EXPR, it makes sense to enforce that EXPR must
be exactly a single field access.

---

[Changes from v1:
  - Error gracefully on variable-size or variable-offset fields if the
requested information according to KIND cannot be calculated for
an access to such a field, instead of maybe computing garbage.
  - Check for invalid expressions in EXPR and reject them rather than
ICEing later. This includes rejecting some expressions which were
accepted by previous patch but not by clang/LLVM.
  - Improve precision of location information for errors.
  - Add function-level comment describing maybe_make_core_relo ().
  - Document return values for all supported values of KIND.
  - Add tests for error conditions. ]

Add BPF __builtin_preserve_field_info. This builtin is used to extract
information to facilitate struct and union relocations performed by the
BPF loader, especially for bitfields.

The builtin has the following signature:

  unsigned int __builtin_preserve_field_info (EXPR, unsigned int KIND);

Where EXPR is an expression accessing a field of a struct or union.
Depending on KIND, different information is returned to the program. The
supported values for KIND are as follows:

  enum {
FIELD_BYTE_OFFSET = 0,
FIELD_BYTE_SIZE,
FIELD_EXISTENCE,
FIELD_SIGNEDNESS,
FIELD_LSHIFT_U64,
FIELD_RSHIFT_U64
  };

If -mco-re is in effect (explicitly or implicitly specified), a CO-RE
relocation is added for the access in EXPR recording the relevant
information according to KIND.

gcc/

* config/bpf/bpf.cc: Support __builtin_preserve_field_info.
(enum bpf_builtins): Add new builtin.
(bpf_init_builtins): Likewise.
(bpf_core_field_info): New function.
(bpf_expand_builtin): Accomodate new 

[pushed] c++: constexpr-evaluate more assumes

2022-10-25 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

The initial [[assume]] support avoided evaluating assumes with
TREE_SIDE_EFFECTS set, such as calls, because we don't want any side-effects
that change the constexpr state.  This patch allows us to evaluate
expressions with that flag set by tracking which variables the evaluation is
allowed to modify, and giving up if it tries to touch any others.

I considered allowing changes to other variables and then rolling them back,
but that seems like a rare enough situation that it doesn't seem worth
working to handle nicely at this point.

gcc/cp/ChangeLog:

* constexpr.cc (class constexpr_global_ctx): Add modifiable field,
get_value, get_value_ptr, put_value, remove_value, flush_modifiable
member functions.
(class modifiable_tracker): New.
(cxx_eval_internal_function): Use it.
(diagnose_failing_condition): Strip CLEANUP_POINT_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/attr-assume9.C: New test.
* g++.dg/cpp23/attr-assume10.C: New test.
---
 gcc/cp/constexpr.cc| 136 ++---
 gcc/testsuite/g++.dg/cpp23/attr-assume10.C |  22 
 gcc/testsuite/g++.dg/cpp23/attr-assume9.C  |  19 +++
 3 files changed, 133 insertions(+), 44 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp23/attr-assume10.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/attr-assume9.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index fc1bc53f68a..39bb023b79c 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1092,10 +1092,11 @@ enum constexpr_switch_state {
cxx_eval_outermost_constant_expr invocation.  VALUES is a map of values of
variables initialized within the expression.  */
 
-struct constexpr_global_ctx {
+class constexpr_global_ctx {
   /* Values for any temporaries or local variables within the
  constant-expression. */
   hash_map values;
+public:
   /* Number of cxx_eval_constant_expression calls (except skipped ones,
  on simple constants or location wrappers) encountered during current
  cxx_eval_outermost_constant_expr call.  */
@@ -1105,11 +1106,61 @@ struct constexpr_global_ctx {
   auto_vec heap_vars;
   /* Cleanups that need to be evaluated at the end of CLEANUP_POINT_EXPR.  */
   vec *cleanups;
+  /* If non-null, only allow modification of existing values of the variables
+ in this set.  Set by modifiable_tracker, below.  */
+  hash_set *modifiable;
   /* Number of heap VAR_DECL deallocations.  */
   unsigned heap_dealloc_count;
   /* Constructor.  */
   constexpr_global_ctx ()
-: constexpr_ops_count (0), cleanups (NULL), heap_dealloc_count (0) {}
+: constexpr_ops_count (0), cleanups (NULL), modifiable (nullptr),
+  heap_dealloc_count (0) {}
+
+ tree get_value (tree t)
+  {
+if (tree *p = values.get (t))
+  return *p;
+return NULL_TREE;
+  }
+  tree *get_value_ptr (tree t)
+  {
+if (modifiable && !modifiable->contains (t))
+  return nullptr;
+return values.get (t);
+  }
+  void put_value (tree t, tree v)
+  {
+bool already_in_map = values.put (t, v);
+if (!already_in_map && modifiable)
+  modifiable->add (t);
+  }
+  void remove_value (tree t) { values.remove (t); }
+};
+
+/* Helper class for constexpr_global_ctx.  In some cases we want to avoid
+   side-effects from evaluation of a particular subexpression of a
+   constant-expression.  In such cases we use modifiable_tracker to prevent
+   modification of variables created outside of that subexpression.
+
+   ??? We could change the hash_set to a hash_map, allow and track external
+   modifications, and roll them back in the destructor.  It's not clear to me
+   that this would be worthwhile.  */
+
+class modifiable_tracker
+{
+  hash_set set;
+  constexpr_global_ctx *global;
+public:
+  modifiable_tracker (constexpr_global_ctx *g): global(g)
+  {
+global->modifiable = 
+  }
+  ~modifiable_tracker ()
+  {
+for (tree t: set)
+  global->remove_value (t);
+global->modifiable = nullptr;
+  }
 };
 
 /* The constexpr expansion context.  CALL is the current function
@@ -1653,7 +1704,7 @@ addr_of_non_const_var (tree *tp, int *walk_subtrees, void 
*data)
return var;
 
  constexpr_global_ctx *global = (constexpr_global_ctx *) data;
- if (global->values.get (var))
+ if (global->get_value (var))
return var;
}
   if (TYPE_P (*tp))
@@ -1865,6 +1916,8 @@ diagnose_failing_condition (tree bad, location_t cloc, 
bool show_expr_p,
 {
   /* Nobody wants to see the artificial (bool) cast.  */
   bad = tree_strip_nop_conversions (bad);
+  if (TREE_CODE (bad) == CLEANUP_POINT_EXPR)
+bad = TREE_OPERAND (bad, 0);
 
   /* Actually explain the failure if this is a concept check or a
  requires-expression.  */
@@ -1902,18 +1955,14 @@ cxx_eval_internal_function (const constexpr_ctx *ctx, 
tree t,
   return void_node;
 
 case IFN_ASSUME:
-  /* For now, 

[pushed] c++: improve failed constexpr assume diagnostic

2022-10-25 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

I noticed that we were printing "the comparison reduces to (x == 42)" when
we should be able to give the value of x.  Fixed by doing the same
evaluation in diagnose_failing_condition that we already do in
find_failing_clause.

gcc/cp/ChangeLog:

* constexpr.cc (fold_operand): New function.
(find_failing_clause_r): Add const.
(find_failing_clause): Add const.
(diagnose_failing_condition): Add ctx parameter.
(cxx_eval_internal_function): Pass it.
* semantics.cc (diagnose_failing_condition): Move to constexpr.cc.
* cp-tree.h: Adjust.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/attr-assume2.C: Expect constant values.
---
 gcc/cp/cp-tree.h  |  5 +-
 gcc/cp/constexpr.cc   | 63 ++-
 gcc/cp/semantics.cc   | 27 --
 gcc/testsuite/g++.dg/cpp23/attr-assume2.C |  4 +-
 4 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 2cca20be6c1..cec376d90af 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7758,7 +7758,6 @@ extern tree build_transaction_expr
(location_t, tree, int, tree);
 extern bool cxx_omp_create_clause_info (tree, tree, bool, bool,
 bool, bool);
 extern tree baselink_for_fns(tree);
-extern void diagnose_failing_condition (tree, location_t, bool);
 extern void finish_static_assert(tree, tree, location_t,
 bool, bool);
 extern tree finish_decltype_type(tree, bool, tsubst_flags_t);
@@ -8497,7 +8496,9 @@ extern void clear_cv_and_fold_caches  (void);
 extern tree unshare_constructor(tree 
CXX_MEM_STAT_INFO);
 extern bool decl_implicit_constexpr_p  (tree);
 struct constexpr_ctx;
-extern tree find_failing_clause(constexpr_ctx *ctx, 
tree);
+extern tree find_failing_clause(const constexpr_ctx 
*ctx, tree);
+extern void diagnose_failing_condition (tree, location_t, bool,
+const constexpr_ctx * = 
nullptr);
 extern bool replace_decl   (tree *, tree, tree);
 
 /* An RAII sentinel used to restrict constexpr evaluation so that it
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 94b54fc71dc..fc1bc53f68a 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1799,6 +1799,24 @@ cx_error_context (void)
   return r;
 }
 
+/* E is an operand of a failed assertion, fold it either with or without
+   constexpr context.  */
+
+static tree
+fold_operand (tree e, const constexpr_ctx *ctx)
+{
+  if (ctx)
+{
+  bool new_non_constant_p = false, new_overflow_p = false;
+  e = cxx_eval_constant_expression (ctx, e, vc_prvalue,
+   _non_constant_p,
+   _overflow_p);
+}
+  else
+e = fold_non_dependent_expr (e, tf_none, /*manifestly_const_eval=*/true);
+  return e;
+}
+
 /* If we have a condition in conjunctive normal form (CNF), find the first
failing clause.  In other words, given an expression like
 
@@ -1807,7 +1825,7 @@ cx_error_context (void)
return the first 'false'.  EXPR is the expression.  */
 
 static tree
-find_failing_clause_r (constexpr_ctx *ctx, tree expr)
+find_failing_clause_r (const constexpr_ctx *ctx, tree expr)
 {
   if (TREE_CODE (expr) == TRUTH_ANDIF_EXPR)
 {
@@ -1818,16 +1836,7 @@ find_failing_clause_r (constexpr_ctx *ctx, tree expr)
e = find_failing_clause_r (ctx, TREE_OPERAND (expr, 1));
   return e;
 }
-  tree e = contextual_conv_bool (expr, tf_none);
-  if (ctx)
-{
-  bool new_non_constant_p = false, new_overflow_p = false;
-  e = cxx_eval_constant_expression (ctx, e, vc_prvalue,
-   _non_constant_p,
-   _overflow_p);
-}
-  else
-e = fold_non_dependent_expr (e, tf_none, /*manifestly_const_eval=*/true);
+  tree e = fold_operand (expr, ctx);
   if (integer_zerop (e))
 /* This is the failing clause.  */
 return expr;
@@ -1837,7 +1846,7 @@ find_failing_clause_r (constexpr_ctx *ctx, tree expr)
 /* Wrapper for find_failing_clause_r.  */
 
 tree
-find_failing_clause (constexpr_ctx *ctx, tree expr)
+find_failing_clause (const constexpr_ctx *ctx, tree expr)
 {
   if (TREE_CODE (expr) == TRUTH_ANDIF_EXPR)
 if (tree e = find_failing_clause_r (ctx, expr))
@@ -1845,6 +1854,34 @@ find_failing_clause (constexpr_ctx *ctx, tree expr)
   return expr;
 }
 
+/* Emit additional diagnostics for failing condition BAD.
+   Used by finish_static_assert and IFN_ASSUME constexpr diagnostics.
+   If SHOW_EXPR_P is true, print the condition (because it was
+   instantiation-dependent).  */
+

Re: [PATCH] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Marek Polacek via Gcc-patches
On Tue, Oct 25, 2022 at 11:39:31AM -0400, Jason Merrill wrote:
> On 10/25/22 09:14, Marek Polacek wrote:
> > On Tue, Oct 25, 2022 at 12:34:50PM +0100, Jonathan Wakely wrote:
> > > On Mon, 24 Oct 2022 at 18:30, Jason Merrill wrote:
> > > > 
> > > > On 10/21/22 19:28, Marek Polacek wrote:
> > > > > When testing a previous version of the patch, there were many FAILs in
> > > > > libstdc++'s 22_locale/; all of them because the warning triggered on
> > > > > 
> > > > > const test_type& obj = std::use_facet(std::locale());
> > > > > 
> > > > > but this code looks valid -- std::use_facet doesn't return a reference
> > > > > to its parameter.  Therefore I added code to suppress the warning when
> > > > > the call is std::use_facet.  Now 22_locale/* pass even with the 
> > > > > warning
> > > > > on.  We could exclude more std:: functions like this if desirable.
> > > > 
> > > > Instead of adding special cases in the compiler, let's disable the
> > > > warning around the definition of use_facet (and adjust the compiler as
> > > > needed so that avoids the warning).
> > > 
> > > I assume you mean using #pragma here. If we disable it around the
> > > definition of use_facet, will that disable it for callers of
> > > use_facet, or only within the definition of use_facet itself?
> > 
> > Right, a #pragma will not help, it would only disable the warning
> > within the definition of use_facet itself.
> 
> That's why I mentioned adjusting the compiler, i.e. check warning_enabled_at
> (DECL_SOURCE_LOCATION (fn)

Ah, like with -Wdeprecated-copy.  I think I fixed this:

--- a/libstdc++-v3/include/bits/locale_classes.tcc
+++ b/libstdc++-v3/include/bits/locale_classes.tcc
@@ -127,6 +127,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  @return  Reference to facet of type Facet.
*  @throw  std::bad_cast if @p __loc doesn't contain a facet of type _Facet.
   */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
   template
 const _Facet&
 use_facet(const locale& __loc)
@@ -141,6 +143,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return static_cast(*__facets[__i]);
 #endif
 }
+#pragma GCC diagnostic pop


   // Generic version does nothing.

and then just check warning_enabled_at.  Nice!

Marek



[PATCH] rs6000: Add CCANY; replace signed by

2022-10-25 Thread Segher Boessenkool
This is in preparation for adding CCFP, and maybe CCEQ, and whatever
other CC mode we may want later.  CCANY is used for CC mode consumers
that actually can take any of the four CR field bits.

Tested on p7 and p9; committing,


Segher


2022-10-25  Segher Boessenkool  

* config/rs6000/rs6000.md (CCEITHER): Delete.
(CCANY): New.
(un): Delete.
(isel_signed_): Rename to...
(isel__): ... this.  Adjust.
(*isel_reversed_signed_): Rename to...
(*isel_reversed__): ... this.  Adjust.
(setbc_signed_): Rename to...
(setbc__C): ... this.  Adjust."
(*setbcr_signed_): Rename to ...
(*setbcr__): ... this.  Adjust.
(*setnbc_signed_): Rename to ...
(*setnbc__): ... this.  Adjust.
(*setnbcr_signed_): Rename to ...
(*setnbcr__): ... this.  Adjust.
(eq3 for GPR): Adjust.
(ne3 for GPR): Adjust.
* config/rs6000/rs6000-string.cc (do_isel): Adjust.
* config/rs6000/rs6000.cc (rs6000_emit_int_cmove): Adjust.

---
 gcc/config/rs6000/rs6000-string.cc |  4 ++--
 gcc/config/rs6000/rs6000.cc|  4 ++--
 gcc/config/rs6000/rs6000.md| 31 +++
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 59d901a..cd8ee8c 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -414,9 +414,9 @@ static void
 do_isel (rtx dest, rtx cmp, rtx src_t, rtx src_f, rtx cr)
 {
   if (GET_MODE (dest) == DImode)
-emit_insn (gen_isel_signed_di (dest, cmp, src_t, src_f, cr));
+emit_insn (gen_isel_cc_di (dest, cmp, src_t, src_f, cr));
   else
-emit_insn (gen_isel_signed_si (dest, cmp, src_t, src_f, cr));
+emit_insn (gen_isel_cc_si (dest, cmp, src_t, src_f, cr));
 }
 
 /* Emit a subtract of the proper mode for DEST.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index d2743f7..01e5bbc 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -16341,8 +16341,8 @@ rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, 
rtx false_cond)
   signedp = GET_MODE (cr) == CCmode;
 
   isel_func = (mode == SImode
-  ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
-  : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
+  ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
+  : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
 
   switch (cond_code)
 {
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index ad5a4cf..3bae303 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5428,13 +5428,12 @@ (define_expand "movcc"
 ;; leave out the mode in operand 4 and use one pattern, but reload can
 ;; change the mode underneath our feet and then gets confused trying
 ;; to reload the value.
-(define_mode_iterator CCEITHER [CC CCUNS])
-(define_mode_attr un [(CC "") (CCUNS "un")])
-(define_insn "isel_signed_"
+(define_mode_iterator CCANY [CC CCUNS])
+(define_insn "isel__"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
(if_then_else:GPR
 (match_operator 1 "scc_comparison_operator"
-[(match_operand:CCEITHER 4 "cc_reg_operand" "y,y")
+[(match_operand:CCANY 4 "cc_reg_operand" "y,y")
  (const_int 0)])
 (match_operand:GPR 2 "reg_or_zero_operand" "O,b")
 (match_operand:GPR 3 "gpc_reg_operand" "r,r")))]
@@ -5446,11 +5445,11 @@ (define_insn "isel_signed_"
 ;; isel can handle reversed comparisons so long as the operands are
 ;; registers.
 
-(define_insn "*isel_reversed_signed_"
+(define_insn "*isel_reversed__"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
(if_then_else:GPR
 (match_operator 1 "scc_rev_comparison_operator"
-[(match_operand:CCEITHER 4 "cc_reg_operand" "y,y")
+[(match_operand:CCANY 4 "cc_reg_operand" "y,y")
  (const_int 0)])
 (match_operand:GPR 2 "gpc_reg_operand" "r,r")
 (match_operand:GPR 3 "reg_or_zero_operand" "O,b")))]
@@ -5462,38 +5461,38 @@ (define_insn "*isel_reversed_signed_"
   [(set_attr "type" "isel")])
 
 ; Set Boolean Condition (Reverse)
-(define_insn "setbc_signed_"
+(define_insn "setbc__"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(match_operator:GPR 1 "scc_comparison_operator"
-   [(match_operand:CCEITHER 2 "cc_reg_operand" "y")
+   [(match_operand:CCANY 2 "cc_reg_operand" "y")
 (const_int 0)]))]
   "TARGET_POWER10"
   "setbc %0,%j1"
   [(set_attr "type" "isel")])
 
-(define_insn "*setbcr_signed_"
+(define_insn "*setbcr__"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(match_operator:GPR 1 "scc_rev_comparison_operator"
-   

Re: [PATCH v2] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Jason Merrill via Gcc-patches

On 10/25/22 11:21, Marek Polacek wrote:

On Mon, Oct 24, 2022 at 01:30:42PM -0400, Jason Merrill wrote:

On 10/21/22 19:28, Marek Polacek wrote:

This patch implements a new experimental warning (enabled by -Wextra) to
detect references bound to temporaries whose lifetime has ended.  The


Great!


primary motivation is the Note in
:

Capturing the result of std::max by reference produces a dangling reference
if one of the parameters is a temporary and that parameter is returned:

int n = 1;
const int& r = std::max(n-1, n+1); // r is dangling

That's because both temporaries for n-1 and n+1 are destroyed at the end
of the full expression.  With this warning enabled, you'll get:

g.C:3:12: warning: possibly dangling reference to a temporary 
[-Wdangling-reference]
  3 | const int& r = std::max(n-1, n+1);
|^
g.C:3:24: note: the temporary was destroyed at the end of the full expression 
'std::max((n - 1), (n + 1))'
  3 | const int& r = std::max(n-1, n+1);
|^~

The warning works by checking if a reference is initialized with a function
that returns a reference, and at least one parameter of the function is
a reference that is bound to a temporary.  It assumes that such a function
actually returns one of its arguments!  (I added code to check_return_expr
to suppress the warning when we've seen the definition of the function
and we can say that it can return something other than its parameter.)


Hmm, that misses returning a reference to a subobject or container element
that will also go away when the object is destroyed.


Yes :-(.  Fixed, tests added.  I'm just checking TREE_STATIC now.


Does it also avoid a lot of false positives?


Not at all, I just thought it may be worth it.
  

It doesn't warn when the function in question is a member function, otherwise
it'd emit loads of warnings for valid code like obj.emplace({0}, 0).


We had discussed warning if the object argument is a temporary (and for the
above check, the function returns *this)?


Presumably you mean detecting something like this:

struct S {
   const S& self () { return *this; }
};
const S& s = S().self();


Yes.  Or

struct S {
  int ar[4];
  int& operator[](int i) { return ar[i]; }
};
const int  = S()[2];


I don't currently have a way to detect it, can I steal a METHOD_TYPE flag
that says "this member function returns *this"?  Alternatively, walk its
DECL_SAVED_TREE and look for RETURN_EXPR?


Like you limited the above check to TREE_STATIC, let's also forget about 
checking for return *this.



It warns in member initializer lists as well:

const int& f(const int& i) { return i; }
struct S {
  const int  // { dg-warning "dangling reference" }
  S() : r(f(10)) { } // { dg-message "destroyed" }
};

I've run the testsuite/bootstrap with the warning enabled by default.
There were just a few FAILs:
* g++.dg/warn/Wdangling-pointer-2.C
* 20_util/any/misc/any_cast.cc
* 20_util/forward/c_neg.cc
* 20_util/forward/f_neg.cc
* experimental/any/misc/any_cast.cc
all of these look like genuine bugs.  A bootstrap with the warning
enabled by default passed.

When testing a previous version of the patch, there were many FAILs in
libstdc++'s 22_locale/; all of them because the warning triggered on

const test_type& obj = std::use_facet(std::locale());

but this code looks valid -- std::use_facet doesn't return a reference
to its parameter.  Therefore I added code to suppress the warning when
the call is std::use_facet.  Now 22_locale/* pass even with the warning
on.  We could exclude more std:: functions like this if desirable.


Instead of adding special cases in the compiler, let's disable the warning
around the definition of use_facet (and adjust the compiler as needed so
that avoids the warning).


As I said in

I don't think it's possible without inventing an attribute (?).


Replied.


I was remembering range adaptors being a stated motivation for Nico's P2012,
but looking back at the paper I now see that this problem was avoided for
them by disallowing rvalue arguments to range composition.


Aha, and if you can't pass a temporary, you will not have this problem.


gcc/testsuite/ChangeLog:

* g++.dg/cpp23/elision4.C: Use -Wdangling-reference, add dg-warning.
* g++.dg/cpp23/elision7.C: Likewise.
* g++.dg/warn/Wdangling-reference1.C: New test.
* g++.dg/warn/Wdangling-reference2.C: New test.


Could use a test with a virtual base.


Added (fns yum/lox in Wdangling-reference1.C).
  

+static tree
+find_initializing_call_expr (tree expr)
+{
+  STRIP_NOPS (expr);
+  switch (TREE_CODE (expr))
+{
+case CALL_EXPR:
+  return expr;
+case COMPOUND_EXPR:
+  return find_initializing_call_expr (TREE_OPERAND (expr, 1));
+case COND_EXPR:
+  if (tree t = find_initializing_call_expr 

Re: [PATCH] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Jason Merrill via Gcc-patches

On 10/25/22 09:14, Marek Polacek wrote:

On Tue, Oct 25, 2022 at 12:34:50PM +0100, Jonathan Wakely wrote:

On Mon, 24 Oct 2022 at 18:30, Jason Merrill wrote:


On 10/21/22 19:28, Marek Polacek wrote:

When testing a previous version of the patch, there were many FAILs in
libstdc++'s 22_locale/; all of them because the warning triggered on

const test_type& obj = std::use_facet(std::locale());

but this code looks valid -- std::use_facet doesn't return a reference
to its parameter.  Therefore I added code to suppress the warning when
the call is std::use_facet.  Now 22_locale/* pass even with the warning
on.  We could exclude more std:: functions like this if desirable.


Instead of adding special cases in the compiler, let's disable the
warning around the definition of use_facet (and adjust the compiler as
needed so that avoids the warning).


I assume you mean using #pragma here. If we disable it around the
definition of use_facet, will that disable it for callers of
use_facet, or only within the definition of use_facet itself?


Right, a #pragma will not help, it would only disable the warning
within the definition of use_facet itself.


That's why I mentioned adjusting the compiler, i.e. check 
warning_enabled_at (DECL_SOURCE_LOCATION (fn)


Jason



Re: [PATCH] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Marek Polacek via Gcc-patches
On Tue, Oct 25, 2022 at 12:50:36PM +0100, Jonathan Wakely wrote:
> On Sat, 22 Oct 2022 at 00:28, Marek Polacek wrote:
> > I've run the testsuite/bootstrap with the warning enabled by default.
> > There were just a few FAILs:
> > * g++.dg/warn/Wdangling-pointer-2.C
> > * 20_util/any/misc/any_cast.cc
> > * 20_util/forward/c_neg.cc
> > * 20_util/forward/f_neg.cc
> 
> These two are XFAIL tests, they're checking for a required static
> assert due to misusing std::forward, so I don't think we want to
> change them to remove a "bug" (they're meant to be buggy).
> I think they should use -Wno-dangling-reference

I think we don't have to do anything: -Wdangling-reference is only 
enabled by -Wextra and that isn't used in these tests.  I was just
curious about the warning's impact.

> > * experimental/any/misc/any_cast.cc
> > all of these look like genuine bugs.  A bootstrap with the warning
> > enabled by default passed.
> 
> 
> I've attached a patch for the two any_cast.cc tests that avoids
> creating dangling references, without changing the intended behaviour
> of the tests.

Thanks a lot!

Marek



Re: [PATCH zero-call-used-regs] Add leafy mode for zero-call-used-regs

2022-10-25 Thread Qing Zhao via Gcc-patches
Hi, Alexandre,

> On Oct 24, 2022, at 10:48 PM, Alexandre Oliva  wrote:
> 
> Hello, Qing,
> 
> It was a pleasure to meet you at the Cauldron.

Me too!
> 
> 
> On Oct 21, 2022, Qing Zhao  wrote:
> 
>> Hi, Alexandre,
>> Could you please explain a little bit on the motivation of this patch first?
> 
> It was a suggestion I got after the Cauldron presentation.
> It made sense to me, and was easy enough to implement.
> 
> 'all' for leaf functions is likely wasteful.  If no other functions are
> called, one can determine exactly which registers might carry
> information out and thus need zeroing, and 'used' is thus likely enough,
> depending on the purpose of register scrubbing.  (In some scenarios, it
> might make sense to want scrubbing of all registers, even unused ones
> that carry incoming values)

Under what kinds of situations, we should clear the un-used registers for leaf 
functions?


> 
> Though some functions are coded as leaf functions, others may become
> leaf functions because of inlining or other optimizations.  It's hard
> for users to predict, so it makes sense to have a mode that tells the
> compiler to figure it out.

Yes, make sense.

Now I am wondering whether we should make “leafy” mode by default then?


Another thing is, do you have any information on how much this new mode can 
save the 
code size and run-time compared to mode “all”?


> 
> 
> There's room for a follow-up improvement, to save on a little more
> potentially-wasteful anti-leaking scrubbing even in non-leaf functions:
> for this purpose, they need not scrub registers that they don't use
> themselves, if all potential callees are known to have scrubbed them.

Yes, looks like another potential improvement we might add. 
> 
> I have not (yet?) implemented this variant; I haven't even found a name
> I'm happy with for it.  (seal?  plug?  cork?  another leak antonym?)

For this improvement, I am still thinking no need to add a new mode, just add 
it by default?

Qing
> 
> I'm not entirely happy with leafy either, FWIW.  Bikeshedding anyone? :-)
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604083.html
> 
> -- 
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>   Free Software Activist   GNU Toolchain Engineer
> Disinformation flourishes because many people care deeply about injustice
> but very few check the facts.  Ask me about 



[PATCH v2] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Marek Polacek via Gcc-patches
On Mon, Oct 24, 2022 at 01:30:42PM -0400, Jason Merrill wrote:
> On 10/21/22 19:28, Marek Polacek wrote:
> > This patch implements a new experimental warning (enabled by -Wextra) to
> > detect references bound to temporaries whose lifetime has ended.  The
> 
> Great!
> 
> > primary motivation is the Note in
> > :
> > 
> >Capturing the result of std::max by reference produces a dangling 
> > reference
> >if one of the parameters is a temporary and that parameter is returned:
> > 
> >int n = 1;
> >const int& r = std::max(n-1, n+1); // r is dangling
> > 
> > That's because both temporaries for n-1 and n+1 are destroyed at the end
> > of the full expression.  With this warning enabled, you'll get:
> > 
> > g.C:3:12: warning: possibly dangling reference to a temporary 
> > [-Wdangling-reference]
> >  3 | const int& r = std::max(n-1, n+1);
> >|^
> > g.C:3:24: note: the temporary was destroyed at the end of the full 
> > expression 'std::max((n - 1), (n + 1))'
> >  3 | const int& r = std::max(n-1, n+1);
> >|^~
> > 
> > The warning works by checking if a reference is initialized with a function
> > that returns a reference, and at least one parameter of the function is
> > a reference that is bound to a temporary.  It assumes that such a function
> > actually returns one of its arguments!  (I added code to check_return_expr
> > to suppress the warning when we've seen the definition of the function
> > and we can say that it can return something other than its parameter.)
> 
> Hmm, that misses returning a reference to a subobject or container element
> that will also go away when the object is destroyed.

Yes :-(.  Fixed, tests added.  I'm just checking TREE_STATIC now.

> Does it also avoid a lot of false positives?

Not at all, I just thought it may be worth it.
 
> > It doesn't warn when the function in question is a member function, 
> > otherwise
> > it'd emit loads of warnings for valid code like obj.emplace({0}, 0).
> 
> We had discussed warning if the object argument is a temporary (and for the
> above check, the function returns *this)?

Presumably you mean detecting something like this:

struct S {
  const S& self () { return *this; }
};
const S& s = S().self();

I don't currently have a way to detect it, can I steal a METHOD_TYPE flag
that says "this member function returns *this"?  Alternatively, walk its
DECL_SAVED_TREE and look for RETURN_EXPR?
 
> > It warns in member initializer lists as well:
> > 
> >const int& f(const int& i) { return i; }
> >struct S {
> >  const int  // { dg-warning "dangling reference" }
> >  S() : r(f(10)) { } // { dg-message "destroyed" }
> >};
> > 
> > I've run the testsuite/bootstrap with the warning enabled by default.
> > There were just a few FAILs:
> > * g++.dg/warn/Wdangling-pointer-2.C
> > * 20_util/any/misc/any_cast.cc
> > * 20_util/forward/c_neg.cc
> > * 20_util/forward/f_neg.cc
> > * experimental/any/misc/any_cast.cc
> > all of these look like genuine bugs.  A bootstrap with the warning
> > enabled by default passed.
> > 
> > When testing a previous version of the patch, there were many FAILs in
> > libstdc++'s 22_locale/; all of them because the warning triggered on
> > 
> >const test_type& obj = std::use_facet(std::locale());
> > 
> > but this code looks valid -- std::use_facet doesn't return a reference
> > to its parameter.  Therefore I added code to suppress the warning when
> > the call is std::use_facet.  Now 22_locale/* pass even with the warning
> > on.  We could exclude more std:: functions like this if desirable.
> 
> Instead of adding special cases in the compiler, let's disable the warning
> around the definition of use_facet (and adjust the compiler as needed so
> that avoids the warning).

As I said in

I don't think it's possible without inventing an attribute (?).
 
> I was remembering range adaptors being a stated motivation for Nico's P2012,
> but looking back at the paper I now see that this problem was avoided for
> them by disallowing rvalue arguments to range composition.

Aha, and if you can't pass a temporary, you will not have this problem.

> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp23/elision4.C: Use -Wdangling-reference, add dg-warning.
> > * g++.dg/cpp23/elision7.C: Likewise.
> > * g++.dg/warn/Wdangling-reference1.C: New test.
> > * g++.dg/warn/Wdangling-reference2.C: New test.
> 
> Could use a test with a virtual base.

Added (fns yum/lox in Wdangling-reference1.C).
 
> > +static tree
> > +find_initializing_call_expr (tree expr)
> > +{
> > +  STRIP_NOPS (expr);
> > +  switch (TREE_CODE (expr))
> > +{
> > +case CALL_EXPR:
> > +  return expr;
> > +case COMPOUND_EXPR:
> > +  return find_initializing_call_expr (TREE_OPERAND (expr, 1));
> > +case COND_EXPR:
> > +  if 

[PATCH] testsuite: Windows paths use \ and not /

2022-10-25 Thread Torbjörn SVENSSON via Gcc-patches
Without this patch, the following error is reported on Windows:

In file included from t:\build\arm-none-eabi\include\c++\11.3.1\string:54,
  from 
t:\build\arm-none-eabi\include\c++\11.3.1\bits\locale_classes.h:40,
  from 
t:\build\arm-none-eabi\include\c++\11.3.1\bits\ios_base.h:41,
  from t:\build\arm-none-eabi\include\c++\11.3.1\ios:42,
  from t:\build\arm-none-eabi\include\c++\11.3.1\ostream:38,
  from t:\build\arm-none-eabi\include\c++\11.3.1\iostream:39:
t:\build\arm-none-eabi\include\c++\11.3.1\bits\range_access.h:36:10: note: 
include 't:\build\arm-none-eabi\include\c++\11.3.1\initializer_list' translated 
to import
arm-none-eabi-g++.exe: warning: .../gcc/testsuite/g++.dg/modules/pr99023_b.X: 
linker input file unused because linking not done
FAIL: g++.dg/modules/pr99023_b.X -std=c++2a  dg-regexp 6 not found: "[^\n]*: 
note: include '[^\n]*/initializer_list' translated to import\n"

gcc/testsuite/ChangeLog:

* g++.dg/modules/pr99023_b.X: Match Windows paths too.

Co-Authored-By: Yvan ROUX 
Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/g++.dg/modules/pr99023_b.X | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/modules/pr99023_b.X 
b/gcc/testsuite/g++.dg/modules/pr99023_b.X
index 3d82f34868b..ca5f32e5bcc 100644
--- a/gcc/testsuite/g++.dg/modules/pr99023_b.X
+++ b/gcc/testsuite/g++.dg/modules/pr99023_b.X
@@ -3,5 +3,5 @@
 
 // { dg-prune-output {linker input file unused} }
 
-// { dg-regexp {[^\n]*: note: include '[^\n]*/initializer_list' translated to 
import\n} }
+// { dg-regexp {[^\n]*: note: include '[^\n]*[/\\]initializer_list' translated 
to import\n} }
 NO DO NOT COMPILE
-- 
2.25.1



PING^3 [PATCH] testsuite: Verify that module-mapper is available

2022-10-25 Thread Torbjorn SVENSSON via Gcc-patches

Hi,

Ping, https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603544.html

Kind regards,
Torbjörn

On 2022-10-14 09:42, Torbjorn SVENSSON wrote:

Hi,

Ping, https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602843.html

Kind regards,
Torbjörn

On 2022-10-05 11:17, Torbjorn SVENSSON wrote:

Hi,

Ping, 
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/602111.html


Kind regards,
Torbjörn

On 2022-09-23 14:03, Torbjörn SVENSSON wrote:

For some test cases, it's required that the optional module mapper
"g++-mapper-server" is built. As the server is not required, the
test cases will fail if it can't be found.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp (check_is_prog_name_available):
New.
* lib/target-supports-dg.exp
(dg-require-prog-name-available): New.
* g++.dg/modules/modules.exp: Verify avilability of module
mapper.

Signed-off-by: Torbjörn SVENSSON  
---
  gcc/testsuite/g++.dg/modules/modules.exp | 31 
  gcc/testsuite/lib/target-supports-dg.exp | 15 
  gcc/testsuite/lib/target-supports.exp    | 15 
  3 files changed, 61 insertions(+)

diff --git a/gcc/testsuite/g++.dg/modules/modules.exp 
b/gcc/testsuite/g++.dg/modules/modules.exp

index afb323d0efd..4784803742a 100644
--- a/gcc/testsuite/g++.dg/modules/modules.exp
+++ b/gcc/testsuite/g++.dg/modules/modules.exp
@@ -279,6 +279,29 @@ proc module-init { src } {
  return $option_list
  }
+# Return 1 if requirements are met
+proc module-check-requirements { tests } {
+    foreach test $tests {
+    set tmp [dg-get-options $test]
+    foreach op $tmp {
+    switch [lindex $op 0] {
+    "dg-additional-options" {
+    # Example strings to match:
+    # -fmodules-ts -fmodule-mapper=|@g++-mapper-server\\ 
-t\\ [srcdir]/inc-xlate-1.map

+    # -fmodules-ts -fmodule-mapper=|@g++-mapper-server
+    if [regexp -- {(^| )-fmodule-mapper=\|@([^\\ ]*)} 
[lindex $op 2] dummy dummy2 prog] {

+    verbose "Checking that mapper exist: $prog"
+    if { ![ check_is_prog_name_available $prog ] } {
+    return 0
+    }
+    }
+    }
+    }
+    }
+    }
+    return 1
+}
+
  # cleanup any detritus from previous run
  cleanup_module_files [find $DEFAULT_REPO *.gcm]
@@ -307,6 +330,14 @@ foreach src [lsort [find $srcdir/$subdir 
{*_a.[CHX}]] {

  set tests [lsort [find [file dirname $src] \
    [regsub {_a.[CHX]$} [file tail $src] 
{_[a-z].[CHX]}]]]

+    if { ![module-check-requirements $tests] } {
+    set testcase [regsub {_a.[CH]} $src {}]
+    set testcase \
+    [string range $testcase [string length "$srcdir/"] end]
+    unsupported $testcase
+    continue
+    }
+
  set std_list [module-init $src]
  foreach std $std_list {
  set mod_files {}
diff --git a/gcc/testsuite/lib/target-supports-dg.exp 
b/gcc/testsuite/lib/target-supports-dg.exp

index aa2164bc789..6ce3b2b1a1b 100644
--- a/gcc/testsuite/lib/target-supports-dg.exp
+++ b/gcc/testsuite/lib/target-supports-dg.exp
@@ -683,3 +683,18 @@ proc dg-require-symver { args } {
  set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"]
  }
  }
+
+# If this target does not provide prog named "$args", skip this test.
+
+proc dg-require-prog-name-available { args } {
+    # The args are within another list; pull them out.
+    set args [lindex $args 0]
+
+    set prog [lindex $args 1]
+
+    if { ![ check_is_prog_name_available $prog ] } {
+    upvar dg-do-what dg-do-what
+    set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"]
+    }
+}
+
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp

index 703aba412a6..c3b7a6c17b3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11928,3 +11928,18 @@ main:
  .byte 0
    } ""]
  }
+
+# Return 1 if this target has prog named "$prog", 0 otherwise.
+
+proc check_is_prog_name_available { prog } {
+    global tool
+
+    set options [list "additional_flags=-print-prog-name=$prog"]
+    set output [lindex [${tool}_target_compile "" "" "none" 
$options] 0]

+
+    if { $output == $prog } {
+    return 0
+    }
+
+    return 1
+}


[PATCH] c++: Use in-process client when networking is disabled

2022-10-25 Thread Torbjörn SVENSSON via Gcc-patches
Without the patch, the output for bad-mapper-3.C would be:

/src/gcc/gcc/testsuite/g++.dg/modules/bad-mapper-3.C:2:1: error: unknown 
Compiled Module Interface: no such module

As this line is unexpected, the test case would fail.
The same problem can also be seen for g++.dg/modules/bad-mapper-2.C.

gcc/cp/ChangeLog:

* mapper-client.cc: Use in-process client when networking is
disabled.

Co-Authored-By: Yvan ROUX 
Signed-off-by: Torbjörn SVENSSON 
---
 gcc/cp/mapper-client.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/cp/mapper-client.cc b/gcc/cp/mapper-client.cc
index fe9544b5ba4..4dcb3a03660 100644
--- a/gcc/cp/mapper-client.cc
+++ b/gcc/cp/mapper-client.cc
@@ -227,6 +227,8 @@ module_client::open_module_client (location_t loc, const 
char *o,
int fd = -1;
 #if CODY_NETWORKING
fd = Cody::OpenLocal (, name.c_str () + 1);
+#else
+   errmsg = "CODY_NETWORKING disabled";
 #endif
if (fd >= 0)
  c = new module_client (fd, fd);
@@ -254,6 +256,8 @@ module_client::open_module_client (location_t loc, const 
char *o,
int fd = -1;
 #if CODY_NETWORKING
fd = Cody::OpenInet6 (, name.c_str (), port);
+#else
+   errmsg = "CODY_NETWORKING disabled";
 #endif
name[colon] = ':';
 
-- 
2.25.1



[PATCH] RISC-V: Fix a mistake in previous patch.

2022-10-25 Thread juzhe . zhong
From: Ju-Zhe Zhong 

 I noticed that I have made a mistake in previous patch:
 
https://patchwork.sourceware.org/project/gcc/patch/20220817071950.271762-1-juzhe.zh...@rivai.ai/
 
 The previous statement before this patch:
 bool need_barrier_p = (get_frame_size () + 
cfun->machine->frame.arg_pointer_offset) != 0;
 
 However, I changed it in the previous patch:
 bool need_barrier_p = known_ne (get_frame_size (), 
cfun->machine->frame.arg_pointer_offset);
 This is incorrect.
 
 Now, I correct this statement in this patch.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_expand_epilogue): Fix statement.

---
 gcc/config/riscv/riscv.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 08354a19c05..50ef38438a2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5028,8 +5028,8 @@ riscv_expand_epilogue (int style)
   rtx insn;
 
   /* We need to add memory barrier to prevent read from deallocated stack.  */
-  bool need_barrier_p
-= known_ne (get_frame_size (), cfun->machine->frame.arg_pointer_offset);
+  bool need_barrier_p = known_ne (get_frame_size ()
+  + cfun->machine->frame.arg_pointer_offset, 
0);
 
   if (cfun->machine->naked_p)
 {
-- 
2.36.1



Re: [PATCH] RISC-V: Recognized Svinval and Svnapot extensions

2022-10-25 Thread Bernhard Reutner-Fischer via Gcc-patches
On 25 October 2022 08:17:33 CEST, Monk Chiang  wrote:
>gcc/ChangeLog:
>

>diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
>index 55e0bc0a0e9..63ac56a8ca0 100644
>--- a/gcc/config/riscv/riscv-opts.h
>+++ b/gcc/config/riscv/riscv-opts.h
>@@ -162,6 +162,12 @@ enum stack_protector_guard {
> #define MASK_ZMMUL  (1 << 0)
> #define TARGET_ZMMUL((riscv_zm_subext & MASK_ZMMUL) != 0)
> 
>+#define MASK_SVINVAL (1 << 0)
>+#define MASK_SVNAPOT (1 << 1)
>+
>+#define TARGET_SVINVAL ((riscv_sv_subext & MASK_SVINVAL) != 0)
>+#define TARGET_SVNAPOT ((riscv_sv_subext & MASK_SVNAPOT) != 0)
>+
> /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
>set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
>popcount to caclulate the minimal VLEN.  */

Preexisting, but the above is hard to parse. contintuly, caclulate, will set, 
has set ?

thanks,


Re: [PATCH] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Marek Polacek via Gcc-patches
On Tue, Oct 25, 2022 at 12:34:50PM +0100, Jonathan Wakely wrote:
> On Mon, 24 Oct 2022 at 18:30, Jason Merrill wrote:
> >
> > On 10/21/22 19:28, Marek Polacek wrote:
> > > When testing a previous version of the patch, there were many FAILs in
> > > libstdc++'s 22_locale/; all of them because the warning triggered on
> > >
> > >const test_type& obj = std::use_facet(std::locale());
> > >
> > > but this code looks valid -- std::use_facet doesn't return a reference
> > > to its parameter.  Therefore I added code to suppress the warning when
> > > the call is std::use_facet.  Now 22_locale/* pass even with the warning
> > > on.  We could exclude more std:: functions like this if desirable.
> >
> > Instead of adding special cases in the compiler, let's disable the
> > warning around the definition of use_facet (and adjust the compiler as
> > needed so that avoids the warning).
> 
> I assume you mean using #pragma here. If we disable it around the
> definition of use_facet, will that disable it for callers of
> use_facet, or only within the definition of use_facet itself?

Right, a #pragma will not help, it would only disable the warning
within the definition of use_facet itself.

Another way would be to use some kind of attribute on use_facet but
I didn't find any that would be relevant in this scenario (so I guess
we'd have to add one).

Marek



[PATCH] unswitch most profitable condition first

2022-10-25 Thread Richard Biener via Gcc-patches
When doing the loop unswitching re-org we promised to followup
with improvements on the cost modeling.  The following makes sure we
try to unswitch on the most profitable condition first.  As most profitable
we pick the condition leading to the edge with the highest profile count.

Note the profile is only applied when picking the first unswitching
opportunity since the profile counts are not updated with earlier
unswitchings in mind.  Further opportunities are picked in DFS order.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Any comments?

Thanks,
Richard.

* tree-ssa-loop-unswitch.cc (unswitch_predicate::count): New.
(unswitch_predicate::unswitch_predicate): Initialize count.
(init_loop_unswitch_info): First collect candidates and
determine the outermost loop to unswitch.
(tree_ssa_unswitch_loops): First perform all guard hoisting,
then perform unswitching on innermost loop predicates.
(find_unswitching_predicates_for_bb): Keep track of the
most profitable predicate to unswitch on.
(tree_unswitch_single_loop): Unswitch given predicate if
not NULL.
---
 gcc/tree-ssa-loop-unswitch.cc | 66 ---
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-ssa-loop-unswitch.cc b/gcc/tree-ssa-loop-unswitch.cc
index 7d6781d1505..dfe75f52f12 100644
--- a/gcc/tree-ssa-loop-unswitch.cc
+++ b/gcc/tree-ssa-loop-unswitch.cc
@@ -118,6 +118,7 @@ struct unswitch_predicate
 if (!false_range.varying_p ()
&& !false_range.undefined_p ())
   false_range.invert ();
+count = e->count ();
 num = predicates->length ();
 predicates->safe_push (this);
   }
@@ -126,7 +127,8 @@ struct unswitch_predicate
   unswitch_predicate (gcond *stmt)
 : switch_p (false)
   {
-if (EDGE_SUCC (gimple_bb (stmt), 0)->flags & EDGE_TRUE_VALUE)
+basic_block bb = gimple_bb (stmt);
+if (EDGE_SUCC (bb, 0)->flags & EDGE_TRUE_VALUE)
   edge_index = 0;
 else
   edge_index = 1;
@@ -134,6 +136,7 @@ struct unswitch_predicate
 tree rhs = gimple_cond_rhs (stmt);
 enum tree_code code = gimple_cond_code (stmt);
 condition = build2 (code, boolean_type_node, lhs, rhs);
+count = EDGE_SUCC (bb, 0)->count ().max (EDGE_SUCC (bb, 1)->count ());
 if (irange::supports_p (TREE_TYPE (lhs)))
   {
auto range_op = range_op_handler (code, TREE_TYPE (lhs));
@@ -180,6 +183,9 @@ struct unswitch_predicate
   /* Index of the edge the predicate belongs to in the successor vector.  */
   int edge_index;
 
+  /* The profile count of this predicate.  */
+  profile_count count;
+
   /* Whether the predicate was created from a switch statement.  */
   bool switch_p;
 
@@ -206,10 +212,14 @@ static class loop *tree_unswitch_loop (class loop *, 
edge, tree);
 static bool tree_unswitch_single_loop (class loop *, dump_user_location_t,
   predicate_vector _path,
   unsigned loop_size, unsigned ,
-  int ignored_edge_flag, bitmap);
+  int ignored_edge_flag, bitmap,
+  unswitch_predicate * = NULL,
+  basic_block = NULL);
 static void
 find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
-   vec );
+   vec ,
+   unswitch_predicate *,
+   basic_block _bb);
 static bool tree_unswitch_outer_loop (class loop *);
 static edge find_loop_guard (class loop *, vec&);
 static bool empty_bb_without_guard_p (class loop *, basic_block,
@@ -242,7 +252,8 @@ set_predicates_for_bb (basic_block bb, 
vec predicates)
Return total number of instructions in the loop.  */
 
 static unsigned
-init_loop_unswitch_info (class loop *loop)
+init_loop_unswitch_info (class loop *loop, unswitch_predicate *,
+basic_block _bb)
 {
   unsigned total_insns = 0;
 
@@ -259,13 +270,16 @@ init_loop_unswitch_info (class loop *loop)
   total_insns += insns;
 }
 
+  hottest = NULL;
+  hottest_bb = NULL;
   /* Find all unswitching candidates.  */
   for (unsigned i = 0; i != loop->num_nodes; i++)
 {
   /* Find a bb to unswitch on.  */
   vec candidates;
   candidates.create (1);
-  find_unswitching_predicates_for_bb (bbs[i], loop, candidates);
+  find_unswitching_predicates_for_bb (bbs[i], loop, candidates,
+ hottest, hottest_bb);
   if (!candidates.is_empty ())
set_predicates_for_bb (bbs[i], candidates);
   else
@@ -329,7 +343,10 @@ tree_ssa_unswitch_loops (function *fun)
  unswitch_predicate::predicates = new vec ();
 
  /* Unswitch innermost loop.  */
- unsigned int loop_size = init_loop_unswitch_info (loop);
+ 

Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.

2022-10-25 Thread Dimitrije Milosevic
Hi Richard,

> I don't follow how only having BASE + OFFSET addressing prevents
> calculation of complexity for other addressing modes?  Can you explain?

It's the valid_mem_ref_p target hook that prevents complexity calculation
for other addressing modes (for Mips and RISC-V).
Here's the snippet of the algorithm (after f9f69dd) for the complexity 
calculation, which is located at the beginning of the get_address_cost
function in tree-ssa-loop-ivopts.cc:

  if (!aff_combination_const_p (aff_inv))
{
  parts.index = integer_one_node;
  /* Addressing mode "base + index".  */
  ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, );
  if (ratio != 1)
{
  parts.step = wide_int_to_tree (type, ratio);
  /* Addressing mode "base + index << scale".  */
  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, );
  if (!ok_with_ratio_p)
parts.step = NULL_TREE;
}
  ...

The algorithm "builds up" the actual addressing mode step-by-step,
starting from BASE + INDEX. However, if valid_mem_ref_p returns
negative, parts.* is set to NULL_TREE and we bail out. For Mips (or 
RISC-V), it always returns negative (given we are building the addressing
mode up from BASE + INDEX), since Mips allows BASE + OFFSET only
(see the case PLUS in mips_classify_address in config/mips/mips.cc).
The result is that all addressing modes besides BASE + OFFSET, for Mips
(or RISC-V) have complexities of 0. f9f69dd introduced calls to valid_mem_ref_p
target hook, which were not there before, and I'm not sure why exactly.

> Do you have a testcase that shows how both changes improve IV selection
> for MIPS?

Certainly, consider the following test case:

void daxpy(float *vector1, float *vector2, int n, float fp_const)
{
for (int i = 0; i < n; ++i)
vector1[i] += fp_const * vector2[i];
}

void dgefa(float *vector, int m, int n, int l)
{
for (int i = 0; i < n - 1; ++i)
{
for (int j = i + 1; j < n; ++j)
{
float t = vector[m * j + l];
daxpy([m * i + i + 1], [m * j + i + 1], n 
- (i + 1), t);
}
}
}

At the third inner loop (which gets inlined from daxpy), an unoptimal candidate
selection takes place.
Worth noting is that f9f69dd doesn't change the costs (they are, however, 
multiplied by
a factor, but what was lesser/greater before is lesser/greater after). Here's 
how complexities
stand:

= Before f9f69dd =
Group 1:
  cand  costcompl.  inv.expr.   inv.vars
  1 13  1   3;  NIL;
  2 13  2   4;  NIL;
  4 9   1   5;  NIL;
  5 1   0   NIL;NIL;
  7 9   1   3;  NIL;
= Before f9f69dd =
= After f9f69dd =
Group 1:
  cand  costcompl.  inv.expr.   inv.vars
  1 10  0   4;  NIL;
  2 10  0   5;  NIL;
  4 6   0   6;  NIL;
  5 1   0   NIL;NIL;
  7 6   0   4;  NIL;
= After f9f69dd =

Notice how all complexities are zero, even though the candidates have
different addressing modes.

For this particular example, this leads to a different candidate selection:

= Before f9f69dd =
Selected IV set for loop 3 at fp_foo.c:3, 10 avg niters, 2 IVs:
Candidate 4:
  Var befor: ivtmp.17_52
  Var after: ivtmp.17_103
  Incr POS: before exit test
  IV struct:
Type:   unsigned long
Base:   (unsigned long) (vector_27(D) + _10)
Step:   4
Object: (void *) vector_27(D)
Biv:N
Overflowness wrto loop niter:   Overflow
Candidate 5:
  Var befor: ivtmp.18_99
  Var after: ivtmp.18_98
  Incr POS: before exit test
  IV struct:
Type:   unsigned long
Base:   (unsigned long) (vector_27(D) + _14)
Step:   4
Object: (void *) vector_27(D)
Biv:N
Overflowness wrto loop niter:   Overflow
= Before f9f69dd =
= After f9f69dd =
Selected IV set for loop 3 at fp_foo.c:3, 10 avg niters, 1 IVs:
Candidate 4:
  Var befor: ivtmp.17_52
  Var after: ivtmp.17_103
  Incr POS: before exit test
  IV struct:
Type:   unsigned long
Base:   (unsigned long) (vector_27(D) + _10)
Step:   4
Object: (void *) vector_27(D)
Biv:N
Overflowness wrto loop niter:   Overflow
= After f9f69dd =

which, in turn, leads to the following assembly sequence:

= Before f9f69dd =
.L83:
lwc1$f5,0($3)
lwc1$f8,0($2)
lwc1$f7,4($2)
lwc1$f6,8($2)
lwc1$f9,12($2)
lwc1$f10,16($2)
maddf.s $f8,$f0,$f5
lwc1$f11,20($2)
lwc1$f12,24($2)
lwc1$f13,28($2)
ld  $12,72($sp)
swc1$f8,0($2)
lwc1$f14,4($3)
maddf.s $f7,$f0,$f14
swc1$f7,4($2)
lwc1$f15,8($3)
maddf.s $f6,$f0,$f15

Re: [PATCH 2/2] ivopts: Consider number of invariants when calculating register pressure.

2022-10-25 Thread Dimitrije Milosevic
Hi Richard,

> don't you add n_invs twice now given
>
>  unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
>  unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
>
> ?

If you are referring to the "If we have enough registers." case, correct. After 
c18101f,
for that case, the returned cost is equal to 2 * n_invs + n_cands. Before 
c18101f, for 
that case, the returned cost is equal to n_invs + n_cands. Another solution 
would be
to just return n_invs + n_cands if we have enough registers.

Regards,
Dimitrije


From: Richard Biener 
Sent: Tuesday, October 25, 2022 1:07 PM
To: Dimitrije Milosevic 
Cc: gcc-patches@gcc.gnu.org ; Djordje Todorovic 

Subject: Re: [PATCH 2/2] ivopts: Consider number of invariants when calculating 
register pressure. 
 
On Fri, Oct 21, 2022 at 3:57 PM Dimitrije Milosevic
 wrote:
>
> From: Dimitrije Milošević 
>
> This patch slightly modifies register pressure model function to consider
> both the number of invariants and the number of candidates, rather than
> just the number of candidates. This used to be the case before c18101f.

don't you add n_invs twice now given

  unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
  unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;

?

> gcc/ChangeLog:
>
> * tree-ssa-loop-ivopts.cc (ivopts_estimate_reg_pressure): Adjust.
>
> Signed-off-by: Dimitrije Milosevic 
> ---
>  gcc/tree-ssa-loop-ivopts.cc | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index d53ba05a4f6..9d0b669d671 100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -6409,9 +6409,9 @@ ivopts_estimate_reg_pressure (struct ivopts_data *data, 
> unsigned n_invs,
>    + target_spill_cost [speed] * (n_cands - available_regs) * 2
>    + target_spill_cost [speed] * (regs_needed - n_cands);
>
> -  /* Finally, add the number of candidates, so that we prefer eliminating
> - induction variables if possible.  */
> -  return cost + n_cands;
> +  /* Finally, add the number of invariants and the number of candidates,
> + so that we prefer eliminating induction variables if possible.  */
> +  return cost + n_invs + n_cands;
>  }
>
>  /* For each size of the induction variable set determine the penalty.  */
> --
> 2.25.1
>

Re: [PATCH] ix86: Suggest unroll factor for loop vectorization

2022-10-25 Thread Richard Biener via Gcc-patches
On Tue, Oct 25, 2022 at 7:46 AM Hongtao Liu  wrote:
>
> Any comments?

+@item x86-vect-unroll-min-ldst-threshold
+The vectorizer will check with target information to determine whether unroll
+it. This parameter is used to limit the mininum of loads and stores in the main
+loop.

It's odd to "limit" the minimum number of something.  I think this
warrants clarification
that for some (unknow to me ;)) reason we think that when we have many loads
and (or?) stores it is beneficial to unroll to get even more loads and
stores in a
single iteration.  Btw, does the parameter limit the number of loads and stores
_after_ unrolling or before?

+@item x86-vect-unroll-max-loop-size
+The vectorizer will check with target information to determine whether unroll
+it. This threshold is used to limit the max size of loop body after unrolling.
+The default value is 200.

it should probably say not "size" but "number of instructions".  Note that 200
is quite large given we are talking about vector instructions here which have
larger encodings than scalar instructions.  Optimistically assuming
4 byte encoding (quite optimistic give we're looking at loops with many
loads/stores) that would be an 800 byte loop body which would be 25 cache lines.
ISTR that at least the loop discovery is limited to a lot smaller cases (but
we are likely not targeting that).  The limit probably still works to
fit the loop
body in the u-op caches though.

That said, the heuristic made me think "what the heck".  Can we explain
in u-arch terms why the unrolling is beneficial instead of just defering to
SPEC CPU 2017 fotonik?

> On Mon, Oct 24, 2022 at 10:46 AM Cui,Lili via Gcc-patches
>  wrote:
> >
> > Hi Hongtao,
> >
> > This patch introduces function finish_cost and
> > determine_suggested_unroll_factor for x86 backend, to make it be
> > able to suggest the unroll factor for a given loop being vectorized.
> > Referring to aarch64, RS6000 backends and basing on the analysis on
> > SPEC2017 performance evaluation results.
> >
> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> >
> > OK for trunk?
> >
> >
> >
> > With this patch, SPEC2017 performance evaluation results on
> > ICX/CLX/ADL/Znver3 are listed below:
> >
> > For single copy:
> >   - ICX: 549.fotonik3d_r +6.2%, the others are neutral
> >   - CLX: 549.fotonik3d_r +1.9%, the others are neutral
> >   - ADL: 549.fotonik3d_r +4.5%, the others are neutral
> >   - Znver3: 549.fotonik3d_r +4.8%, the others are neutral
> >
> > For multi-copy:
> >   - ADL: 549.fotonik3d_r +2.7%, the others are neutral
> >
> > gcc/ChangeLog:
> >
> > * config/i386/i386.cc (class ix86_vector_costs): Add new members
> >  m_nstmts, m_nloads m_nstores and determine_suggested_unroll_factor.
> > (ix86_vector_costs::add_stmt_cost): Update for m_nstores, m_nloads
> > and m_nstores.
> > (ix86_vector_costs::determine_suggested_unroll_factor): New 
> > function.
> > (ix86_vector_costs::finish_cost): Diito.
> > * config/i386/i386.opt:(x86-vect-unroll-limit): New parameter.
> > (x86-vect-unroll-min-ldst-threshold): Likewise.
> > (x86-vect-unroll-max-loop-size): Likewise.
> > * doc/invoke.texi: Document new parameter.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/cond_op_maxmin_b-1.c: Add -fno-unroll-loops.
> > * gcc.target/i386/cond_op_maxmin_ub-1.c: Ditto.
> > * gcc.target/i386/vect-alignment-peeling-1.c: Ditto.
> > * gcc.target/i386/vect-alignment-peeling-2.c: Ditto.
> > * gcc.target/i386/vect-reduc-1.c: Ditto.
> > ---
> >  gcc/config/i386/i386.cc   | 106 ++
> >  gcc/config/i386/i386.opt  |  15 +++
> >  gcc/doc/invoke.texi   |  17 +++
> >  .../gcc.target/i386/cond_op_maxmin_b-1.c  |   2 +-
> >  .../gcc.target/i386/cond_op_maxmin_ub-1.c |   2 +-
> >  .../i386/vect-alignment-peeling-1.c   |   2 +-
> >  .../i386/vect-alignment-peeling-2.c   |   2 +-
> >  gcc/testsuite/gcc.target/i386/vect-reduc-1.c  |   2 +-
> >  8 files changed, 143 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index aeea26ef4be..a939354e55e 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -23336,6 +23336,17 @@ class ix86_vector_costs : public vector_costs
> >   stmt_vec_info stmt_info, slp_tree node,
> >   tree vectype, int misalign,
> >   vect_cost_model_location where) override;
> > +
> > +  unsigned int determine_suggested_unroll_factor (loop_vec_info);
> > +
> > +  void finish_cost (const vector_costs *) override;
> > +
> > +  /* Total number of vectorized stmts (loop only).  */
> > +  unsigned m_nstmts = 0;
> > +  /* Total number of loads (loop only).  */
> > +  unsigned m_nloads = 0;
> > +  /* Total number of stores (loop only).  */
> > 

Re: [PATCH] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Jonathan Wakely via Gcc-patches
On Sat, 22 Oct 2022 at 00:28, Marek Polacek wrote:
> I've run the testsuite/bootstrap with the warning enabled by default.
> There were just a few FAILs:
> * g++.dg/warn/Wdangling-pointer-2.C
> * 20_util/any/misc/any_cast.cc
> * 20_util/forward/c_neg.cc
> * 20_util/forward/f_neg.cc

These two are XFAIL tests, they're checking for a required static
assert due to misusing std::forward, so I don't think we want to
change them to remove a "bug" (they're meant to be buggy).
I think they should use -Wno-dangling-reference

> * experimental/any/misc/any_cast.cc
> all of these look like genuine bugs.  A bootstrap with the warning
> enabled by default passed.


I've attached a patch for the two any_cast.cc tests that avoids
creating dangling references, without changing the intended behaviour
of the tests.
diff --git a/libstdc++-v3/testsuite/20_util/any/misc/any_cast.cc 
b/libstdc++-v3/testsuite/20_util/any/misc/any_cast.cc
index 8d63dfbba9b..43f90c336c9 100644
--- a/libstdc++-v3/testsuite/20_util/any/misc/any_cast.cc
+++ b/libstdc++-v3/testsuite/20_util/any/misc/any_cast.cc
@@ -93,7 +93,8 @@ void test03()
   MoveEnabled m;
   MoveEnabled m2 = any_cast(any(m));
   VERIFY(move_count == 1);
-  MoveEnabled&& m3 = any_cast(any(m));
+  auto rvalue = [](MoveEnabled&&) { };
+  rvalue(any_cast(any(m))); // No move construction, just a 
cast.
   VERIFY(move_count == 1);
 }
 
diff --git a/libstdc++-v3/testsuite/experimental/any/misc/any_cast.cc 
b/libstdc++-v3/testsuite/experimental/any/misc/any_cast.cc
index f3ce83ad702..98347f6d37d 100644
--- a/libstdc++-v3/testsuite/experimental/any/misc/any_cast.cc
+++ b/libstdc++-v3/testsuite/experimental/any/misc/any_cast.cc
@@ -92,7 +92,8 @@ void test03()
   MoveEnabled m;
   MoveEnabled m2 = any_cast(any(m));
   VERIFY(move_count == 1);
-  MoveEnabled&& m3 = any_cast(any(m));
+  auto rvalue = [](MoveEnabled&&) { }; // Can only be called with an rvalue.
+  rvalue(any_cast(any(m))); // Doesn't move, just casts to rval.
   VERIFY(move_count == 1);
   struct MoveDeleted
   {
@@ -101,8 +102,9 @@ void test03()
 MoveDeleted(const MoveDeleted&) = default;
   };
   MoveDeleted md;
-  MoveDeleted&& md2 = any_cast(any(std::move(md)));
-  MoveDeleted&& md3 = any_cast(any(std::move(md)));
+  auto rvalue2 = [](MoveDeleted&&) { }; // Can only be called with an rvalue.
+  rvalue2(any_cast(any(std::move(md;
+  rvalue2(any_cast(any(std::move(md;
 }
 
 void test05()


Re: [PATCH] diagnostics: Allow FEs to keep customizations for middle end [PR101551, PR106274]

2022-10-25 Thread Richard Biener via Gcc-patches
On Thu, Oct 20, 2022 at 1:09 AM Lewis Hyatt via Gcc-patches
 wrote:
>
> Currently, the ipa-free-lang-data pass resets most of the frontend's
> diagnostic customizations, such as the diagnostic_finalizer that prints macro
> expansion information, which is the subject of the two PRs. In most cases,
> however, there is no need to reset these customizations; they still work just
> fine after the language-specific data has been freed. (Macro tracking
> information, for instance, only depends on the line_maps instance and does not
> use the tree data structures at all.)
>
> Add an interface whereby frontends can convey which of their customizations
> should be preserved by ipa-free-lang-data. Only the macro tracking behavior is
> changed for now.  Subsequent patches will add further configurations for each
> frontend.

One point of the resetting of the hooks is to avoid crashes due to us zapping
many of the lang specific data structures.  If the hooks were more resilent
that wouldn't be an issue.

Now - as for macro tracking, how difficult is it to replicate that in the
default hook implementation?  Basically we have similar issues for
late diagnostics of the LTO compile step where only the LTO (aka default)
variant of the hooks are present - it would be nice to improve that as well.

Note free_lang_data exists to "simplify" the LTO bytecode output - things
freed do not need to be output.  Of course the "freeing" logic could be
wired into the LTO bytecode output machinery directly - simply do not
output what we'd free.  That way all info would prevail for the non-LTO
compile and the hooks could continue to work as they do without any
LTO streaming done.

Richard.

>
> gcc/ChangeLog:
>
> PR lto/106274
> PR middle-end/101551
> * diagnostic.h (struct diagnostic_context): Add new customization
> point diagnostic_context::preserve_on_reset.
> * diagnostic.cc (diagnostic_initialize): Initialize it.
> * tree-diagnostic.h (tree_diagnostics_defaults): Add new optional
> argument enable_preserve.
> * tree-diagnostic.cc (tree_diagnostics_defaults): Implement it.
> * ipa-free-lang-data.cc (free_lang_data): Use it.
>
> gcc/c-family/ChangeLog:
>
> PR lto/106274
> PR middle-end/101551
> * c-opts.cc (c_common_diagnostics_set_defaults): Preserve
> diagnostic finalizer for the middle end.
>
> libgomp/ChangeLog:
>
> PR lto/106274
> PR middle-end/101551
> * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Remove
> now-unnecessary workaround.
> * testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: Likewise.
>
> gcc/testsuite/ChangeLog:
>
> PR lto/106274
> PR middle-end/101551
> * c-c++-common/diag-after-fld-1.c: New test.
> ---
>
> Notes:
> Hello-
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101551
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106274
>
> PR101551 complains that when compiling with offloading enabled, 
> diagnostics
> output changes in several different ways. PR106274 notes the same thing 
> (which
> has the same cause) when compiling with -flto, for the specific case of 
> macro
> expansion tracking, which is no longer output for middle-end diagnostics 
> when
> -flto is present. Restoring the macro tracking information, at least, can 
> be
> done simply, which is the attached patch. This is straightforward because 
> the
> printing of macro tracking information is not really reliant on the sort 
> of
> language-specific data structures that are freed by ipa-free-lang-data... 
> it
> just needs the line-maps API, which is common to all languages and which 
> is
> not impacted by the work done by ipa-free-lang-data.
>
> To be clear, this is not about diagnostics issued *by* the lto frontend. 
> This
> is just about diagnostics issued by the language frontends, in case they 
> were
> asked to stream the IL for later use by the lto frontend. I think from the
> user's perspective, compiling with or without -flto should not change the
> quality of diagnostics, since on the face of it, -flto seems to be just a
> request for the compiler to do something extra (write out the IL in 
> addition
> to all the other stuff it does), and it's not clear why this should 
> change the
> way diagnostics look. (I understand there must be some good reasons, why 
> it's
> not the case.)  So anyway I was focusing on how to keep the diagnostics as
> close as possible to their normal form after ipa-free-lang-data is done.
>
> The approach I took was to add a new variable "preserve_on_reset" in the
> diagnostic_context, allowing a frontend to specify whether its diagnostic
> context customizations are safe to leave in place for the middle end. 
> There
> are currently four customizations for which preservation can be enabled:
>
> * diagnostic starter
>  

Re: [PATCH] c++: Implement -Wdangling-reference [PR106393]

2022-10-25 Thread Jonathan Wakely via Gcc-patches
On Mon, 24 Oct 2022 at 18:30, Jason Merrill wrote:
>
> On 10/21/22 19:28, Marek Polacek wrote:
> > When testing a previous version of the patch, there were many FAILs in
> > libstdc++'s 22_locale/; all of them because the warning triggered on
> >
> >const test_type& obj = std::use_facet(std::locale());
> >
> > but this code looks valid -- std::use_facet doesn't return a reference
> > to its parameter.  Therefore I added code to suppress the warning when
> > the call is std::use_facet.  Now 22_locale/* pass even with the warning
> > on.  We could exclude more std:: functions like this if desirable.
>
> Instead of adding special cases in the compiler, let's disable the
> warning around the definition of use_facet (and adjust the compiler as
> needed so that avoids the warning).

I assume you mean using #pragma here. If we disable it around the
definition of use_facet, will that disable it for callers of
use_facet, or only within the definition of use_facet itself?



Re: c: tree: target: C2x (...) function prototypes and va_start relaxation

2022-10-25 Thread Richard Biener via Gcc-patches
On Sat, Oct 22, 2022 at 1:03 AM Joseph Myers  wrote:
>
> C2x allows function prototypes to be given as (...), a prototype
> meaning a variable-argument function with no named arguments.  To
> allow such functions to access their arguments, requirements for
> va_start calls are relaxed so it ignores all but its first argument
> (i.e. subsequent arguments, if any, can be arbitrary pp-token
> sequences).
>
> Implement this feature accordingly.  The va_start relaxation in
>  is itself easy: __builtin_va_start already supports a
> second argument of 0 instead of a parameter name, and calls get
> converted internally to the form using 0 for that argument, so
>  just needs changing to use a variadic macro that passes 0
> as the second argument of __builtin_va_start.  (This is done only in
> C2x mode, on the expectation that users of older standard would expect
> unsupported uses of va_start to be diagnosed.)
>
> For the (...) functions, it's necessary to distinguish these from
> unprototyped functions, whereas previously C++ (...) functions and
> unprototyped functions both used NULL TYPE_ARG_TYPES.  A flag is added
> to tree_type_common to mark the (...) functions; as discussed on gcc@,
> doing things this way is likely to be safer for unchanged code in GCC
> than adding a different form of representation in TYPE_ARG_TYPES, or
> adding a flag that instead signals that the function is unprototyped.
>
> There was previously an option
> -fallow-parameterless-variadic-functions to enable support for (...)
> prototypes.  The support was incomplete - it treated the functions as
> unprototyped, and only parsed some declarations, not e.g.
> "int g (int (...));".  This option is changed into a no-op ignored
> option; (...) is always accepted syntactically, with a pedwarn_c11
> call to given required diagnostics when appropriate.  The peculiarity
> of a parameter list with __attribute__ followed by '...' being
> accepted with that option is removed.
>
> Interfaces in tree.cc that create function types are adjusted to set
> this flag as appropriate.  It is of course possible that some existing
> users of the functions to create variable-argument functions actually
> wanted unprototyped functions in the no-named-argument case, rather
> than functions with a (...) prototype; some such cases in c-common.cc
> (for built-in functions and implicit function declarations) turn out
> to need updating for that reason.
>
> I didn't do anything to change how the C++ front end creates (...)
> function types.  It's very likely there are unchanged places in the
> compiler that in fact turn out to need changes to work properly with
> (...) function prototypes.
>
> Target setup_incoming_varargs hooks, where they used the information
> passed about the last named argument, needed updating to avoid using
> that information in the (...) case.  Note that apart from the x86
> changes, I haven't done any testing of those target changes beyond
> building cc1 to check for syntax errors.  It's possible further
> target-specific fixes will be needed; target maintainers should watch
> out for failures of c2x-stdarg-4.c, the execution test, which would
> indicate that this feature is not working correctly.
>
> Bootstrapped with no regressions for x86_64-pc-linux-gnu.  OK to commit?

You are missing to stream the new type flag in tree-streamer-{in,out}.cc
and checking for tree merging in lto-common.cc:compare_tree_sccs_1

Otherwise looks reasonable.  Can you add a (multi TU) runtime testcase to the
torture exercising the feature so we can see any ABI issues?

Thanks,
Richard.

> gcc/
> * config/aarch64/aarch64.cc (aarch64_setup_incoming_varargs):
> Check TYPE_NO_NAMED_ARGS_STDARG_P.
> * config/alpha/alpha.cc (alpha_setup_incoming_varargs): Likewise.
> * config/arc/arc.cc (arc_setup_incoming_varargs): Likewise.
> * config/arm/arm.cc (arm_setup_incoming_varargs): Likewise.
> * config/csky/csky.cc (csky_setup_incoming_varargs): Likewise.
> * config/epiphany/epiphany.cc (epiphany_setup_incoming_varargs):
> Likewise.
> * config/fr30/fr30.cc (fr30_setup_incoming_varargs): Likewise.
> * config/frv/frv.cc (frv_setup_incoming_varargs): Likewise.
> * config/ft32/ft32.cc (ft32_setup_incoming_varargs): Likewise.
> * config/i386/i386.cc (ix86_setup_incoming_varargs): Likewise.
> * config/ia64/ia64.cc (ia64_setup_incoming_varargs): Likewise.
> * config/loongarch/loongarch.cc
> (loongarch_setup_incoming_varargs): Likewise.
> * config/m32r/m32r.cc (m32r_setup_incoming_varargs): Likewise.
> * config/mcore/mcore.cc (mcore_setup_incoming_varargs): Likewise.
> * config/mips/mips.cc (mips_setup_incoming_varargs): Likewise.
> * config/mmix/mmix.cc (mmix_setup_incoming_varargs): Likewise.
> * config/nds32/nds32.cc (nds32_setup_incoming_varargs): Likewise.
> * config/nios2/nios2.cc 

Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.

2022-10-25 Thread Richard Biener via Gcc-patches
On Fri, Oct 21, 2022 at 3:56 PM Dimitrije Milosevic
 wrote:
>
> From: Dimitrije Milošević 
>
> This patch reverts the computation of address cost complexity
> to the legacy one. After f9f69dd, complexity is calculated
> using the valid_mem_ref_p target hook. Architectures like
> Mips only allow BASE + OFFSET addressing modes, which in turn
> prevents the calculation of complexity for other addressing
> modes, resulting in non-optimal candidate selection.

I don't follow how only having BASE + OFFSET addressing prevents
calculation of complexity for other addressing modes?  Can you explain?

Do you have a testcase that shows how both changes improve IV selection
for MIPS?

>
> gcc/ChangeLog:
>
> * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
> to non-static.
> * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
> * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): 
> Reintroduce.
> (compute_min_and_max_offset): Likewise.
> (get_address_cost): Revert
> complexity calculation.
>
> Signed-off-by: Dimitrije Milosevic 
> ---
>  gcc/tree-ssa-address.cc |   2 +-
>  gcc/tree-ssa-address.h  |   2 +
>  gcc/tree-ssa-loop-ivopts.cc | 214 ++--
>  3 files changed, 207 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/tree-ssa-address.cc b/gcc/tree-ssa-address.cc
> index ba7b7c93162..442f54f0165 100644
> --- a/gcc/tree-ssa-address.cc
> +++ b/gcc/tree-ssa-address.cc
> @@ -561,7 +561,7 @@ add_to_parts (struct mem_address *parts, tree elt)
> validity for a memory reference accessing memory of mode MODE in address
> space AS.  */
>
> -static bool
> +bool
>  multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
>  addr_space_t as)
>  {
> diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h
> index 95143a099b9..09f36ee2f19 100644
> --- a/gcc/tree-ssa-address.h
> +++ b/gcc/tree-ssa-address.h
> @@ -38,6 +38,8 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
>  class aff_tree *, tree, tree, tree, bool);
>  extern void copy_ref_info (tree, tree);
>  tree maybe_fold_tmr (tree);
> +bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
> +addr_space_t as);
>
>  extern unsigned int preferred_mem_scale_factor (tree base,
> machine_mode mem_mode,
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index a6f926a68ef..d53ba05a4f6 100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -4774,6 +4774,135 @@ get_address_cost_ainc (poly_int64 ainc_step, 
> poly_int64 ainc_offset,
>return infinite_cost;
>  }
>
> +static void
> +compute_symbol_and_var_present (tree e1, tree e2,
> +   bool *symbol_present, bool *var_present)
> +{
> +  poly_uint64_pod off1, off2;
> +
> +  e1 = strip_offset (e1, );
> +  e2 = strip_offset (e2, );
> +
> +  STRIP_NOPS (e1);
> +  STRIP_NOPS (e2);
> +
> +  if (TREE_CODE (e1) == ADDR_EXPR)
> +{
> +  poly_int64_pod diff;
> +  if (ptr_difference_const (e1, e2, ))
> +  {
> +*symbol_present = false;
> +*var_present = false;
> +return;
> +  }
> +
> +  if (integer_zerop (e2))
> +  {
> +tree core;
> +poly_int64_pod bitsize;
> +poly_int64_pod bitpos;
> +widest_int mul;
> +tree toffset;
> +machine_mode mode;
> +int unsignedp, reversep, volatilep;
> +
> +core = get_inner_reference (TREE_OPERAND (e1, 0), , ,
> +  , , , , );
> +
> +if (toffset != 0
> +|| !constant_multiple_p (bitpos, BITS_PER_UNIT, )
> +|| reversep
> +|| !VAR_P (core))
> +  {
> +*symbol_present = false;
> +*var_present = true;
> +return;
> +  }
> +
> +if (TREE_STATIC (core)
> +|| DECL_EXTERNAL (core))
> +  {
> +*symbol_present = true;
> +*var_present = false;
> +return;
> +  }
> +
> +*symbol_present = false;
> +*var_present = true;
> +return;
> +  }
> +
> +  *symbol_present = false;
> +  *var_present = true;
> +}
> +  *symbol_present = false;
> +
> +  if (operand_equal_p (e1, e2, 0))
> +{
> +  *var_present = false;
> +  return;
> +}
> +
> +  *var_present = true;
> +}
> +
> +static void
> +compute_min_and_max_offset (addr_space_t as,
> +   machine_mode mem_mode, poly_int64_pod *min_offset,
> +   poly_int64_pod *max_offset)
> +{
> +  machine_mode address_mode = targetm.addr_space.address_mode (as);
> +  HOST_WIDE_INT i;
> +  poly_int64_pod off, width;
> +  rtx addr;
> +  rtx reg1;
> +
> +  reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
> +
> +  width = GET_MODE_BITSIZE (address_mode) - 1;
> +  if (known_gt (width, HOST_BITS_PER_WIDE_INT - 1))
> + width = HOST_BITS_PER_WIDE_INT - 1;
> +  gcc_assert (width.is_constant ());
> +  addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
> +

Re: [PATCH 2/2] ivopts: Consider number of invariants when calculating register pressure.

2022-10-25 Thread Richard Biener via Gcc-patches
On Fri, Oct 21, 2022 at 3:57 PM Dimitrije Milosevic
 wrote:
>
> From: Dimitrije Milošević 
>
> This patch slightly modifies register pressure model function to consider
> both the number of invariants and the number of candidates, rather than
> just the number of candidates. This used to be the case before c18101f.

don't you add n_invs twice now given

  unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
  unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;

?

> gcc/ChangeLog:
>
> * tree-ssa-loop-ivopts.cc (ivopts_estimate_reg_pressure): Adjust.
>
> Signed-off-by: Dimitrije Milosevic 
> ---
>  gcc/tree-ssa-loop-ivopts.cc | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index d53ba05a4f6..9d0b669d671 100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -6409,9 +6409,9 @@ ivopts_estimate_reg_pressure (struct ivopts_data *data, 
> unsigned n_invs,
>+ target_spill_cost [speed] * (n_cands - available_regs) * 2
>+ target_spill_cost [speed] * (regs_needed - n_cands);
>
> -  /* Finally, add the number of candidates, so that we prefer eliminating
> - induction variables if possible.  */
> -  return cost + n_cands;
> +  /* Finally, add the number of invariants and the number of candidates,
> + so that we prefer eliminating induction variables if possible.  */
> +  return cost + n_invs + n_cands;
>  }
>
>  /* For each size of the induction variable set determine the penalty.  */
> --
> 2.25.1
>


[PATCH] tree-optimization/107176 - SCEV analysis association issue

2022-10-25 Thread Richard Biener via Gcc-patches
The following fixes a wrong-code issue caused by SCEV analysis
associating an addition due trying to use tail-recursion in
follow_ssa_edge_expr.  That causes us to apply a conversion at
the wrong point and thus miscompute the scalar evolution of
an induction variable.  This reverts the PR66375 fix and
revisits the PR42512 fix by keeping the evolution symbolic
up to the point we process the first linear function when
we then can check for the supported cases and substitute the
whole symbolic expression with the built chrec substituting
the proper initial value.

To simplify passing around things and to clarify scoping of
the involved functions this change wraps the SCEV DFS walking
code into a class.

Bootstrapped and tested on x86_64-unknown-linux-gnu, I'm doing
final testing on the following which includes some API
streamlining over the tested prototype.

PR tree-optimization/107176
PR tree-optimization/66375
PR tree-optimization/42512
* tree-scalar-evolution.cc (follow_ssa_edge_expr): Revert
the PR66375 fix, do not not associate PLUS_EXPR to be able
to use tail-recursion.
(follow_ssa_edge_binary): Likewise.
(interpret_loop_phi): Revert PR42512 fix, do not throw
away analyze_evolution_in_loop result after the fact.
(follow_ssa_edge_expr): When reaching halting_phi initalize
the evolution to the symbolic value of the PHI result.
(add_to_evolution_1): When adding the first evolution verify
we can handle the expression wrapping the symbolic evolution
and replace that in full using the initial condition.
(class scev_dfs): New, contains ...
(follow_ssa_edge_expr, follow_ssa_edge_binary,
follow_ssa_edge_in_condition_phi_branch,
follow_ssa_edge_in_condition_phi,
follow_ssa_edge_inner_loop_phi,
add_to_evolution, add_to_evolution_1): ... these with
loop and halting_phi arguments in class data.
(scev_dfs::get_ev): New toplevel DFS entry, start with
a chrec_dont_know evolution.
(analyze_evolution_in_loop): Use scev_dfs.

* gcc.dg/torture/pr107176.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr107176.c |  22 ++
 gcc/tree-scalar-evolution.cc| 320 
 2 files changed, 185 insertions(+), 157 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr107176.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr107176.c 
b/gcc/testsuite/gcc.dg/torture/pr107176.c
new file mode 100644
index 000..c4f7b6d7336
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr107176.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+
+__INT32_TYPE__ a;
+__INT64_TYPE__ b;
+static inline __INT64_TYPE__ c(__UINT32_TYPE__ d)
+{
+  return d;
+}
+static inline void e(__INT32_TYPE__ d)
+{
+  a = d;
+}
+int main()
+{
+  b = 0;
+  for (; b < 1; b = c(b - 90) + 90 + 1)
+;
+  e(b >> 2);
+  if (a != 1073741824)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index 8b927094324..7e2a3e98661 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -577,6 +577,51 @@ get_scalar_evolution (basic_block instantiated_below, tree 
scalar)
   return res;
 }
 
+
+/* Depth first search algorithm.  */
+
+enum t_bool {
+  t_false,
+  t_true,
+  t_dont_know
+};
+
+class scev_dfs
+{
+public:
+  scev_dfs (class loop *loop_, gphi *phi_, tree init_cond_)
+  : loop (loop_), loop_phi_node (phi_), init_cond (init_cond_) {}
+  t_bool get_ev (tree *, tree);
+
+private:
+  t_bool follow_ssa_edge_expr (gimple *, tree, tree *, int);
+  t_bool follow_ssa_edge_binary (gimple *at_stmt,
+tree type, tree rhs0, enum tree_code code,
+tree rhs1, tree *evolution_of_loop, int limit);
+  t_bool follow_ssa_edge_in_condition_phi_branch (int i,
+ gphi *condition_phi,
+ tree *evolution_of_branch,
+ tree init_cond, int limit);
+  t_bool follow_ssa_edge_in_condition_phi (gphi *condition_phi,
+  tree *evolution_of_loop, int limit);
+  t_bool follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node,
+tree *evolution_of_loop, int limit);
+  tree add_to_evolution (tree chrec_before, enum tree_code code,
+tree to_add, gimple *at_stmt);
+  tree add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt);
+
+  class loop *loop;
+  gphi *loop_phi_node;
+  tree init_cond;
+};
+
+t_bool
+scev_dfs::get_ev (tree *ev_fn, tree arg)
+{
+  *ev_fn = chrec_dont_know;
+  return follow_ssa_edge_expr (loop_phi_node, arg, ev_fn, 0);
+}
+
 /* Helper function for add_to_evolution.  Returns the evolution
function for an assignment of the form "a = b + c", where "a" and
"b" are 

[PATCH] IRA: Make sure array is big enough

2022-10-25 Thread Torbjörn SVENSSON via Gcc-patches
In commit 081c96621da, the call to resize_reg_info() was moved before
the call to remove_scratches() and the latter one can increase the
number of regs and that would cause an out of bounds usage on the
reg_renumber global array.

Without this patch, the following testcase randomly fails with:
during RTL pass: ira
In file included from 
/src/gcc/testsuite/gcc.dg/compat//struct-by-value-5b_y.c:13:
/src/gcc/testsuite/gcc.dg/compat//struct-by-value-5b_y.c: In function 
'checkgSf13':
/src/gcc/testsuite/gcc.dg/compat//fp-struct-test-by-value-y.h:28:1: internal 
compiler error: Segmentation fault
/src/gcc/testsuite/gcc.dg/compat//struct-by-value-5b_y.c:22:1: note: in 
expansion of macro 'TEST'

gcc/ChangeLog:

* ira.c: Resize array after reg number increased.

Co-Authored-By: Yvan ROUX 
Signed-off-by: Torbjörn SVENSSON 
---
 gcc/ira.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/ira.cc b/gcc/ira.cc
index 42c9cead9f8..d28a67b2546 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -5718,6 +5718,7 @@ ira (FILE *f)
 regstat_free_ri ();
 regstat_init_n_sets_and_refs ();
 regstat_compute_ri ();
+resize_reg_info ();
   };
 
   int max_regno_before_rm = max_reg_num ();
-- 
2.25.1



RE: [PATCH] [X86_64]: Enable support for next generation AMD Zen4 CPU

2022-10-25 Thread Joshi, Tejas Sanjay via Gcc-patches
[Public]

Hi,

On Mon, Oct 24, 2022 at 4:26 PM Alexander Monakov  
wrote:
> > > This grew insn-automata.cc from 201502 lines to 639968 lines and the
> > > build of the automata (genautomata) to several minutes in my dev tree.
> >
> > Yeah, in my unoptimized non-bootstrapped development tree genautomata
> > now takes over 12 minutes on a fast box, that is simply not acceptable.
> 
> Thank you for notifying us.
> 
> mailto:tejassanjay.jo...@amd.com has posted a patch for review to fix this 
> (as per Honza's comments).
> Ref: 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gnu.org%2Fpipermail%2Fgcc-patches%2F2022-October%2F604144.html=05%7C01%7CTejasSanjay.Joshi%40amd.com%7C10a544bb98214654ee7808dab5cdafe5%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638022192267092598%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=3ATGZwSwJZWlJ1EU%2BijPEYTuVFb38gTkAvSWVQNF3AQ%3D=0

> This patch is OK 
We have pushed this patch which reverts the scheduler descriptions for znver4.
Now, on my machine, the build time and insn-automata.cc size is matching with 
previous gcc trunk state.

Thanks and Regards,
Tejas


[PATCH] Move NOP stripping in SCEV analysis

2022-10-25 Thread Richard Biener via Gcc-patches
The following moves a pair of STRIP_USELESS_TYPE_CONVERSIONS to
where it belongs and adds a comment on why we handle GENERIC
there at all.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-scalar-evolution.cc (follow_ssa_edge_expr): Move
STRIP_USELESS_TYPE_CONVERSIONS to where it matters.
---
 gcc/tree-scalar-evolution.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index 9f30f78cb5d..8b927094324 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -1214,6 +1214,8 @@ tail_recurse:
 {
   code = TREE_CODE (expr);
   type = TREE_TYPE (expr);
+  /* Via follow_ssa_edge_inner_loop_phi we arrive here with the
+GENERIC scalar evolution of the inner loop.  */
   switch (code)
{
CASE_CONVERT:
@@ -1224,6 +1226,8 @@ tail_recurse:
case MINUS_EXPR:
  rhs0 = TREE_OPERAND (expr, 0);
  rhs1 = TREE_OPERAND (expr, 1);
+ STRIP_USELESS_TYPE_CONVERSION (rhs0);
+ STRIP_USELESS_TYPE_CONVERSION (rhs1);
  break;
default:
  rhs0 = expr;
@@ -1260,8 +1264,6 @@ tail_recurse:
 case PLUS_EXPR:
 case MINUS_EXPR:
   /* This case is under the form "rhs0 +- rhs1".  */
-  STRIP_USELESS_TYPE_CONVERSION (rhs0);
-  STRIP_USELESS_TYPE_CONVERSION (rhs1);
   if (TREE_CODE (rhs0) == SSA_NAME
  && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR))
{
-- 
2.35.3


Patch ping

2022-10-25 Thread Jakub Jelinek via Gcc-patches
Hi!

On Mon, Oct 24, 2022 at 10:28:34AM -0600, Jeff Law wrote:
> On 10/21/22 09:42, Jakub Jelinek wrote:
> > On top of the pending
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603665.html
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604080.html
> > the following patch adds some complex builtins which have libm
> > implementation in glibc 2.26 and later on various arches.
> > It is needed for libstdc++ _Float128 support when long double is not
> > IEEE quad.
> 
> OK

Thanks a lot.

Can I ping the
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603665.html
patch these 2 approved patches (and the libstdc++ changes too) depend on?

Thanks.

Jakub



[committed] gimplify: Fix comment typos

2022-10-25 Thread Jakub Jelinek via Gcc-patches
Hi!

While looking at gimple_boolify for PR107368, I've noticed 2 comment
typos.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2022-10-25  Jakub Jelinek  

* gimplify.cc (gimple_boolify): Fix comment typos, prduce -> produce
and There -> These.

--- gcc/gimplify.cc.jj  2022-10-24 11:01:54.121339779 +0200
+++ gcc/gimplify.cc 2022-10-24 11:21:44.215124376 +0200
@@ -4272,7 +4272,7 @@ gimple_boolify (tree expr)
 default:
   if (COMPARISON_CLASS_P (expr))
{
- /* There expressions always prduce boolean results.  */
+ /* These expressions always produce boolean results.  */
  if (TREE_CODE (type) != BOOLEAN_TYPE)
TREE_TYPE (expr) = boolean_type_node;
  return expr;

Jakub



[committed] gimplify: Call gimple_boolify on IFN_ASSUME argument [PR107368]

2022-10-25 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase ICEs in C, because assume attribute condition
has int type rather than bool and the gimplification into GIMPLE_ASSUME
assigns it into a bool variable.

Fixed by calling gimple_boolify.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2022-10-25  Jakub Jelinek  

PR tree-optimization/107368
* gimplify.cc (gimplify_call_expr): For complex IFN_ASSUME
conditions call gimple_boolify on the condition.

* gcc.dg/attr-assume-5.c: New test.

--- gcc/gimplify.cc.jj  2022-10-24 10:22:08.590902730 +0200
+++ gcc/gimplify.cc 2022-10-24 11:01:54.121339779 +0200
@@ -3586,7 +3586,7 @@ gimplify_call_expr (tree *expr_p, gimple
 a separate function easily.  */
  tree guard = create_tmp_var (boolean_type_node);
  *expr_p = build2 (MODIFY_EXPR, void_type_node, guard,
-   CALL_EXPR_ARG (*expr_p, 0));
+   gimple_boolify (CALL_EXPR_ARG (*expr_p, 0)));
  *expr_p = build3 (BIND_EXPR, void_type_node, NULL, *expr_p, NULL);
  push_gimplify_context ();
  gimple_seq body = NULL;
--- gcc/testsuite/gcc.dg/attr-assume-5.c.jj 2022-10-24 11:13:47.169624469 
+0200
+++ gcc/testsuite/gcc.dg/attr-assume-5.c2022-10-24 11:16:11.137662896 
+0200
@@ -0,0 +1,10 @@
+/* PR tree-optimization/107368 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+double
+f4 (double x)
+{
+  [[gnu::assume (x && x > 0.0)]];
+  return x;
+}

Jakub



[committed] gimplify: Don't add GIMPLE_ASSUME if errors were seen [PR107369]

2022-10-25 Thread Jakub Jelinek via Gcc-patches
Hi!

The FEs emit errors about jumps into assume attribute conditions,
but when we add GIMPLE_ASSUME for the condition which is reachable
through those jumps, we can run into cfg verification diagnostics.

Fixed by throwing the IFN_ASSUME away during gimplification if
seen_error () - like we already do for -O0.  GIMPLE_ASSUME in the middle-end
is a pure optimization thing and if errors were reported, the optimizations
will not be beneficial for anything.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2022-10-25  Jakub Jelinek  

PR tree-optimization/107369
* gimplify.cc (gimplify_call_expr): If seen_error, handle complex
IFN_ASSUME the same as for -O0.

* gcc.dg/attr-assume-4.c: New test.
* g++.dg/cpp23/attr-assume8.C: New test.

--- gcc/gimplify.cc.jj  2022-10-18 10:38:48.146406234 +0200
+++ gcc/gimplify.cc 2022-10-24 10:22:08.590902730 +0200
@@ -3570,7 +3570,7 @@ gimplify_call_expr (tree *expr_p, gimple
  return GS_OK;
}
  /* If not optimizing, ignore the assumptions.  */
- if (!optimize)
+ if (!optimize || seen_error ())
{
  *expr_p = NULL_TREE;
  return GS_ALL_DONE;
--- gcc/testsuite/gcc.dg/attr-assume-4.c.jj 2022-10-24 10:31:30.654197800 
+0200
+++ gcc/testsuite/gcc.dg/attr-assume-4.c2022-10-24 10:33:21.214682782 
+0200
@@ -0,0 +1,12 @@
+/* PR tree-optimization/107369 */
+/* { dg-do compile } */
+/* { dg-options "-std=c2x -O1" } */
+
+void
+foo (int x)
+{
+  if (x == 1)
+goto l1;   /* { dg-error "jump into statement 
expression" } */
+
+  [[gnu::assume (({ l1:; 1; }))]]; /* { dg-message "label 'l1' defined 
here" } */
+}
--- gcc/testsuite/g++.dg/cpp23/attr-assume8.C.jj2022-10-24 
10:48:35.550198403 +0200
+++ gcc/testsuite/g++.dg/cpp23/attr-assume8.C   2022-10-24 10:35:52.025616208 
+0200
@@ -0,0 +1,12 @@
+// PR tree-optimization/107369
+// { dg-do compile { target c++11 } }
+// { dg-options "-O1" }
+
+void
+foo (int x)
+{
+  if (x == 1)
+goto l1;   // { dg-message "from here" }
+
+  [[assume (({ l1:; 1; }))]];  // { dg-error "jump to label 'l1'" }
+}  // { dg-message "enters statement expression" 
"" { target *-*-* } .-1 }

Jakub



Re: [PATCH] Add -gcodeview option

2022-10-25 Thread Martin Storsjö

On Tue, 25 Oct 2022, Mark Harmstone wrote:


On 24/10/22 12:08, Martin Storsjö wrote:
Hmm, what does this end up passing to the linker in the end - does it just 
pass "-pdb="? (What does the "*" parameter do here?) If that's the case - 
that sounds reasonable - assuming that if a user passes an extra 
-Wl,--pdb,myspecificname.pdb, that would take precedence (i.e. be passed 
after the compiler's default one). 


That's right. The "*" means "all languages".


Ok, great.

Btw, stylistically, should we strive towards using double dashes for the 
pdb option? I think that's the most canonical way for such getopt options 
(even though it doesn't make any practical difference). I've started 
trying to update various users to prefer that form (together with 
preferring -Wl,--pdb= over -Wl,--pdb,) and probably will send 
a few more.


// Martin


[PATCH] tree-optimization/100756 - niter analysis and folding

2022-10-25 Thread Richard Biener via Gcc-patches
niter analysis, specifically the part trying to simplify the computed
maybe_zero condition against the loop header copying condition, is
confused by us now simplifying

  _15 = n_8(D) * 4;
  if (_15 > 0)

to

  _15 = n_8(D) * 4;
  if (n_8(D) > 0)

which is perfectly sound at the point we do this transform.  One
solution might be to involve ranger in this simplification, another
is to be more aggressive when expanding expressions - the condition
we try to simplify is _15 > 0, so all we need is expanding that
to n_8(D) * 4 > 0.

The following does just that.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/100756
* tree-ssa-loop-niter.cc (expand_simple_operations): Also
expand multiplications by invariants.

* gcc.dg/vect/pr100756.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr100756.c | 15 +++
 gcc/tree-ssa-loop-niter.cc   |  1 +
 2 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr100756.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr100756.c 
b/gcc/testsuite/gcc.dg/vect/pr100756.c
new file mode 100644
index 000..c1362f29ebe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr100756.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int
+foo (int * restrict a, int n)
+{
+  int i, result = 0;
+
+  a = __builtin_assume_aligned (a, __BIGGEST_ALIGNMENT__);
+  for (i = 0; i < n * 4; i++)
+result += a[i];
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 1e0f609d8b6..4ffcef4f4ff 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2216,6 +2216,7 @@ expand_simple_operations (tree expr, tree stop, 
hash_map )
 
 case PLUS_EXPR:
 case MINUS_EXPR:
+case MULT_EXPR:
   if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr))
  && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr)))
return expr;
-- 
2.35.3


Ping [PATCH] Add condition coverage profiling

2022-10-25 Thread Jørgen Kvalsvik via Gcc-patches
On 12/10/2022 12:16, Jørgen Kvalsvik wrote:
> This patch adds support in gcc+gcov for modified condition/decision
> coverage (MC/DC) with the -fprofile-conditions flag. MC/DC is a type of
> test/code coverage and it is particularly important in the avation and
> automotive industries for safety-critical applications. MC/DC it is
> required for or recommended by:
> 
> * DO-178C for the most critical software (Level A) in avionics
> * IEC 61508 for SIL 4
> * ISO 26262-6 for ASIL D
> 
> From the SQLite webpage:
> 
> Two methods of measuring test coverage were described above:
> "statement" and "branch" coverage. There are many other test
> coverage metrics besides these two. Another popular metric is
> "Modified Condition/Decision Coverage" or MC/DC. Wikipedia defines
> MC/DC as follows:
> 
> * Each decision tries every possible outcome.
> * Each condition in a decision takes on every possible outcome.
> * Each entry and exit point is invoked.
> * Each condition in a decision is shown to independently affect
>   the outcome of the decision.
> 
> In the C programming language where && and || are "short-circuit"
> operators, MC/DC and branch coverage are very nearly the same thing.
> The primary difference is in boolean vector tests. One can test for
> any of several bits in bit-vector and still obtain 100% branch test
> coverage even though the second element of MC/DC - the requirement
> that each condition in a decision take on every possible outcome -
> might not be satisfied.
> 
> https://sqlite.org/testing.html#mcdc
> 
> Wahlen, Heimdahl, and De Silva "Efficient Test Coverage Measurement for
> MC/DC" describes an algorithm for adding instrumentation by carrying
> over information from the AST, but my algorithm analyses the the control
> flow graph to instrument for coverage. This has the benefit of being
> programming language independent and faithful to compiler decisions
> and transformations, although I have only tested it on constructs in C
> and C++, see testsuite/gcc.misc-tests and testsuite/g++.dg.
> 
> Like Wahlen et al this implementation records coverage in fixed-size
> bitsets which gcov knows how to interpret. This is very fast, but
> introduces a limit on the number of terms in a single boolean
> expression, the number of bits in a gcov_unsigned_type (which is
> typedef'd to uint64_t), so for most practical purposes this would be
> acceptable. This limitation is in the implementation and not the
> algorithm, so support for more conditions can be added by also
> introducing arbitrary-sized bitsets.
> 
> For space overhead, the instrumentation needs two accumulators
> (gcov_unsigned_type) per condition in the program which will be written
> to the gcov file. In addition, every function gets a pair of local
> accumulators, but these accmulators are reused between conditions in the
> same function.
> 
> For time overhead, there is a zeroing of the local accumulators for
> every condition and one or two bitwise operation on every edge taken in
> the an expression.
> 
> In action it looks pretty similar to the branch coverage. The -g short
> opt carries no significance, but was chosen because it was an available
> option with the upper-case free too.
> 
> gcov --conditions:
> 
> 3:   17:void fn (int a, int b, int c, int d) {
> 3:   18:if ((a && (b || c)) && d)
> condition outcomes covered 3/8
> condition  0 not covered (true false)
> condition  1 not covered (true)
> condition  2 not covered (true)
> condition  3 not covered (true)
> 1:   19:x = 1;
> -:   20:else
> 2:   21:x = 2;
> 3:   22:}
> 
> gcov --conditions --json-format:
> 
> "conditions": [
> {
> "not_covered_false": [
> 0
> ],
> "count": 8,
> "covered": 3,
> "not_covered_true": [
> 0,
> 1,
> 2,
> 3
> ]
> }
> ],
> 
> Some expressions, mostly those without else-blocks, are effectively
> "rewritten" in the CFG construction making the algorithm unable to
> distinguish them:
> 
> and.c:
> 
> if (a && b && c)
> x = 1;
> 
> ifs.c:
> 
> if (a)
> if (b)
> if (c)
> x = 1;
> 
> gcc will build the same graph for both these programs, and gcov will
> report boths as 3-term expressions. It is vital that it is not
> interpreted the other way around (which is consistent with the shape of
> the graph) because otherwise the masking would be wrong for the and.c
> program which is a more severe error. While surprising, users would
> probably expect some minor rewriting of semantically-identical
> expressions.
> 
> and.c.gcov:
> #:2:if (a && b && c)
> decisions covered 6/6
> #:3:x = 1;
> 
> ifs.c.gcov:
> #:2:if 

Re: [PATCH v2] Always use TYPE_MODE instead of DECL_MODE for vector field

2022-10-25 Thread Richard Biener via Gcc-patches
On Mon, Oct 24, 2022 at 10:02 PM H.J. Lu  wrote:
>
> On Mon, Oct 24, 2022 at 12:12 AM Richard Biener
>  wrote:
> >
> > On Fri, Oct 21, 2022 at 6:18 PM H.J. Lu  wrote:
> > >
> > > On Fri, Oct 21, 2022 at 2:33 AM Richard Biener
> > >  wrote:
> > > >
> > > > On Thu, Oct 20, 2022 at 6:58 PM H.J. Lu via Gcc-patches
> > > >  wrote:
> > > > >
> > > > > commit e034c5c895722e0092d2239cd8c2991db77d6d39
> > > > > Author: Jakub Jelinek 
> > > > > Date:   Sat Dec 2 08:54:47 2017 +0100
> > > > >
> > > > > PR target/78643
> > > > > PR target/80583
> > > > > * expr.c (get_inner_reference): If DECL_MODE of a non-bitfield
> > > > > is BLKmode for vector field with vector raw mode, use 
> > > > > TYPE_MODE
> > > > > instead of DECL_MODE.
> > > > >
> > > > > fixed the case where DECL_MODE of a vector field is BLKmode and its
> > > > > TYPE_MODE is a vector mode because of target attribute.  Remove the
> > > > > BLKmode check for the case where DECL_MODE of a vector field is a 
> > > > > vector
> > > > > mode and its TYPE_MODE is BLKmode because of target attribute.
> > > > >
> > > > > gcc/
> > > > >
> > > > > PR target/107304
> > > > > * expr.c (get_inner_reference): Always use TYPE_MODE for 
> > > > > vector
> > > > > field with vector raw mode.
> > > > >
> > > > > gcc/testsuite/
> > > > >
> > > > > PR target/107304
> > > > > * gcc.target/i386/pr107304.c: New test.
> > > > > ---
> > > > >  gcc/expr.cc  |  3 +-
> > > > >  gcc/testsuite/gcc.target/i386/pr107304.c | 39 
> > > > > 
> > > > >  2 files changed, 40 insertions(+), 2 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr107304.c
> > > > >
> > > > > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > > > > index efe387e6173..9145193c2c1 100644
> > > > > --- a/gcc/expr.cc
> > > > > +++ b/gcc/expr.cc
> > > > > @@ -7905,8 +7905,7 @@ get_inner_reference (tree exp, poly_int64_pod 
> > > > > *pbitsize,
> > > > >   /* For vector fields re-check the target flags, as DECL_MODE
> > > > >  could have been set with different target flags than
> > > > >  the current function has.  */
> > > > > - if (mode == BLKmode
> > > > > - && VECTOR_TYPE_P (TREE_TYPE (field))
> > > > > + if (VECTOR_TYPE_P (TREE_TYPE (field))
> > > > >   && VECTOR_MODE_P (TYPE_MODE_RAW (TREE_TYPE (field
> > > >
> > > > Isn't the check on TYPE_MODE_RAW also wrong then?  Btw, the mode could
> > >
> > > TYPE_MODE_RAW is always set to a vector mode for a vector type:
> > >
> > >/* Find an appropriate mode for the vector type.  */
> > > if (TYPE_MODE (type) == VOIDmode)
> > >   SET_TYPE_MODE (type,
> > >  mode_for_vector (SCALAR_TYPE_MODE (innertype),
> > >   nunits).else_blk ());
> >
> > But mode_for_vector can return a MODE_INT!
>
> You are right.
>
> >   /* For integers, try mapping it to a same-sized scalar mode.  */
> >   if (GET_MODE_CLASS (innermode) == MODE_INT)
> > {
> >   poly_uint64 nbits = nunits * GET_MODE_BITSIZE (innermode);
> >   if (int_mode_for_size (nbits, 0).exists ()
> >   && have_regs_of_mode[mode])
> > return mode;
> >
> > > But TYPE_MODE returns BLKmode if the vector mode is unsupported.
> > >
> > > > also be an integer mode.
> > >
> > > For a vector field, mode is either BLK mode or the vector mode.  Jakub,
> > > can you comment on it?
> >
> > I think that for
> >
> > typedef int v2si __attribute__((vector_size(8)));
> >
> > struct X { int i; v2si j; };
> >
> > v2si should get DImode with -mno-sse?
> >
>
> Currently GCC generates
>
> (insn 31 32 33 (set (subreg:DI (reg:V2SI 105) 0)
> (reg:DI 84 [ _3 ])) "y2.c":12:11 -1
>  (nil))
>
> With my patch, v2si gets DImode directly without SUBREG.
>
> Here is the v2 patch with the update commit message:
>
> Remove the BLKmode check for the case where DECL_MODE
> of a vector field is a vector mode and its TYPE_MODE isn't a
> vector mode because of target attribute.
>
> OK for master?

OK.

Thanks,
Richard.

> Thanks.
>
> --
> H.J.


[PATCH] RISC-V: Recognized Svinval and Svnapot extensions

2022-10-25 Thread Monk Chiang
gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_ext_version_table):
Add svinval and svnapot extension.
(riscv_ext_flag_table): Ditto.
* config/riscv/riscv-opts.h (MASK_SVINVAL): New.
(MASK_SVNAPOT): Ditto.
(TARGET_SVINVAL): Ditto.
(TARGET_SVNAPOT): Ditto.
* config/riscv/riscv.opt (riscv_sv_subext): New.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/predef-23.c:New.
* gcc.target/riscv/predef-24.c:Ditto.
---
 gcc/common/config/riscv/riscv-common.cc|  6 +++
 gcc/config/riscv/riscv-opts.h  |  6 +++
 gcc/config/riscv/riscv.opt |  3 ++
 gcc/testsuite/gcc.target/riscv/predef-23.c | 47 ++
 gcc/testsuite/gcc.target/riscv/predef-24.c | 47 ++
 5 files changed, 109 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-23.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-24.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index dead3802f83..a5fe782bb61 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -202,6 +202,9 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
 
   {"zmmul", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"svinval", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"svnapot", ISA_SPEC_CLASS_NONE, 1, 0},
+
   /* Terminate the list.  */
   {NULL, ISA_SPEC_CLASS_NONE, 0, 0}
 };
@@ -1226,6 +1229,9 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
 
   {"zmmul", _options::x_riscv_zm_subext, MASK_ZMMUL},
 
+  {"svinval", _options::x_riscv_sv_subext, MASK_SVINVAL},
+  {"svnapot", _options::x_riscv_sv_subext, MASK_SVNAPOT},
+
   {NULL, NULL, 0}
 };
 
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 55e0bc0a0e9..63ac56a8ca0 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -162,6 +162,12 @@ enum stack_protector_guard {
 #define MASK_ZMMUL  (1 << 0)
 #define TARGET_ZMMUL((riscv_zm_subext & MASK_ZMMUL) != 0)
 
+#define MASK_SVINVAL (1 << 0)
+#define MASK_SVNAPOT (1 << 1)
+
+#define TARGET_SVINVAL ((riscv_sv_subext & MASK_SVINVAL) != 0)
+#define TARGET_SVNAPOT ((riscv_sv_subext & MASK_SVNAPOT) != 0)
+
 /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
popcount to caclulate the minimal VLEN.  */
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 8923a11a97d..949311775c1 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -224,6 +224,9 @@ int riscv_zf_subext
 TargetVariable
 int riscv_zm_subext
 
+TargetVariable
+int riscv_sv_subext
+
 Enum
 Name(isa_spec_class) Type(enum riscv_isa_spec_class)
 Supported ISA specs (for use with the -misa-spec= option):
diff --git a/gcc/testsuite/gcc.target/riscv/predef-23.c 
b/gcc/testsuite/gcc.target/riscv/predef-23.c
new file mode 100644
index 000..64bde17efa9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-23.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_svinval -mabi=lp64 -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_i"
+#endif
+
+#if !defined(__riscv_c) || (__riscv_c != (2 * 1000 * 1000))
+#error "__riscv_c"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if !defined(__riscv_a) || (__riscv_a != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_a"
+#endif
+
+#if !defined(__riscv_m) || (__riscv_m != (2 * 1000 * 1000))
+#error "__riscv_m"
+#endif
+
+#if !defined(__riscv_f) || (__riscv_f != (2 * 1000 * 1000 + 2 * 1000))
+#error "__riscv_f"
+#endif
+
+#if !defined(__riscv_d) || (__riscv_d != (2 * 1000 * 1000 + 2 * 1000))
+#error "__riscv_d"
+#endif
+
+#if !defined(__riscv_svinval)
+#error "__riscv_svinval"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-24.c 
b/gcc/testsuite/gcc.target/riscv/predef-24.c
new file mode 100644
index 000..2b51a19eacd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-24.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_svnapot -mabi=lp64 -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_i"
+#endif
+
+#if !defined(__riscv_c) || (__riscv_c != (2 * 1000 * 1000))
+#error "__riscv_c"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if !defined(__riscv_a) || (__riscv_a != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_a"
+#endif
+
+#if !defined(__riscv_m)