[PATCH] Extend 64-bit vector bit_op patterns with ?r alternative

2022-07-13 Thread liuhongt via Gcc-patches
And split it to GPR-version instruction after reload.

> ?r was introduced under the assumption that we want vector values
> mostly in vector registers. Currently there are no instructions with
> memory or immediate operand, so that made sense at the time. Let's
> keep ?r until logic instructions with mem/imm operands are introduced.
> So, for the patch that adds 64-bit vector logic in GPR, I would advise
> to first introduce only register operands. mem/imm operands should be
Update patch to add ?r to 64-bit bit_op patterns.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
No big imact on SPEC2017(Most same binary).

Ok for trunk?

gcc/ChangeLog:

PR target/106038
* config/i386/mmx.md (3): Expand
with (clobber (reg:CC flags_reg)) under TARGET_64BIT
(mmx_code>3): Ditto.
(*mmx_3_gpr): New define_insn, add post_reload
splitter after it.
(mmx_andnot3_gpr): Ditto.
(3): Extend follow define_split from VI_16_32 to
VI_16_32_64.
(*andnot3): Ditto.
(mmxinsnmode): New mode attribute.
(VI_16_32_64): New mode iterator.
(*mov_imm): Refactor with mmxinsnmode.
* config/i386/predicates.md

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr106038-1.c: New test.
* gcc.target/i386/pr106038-2.c: New test.
* gcc.target/i386/pr106038-3.c: New test.
---
 gcc/config/i386/mmx.md | 131 +++--
 gcc/testsuite/gcc.target/i386/pr106038-1.c |  61 ++
 gcc/testsuite/gcc.target/i386/pr106038-2.c |  35 ++
 gcc/testsuite/gcc.target/i386/pr106038-3.c |  17 +++
 4 files changed, 210 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106038-3.c

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3294c1e6274..5f7e40bd7a1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -75,6 +75,11 @@ (define_mode_iterator V_16_32_64
 (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
 (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
 
+(define_mode_iterator VI_16_32_64
+   [V2QI V4QI V2HI
+(V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
+(V2SI "TARGET_64BIT")])
+
 ;; V2S* modes
 (define_mode_iterator V2FI [V2SF V2SI])
 
@@ -86,6 +91,14 @@ (define_mode_attr mmxvecsize
   [(V8QI "b") (V4QI "b") (V2QI "b")
(V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
 
+;; Mapping to same size integral mode.
+(define_mode_attr mmxinsnmode
+  [(V8QI "DI") (V4QI "SI") (V2QI "HI")
+   (V4HI "DI") (V2HI "SI")
+   (V2SI "DI")
+   (V4HF "DI") (V2HF "SI")
+   (V2SF "DI")])
+
 (define_mode_attr mmxdoublemode
   [(V8QI "V8HI") (V4HI "V4SI")])
 
@@ -350,22 +363,7 @@ (define_insn_and_split "*mov_imm"
   HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
mode);
   operands[1] = GEN_INT (val);
-  machine_mode mode;
-  switch (GET_MODE_SIZE (mode))
-{
-case 2:
-  mode = HImode;
-  break;
-case 4:
-  mode = SImode;
-  break;
-case 8:
-  mode = DImode;
-  break;
-default:
-  gcc_unreachable ();
-}
-  operands[0] = lowpart_subreg (mode, operands[0], mode);
+  operands[0] = lowpart_subreg (mode, operands[0], mode);
 })
 
 ;; For TARGET_64BIT we always round up to 8 bytes.
@@ -2878,6 +2876,31 @@ (define_insn "mmx_andnot3"
(set_attr "type" "mmxadd,sselog,sselog,sselog")
(set_attr "mode" "DI,TI,TI,TI")])
 
+(define_insn "mmx_andnot3_gpr"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=?r,y,x,x,v")
+   (and:MMXMODEI
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" 
"r,0,0,x,v"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "r,ym,x,x,v")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_MMX || TARGET_SSE2)"
+  "#"
+  [(set_attr "isa" "bmi,*,sse2_noavx,avx,avx512vl")
+   (set_attr "mmx_isa" "*,native,*,*,*")
+   (set_attr "type" "alu,mmxadd,sselog,sselog,sselog")
+   (set_attr "mode" "DI,DI,TI,TI,TI")])
+
+(define_split
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (and:MMXMODEI
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_mmxmem_operand"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && (TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && !GENERAL_REGNO_P (REGNO (operands[0]))"
+  [(set (match_dup 0)
+   (and: (not: (match_dup 1)) (match_dup 2)))])
+
 (define_insn "*andnot3"
   [(set (match_operand:VI_16_32 0 "register_operand" "=?,?r,x,x,v")
 (and:VI_16_32
@@ -2892,20 +2915,20 @@ (define_insn "*andnot3"
(set_attr "mode" "SI,SI,TI,TI,TI")])
 
 (define_split
-  [(set (match_operand:VI_16_32 0 "general_reg_operand")
-(and:VI_16_32
- (not:VI_16_32 (match_operand:VI_16_32 1 

RE: [x86_64 PATCH] Improved Scalar-To-Vector (STV) support for TImode to V1TImode.

2022-07-13 Thread Roger Sayle


On Mon, Jul 11, 2022, H.J. Lu  wrote:
> On Sun, Jul 10, 2022 at 2:38 PM Roger Sayle 
> wrote:
> > Hi HJ,
> >
> > I believe this should now be handled by the post-reload (CSE) pass.
> > Consider the simple test case:
> >
> > __int128 a, b, c;
> > void foo()
> > {
> >   a = 0;
> >   b = 0;
> >   c = 0;
> > }
> >
> > Without any STV, i.e. -O2 -msse4 -mno-stv, GCC get TI mode writes:
> > movq$0, a(%rip)
> > movq$0, a+8(%rip)
> > movq$0, b(%rip)
> > movq$0, b+8(%rip)
> > movq$0, c(%rip)
> > movq$0, c+8(%rip)
> > ret
> >
> > But with STV, i.e. -O2 -msse4, things get converted to V1TI mode:
> > pxor%xmm0, %xmm0
> > movaps  %xmm0, a(%rip)
> > movaps  %xmm0, b(%rip)
> > movaps  %xmm0, c(%rip)
> > ret
> >
> > You're quite right internally the STV actually generates the equivalent of:
> > pxor%xmm0, %xmm0
> > movaps  %xmm0, a(%rip)
> > pxor%xmm0, %xmm0
> > movaps  %xmm0, b(%rip)
> > pxor%xmm0, %xmm0
> > movaps  %xmm0, c(%rip)
> > ret
> >
> > And currently because STV run before cse2 and combine, the const0_rtx
> > gets CSE'd be the cse2 pass to produce the code we see.  However, if
> > you specify -fno-rerun-cse-after-loop (to disable the cse2 pass),
> > you'll see we continue to generate the same optimized code, as the
> > same const0_rtx gets CSE'd in postreload.
> >
> > I can't be certain until I try the experiment, but I believe that the
> > postreload CSE will clean-up, all of the same common subexpressions.
> > Hence, it should be safe to perform all STV at the same point (after
> > combine), which for a few additional optimizations.
> >
> > Does this make sense?  Do you have a test case,
> > -fno-rerun-cse-after-loop produces different/inferior code for TImode STV
> chains?
> >
> > My guess is that the RTL passes have changed so much in the last six
> > or seven years, that some of the original motivation no longer applies.
> > Certainly we now try to keep TI mode operations visible longer, and
> > then allow STV to behave like a pre-reload pass to decide which set of
> > registers to use (vector V1TI or scalar doubleword DI).  Any CSE
> > opportunities that cse2 finds with V1TI mode, could/should equally
> > well be found for TI mode (mostly).
> 
> You are probably right.  If there are no regressions in GCC testsuite, my 
> original
> motivation is no longer valid.

It was good to try the experiment, but H.J. is right, there is still some 
benefit
(as well as some disadvantages)  to running STV lowering before CSE2/combine.
A clean-up patch to perform all STV conversion as a single pass (removing a
pass from the compiler) results in just a single regression in the test suite:
FAIL: gcc.target/i386/pr70155-17.c scan-assembler-times movv1ti_internal 8
which looks like:

__int128 a, b, c, d, e, f;
void foo (void)
{
  a = 0;
  b = -1;
  c = 0;
  d = -1;
  e = 0;
  f = -1;
}

By performing STV after combine (without CSE), reload prefers to implement
this function using a single register, that then requires 12 instructions rather
than 8 (if using two registers).  Alas there's nothing that postreload CSE/GCSE
can do.  Doh!

pxor%xmm0, %xmm0
movaps  %xmm0, a(%rip)
pcmpeqd %xmm0, %xmm0
movaps  %xmm0, b(%rip)
pxor%xmm0, %xmm0
movaps  %xmm0, c(%rip)
pcmpeqd %xmm0, %xmm0
movaps  %xmm0, d(%rip)
pxor%xmm0, %xmm0
movaps  %xmm0, e(%rip)
pcmpeqd %xmm0, %xmm0
movaps  %xmm0, f(%rip)
ret

I also note that even without STV, the scalar implementation of this function 
when
compiled with -Os is also larger than it needs to be due to poor CSE (notice in 
the
following we only need a single zero register, and  an all_ones reg would be 
helpful).

xorl%eax, %eax
xorl%edx, %edx
xorl%ecx, %ecx
movq$-1, b(%rip)
movq%rax, a(%rip)
movq%rax, a+8(%rip)
movq$-1, b+8(%rip)
movq%rdx, c(%rip)
movq%rdx, c+8(%rip)
movq$-1, d(%rip)
movq$-1, d+8(%rip)
movq%rcx, e(%rip)
movq%rcx, e+8(%rip)
movq$-1, f(%rip)
movq$-1, f+8(%rip)
ret

I need to give the problem some more thought.  It would be good to 
clean-up/unify
the STV passes, but I/we need to solve/CSE HJ's last test case before we do.  
Perhaps
by forbidding "(set (mem:ti) (const_int 0))" in movti_internal, would force the 
zero
register to become visible, and CSE'd, benefiting both vector code and scalar 
-Os code,
then use postreload/peephole2 to fix up the remaining scalar cases.  It's 
tricky.

Cheers,
Roger
--




Re: [PATCH v3] RISC-V/testsuite: constraint some of tests to hard_float

2022-07-13 Thread Vineet Gupta

On 5/29/22 20:50, Kito Cheng via Gcc-patches wrote:

Committed, thanks!


Can this be backported to gcc-12 please.

Thx,
-Vineet



On Fri, May 27, 2022 at 10:37 AM Vineet Gupta  wrote:


Commit 9ddd44b58649d1d ("RISC-V: Provide `fmin'/`fmax' RTL pattern") added
tests which check for hard float instructions which obviously fails on
soft-float ABI builds.

And my recent commit b646d7d279ae ("RISC-V: Inhibit FP <--> int register
moves via tune param") is guilty of same crime.

So constraint with "dg-require-effective-target hard_float"

This reduces bunch of new RV failures.

|   = Summary of gcc testsuite =
|| # of unexpected case / # of unique unexpected 
case
||  gcc |  g++ | gfortran |
|   rv64imac/   lp64/ medlow |  134 /22 |0 / 0 |- |  
BEFORE
|   rv64imac/   lp64/ medlow |   22 / 9 |0 / 0 |- |  
AFTER
|

gcc/testsuite/Changelog:
 * gcc.target/riscv/fmax.c: Add dg-require-effective-target hard_float.
 * gcc.target/riscv/fmaxf.c: Ditto.
 * gcc.target/riscv/fmin.c: Ditto.
 * gcc.target/riscv/fminf.c: Ditto.
 * gcc.target/riscv/smax-ieee.c: Ditto.
 * gcc.target/riscv/smax.c: Ditto.
 * gcc.target/riscv/smaxf-ieee.c: Ditto.
 * gcc.target/riscv/smaxf.c: Ditto.
 * gcc.target/riscv/smin-ieee.c: Ditto.
 * gcc.target/riscv/smin.c: Ditto.
 * gcc.target/riscv/sminf-ieee.c: Ditto.
 * gcc.target/riscv/sminf.c: Ditto.
 * gcc.target/riscv/pr105666.c: Ditto.

Signed-off-by: Vineet Gupta 
---
v3:
 Added fix to pr105666.c as well.
v2:
 Fixed the SoB snafu in v1
---
  gcc/testsuite/gcc.target/riscv/fmax.c   | 1 +
  gcc/testsuite/gcc.target/riscv/fmaxf.c  | 1 +
  gcc/testsuite/gcc.target/riscv/fmin.c   | 1 +
  gcc/testsuite/gcc.target/riscv/fminf.c  | 1 +
  gcc/testsuite/gcc.target/riscv/pr105666.c   | 1 +
  gcc/testsuite/gcc.target/riscv/smax-ieee.c  | 1 +
  gcc/testsuite/gcc.target/riscv/smax.c   | 1 +
  gcc/testsuite/gcc.target/riscv/smaxf-ieee.c | 1 +
  gcc/testsuite/gcc.target/riscv/smaxf.c  | 1 +
  gcc/testsuite/gcc.target/riscv/smin-ieee.c  | 1 +
  gcc/testsuite/gcc.target/riscv/smin.c   | 1 +
  gcc/testsuite/gcc.target/riscv/sminf-ieee.c | 1 +
  gcc/testsuite/gcc.target/riscv/sminf.c  | 1 +
  13 files changed, 13 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/fmax.c 
b/gcc/testsuite/gcc.target/riscv/fmax.c
index c71d35c9f9dc..e1b7fa8f918c 100644
--- a/gcc/testsuite/gcc.target/riscv/fmax.c
+++ b/gcc/testsuite/gcc.target/riscv/fmax.c
@@ -1,4 +1,5 @@
  /* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
  /* { dg-options "-fno-finite-math-only -fsigned-zeros -fno-signaling-nans 
-dp" } */

  double
diff --git a/gcc/testsuite/gcc.target/riscv/fmaxf.c 
b/gcc/testsuite/gcc.target/riscv/fmaxf.c
index f9980166887a..8da0513dc8f6 100644
--- a/gcc/testsuite/gcc.target/riscv/fmaxf.c
+++ b/gcc/testsuite/gcc.target/riscv/fmaxf.c
@@ -1,4 +1,5 @@
  /* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
  /* { dg-options "-fno-finite-math-only -fsigned-zeros -fno-signaling-nans 
-dp" } */

  float
diff --git a/gcc/testsuite/gcc.target/riscv/fmin.c 
b/gcc/testsuite/gcc.target/riscv/fmin.c
index 9634abd19af8..01993d49bc21 100644
--- a/gcc/testsuite/gcc.target/riscv/fmin.c
+++ b/gcc/testsuite/gcc.target/riscv/fmin.c
@@ -1,4 +1,5 @@
  /* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
  /* { dg-options "-fno-finite-math-only -fsigned-zeros -fno-signaling-nans 
-dp" } */

  double
diff --git a/gcc/testsuite/gcc.target/riscv/fminf.c 
b/gcc/testsuite/gcc.target/riscv/fminf.c
index 9a3687be3092..32ce363e10d8 100644
--- a/gcc/testsuite/gcc.target/riscv/fminf.c
+++ b/gcc/testsuite/gcc.target/riscv/fminf.c
@@ -1,4 +1,5 @@
  /* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
  /* { dg-options "-fno-finite-math-only -fsigned-zeros -fno-signaling-nans 
-dp" } */

  float
diff --git a/gcc/testsuite/gcc.target/riscv/pr105666.c 
b/gcc/testsuite/gcc.target/riscv/pr105666.c
index 904f3bc0763f..dd996eec8efc 100644
--- a/gcc/testsuite/gcc.target/riscv/pr105666.c
+++ b/gcc/testsuite/gcc.target/riscv/pr105666.c
@@ -6,6 +6,7 @@
 spilling to stack.  */

  /* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
  /* { dg-options "-march=rv64g -ffast-math" } */

  #define NITER 4
diff --git a/gcc/testsuite/gcc.target/riscv/smax-ieee.c 
b/gcc/testsuite/gcc.target/riscv/smax-ieee.c
index 3a98aeb45add..2dbccefe2f4d 100644
--- a/gcc/testsuite/gcc.target/riscv/smax-ieee.c
+++ b/gcc/testsuite/gcc.target/riscv/smax-ieee.c
@@ -1,4 +1,5 @@
  /* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
  /* { dg-options "-ffinite-math-only -fsigned-zeros -dp" } */

  double
diff --git a/gcc/testsuite/gcc.target/riscv/smax.c 

Re: [PATCH] testsuite: constraint some of fp tests to hard_float

2022-07-13 Thread Vineet Gupta

Hi Jeff,

On 6/26/22 12:05, Jeff Law via Gcc-patches wrote:



On 5/29/2022 9:53 PM, Vineet Gupta wrote:

These tests validate fp conversions with various rounding modes which
would not work on soft-float ABIs.

On -march=rv64imac/-mabi=lp64 this reduces 5 unique failures (overall 35
due to multi flag combination builds)

gcc/testsuite/Changelog:
* gcc.dg/torture/fp-double-convert-float-1.c: Add
dg-require-effective-target hard_float.
* gcc.dg/torture/fp-int-convert-timode-3.c: Ditto.
* gcc.dg/torture/fp-int-convert-timode-4.c: Ditto.
* gcc.dg/torture/fp-uint64-convert-double-1.c: Ditto.
* gcc.dg/torture/fp-uint64-convert-double-2.c: Ditto.



Thanks.  I've pushed this to the trunk.


Can this be backported to gcc-12 please.

Thx,
-Vineet


Re: [PATCH, rs6000] Additional cleanup of rs6000_builtin_mask

2022-07-13 Thread Kewen.Lin via Gcc-patches
Hi Will,

Thanks for the cleanup!  Some comments are inlined.

on 2022/7/14 05:39, will schmidt wrote:
> [PATCH, rs6000] Additional cleanup of rs6000_builtin_mask
> 
> Hi,
>   Post the rs6000 builtins rewrite, some of the leftover builtin
> code is redundant and can be removed.
>   This replaces the remaining usage of bu_mask in
> rs6000_target_modify_macros() with checks against the rs6000_cpu directly.
> Thusly the bu_mask variable can be removed.  After that variable
> is eliminated there are no other uses of rs6000_builtin_mask_calculate(),
> so that function can also be safely removed.
> 

The TargetVariable rs6000_builtin_mask in rs6000.opt is useless, it seems
it can be removed together?

> I have tested this on current systems (P8,P9,P10) without regressions.
> 
> OK for trunk?
> 
> 
> Thanks,
> -Will
> 
> gcc/
>   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Remove
>   bu_mask references.  (rs6000_define_or_undefine_macro): Replace
>   bu_mask reference with a rs6000_cpu value check.
>   (rs6000_cpu_cpp_builtins): Remove rs6000_builtin_mask_calculate()
>   parameter from call to rs6000_target_modify_macros.
>   * config/rs6000/rs6000-protos.h (rs6000_target_modify_macros,
>   rs6000_target_modify_macros_ptr): Remove parameter from extern
>   for the prototype.
>   * config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): Remove
>   parameter from prototype, update calls to this function.
>   (rs6000_print_builtin_options): Remove prototype, call and function.
>   (rs6000_builtin_mask_calculate): Remove function.
>   (rs6000_debug_reg_global): Remove call to rs6000_print_builtin_options.
>   (rs6000_option_override_internal): Remove rs6000_builtin_mask var
>   and builtin_mask debug output.
>   (rs6000_pragma_target_parse): Update calls to
>   rs6000_target_modify_ptr.
> 
> 
> diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
> index 0d13645040ff..4d051b906582 100644
> --- a/gcc/config/rs6000/rs6000-c.cc
> +++ b/gcc/config/rs6000/rs6000-c.cc
> @@ -333,24 +333,20 @@ rs6000_define_or_undefine_macro (bool define_p, const 
> char *name)
>else
>  cpp_undef (parse_in, name);
>  }
>  
>  /* Define or undefine macros based on the current target.  If the user does
> -   #pragma GCC target, we need to adjust the macros dynamically.  Note, some 
> of
> -   the options needed for builtins have been moved to separate variables, so
> -   have both the target flags and the builtin flags as arguments.  */
> +   #pragma GCC target, we need to adjust the macros dynamically.  */
>  
>  void
> -rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
> -  HOST_WIDE_INT bu_mask)
> +rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
>  {
>if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
>  fprintf (stderr,
> -  "rs6000_target_modify_macros (%s, " HOST_WIDE_INT_PRINT_HEX
> -  ", " HOST_WIDE_INT_PRINT_HEX ")\n",
> +  "rs6000_target_modify_macros (%s, " HOST_WIDE_INT_PRINT_HEX ")\n",
>(define_p) ? "define" : "undef",
> -  flags, bu_mask);
> +  flags);
>  
>/* Each of the flags mentioned below controls whether certain
>   preprocessor macros will be automatically defined when
>   preprocessing source files for compilation by this compiler.
>   While most of these flags can be enabled or disabled
> @@ -593,14 +589,12 @@ rs6000_target_modify_macros (bool define_p, 
> HOST_WIDE_INT flags,
>/* OPTION_MASK_FLOAT128_HARDWARE can be turned on if -mcpu=power9 is used 
> or
>   via the target attribute/pragma.  */
>if ((flags & OPTION_MASK_FLOAT128_HW) != 0)
>  rs6000_define_or_undefine_macro (define_p, "__FLOAT128_HARDWARE__");
>  
> -  /* options from the builtin masks.  */
> -  /* Note that OPTION_MASK_FPRND is enabled only if
> - (rs6000_cpu == PROCESSOR_CELL) (e.g. -mcpu=cell).  */
> -  if ((bu_mask & OPTION_MASK_FPRND) != 0)
> +  /* Tell the user if we are targeting CELL.  */
> +  if (rs6000_cpu == PROCESSOR_CELL)
>  rs6000_define_or_undefine_macro (define_p, "__PPU__");
>  
>/* Tell the user if we support the MMA instructions.  */
>if ((flags & OPTION_MASK_MMA) != 0)
>  rs6000_define_or_undefine_macro (define_p, "__MMA__");
> @@ -614,12 +608,11 @@ rs6000_target_modify_macros (bool define_p, 
> HOST_WIDE_INT flags,
>  
>  void
>  rs6000_cpu_cpp_builtins (cpp_reader *pfile)
>  {
>/* Define all of the common macros.  */
> -  rs6000_target_modify_macros (true, rs6000_isa_flags,
> -rs6000_builtin_mask_calculate ());
> +  rs6000_target_modify_macros (true, rs6000_isa_flags);
>  
>if (TARGET_FRE)
>  builtin_define ("__RECIP__");
>if (TARGET_FRES)
>  builtin_define ("__RECIPF__");
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 3ea010236090..b3c16e7448d8 

Re: [PATCH] HIGH part of symbol ref is invalid for constant pool

2022-07-13 Thread Kewen.Lin via Gcc-patches
Hi Jeff,

Thanks for the patch, one question is inlined below.

on 2022/7/4 14:58, Jiufu Guo wrote:
> The high part of the symbol address is invalid for the constant pool. In
> function rs6000_cannot_force_const_mem, we already return true for
> "HIGH with UNSPEC" rtx. During debug GCC, I found that
> rs6000_cannot_force_const_mem is called for some other HIGH code rtx
> expressions which also indicate the high part of a symbol_ref.
> For example:
> (high:DI (const:DI (plus:DI (symbol_ref:DI ("xx") (const_int 12 [0xc])
> (high:DI (symbol_ref:DI ("var_1")..)))
> 
> In the below case, this kind of rtx could occur in the middle of optimizations
> pass but was not dumped to a file. So, no test case is attached to this
> patch.
> 

Could you help to expand this more on how it affects some tree-optimization 
pass?
I guess some tree-opt will expand gimple expression to rtx, evaluate the cost
or similar and make some decision basing on it.  If that is the case, you 
probably
can construct one test case to show that: without this patch, the evaluated cost
or similar looks off, the optimization decision is sub-optimal;  with this 
patch,
the optimization result is expected.

BR,
Kewen


> extern const unsigned int __decPOWERS[10];
> void
> decSetCoeff (int *residue, const unsigned int *up)
> {
>  unsigned int half = (unsigned int) __decPOWERS1[3] >> 1;
> 
>  if (*up >= half)
>   *residue = 7;
> 
>  return;
> }
> 
> This patch updates rs6000_cannot_force_const_mem to return true for
> rtx with HIGH code.
> 
> 
> Bootstrapped and regtested on ppc64le and ppc64.
> Is it ok for trunk?
> 
> BR,
> Jiufu Guo
> 
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000.cc (rs6000_cannot_force_const_mem):
>   Return true for HIGH code rtx.
> 
> ---
>  gcc/config/rs6000/rs6000.cc | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 3ff16b8ae04..c2b10669627 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -9707,8 +9707,11 @@ rs6000_init_stack_protect_guard (void)
>  static bool
>  rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
>  {
> -  if (GET_CODE (x) == HIGH
> -  && GET_CODE (XEXP (x, 0)) == UNSPEC)
> +  /* High part of a symbol ref/address can not be put into constant pool. 
> e.g.
> + (high:DI (symbol_ref:DI ("var")..)) or
> + (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
> + (high:DI (const:DI (plus:DI (symbol_ref:DI ("xx")) (const_int 12.  
> */
> +  if (GET_CODE (x) == HIGH)
>  return true;
> 
>/* A TLS symbol in the TOC cannot contain a sum.  */


Re: XFAIL 'offloading_enabled' diagnostics issue in 'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551] (was: Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases)

2022-07-13 Thread Lewis Hyatt via Gcc-patches
On Tue, Jul 12, 2022 at 9:10 AM Tobias Burnus  wrote:
> On 12.07.22 13:50, Lewis Hyatt via Gcc-patches wrote:
> > On Tue, Jul 12, 2022 at 2:33 AM Thomas Schwinge  
> > wrote:
> >> On 2022-07-11T11:27:12+0200, I wrote:
> >>> Oh my, PR101551 "[offloading] Differences in diagnostics etc."
> >>> strikes again...  The latter two 'note' diagnostics are currently
> >>> only emitted in non-offloading configurations.  I've now pushed to
> >>> master branch commit 3723aedaad20a129741c2f6f3c22b3dd1220a3fc
> >>> "XFAIL 'offloading_enabled' diagnostics issue in
> >>> 'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551]", see attached.
> > Would you mind please confirming how I need to run configure in order
> > to get this configuration? Then I can look into why the difference in
> > location information there. Thanks.
>
> I think the simplest to replicate it without much effort is to run:
>
> cd ${GCC-SRC}/gcc
> sed -e 's/ENABLE_OFFLOADING/true/' *.cc */*.cc
>
> I think that covers all cases, which do not need the target lto1.
> If they do do - then it becomes more difficult as you need an
> offloading compiler. (But that is rather about: diagnostic or
> no diagostic and not about having a different diagnostic.)
>
> I think the different diagnostic has the reason stated in
> commit r12-135-gbd7ebe9da745a62184052dd1b15f4dd10fbdc9f4
>
> Namely:
> cut---
>  It turned out that a compiler built without offloading support
>  and one with can produce slightly different diagnostic.
>
>  Offloading support implies ENABLE_OFFLOAD which implies that
>  g->have_offload is set when offloading is actually needed.
>  In cgraphunit.c, the latter causes flag_generate_offload = 1,
>  which in turn affects tree.c's free_lang_data.
>
>  The result is that the front-end specific diagnostic gets reset
>  ('tree_diagnostics_defaults (global_dc)'), which affects in this
>  case 'Warning' vs. 'warning' via the Fortran frontend.
>
>  Result: 'Warning:' vs. 'warning:'.
>  Side note: Other FE also override the diagnostic, leading to
>  similar differences, e.g. the C++ FE outputs mangled function
>  names differently
> cut--
>
> If the message is from the offload-device's lto1 compiler, it
> becomes more difficult to configure+build GCC. See
> https://gcc.gnu.org/wiki/Offloading under
> "How to build an offloading-enabled GCC"
>
> I hope it helps.

Yes, very much, thank you. I am trying something that should improve
it, and also a similar issue that happens with -flto, I made this PR
about the latter: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106274


[PATCH, rs6000] Additional cleanup of rs6000_builtin_mask

2022-07-13 Thread will schmidt via Gcc-patches
[PATCH, rs6000] Additional cleanup of rs6000_builtin_mask

Hi,
  Post the rs6000 builtins rewrite, some of the leftover builtin
code is redundant and can be removed.
  This replaces the remaining usage of bu_mask in
rs6000_target_modify_macros() with checks against the rs6000_cpu directly.
Thusly the bu_mask variable can be removed.  After that variable
is eliminated there are no other uses of rs6000_builtin_mask_calculate(),
so that function can also be safely removed.

I have tested this on current systems (P8,P9,P10) without regressions.

OK for trunk?


Thanks,
-Will

gcc/
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Remove
bu_mask references.  (rs6000_define_or_undefine_macro): Replace
bu_mask reference with a rs6000_cpu value check.
(rs6000_cpu_cpp_builtins): Remove rs6000_builtin_mask_calculate()
parameter from call to rs6000_target_modify_macros.
* config/rs6000/rs6000-protos.h (rs6000_target_modify_macros,
rs6000_target_modify_macros_ptr): Remove parameter from extern
for the prototype.
* config/rs6000/rs6000.cc (rs6000_target_modify_macros_ptr): Remove
parameter from prototype, update calls to this function.
(rs6000_print_builtin_options): Remove prototype, call and function.
(rs6000_builtin_mask_calculate): Remove function.
(rs6000_debug_reg_global): Remove call to rs6000_print_builtin_options.
(rs6000_option_override_internal): Remove rs6000_builtin_mask var
and builtin_mask debug output.
(rs6000_pragma_target_parse): Update calls to
rs6000_target_modify_ptr.


diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 0d13645040ff..4d051b906582 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -333,24 +333,20 @@ rs6000_define_or_undefine_macro (bool define_p, const 
char *name)
   else
 cpp_undef (parse_in, name);
 }
 
 /* Define or undefine macros based on the current target.  If the user does
-   #pragma GCC target, we need to adjust the macros dynamically.  Note, some of
-   the options needed for builtins have been moved to separate variables, so
-   have both the target flags and the builtin flags as arguments.  */
+   #pragma GCC target, we need to adjust the macros dynamically.  */
 
 void
-rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
-HOST_WIDE_INT bu_mask)
+rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
 {
   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
 fprintf (stderr,
-"rs6000_target_modify_macros (%s, " HOST_WIDE_INT_PRINT_HEX
-", " HOST_WIDE_INT_PRINT_HEX ")\n",
+"rs6000_target_modify_macros (%s, " HOST_WIDE_INT_PRINT_HEX ")\n",
 (define_p) ? "define" : "undef",
-flags, bu_mask);
+flags);
 
   /* Each of the flags mentioned below controls whether certain
  preprocessor macros will be automatically defined when
  preprocessing source files for compilation by this compiler.
  While most of these flags can be enabled or disabled
@@ -593,14 +589,12 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
   /* OPTION_MASK_FLOAT128_HARDWARE can be turned on if -mcpu=power9 is used or
  via the target attribute/pragma.  */
   if ((flags & OPTION_MASK_FLOAT128_HW) != 0)
 rs6000_define_or_undefine_macro (define_p, "__FLOAT128_HARDWARE__");
 
-  /* options from the builtin masks.  */
-  /* Note that OPTION_MASK_FPRND is enabled only if
- (rs6000_cpu == PROCESSOR_CELL) (e.g. -mcpu=cell).  */
-  if ((bu_mask & OPTION_MASK_FPRND) != 0)
+  /* Tell the user if we are targeting CELL.  */
+  if (rs6000_cpu == PROCESSOR_CELL)
 rs6000_define_or_undefine_macro (define_p, "__PPU__");
 
   /* Tell the user if we support the MMA instructions.  */
   if ((flags & OPTION_MASK_MMA) != 0)
 rs6000_define_or_undefine_macro (define_p, "__MMA__");
@@ -614,12 +608,11 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
 
 void
 rs6000_cpu_cpp_builtins (cpp_reader *pfile)
 {
   /* Define all of the common macros.  */
-  rs6000_target_modify_macros (true, rs6000_isa_flags,
-  rs6000_builtin_mask_calculate ());
+  rs6000_target_modify_macros (true, rs6000_isa_flags);
 
   if (TARGET_FRE)
 builtin_define ("__RECIP__");
   if (TARGET_FRES)
 builtin_define ("__RECIPF__");
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 3ea010236090..b3c16e7448d8 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -318,13 +318,12 @@ extern void rs6000_pragma_longcall (struct cpp_reader *);
 extern void rs6000_cpu_cpp_builtins (struct cpp_reader *);
 #ifdef TREE_CODE
 extern bool rs6000_pragma_target_parse (tree, tree);
 #endif
 extern void rs6000_activate_target_options (tree new_tree);
-extern void 

Re: [PATCH, rs6000] Cleanup some vstrir define_expand naming inconsistencies

2022-07-13 Thread Segher Boessenkool
On Wed, Jul 13, 2022 at 04:14:11PM -0500, will schmidt wrote:
> On Wed, 2022-07-13 at 14:39 -0500, Segher Boessenkool wrote:
> > I'm not a fan of "internal" either, it doesn't say anything.  At
> > least
> > put it at the very end of the names please?
> I'm easily convinced. ;-)  I wonder if I should just drop "_internal"
> entirely and go with "vstrir_".  Otherwise I'll rework to be
> "vstrir__internal".

The define_expand already uses that name.  Some other patterns in
altivec.md use *_direct, maybe that is nicer?

> At a glance I see we do have some other existing define_insn entries
> with _internal at the tail and a few others embedded in the middle. 
> I'll leave a note and perhaps review those after.  :-)

Thanks :-)


Segher


Re: [PATCH, rs6000] Cleanup some vstrir define_expand naming inconsistencies

2022-07-13 Thread will schmidt via Gcc-patches
On Wed, 2022-07-13 at 14:39 -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Jul 13, 2022 at 01:18:29PM -0500, will schmidt wrote:
> >   This cleans up some of the naming around the vstrir and vstril
> > instruction definitions, with some cosmetic changes for
> > consistency.
> > gcc/
> > * config/rs6000/altivec.md (vstrir_code_): Rename
> > to vstrir_internal_.
> > (vstrir_p_code_): Rename to vstrir_p_internal_.
> > (vstril_code_): Rename to vstril_internal_.
> > (vstril_p_code_): Rename to vstril_p_internal_.
> 
> It doesn't show the new names on the lhs this way.  One way to do
> better
> is to write e.g.
>   (vstril_code_): Rename to...
>   (vstril_internal_): ... this.

Ok.

> 
> It often is a good idea to say "... for VIshort" and similar
> btw.

Ok. 

> 
> I'm not a fan of "internal" either, it doesn't say anything.  At
> least
> put it at the very end of the names please?
I'm easily convinced. ;-)  I wonder if I should just drop "_internal"
entirely and go with "vstrir_".  Otherwise I'll rework to be
"vstrir__internal".
At a glance I see we do have some other existing define_insn entries
with _internal at the tail and a few others embedded in the middle. 
I'll leave a note and perhaps review those after.  :-)

Thanks,
-Will

> 
> Okay for trunk with that changed.  Thanks!
> 
> 
> Segher



[PATCH] ipa-cp: Fix assert triggering with -fno-toplevel-reorder (PR 106260)

2022-07-13 Thread Martin Jambor
Hi,

with -fno-toplevel-reorder (and -fwhole-program), there apparently can
be local functions without any callers.  This is something that IPA-CP
does not like because its propagation verifier checks that local
functions do not end up with TOP in their lattices.  Therefore there
is an assert checking that all call-less unreachable functions have
been removed, which triggers in PR 106260 with these two options.

This patch detects the situation and marks the lattices as variable,
thus avoiding both the assert trigger and the verification failure.

Bootstrapped and tested on x86_64-linux.  OK for master and then all
active release branches?

Thanks,

Martin


gcc/ChangeLog:

2022-07-13  Martin Jambor  

PR ipa/106260
* ipa-cp.cc (initialize_node_lattices): Replace assert that there are
callers with handling that situation when -fno-toplevel_reorder.

gcc/testsuite/ChangeLog:

2022-07-13  Martin Jambor  

PR ipa/106260
* g++.dg/ipa/pr106260.C: New test.
---
 gcc/ipa-cp.cc   |  6 ++-
 gcc/testsuite/g++.dg/ipa/pr106260.C | 64 +
 2 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr106260.C

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 543a9334e2c..f699a8dadc0 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -1286,10 +1286,14 @@ initialize_node_lattices (struct cgraph_node *node)
   int caller_count = 0;
   node->call_for_symbol_thunks_and_aliases (count_callers, _count,
true);
-  gcc_checking_assert (caller_count > 0);
   if (caller_count == 1)
node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
  NULL, true);
+  else if (caller_count == 0)
+   {
+ gcc_checking_assert (!opt_for_fn (node->decl, flag_toplevel_reorder));
+ variable = true;
+   }
 }
   else
 {
diff --git a/gcc/testsuite/g++.dg/ipa/pr106260.C 
b/gcc/testsuite/g++.dg/ipa/pr106260.C
new file mode 100644
index 000..bd3b6e0af79
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr106260.C
@@ -0,0 +1,64 @@
+// { dg-do compile }
+// { dg-options "-O2 -std=gnu++14 -fwhole-program -fno-unit-at-a-time" }
+
+struct A;
+template 
+struct Q { Q (T); };
+template
+struct U {
+  ~U () { m1 (nullptr); }
+  D m2 ();
+  T *u;
+  void m1 (T *) { m2 () (u); }
+};
+struct F { F (int *); };
+template 
+using W = Q;
+int a, b;
+void fn1 (void *);
+template 
+void
+fn2 (T *x)
+{
+  if (x)
+x->~T();
+  fn1 (x);
+}
+template 
+struct C {
+  void operator() (T *x) { fn2 (x); }
+};
+struct D;
+template  >
+using V = U;
+struct A {
+  A (int *);
+};
+struct S;
+struct G {
+  V m3 ();
+};
+struct S {
+  int e;
+  virtual ~S () {}
+};
+template
+struct H {
+  H (int, T x, int) : h(x) {}
+  G g;
+  void m4 () { g.m3 (); }
+  T h;
+};
+struct I {
+  I(A, W);
+};
+void
+test ()
+{
+  A c ();
+  W d ();
+  I e (c, d);
+  H f (0, e, a);
+  f.m4 ();
+}
+
-- 
2.36.1



Re: [PATCH, rs6000] Cleanup some vstrir define_expand naming inconsistencies

2022-07-13 Thread Segher Boessenkool
Hi!

On Wed, Jul 13, 2022 at 01:18:29PM -0500, will schmidt wrote:
>   This cleans up some of the naming around the vstrir and vstril
> instruction definitions, with some cosmetic changes for consistency.

> gcc/
>   * config/rs6000/altivec.md (vstrir_code_): Rename
>   to vstrir_internal_.
>   (vstrir_p_code_): Rename to vstrir_p_internal_.
>   (vstril_code_): Rename to vstril_internal_.
>   (vstril_p_code_): Rename to vstril_p_internal_.

It doesn't show the new names on the lhs this way.  One way to do
better
is to write e.g.
(vstril_code_): Rename to...
(vstril_internal_): ... this.

It often is a good idea to say "... for VIshort" and similar btw.

I'm not a fan of "internal" either, it doesn't say anything.  At least
put it at the very end of the names please?

Okay for trunk with that changed.  Thanks!


Segher


Re: [PATCH] match.pd: Add new abs pattern [PR94290]

2022-07-13 Thread Andrew Pinski via Gcc-patches
On Wed, Jul 13, 2022 at 12:26 PM Sam Feifer via Gcc-patches
 wrote:
>
> This patch is intended to fix a missed optimization in match.pd. It optimizes 
> (x >= 0 ? x : 0) + (x <= 0 ? -x : 0) to just abs(x). I had to write a second 
> simplification in match.pd to handle the commutative property as the match 
> was not ocurring otherwise. Additionally, the pattern (x <= 0 ? -x : 0) now 
> gets optimized to max(-x, 0), which helps with the other simplification rule.

You could use :c for the commutative property instead and that should
simplify things.
That is:

(simplify
  (plus:c (max @0 integer_zerop) (max (negate @0) integer_zerop))
  (abs @0))

Also since integer_zerop works on vectors, it seems like you should
add a testcase or two for the vector case.
Also would be useful if you write a testcase that uses different
statements rather than one big one so it gets exercised in the
forwprop case.
Note also if either of the max are used more than just in this
simplification, it could increase the lifetime of @0, maybe you need
to add :s to the max expressions.

Thanks,
Andrew

>
> Tests are also included to be added to the testsuite.
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>
> PR tree-optimization/94290
>
> gcc/ChangeLog:
>
> * match.pd (x >= 0 ? x : 0) + (x <= 0 ? -x : 0): New simplification.
> * match.pd (x <= 0 ? -x : 0): New Simplification.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.c-torture/execute/pr94290-1.c: New test.
> * gcc.dg/pr94290-2.c: New test.
> * gcc.dg/pr94290.c: New test.
> ---
>  gcc/match.pd  | 15 ++
>  .../gcc.c-torture/execute/pr94290-1.c | 16 +++
>  gcc/testsuite/gcc.dg/pr94290-2.c  | 15 ++
>  gcc/testsuite/gcc.dg/pr94290.c| 46 +++
>  4 files changed, 92 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr94290-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94290-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94290.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 45aefd96688..55ca79d7ac9 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -7848,3 +7848,18 @@ and,
>(if (TYPE_UNSIGNED (TREE_TYPE (@0)))
>  (bit_and @0 @1)
>(cond (le @0 @1) @0 (bit_and @0 @1))
> +
> +/* (x >= 0 ? x : 0) + (x <= 0 ? -x : 0) -> abs x.  */
> +(simplify
> +  (plus (max @0 integer_zerop) (max (negate @0) integer_zerop))
> +  (abs @0))
> +
> +/* (x <= 0 ? -x : 0) + (x >= 0 ? x : 0) -> abs x.  */
> +(simplify
> +  (plus (max (negate @0) integer_zerop) (max @0 integer_zerop) )
> +  (abs @0))
> +
> +/* (x <= 0 ? -x : 0) -> max(-x, 0).  */
> +(simplify
> + (cond (le @0 integer_zerop@1) (negate @0) integer_zerop@1)
> + (max (negate @0) @1))
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr94290-1.c 
> b/gcc/testsuite/gcc.c-torture/execute/pr94290-1.c
> new file mode 100644
> index 000..93b80d569aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr94290-1.c
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/94290 */
> +
> +#include "../../gcc.dg/pr94290.c"
> +
> +int main() {
> +
> +if (foo(0) != 0
> +|| foo(-42) != 42
> +|| foo(42) != 42
> +|| baz(-10) != 10
> +|| baz(-10) != 10) {
> +__builtin_abort();
> +}
> +
> +return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/pr94290-2.c 
> b/gcc/testsuite/gcc.dg/pr94290-2.c
> new file mode 100644
> index 000..ea6e55755f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr94290-2.c
> @@ -0,0 +1,15 @@
> +/* PR tree-optimization/94290 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +/* Form from PR.  */
> +__attribute__((noipa)) unsigned int foo(int x) {
> +return x <= 0 ? -x : 0;
> +}
> +
> +/* Changed order.  */
> +__attribute__((noipa)) unsigned int bar(int x) {
> +return 0 >= x ? -x : 0;
> +}
> +
> +/* { dg-final {scan-tree-dump-times " MAX_EXPR " 2 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr94290.c b/gcc/testsuite/gcc.dg/pr94290.c
> new file mode 100644
> index 000..47617c36c02
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr94290.c
> @@ -0,0 +1,46 @@
> +/* PR tree-optimization/94290 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +
> +/* Same form as PR.  */
> +__attribute__((noipa)) unsigned int foo(int x) {
> +return (x >= 0 ? x : 0) + (x <= 0 ? -x : 0);
> +}
> +
> +/* Signed function.  */
> +__attribute__((noipa)) int bar(int x) {
> +return (x >= 0 ? x : 0) + (x <= 0 ? -x : 0);
> +}
> +
> +/* Commutative property.  */
> +__attribute__((noipa)) unsigned int baz(int x) {
> +return (x <= 0 ? -x : 0) + (x >= 0 ? x : 0);
> +}
> +
> +/* Flipped order for max expressions.  */
> +__attribute__((noipa)) unsigned int quux(int x) {
> +return (0 <= x ? x : 0) + (0 >= x ? -x : 0);
> +}
> +
> +/* Not zero so should not optimize.  */
> 

C++: add -std={c,gnu}++{current,future}

2022-07-13 Thread Nathan Sidwell via Gcc-patches

Inspired by a user question.  Jason, thoughts?

Since C++ is such a moving target, Microsoft have /std:c++latest
(AFAICT clang does not), to select the currently implemented version
of the working paper.  But the use of 'std:latest' is somewhat
ambiguous -- the current std is C++20 -- that's the latest std, the
next std will more than likely but not necessarily be C++23.  So this
adds:

  -std=c++current -- the current std (c++20)
  -std=c++future -- the working paper (c++2b)

also adds gnu++current and gnu++future to select the gnu-extended
variants.

nathan

--
Nathan SidwellFrom 9671f4d5e7efa130280b6d50fb4e9e8492d5b587 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Wed, 13 Jul 2022 12:11:40 -0700
Subject: [PATCH] C++: add -std={c,gnu}++{current,future}

Since C++ is such a moving target, Microsoft have /std:c++latest
(AFAICT clang does not), to select the currently implemented version
of the working paper.  But the use of 'std:latest' is somewhat
ambiguous -- the current std is C++20 -- that's the latest std, the
next std will more than likely but not necessarily be C++23.  So this
adds:

  -std=c++current -- the current std (c++20)
  -std=c++future -- the working paper (c++2b)

also adds gnu++current and gnu++future to select the gnu-extended
variants.

	gcc/
	* doc/invoke.texi (-std=): Document new c++ current & future
	options.
	gcc/c-family/
	* c.opt (-std={c,gnu}++{current,future}: New alias options.
	gcc/testsuite/
	* g++.dg/gnu-current.C: New.
	* g++.dg/gnu-future.C: New.
	* g++.dg/std-current.C: New.
	* g++.dg/std-future.C: New.
---
 gcc/c-family/c.opt | 16 
 gcc/doc/invoke.texi| 23 +++
 gcc/testsuite/g++.dg/gnu-current.C |  7 +++
 gcc/testsuite/g++.dg/gnu-future.C  |  7 +++
 gcc/testsuite/g++.dg/std-current.C | 11 +++
 gcc/testsuite/g++.dg/std-future.C  |  8 
 6 files changed, 72 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/gnu-current.C
 create mode 100644 gcc/testsuite/g++.dg/gnu-future.C
 create mode 100644 gcc/testsuite/g++.dg/std-current.C
 create mode 100644 gcc/testsuite/g++.dg/std-future.C

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 44e1a60ce24..9292029a967 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2321,6 +2321,14 @@ std=c++23
 C++ ObjC++ Undocumented
 Conform to the ISO 2023 C++ draft standard (experimental and incomplete support).
 
+std=c++current
+C++ ObjC++ Alias(std=c++20) Undocumented
+Conform to the current ISO C++ standard (C++20).
+
+std=c++future
+C++ ObjC++ Alias(std=c++23) Undocumented
+Conform to a future ISO C++ standard (C++2b, experimentatl and incomplete support).
+
 std=c11
 C ObjC
 Conform to the ISO 2011 C standard.
@@ -2407,6 +2415,14 @@ std=gnu++23
 C++ ObjC++ Undocumented
 Conform to the ISO 2023 C++ draft standard with GNU extensions (experimental and incomplete support).
 
+std=gnu++current
+C++ ObjC++ Alias(std=gnu++20) Undocumented
+Conform to the current ISO C++ standard with GNU extensions (C++20).
+
+std=gnu++future
+C++ ObjC++ Alias(std=gnu++23) Undocumented
+Conform to a future ISO C++ standard with GNU extensions (C++2b, experimentatl and incomplete support).
+
 std=gnu11
 C ObjC
 Conform to the ISO 2011 C standard with GNU extensions.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d5ff1018372..1c0edb9df68 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2462,6 +2462,17 @@ GNU dialect of @option{-std=c++17}.
 This is the default for C++ code.
 The name @samp{gnu++1z} is deprecated.
 
+@item gnu++current
+@itemx gnu++current
+GNU dialect of the current C++ standard, currently @option{-std=gnu++20}.
+The C++ version selected by this option is a moving target.
+
+@item gnu++future
+@itemx gnu++future
+GNU dialect of the next C++ standard, currently @option{-std=gnu++2b}.
+The C++ version selected by this option is a moving target (as are the
+semantics of that proposed version).
+
 @item c++20
 @itemx c++2a
 The 2020 ISO C++ standard plus amendments.
@@ -2487,6 +2498,18 @@ change in incompatible ways in future releases.
 GNU dialect of @option{-std=c++2b}.  Support is highly experimental,
 and will almost certainly change in incompatible ways in future
 releases.
+
+@item c++current
+@itemx c++current
+The current C++ standard, currently @option{-std=gnu++20}.
+The C++ version selected by this option is a moving target.
+
+@item c++future
+@itemx c++future
+The next C++ standard, currently @option{-std=gnu++2b}.
+The C++ version selected by this option is a moving target (as are the
+semantics of that proposed version).
+
 @end table
 
 @item -aux-info @var{filename}
diff --git a/gcc/testsuite/g++.dg/gnu-current.C b/gcc/testsuite/g++.dg/gnu-current.C
new file mode 100644
index 000..c95c56d3ad8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gnu-current.C
@@ -0,0 +1,7 @@
+// { dg-do compile }
+// { dg-options -std=gnu++current }
+
+static_assert (__cplusplus == 202002L, "time has moved 

[PATCH] match.pd: Add new abs pattern [PR94290]

2022-07-13 Thread Sam Feifer via Gcc-patches
This patch is intended to fix a missed optimization in match.pd. It optimizes 
(x >= 0 ? x : 0) + (x <= 0 ? -x : 0) to just abs(x). I had to write a second 
simplification in match.pd to handle the commutative property as the match was 
not ocurring otherwise. Additionally, the pattern (x <= 0 ? -x : 0) now gets 
optimized to max(-x, 0), which helps with the other simplification rule.

Tests are also included to be added to the testsuite.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR tree-optimization/94290

gcc/ChangeLog:

* match.pd (x >= 0 ? x : 0) + (x <= 0 ? -x : 0): New simplification.
* match.pd (x <= 0 ? -x : 0): New Simplification.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/pr94290-1.c: New test.
* gcc.dg/pr94290-2.c: New test.
* gcc.dg/pr94290.c: New test.
---
 gcc/match.pd  | 15 ++
 .../gcc.c-torture/execute/pr94290-1.c | 16 +++
 gcc/testsuite/gcc.dg/pr94290-2.c  | 15 ++
 gcc/testsuite/gcc.dg/pr94290.c| 46 +++
 4 files changed, 92 insertions(+)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr94290-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr94290-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr94290.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 45aefd96688..55ca79d7ac9 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7848,3 +7848,18 @@ and,
   (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
 (bit_and @0 @1)
   (cond (le @0 @1) @0 (bit_and @0 @1))
+
+/* (x >= 0 ? x : 0) + (x <= 0 ? -x : 0) -> abs x.  */
+(simplify
+  (plus (max @0 integer_zerop) (max (negate @0) integer_zerop))
+  (abs @0))
+
+/* (x <= 0 ? -x : 0) + (x >= 0 ? x : 0) -> abs x.  */
+(simplify
+  (plus (max (negate @0) integer_zerop) (max @0 integer_zerop) )
+  (abs @0))
+
+/* (x <= 0 ? -x : 0) -> max(-x, 0).  */
+(simplify
+ (cond (le @0 integer_zerop@1) (negate @0) integer_zerop@1)
+ (max (negate @0) @1))
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr94290-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr94290-1.c
new file mode 100644
index 000..93b80d569aa
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr94290-1.c
@@ -0,0 +1,16 @@
+/* PR tree-optimization/94290 */
+
+#include "../../gcc.dg/pr94290.c"
+
+int main() {
+
+if (foo(0) != 0
+|| foo(-42) != 42
+|| foo(42) != 42
+|| baz(-10) != 10
+|| baz(-10) != 10) {
+__builtin_abort();
+}
+
+return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr94290-2.c b/gcc/testsuite/gcc.dg/pr94290-2.c
new file mode 100644
index 000..ea6e55755f5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94290-2.c
@@ -0,0 +1,15 @@
+/* PR tree-optimization/94290 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* Form from PR.  */
+__attribute__((noipa)) unsigned int foo(int x) {
+return x <= 0 ? -x : 0;
+}
+
+/* Changed order.  */
+__attribute__((noipa)) unsigned int bar(int x) {
+return 0 >= x ? -x : 0;
+}
+
+/* { dg-final {scan-tree-dump-times " MAX_EXPR " 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr94290.c b/gcc/testsuite/gcc.dg/pr94290.c
new file mode 100644
index 000..47617c36c02
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94290.c
@@ -0,0 +1,46 @@
+/* PR tree-optimization/94290 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+/* Same form as PR.  */
+__attribute__((noipa)) unsigned int foo(int x) {
+return (x >= 0 ? x : 0) + (x <= 0 ? -x : 0);
+}
+
+/* Signed function.  */
+__attribute__((noipa)) int bar(int x) {
+return (x >= 0 ? x : 0) + (x <= 0 ? -x : 0);
+}
+
+/* Commutative property.  */
+__attribute__((noipa)) unsigned int baz(int x) {
+return (x <= 0 ? -x : 0) + (x >= 0 ? x : 0);
+}
+
+/* Flipped order for max expressions.  */
+__attribute__((noipa)) unsigned int quux(int x) {
+return (0 <= x ? x : 0) + (0 >= x ? -x : 0);
+}
+
+/* Not zero so should not optimize.  */
+__attribute__((noipa)) unsigned int waldo(int x) {
+return (x >= 4 ? x : 4) + (x <= 4 ? -x : 4);
+}
+
+/* Not zero so should not optimize.  */
+__attribute__((noipa)) unsigned int fred(int x) {
+return (x >= -4 ? x : -4) + (x <= -4 ? -x : -4);
+}
+
+/* Incorrect pattern.  */
+__attribute__((noipa)) unsigned int goo(int x) {
+return (x <= 0 ? x : 0) + (x >= 0 ? -x : 0);
+}
+
+/* Incorrect pattern.  */
+__attribute__((noipa)) int qux(int x) {
+return (x >= 0 ? x : 0) + (x >= 0 ? x : 0);
+}
+
+/* { dg-final {scan-tree-dump-times " ABS_EXPR " 4 "optimized" } } */

base-commit: 6af530f914801f5e561057da55c41480f28751f7
-- 
2.31.1



[pushed] libcpp: Avoid pessimizing std::move [PR106272]

2022-07-13 Thread Marek Polacek via Gcc-patches
std::move in a return statement can prevent the NRVO:


PR106272 reports that we have two such cases in class label_text's
member functions.  We have -Wpessimizing-move that's supposed to detect
problematic std::move uses, but in this case it didn't trigger.  I've filed
PR106276 to track that.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR preprocessor/106272

libcpp/ChangeLog:

* include/line-map.h (class label_text): Don't std::move in a return
statement.
---
 libcpp/include/line-map.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index c6379ce25b8..c434a246b13 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -1873,13 +1873,13 @@ public:
  longer-lived owner.  */
   static label_text borrow (const char *buffer)
   {
-return std::move (label_text (const_cast  (buffer), false));
+return label_text (const_cast  (buffer), false);
   }
 
   /* Create a label_text instance that takes ownership of BUFFER.  */
   static label_text take (char *buffer)
   {
-return std::move (label_text (buffer, true));
+return label_text (buffer, true);
   }
 
   /* Take ownership of the buffer, copying if necessary.  */

base-commit: f70c18524221dcefa6cd26cee7b55503181bd912
-- 
2.36.1



[PATCH, rs6000] Cleanup some vstrir define_expand naming inconsistencies

2022-07-13 Thread will schmidt via Gcc-patches
[PATCH, rs6000] Cleanup some vstrir define_expand naming inconsistencies

Hi,
  This cleans up some of the naming around the vstrir and vstril
instruction definitions, with some cosmetic changes for consistency.
No functional changes.
Regtested just in case, no regressions.  :-)
OK for trunk?

Thanks,

gcc/
* config/rs6000/altivec.md (vstrir_code_): Rename
to vstrir_internal_.
(vstrir_p_code_): Rename to vstrir_p_internal_.
(vstril_code_): Rename to vstril_internal_.
(vstril_p_code_): Rename to vstril_p_internal_.

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index efc8ae35c2e7..5aea02e9ad6e 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -884,44 +884,44 @@ (define_expand "vstrir_"
(unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")]
UNSPEC_VSTRIR))]
   "TARGET_POWER10"
 {
   if (BYTES_BIG_ENDIAN)
-emit_insn (gen_vstrir_code_ (operands[0], operands[1]));
+emit_insn (gen_vstrir_internal_ (operands[0], operands[1]));
   else
-emit_insn (gen_vstril_code_ (operands[0], operands[1]));
+emit_insn (gen_vstril_internal_ (operands[0], operands[1]));
   DONE;
 })
 
-(define_insn "vstrir_code_"
+(define_insn "vstrir_internal_"
   [(set (match_operand:VIshort 0 "altivec_register_operand" "=v")
(unspec:VIshort
   [(match_operand:VIshort 1 "altivec_register_operand" "v")]
   UNSPEC_VSTRIR))]
   "TARGET_POWER10"
   "vstrir %0,%1"
   [(set_attr "type" "vecsimple")])
 
-;; This expands into same code as vstrir_ followed by condition logic
+;; This expands into same code as vstrir followed by condition logic
 ;; so that a single vstribr. or vstrihr. or vstribl. or vstrihl. instruction
 ;; can, for example, satisfy the needs of a vec_strir () function paired
 ;; with a vec_strir_p () function if both take the same incoming arguments.
 (define_expand "vstrir_p_"
   [(match_operand:SI 0 "gpc_reg_operand")
(match_operand:VIshort 1 "altivec_register_operand")]
   "TARGET_POWER10"
 {
   rtx scratch = gen_reg_rtx (mode);
   if (BYTES_BIG_ENDIAN)
-emit_insn (gen_vstrir_p_code_ (scratch, operands[1]));
+emit_insn (gen_vstrir_p_internal_ (scratch, operands[1]));
   else
-emit_insn (gen_vstril_p_code_ (scratch, operands[1]));
+emit_insn (gen_vstril_p_internal_ (scratch, operands[1]));
   emit_insn (gen_cr6_test_for_zero (operands[0]));
   DONE;
 })
 
-(define_insn "vstrir_p_code_"
+(define_insn "vstrir_p_internal_"
   [(set (match_operand:VIshort 0 "altivec_register_operand" "=v")
(unspec:VIshort
   [(match_operand:VIshort 1 "altivec_register_operand" "v")]
   UNSPEC_VSTRIR))
(set (reg:CC CR6_REGNO)
@@ -936,17 +936,17 @@ (define_expand "vstril_"
(unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")]
UNSPEC_VSTRIR))]
   "TARGET_POWER10"
 {
   if (BYTES_BIG_ENDIAN)
-emit_insn (gen_vstril_code_ (operands[0], operands[1]));
+emit_insn (gen_vstril_internal_ (operands[0], operands[1]));
   else
-emit_insn (gen_vstrir_code_ (operands[0], operands[1]));
+emit_insn (gen_vstrir_internal_ (operands[0], operands[1]));
   DONE;
 })
 
-(define_insn "vstril_code_"
+(define_insn "vstril_internal_"
   [(set (match_operand:VIshort 0 "altivec_register_operand" "=v")
(unspec:VIshort
   [(match_operand:VIshort 1 "altivec_register_operand" "v")]
   UNSPEC_VSTRIL))]
   "TARGET_POWER10"
@@ -962,18 +962,18 @@ (define_expand "vstril_p_"
(match_operand:VIshort 1 "altivec_register_operand")]
   "TARGET_POWER10"
 {
   rtx scratch = gen_reg_rtx (mode);
   if (BYTES_BIG_ENDIAN)
-emit_insn (gen_vstril_p_code_ (scratch, operands[1]));
+emit_insn (gen_vstril_p_internal_ (scratch, operands[1]));
   else
-emit_insn (gen_vstrir_p_code_ (scratch, operands[1]));
+emit_insn (gen_vstrir_p_internal_ (scratch, operands[1]));
   emit_insn (gen_cr6_test_for_zero (operands[0]));
   DONE;
 })
 
-(define_insn "vstril_p_code_"
+(define_insn "vstril_p_internal_"
   [(set (match_operand:VIshort 0 "altivec_register_operand" "=v")
(unspec:VIshort
   [(match_operand:VIshort 1 "altivec_register_operand" "v")]
   UNSPEC_VSTRIL))
(set (reg:CC CR6_REGNO)



[PATCH] Add _GLIBCXX_DEBUG backtrace generation

2022-07-13 Thread François Dumont via Gcc-patches

libstdc++: [_GLIBCXX_DEBUG] Add backtrace generation on demand

  Add _GLIBCXX_DEBUG_BACKTRACE macro to activate backtrace generation 
on _GLIBCXX_DEBUG assertions. Prerequisite is to have configure the lib 
with:


  --enable-libstdcxx-backtrace=yes

  libstdc++-v3/ChangeLog:

  * include/debug/formatter.h
  [_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_state): Declare.
  [_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_create_state): Declare.
  [_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_full_callback): Define.
  [_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_error_callback): Define.
  [_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_full_func): Define.
  [_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_full): Declare.
  [_GLIBCXX_HAVE_STACKTRACE](_Error_formatter::_M_backtrace_state): New.
  [_GLIBCXX_HAVE_STACKTRACE](_Error_formatter::_M_backtrace_full): New.
  * src/c++11/debug.cc (pretty_print): Rename into...
  (print_function): ...that.
  [_GLIBCXX_HAVE_STACKTRACE](print_backtrace): New.
  (_Error_formatter::_M_error()): Adapt.
  * src/libbacktrace/Makefile.am: Add backtrace.c.
  * src/libbacktrace/Makefile.in: Regenerate.
  * src/libbacktrace/backtrace-rename.h (backtrace_full): New.
  * testsuite/23_containers/vector/debug/assign4_neg.cc: Add backtrace
    generation.
  * doc/xml/manual/debug_mode.xml: Document _GLIBCXX_DEBUG_BACKTRACE.
  * doc/xml/manual/using.xml: Likewise.

Tested under Linux x86_64 normal and _GLIBCXX_DEBUG modes.

Ok to commit ?

Françoisdiff --git a/libstdc++-v3/doc/xml/manual/debug_mode.xml b/libstdc++-v3/doc/xml/manual/debug_mode.xml
index 988c4a93601..dadc0cd1bb4 100644
--- a/libstdc++-v3/doc/xml/manual/debug_mode.xml
+++ b/libstdc++-v3/doc/xml/manual/debug_mode.xml
@@ -161,6 +161,12 @@ which always works correctly.
   GLIBCXX_DEBUG_MESSAGE_LENGTH can be used to request a
   different length.
 
+Note that libstdc++ is able to produce backtraces on error.
+  It requires that you configure libstdc++ build with
+  --enable-libstdcxx-backtrace=yes.
+  Use -D_GLIBCXX_DEBUG_BACKTRACE to activate it.
+  You'll then have to link with libstdc++_libbacktrace static library
+  (-lstdc++_libbacktrace) to build your application.
 
 
 Using a Specific Debug Container
diff --git a/libstdc++-v3/doc/xml/manual/using.xml b/libstdc++-v3/doc/xml/manual/using.xml
index 36b86702d22..26f14fae194 100644
--- a/libstdc++-v3/doc/xml/manual/using.xml
+++ b/libstdc++-v3/doc/xml/manual/using.xml
@@ -1129,6 +1129,15 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 hello.cc -o test.exe
 	extensions and libstdc++-specific behavior into errors.
   
 
+_GLIBCXX_DEBUG_BACKTRACE
+
+  
+	Undefined by default. Considered only if libstdc++ has been configured with
+	--enable-libstdcxx-backtrace=yes and if _GLIBCXX_DEBUG
+	is defined. When defined display backtraces on
+	debug mode assertions.
+  
+
 _GLIBCXX_PARALLEL
 
   Undefined by default. When defined, compiles user code
@@ -1635,6 +1644,7 @@ A quick read of the relevant part of the GCC
   header will remain compatible between different GCC releases.
 
 
+
   
 
   Concurrency
diff --git a/libstdc++-v3/include/debug/formatter.h b/libstdc++-v3/include/debug/formatter.h
index 80e8ba46d1e..f31b44d184a 100644
--- a/libstdc++-v3/include/debug/formatter.h
+++ b/libstdc++-v3/include/debug/formatter.h
@@ -31,6 +31,37 @@
 
 #include 
 
+#if _GLIBCXX_HAVE_STACKTRACE
+struct __glibcxx_backtrace_state;
+
+extern "C"
+{
+  __glibcxx_backtrace_state*
+  __glibcxx_backtrace_create_state(const char*, int,
+   void(*)(void*, const char*, int),
+   void*);
+
+  typedef int (*__glibcxx_backtrace_full_callback) (
+void*, __UINTPTR_TYPE__, const char *, int, const char*);
+
+  typedef void (*__glibcxx_backtrace_error_callback) (
+void*, const char*, int);
+
+  typedef int (*__glibcxx_backtrace_full_func) (
+__glibcxx_backtrace_state*, int,
+__glibcxx_backtrace_full_callback,
+__glibcxx_backtrace_error_callback,
+void*);
+
+  int
+  __glibcxx_backtrace_full(
+__glibcxx_backtrace_state*, int,
+__glibcxx_backtrace_full_callback,
+__glibcxx_backtrace_error_callback,
+void*);
+}
+#endif
+
 #if __cpp_rtti
 # include 
 # define _GLIBCXX_TYPEID(_Type) (_Type)
@@ -565,6 +596,15 @@ namespace __gnu_debug
 		 const char* __function)
 : _M_file(__file), _M_line(__line), _M_num_parameters(0), _M_text(0)
 , _M_function(__function)
+#if _GLIBCXX_HAVE_STACKTRACE
+# ifdef _GLIBCXX_DEBUG_BACKTRACE
+, _M_backtrace_state(
+  __glibcxx_backtrace_create_state(nullptr, 0, nullptr, nullptr))
+, _M_backtrace_full(&__glibcxx_backtrace_full)
+# else
+, _M_backtrace_state()
+# endif
+#endif
 { }
 
 #if !_GLIBCXX_INLINE_VERSION
@@ -580,6 +620,10 @@ namespace __gnu_debug
 unsigned int	_M_num_parameters;
 const char*		_M_text;
 const char*		_M_function;
+#if _GLIBCXX_HAVE_STACKTRACE
+__glibcxx_backtrace_state*		_M_backtrace_state;
+

Re: [PATCH v2] Simplify memchr with small constant strings

2022-07-13 Thread H.J. Lu via Gcc-patches
On Wed, Jul 13, 2022 at 5:35 AM Richard Biener
 wrote:
>
> On Tue, Jul 12, 2022 at 6:59 PM H.J. Lu  wrote:
> >
> > On Fri, Jul 8, 2022 at 5:54 AM Richard Biener
> >  wrote:
> > >
> > > On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu  wrote:
> > > >
> > > > When memchr is applied on a constant string of no more than the bytes of
> > > > a word, simplify memchr by checking each byte in the constant string.
> > > >
> > > > int f (int a)
> > > > {
> > > >return  __builtin_memchr ("AE", a, 2) != 0;
> > > > }
> > > >
> > > > is simplified to
> > > >
> > > > int f (int a)
> > > > {
> > > >   return ((char) a == 'A' || (char) a == 'E') != 0;
> > > > }
> > > >
> > > > gcc/
> > > >
> > > > PR tree-optimization/103798
> > > > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h".
> > > > (simplify_builtin_call): Inline memchr with constant strings of
> > > > no more than the bytes of a word.
> > > > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global.
> > > > * tree-ssa-strlen.h (use_in_zero_equality): New.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR tree-optimization/103798
> > > > * c-c++-common/pr103798-1.c: New test.
> > > > * c-c++-common/pr103798-2.c: Likewise.
> > > > * c-c++-common/pr103798-3.c: Likewise.
> > > > * c-c++-common/pr103798-4.c: Likewise.
> > > > * c-c++-common/pr103798-5.c: Likewise.
> > > > * c-c++-common/pr103798-6.c: Likewise.
> > > > * c-c++-common/pr103798-7.c: Likewise.
> > > > * c-c++-common/pr103798-8.c: Likewise.
> > > > ---
> > > >  gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++
> > > >  gcc/testsuite/c-c++-common/pr103798-2.c | 30 
> > > >  gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++
> > > >  gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++
> > > >  gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++
> > > >  gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++
> > > >  gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++
> > > >  gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++
> > > >  gcc/tree-ssa-forwprop.cc| 64 +
> > > >  gcc/tree-ssa-strlen.cc  |  4 +-
> > > >  gcc/tree-ssa-strlen.h   |  2 +
> > > >  11 files changed, 289 insertions(+), 2 deletions(-)
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c
> > > >
> > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c 
> > > > b/gcc/testsuite/c-c++-common/pr103798-1.c
> > > > new file mode 100644
> > > > index 000..cd3edf569fc
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c
> > > > @@ -0,0 +1,28 @@
> > > > +/* { dg-do run } */
> > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
> > > > +
> > > > +__attribute__ ((weak))
> > > > +int
> > > > +f (char a)
> > > > +{
> > > > +   return  __builtin_memchr ("a", a, 1) == 0;
> > > > +}
> > > > +
> > > > +__attribute__ ((weak))
> > > > +int
> > > > +g (char a)
> > > > +{
> > > > +  return a != 'a';
> > > > +}
> > > > +
> > > > +int
> > > > +main ()
> > > > +{
> > > > + for (int i = 0; i < 255; i++)
> > > > +   if (f (i) != g (i))
> > > > + __builtin_abort ();
> > > > +
> > > > + return 0;
> > > > +}
> > > > +
> > > > +/* { dg-final { scan-assembler-not "memchr" } } */
> > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c 
> > > > b/gcc/testsuite/c-c++-common/pr103798-2.c
> > > > new file mode 100644
> > > > index 000..e7e99c3679e
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c
> > > > @@ -0,0 +1,30 @@
> > > > +/* { dg-do run } */
> > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
> > > > +
> > > > +#include 
> > > > +
> > > > +__attribute__ ((weak))
> > > > +int
> > > > +f (int a)
> > > > +{
> > > > +   return memchr ("aE", a, 2) != NULL;
> > > > +}
> > > > +
> > > > +__attribute__ ((weak))
> > > > +int
> > > > +g (char a)
> > > > +{
> > > > +  return a == 'a' || a == 'E';
> > > > +}
> > > > +
> > > > +int
> > > > +main ()
> > > > +{
> > > > + for (int i = 0; i < 255; i++)
> > > > +   if (f (i + 256) != g (i + 256))
> > > > + __builtin_abort ();
> > > > +
> > > > + return 0;
> > > > +}
> > > > +
> > > > +/* { dg-final { scan-assembler-not "memchr" } } */
> > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c 
> > > > b/gcc/testsuite/c-c++-common/pr103798-3.c
> > > > new file mode 100644
> > > > index 

[PATCH v3] Simplify memchr with small constant strings

2022-07-13 Thread H.J. Lu via Gcc-patches
When memchr is applied on a constant string of no more than the bytes of
a word, simplify memchr by checking each byte in the constant string.

int f (int a)
{
   return  __builtin_memchr ("AE", a, 2) != 0;
}

is simplified to

int f (int a)
{
  return ((char) a == 'A' || (char) a == 'E') != 0;
}

gcc/

PR tree-optimization/103798
* tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h".
(simplify_builtin_call): Inline memchr with constant strings of
no more than the bytes of a word.
* tree-ssa-strlen.cc (use_in_zero_equality): Make it global.
* tree-ssa-strlen.h (use_in_zero_equality): New.

gcc/testsuite/

PR tree-optimization/103798
* c-c++-common/pr103798-1.c: New test.
* c-c++-common/pr103798-2.c: Likewise.
* c-c++-common/pr103798-3.c: Likewise.
* c-c++-common/pr103798-4.c: Likewise.
* c-c++-common/pr103798-5.c: Likewise.
* c-c++-common/pr103798-6.c: Likewise.
* c-c++-common/pr103798-7.c: Likewise.
* c-c++-common/pr103798-8.c: Likewise.
* c-c++-common/pr103798-9.c: Likewise.
* c-c++-common/pr103798-10.c: Likewise.
---
 gcc/testsuite/c-c++-common/pr103798-1.c  | 28 +
 gcc/testsuite/c-c++-common/pr103798-10.c | 10 
 gcc/testsuite/c-c++-common/pr103798-2.c  | 30 ++
 gcc/testsuite/c-c++-common/pr103798-3.c  | 28 +
 gcc/testsuite/c-c++-common/pr103798-4.c  | 28 +
 gcc/testsuite/c-c++-common/pr103798-5.c  | 26 +
 gcc/testsuite/c-c++-common/pr103798-6.c  | 27 +
 gcc/testsuite/c-c++-common/pr103798-7.c  | 27 +
 gcc/testsuite/c-c++-common/pr103798-8.c  | 27 +
 gcc/testsuite/c-c++-common/pr103798-9.c  | 10 
 gcc/tree-ssa-forwprop.cc | 73 
 gcc/tree-ssa-strlen.cc   |  4 +-
 gcc/tree-ssa-strlen.h|  2 +
 13 files changed, 318 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-10.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c
 create mode 100644 gcc/testsuite/c-c++-common/pr103798-9.c

diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c 
b/gcc/testsuite/c-c++-common/pr103798-1.c
new file mode 100644
index 000..cd3edf569fc
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr103798-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
+
+__attribute__ ((weak))
+int
+f (char a)
+{
+   return  __builtin_memchr ("a", a, 1) == 0;
+}
+
+__attribute__ ((weak))
+int
+g (char a)
+{
+  return a != 'a';
+}
+
+int
+main ()
+{
+ for (int i = 0; i < 255; i++)
+   if (f (i) != g (i))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "memchr" } } */
diff --git a/gcc/testsuite/c-c++-common/pr103798-10.c 
b/gcc/testsuite/c-c++-common/pr103798-10.c
new file mode 100644
index 000..4677e9539fa
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr103798-10.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-optimized -save-temps" } */
+
+int
+f (char a)
+{
+  return  __builtin_memchr ("ac", a, 1) == 0;
+}
+
+/* { dg-final { scan-assembler "memchr" } } */
diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c 
b/gcc/testsuite/c-c++-common/pr103798-2.c
new file mode 100644
index 000..e7e99c3679e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr103798-2.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
+
+#include 
+
+__attribute__ ((weak))
+int
+f (int a)
+{
+   return memchr ("aE", a, 2) != NULL;
+}
+
+__attribute__ ((weak))
+int
+g (char a)
+{
+  return a == 'a' || a == 'E';
+}
+
+int
+main ()
+{
+ for (int i = 0; i < 255; i++)
+   if (f (i + 256) != g (i + 256))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "memchr" } } */
diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c 
b/gcc/testsuite/c-c++-common/pr103798-3.c
new file mode 100644
index 000..ddcedc7e238
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr103798-3.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
+
+__attribute__ ((weak))
+int
+f (char a)
+{
+   return  __builtin_memchr ("aEgZ", a, 3) == 0;
+}
+
+__attribute__ ((weak))
+int
+g (char a)
+{
+  return a != 'a' && a != 'E' && a != 'g';
+}
+
+int
+main ()
+{
+ for (int i = 0; i < 255; i++)
+   if (f (i) != g (i))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "memchr" } } */
diff --git 

Re: [PATCH v2 4/4] aarch64: Move vreinterpret definitions into the compiler

2022-07-13 Thread Richard Sandiford via Gcc-patches
Andrew Carlotti  writes:
> This removes a significant number of intrinsic definitions from the arm_neon.h
> header file, and reduces the amount of code duplication. The new macros and
> data structures are intended to also facilitate moving other intrinsic
> definitions out of the header file in future.
>
> There is a a slight change in the behaviour of the bf16 vreinterpret 
> intrinsics
> when compiling without bf16 support. Expressions like:
>
> b = vreinterpretq_s32_bf16(vreinterpretq_bf16_s64(a))
>
> are now compiled successfully, instead of causing a 'target specific option
> mismatch' during inlining.
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-builtins.cc
>   (MODE_d_bf16, MODE_d_f16, MODE_d_f32, MODE_d_f64, MODE_d_s8)
>   (MODE_d_s16, MODE_d_s32, MODE_d_s64, MODE_d_u8, MODE_d_u16)
>   (MODE_d_u32, MODE_d_u64, MODE_d_p8, MODE_d_p16, MODE_d_p64)
>   (MODE_q_bf16, MODE_q_f16, MODE_q_f32, MODE_q_f64, MODE_q_s8)
>   (MODE_q_s16, MODE_q_s32, MODE_q_s64, MODE_q_u8, MODE_q_u16)
>   (MODE_q_u32, MODE_q_u64, MODE_q_p8, MODE_q_p16, MODE_q_p64)
>   (MODE_q_p128): Define macro to map to corresponding mode name.
>   (QUAL_bf16, QUAL_f16, QUAL_f32, QUAL_f64, QUAL_s8, QUAL_s16)
>   (QUAL_s32, QUAL_s64, QUAL_u8, QUAL_u16, QUAL_u32, QUAL_u64)
>   (QUAL_p8, QUAL_p16, QUAL_p64, QUAL_p128): Define macro to map to
>   corresponding qualifier name.
>   (LENGTH_d, LENGTH_q): Define macro to map to "" or "q" suffix.
>   (SIMD_INTR_MODE, SIMD_INTR_QUAL, SIMD_INTR_LENGTH_CHAR): Macro
>   functions for the above mappings
>   (VREINTERPRET_BUILTIN2, VREINTERPRET_BUILTINS1, VREINTERPRET_BUILTINS)
>   (VREINTERPRETQ_BUILTIN2, VREINTERPRETQ_BUILTINS1)
>   (VREINTERPRETQ_BUILTINS, VREINTERPRET_BUILTIN)
>   (AARCH64_SIMD_VREINTERPRET_BUILTINS): New macros to create definitions
>   for all vreinterpret intrinsics
>   (enum aarch64_builtins): Add vreinterpret function codes
>   (aarch64_init_simd_intrinsics): New
>   (handle_arm_neon_h): Improved comment.
>   (aarch64_general_fold_builtin): Fold vreinterpret calls
>   * config/aarch64/arm_neon.h
>   (vreinterpret_p8_f16, vreinterpret_p8_f64, vreinterpret_p8_s8)
>   (vreinterpret_p8_s16, vreinterpret_p8_s32, vreinterpret_p8_s64)
>   (vreinterpret_p8_f32, vreinterpret_p8_u8, vreinterpret_p8_u16)
>   (vreinterpret_p8_u32, vreinterpret_p8_u64, vreinterpret_p8_p16)
>   (vreinterpret_p8_p64, vreinterpretq_p8_f64, vreinterpretq_p8_s8)
>   (vreinterpretq_p8_s16, vreinterpretq_p8_s32, vreinterpretq_p8_s64)
>   (vreinterpretq_p8_f16, vreinterpretq_p8_f32, vreinterpretq_p8_u8)
>   (vreinterpretq_p8_u16, vreinterpretq_p8_u32, vreinterpretq_p8_u64)
>   (vreinterpretq_p8_p16, vreinterpretq_p8_p64, vreinterpretq_p8_p128)
>   (vreinterpret_p16_f16, vreinterpret_p16_f64, vreinterpret_p16_s8)
>   (vreinterpret_p16_s16, vreinterpret_p16_s32, vreinterpret_p16_s64)
>   (vreinterpret_p16_f32, vreinterpret_p16_u8, vreinterpret_p16_u16)
>   (vreinterpret_p16_u32, vreinterpret_p16_u64, vreinterpret_p16_p8)
>   (vreinterpret_p16_p64, vreinterpretq_p16_f64, vreinterpretq_p16_s8)
>   (vreinterpretq_p16_s16, vreinterpretq_p16_s32, vreinterpretq_p16_s64)
>   (vreinterpretq_p16_f16, vreinterpretq_p16_f32, vreinterpretq_p16_u8)
>   (vreinterpretq_p16_u16, vreinterpretq_p16_u32, vreinterpretq_p16_u64)
>   (vreinterpretq_p16_p8, vreinterpretq_p16_p64, vreinterpretq_p16_p128)
>   (vreinterpret_p64_f16, vreinterpret_p64_f64, vreinterpret_p64_s8)
>   (vreinterpret_p64_s16, vreinterpret_p64_s32, vreinterpret_p64_s64)
>   (vreinterpret_p64_f32, vreinterpret_p64_u8, vreinterpret_p64_u16)
>   (vreinterpret_p64_u32, vreinterpret_p64_u64, vreinterpret_p64_p8)
>   (vreinterpret_p64_p16, vreinterpretq_p64_f64, vreinterpretq_p64_s8)
>   (vreinterpretq_p64_s16, vreinterpretq_p64_s32, vreinterpretq_p64_s64)
>   (vreinterpretq_p64_f16, vreinterpretq_p64_f32, vreinterpretq_p64_p128)
>   (vreinterpretq_p64_u8, vreinterpretq_p64_u16, vreinterpretq_p64_p16)
>   (vreinterpretq_p64_u32, vreinterpretq_p64_u64, vreinterpretq_p64_p8)
>   (vreinterpretq_p128_p8, vreinterpretq_p128_p16, vreinterpretq_p128_f16)
>   (vreinterpretq_p128_f32, vreinterpretq_p128_p64, vreinterpretq_p128_s64)
>   (vreinterpretq_p128_u64, vreinterpretq_p128_s8, vreinterpretq_p128_s16)
>   (vreinterpretq_p128_s32, vreinterpretq_p128_u8, vreinterpretq_p128_u16)
>   (vreinterpretq_p128_u32, vreinterpret_f16_f64, vreinterpret_f16_s8)
>   (vreinterpret_f16_s16): (vreinterpret_f16_s32): (vreinterpret_f16_s64):
>   (vreinterpret_f16_f32, vreinterpret_f16_u8, vreinterpret_f16_u16)
>   (vreinterpret_f16_u32, vreinterpret_f16_u64, vreinterpret_f16_p8)
>   (vreinterpret_f16_p16, vreinterpret_f16_p64, vreinterpretq_f16_f64)
>   (vreinterpretq_f16_s8, vreinterpretq_f16_s16, vreinterpretq_f16_s32)
>   (vreinterpretq_f16_s64, 

Re: [PATCH v2 3/4] aarch64: Consolidate simd type lookup functions

2022-07-13 Thread Richard Sandiford via Gcc-patches
Andrew Carlotti  writes:
> There were several similarly-named functions, which each built or looked up a
> type using a different subset of valid modes or qualifiers.
>
> This change combines these all into a single function, which can additionally
> handle const and pointer qualifiers.

I like the part about getting rid of:

static tree
aarch64_simd_builtin_type (machine_mode mode,
   bool unsigned_p, bool poly_p)

and the flow of the new function.  However, I think it's still
slightly more readable if we keep the switch and lookup routines
separate, partly to keep down the size of the main routine and
partly to avoid the goto.

So how about:

- aarch64_simd_builtin_std_type becomes aarch64_int_or_fp_element_type
  but otherwise stays as-is

- aarch64_lookup_simd_builtin_type becomes aarch64_lookup_simd_type_in_table,
  without the:

  /* Non-poly scalar modes map to standard types not in the table.  */
  if (q != qualifier_poly && !VECTOR_MODE_P (mode))
return aarch64_simd_builtin_std_type (mode, q);

  that your new routine handles instead.

- The new routine is called aarch64_simd_builtin_type rather than
  aarch64_build_simd_builtin_type (since the latter implies creating
  a new type).  It uses the routines:

  if ((qualifiers & qualifier_poly) || VECTOR_MODE_P (mode))
type = aarch64_lookup_simd_type_in_table (mode, q);
  else
type = aarch64_int_or_fp_element_type (mode, q);
  gcc_assert (type);

?

I realise this is all eye of the beholder stuff though.

Thanks,
Richard

> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-builtins.cc
>   (aarch64_simd_builtin_std_type, aarch64_lookup_simd_builtin_type)
>   (aarch64_simd_builtin_type): Combine and replace with...
>   (aarch64_build_simd_builtin_type): ...this new function.
>   (aarch64_init_fcmla_laneq_builtins): Update to call new function.
>   (aarch64_init_simd_builtin_functions): Ditto.
>   (aarch64_init_crc32_builtins): Ditto.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> 55ad2e8b6831d6cc2b039270c8656d429347092d..6b413a36a09c7a4ac41b0fe7c414a3247580f222
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -789,79 +789,101 @@ aarch64_general_mangle_builtin_type (const_tree type)
>  }
>  
>  static tree
> -aarch64_simd_builtin_std_type (machine_mode mode,
> -enum aarch64_type_qualifiers q)
> -{
> -#define QUAL_TYPE(M)  \
> -  ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
> -  switch (mode)
> -{
> -case E_QImode:
> -  return QUAL_TYPE (QI);
> -case E_HImode:
> -  return QUAL_TYPE (HI);
> -case E_SImode:
> -  return QUAL_TYPE (SI);
> -case E_DImode:
> -  return QUAL_TYPE (DI);
> -case E_TImode:
> -  return QUAL_TYPE (TI);
> -case E_OImode:
> -  return aarch64_simd_intOI_type_node;
> -case E_CImode:
> -  return aarch64_simd_intCI_type_node;
> -case E_XImode:
> -  return aarch64_simd_intXI_type_node;
> -case E_HFmode:
> -  return aarch64_fp16_type_node;
> -case E_SFmode:
> -  return float_type_node;
> -case E_DFmode:
> -  return double_type_node;
> -case E_BFmode:
> -  return aarch64_bf16_type_node;
> -default:
> -  gcc_unreachable ();
> -}
> -#undef QUAL_TYPE
> -}
> -
> -static tree
> -aarch64_lookup_simd_builtin_type (machine_mode mode,
> -   enum aarch64_type_qualifiers q)
> +aarch64_build_simd_builtin_type (machine_mode mode,
> +  enum aarch64_type_qualifiers qualifiers)
>  {
> +  tree type = NULL_TREE;
>int i;
>int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
>  
> -  /* Non-poly scalar modes map to standard types not in the table.  */
> -  if (q != qualifier_poly && !VECTOR_MODE_P (mode))
> -return aarch64_simd_builtin_std_type (mode, q);
> +  /* For pointers, we want a pointer to the basic type of the vector.  */
> +  if ((qualifiers & qualifier_pointer) && VECTOR_MODE_P (mode))
> +mode = GET_MODE_INNER (mode);
>  
> -  for (i = 0; i < nelts; i++)
> +  if ((qualifiers & qualifier_poly) || VECTOR_MODE_P (mode))
>  {
> -  if (aarch64_simd_types[i].mode == mode
> -   && aarch64_simd_types[i].q == q)
> - return aarch64_simd_types[i].itype;
> -  if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
> - for (int j = 0; j < 3; j++)
> -   if (aarch64_simd_tuple_modes[i][j] == mode
> +  int q = qualifiers & (qualifier_poly | qualifier_unsigned);
> +  /* Poly or vector modes map to types in the table.  */
> +  for (i = 0; i < nelts; i++)
> + {
> +   if (aarch64_simd_types[i].mode == mode
> && aarch64_simd_types[i].q == q)
> - return aarch64_simd_tuple_types[i][j];
> + {
> +   type = aarch64_simd_types[i].itype;
> +   

Re: [PATCH v2 2/4] aarch64: Remove qualifier_internal

2022-07-13 Thread Richard Sandiford via Gcc-patches
Andrew Carlotti  writes:
> This has been unused since 2014, so there's no reason to retain it.
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-builtins.cc
>   (enum aarch64_type_qualifiers): Remove qualifier_internal.
>   (aarch64_init_simd_builtin_functions): Remove qualifier_internal check.

OK, thanks.

Richard

> ---
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> 52d27c6978990ca3e6c523654fe1cdc952e77ad7..55ad2e8b6831d6cc2b039270c8656d429347092d
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -145,9 +145,7 @@ enum aarch64_type_qualifiers
>qualifier_maybe_immediate = 0x10, /* 1 << 4  */
>/* void foo (...).  */
>qualifier_void = 0x20, /* 1 << 5  */
> -  /* Some patterns may have internal operands, this qualifier is an
> - instruction to the initialisation code to skip this operand.  */
> -  qualifier_internal = 0x40, /* 1 << 6  */
> +  /* 1 << 6 is now unused */
>/* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
>   rather than using the type of the operand.  */
>qualifier_map_mode = 0x80, /* 1 << 7  */
> @@ -1207,10 +1205,6 @@ aarch64_init_simd_builtin_functions (bool 
> called_from_pragma)
> else
>   type_signature[op_num] = 's';
>  
> -   /* Skip an internal operand for vget_{low, high}.  */
> -   if (qualifiers & qualifier_internal)
> - continue;
> -
> /* Some builtins have different user-facing types
>for certain arguments, encoded in d->mode.  */
> if (qualifiers & qualifier_map_mode)


Re: [PATCH v2 1/4] aarch64: Add V1DI mode

2022-07-13 Thread Richard Sandiford via Gcc-patches
Andrew Carlotti  writes:
> We already have a V1DF mode, so this makes the vector modes more consistent.
>
> Additionally, this allows us to recognise uint64x1_t and int64x1_t types given
> only the mode and type qualifiers (e.g. in aarch64_lookup_simd_builtin_type).
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-builtins.cc
>   (v1di_UP): Add V1DI mode to _UP macros.
>   * config/aarch64/aarch64-modes.def (VECTOR_MODE): Add V1DI mode
>   * config/aarch64/aarch64-simd-builtin-types.def: Use V1DI mode
>   * config/aarch64/aarch64-simd.md
>   (vec_extractv2dfv1df): Replace with...
>   (vec_extract): ...this.
>   * config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Add V1DI 
> mode
>   * config/aarch64/iterators.md
>   (VQ_2E, V1HALF, V1half): New.
>   (nunits): Add V1DI mode.

OK, thanks.

Please follow the instructions on https://gcc.gnu.org/gitwrite.html
to get write access, listing me as sponsor.

Richard

> ---
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> c21476d7ae963450b12efa24418ce4004a3c74bf..52d27c6978990ca3e6c523654fe1cdc952e77ad7
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -55,6 +55,7 @@
>  #define v2si_UP  E_V2SImode
>  #define v2sf_UP  E_V2SFmode
>  #define v1df_UP  E_V1DFmode
> +#define v1di_UP  E_V1DImode
>  #define di_UPE_DImode
>  #define df_UPE_DFmode
>  #define v16qi_UP E_V16QImode
> diff --git a/gcc/config/aarch64/aarch64-modes.def 
> b/gcc/config/aarch64/aarch64-modes.def
> index 
> 8f399225a8048d93108e33e9d49c736aeb5612ce..d3c9b74434cd2c0d0cb1a2fd26af8c0bf38a4cfa
>  100644
> --- a/gcc/config/aarch64/aarch64-modes.def
> +++ b/gcc/config/aarch64/aarch64-modes.def
> @@ -70,6 +70,7 @@ VECTOR_MODES (INT, 8);/*   V8QI V4HI V2SI.  */
>  VECTOR_MODES (INT, 16);   /* V16QI V8HI V4SI V2DI.  */
>  VECTOR_MODES (FLOAT, 8);  /* V2SF.  */
>  VECTOR_MODES (FLOAT, 16); /*V4SF V2DF.  */
> +VECTOR_MODE (INT, DI, 1); /* V1DI.  */
>  VECTOR_MODE (FLOAT, DF, 1);   /* V1DF.  */
>  VECTOR_MODE (FLOAT, HF, 2);   /* V2HF.  */
>  
> diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def 
> b/gcc/config/aarch64/aarch64-simd-builtin-types.def
> index 
> 248e51e96549fb640817d79c099a3f5e62c71317..40545581408e2ee2be84f08abb5801058c4ea42e
>  100644
> --- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
> @@ -24,7 +24,7 @@
>ENTRY (Int16x8_t, V8HI, none, 11)
>ENTRY (Int32x2_t, V2SI, none, 11)
>ENTRY (Int32x4_t, V4SI, none, 11)
> -  ENTRY (Int64x1_t, DI, none, 11)
> +  ENTRY (Int64x1_t, V1DI, none, 11)
>ENTRY (Int64x2_t, V2DI, none, 11)
>ENTRY (Uint8x8_t, V8QI, unsigned, 11)
>ENTRY (Uint8x16_t, V16QI, unsigned, 12)
> @@ -32,7 +32,7 @@
>ENTRY (Uint16x8_t, V8HI, unsigned, 12)
>ENTRY (Uint32x2_t, V2SI, unsigned, 12)
>ENTRY (Uint32x4_t, V4SI, unsigned, 12)
> -  ENTRY (Uint64x1_t, DI, unsigned, 12)
> +  ENTRY (Uint64x1_t, V1DI, unsigned, 12)
>ENTRY (Uint64x2_t, V2DI, unsigned, 12)
>ENTRY (Poly8_t, QI, poly, 9)
>ENTRY (Poly16_t, HI, poly, 10)
> @@ -42,7 +42,7 @@
>ENTRY (Poly8x16_t, V16QI, poly, 12)
>ENTRY (Poly16x4_t, V4HI, poly, 12)
>ENTRY (Poly16x8_t, V8HI, poly, 12)
> -  ENTRY (Poly64x1_t, DI, poly, 12)
> +  ENTRY (Poly64x1_t, V1DI, poly, 12)
>ENTRY (Poly64x2_t, V2DI, poly, 12)
>ENTRY (Float16x4_t, V4HF, none, 13)
>ENTRY (Float16x8_t, V8HF, none, 13)
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> a00e1c6ef8d6b43d8b1a0fe4701e6b8c1f0f622f..587a45d77721e1b39accbad7dbeca4d741eccb10
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -8026,16 +8026,16 @@
>  })
>  
>  ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
> -(define_expand "vec_extractv2dfv1df"
> -  [(match_operand:V1DF 0 "register_operand")
> -   (match_operand:V2DF 1 "register_operand")
> +(define_expand "vec_extract"
> +  [(match_operand: 0 "register_operand")
> +   (match_operand:VQ_2E 1 "register_operand")
> (match_operand 2 "immediate_operand")]
>"TARGET_SIMD"
>  {
> -  /* V1DF is rarely used by other patterns, so it should be better to hide
> - it in a subreg destination of a normal DF op.  */
> -  rtx scalar0 = gen_lowpart (DFmode, operands[0]);
> -  emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
> +  /* V1DI and V1DF are rarely used by other patterns, so it should be better
> + to hide it in a subreg destination of a normal DI or DF op.  */
> +  rtx scalar0 = gen_lowpart (mode, operands[0]);
> +  emit_insn (gen_vec_extract (scalar0, operands[1], 
> operands[2]));
>DONE;
>  })
>  
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> 

libgo patch committed: Don't include in sysinfo.c

2022-07-13 Thread Ian Lance Taylor via Gcc-patches
This libgo patch stops including  when building
gen-sysinfo.go.  Removing this doesn't change anything at least with
glibc 2.33.  The include was added in https://go.dev/cl/6100049 but
it's not clear why.  This should fix GCC PR 106266.  Bootstrapped and
ran Go testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
9b487dbc298242fdde127e7827e728545c308aac
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 7c5c45672d7..5ea0406cd8e 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-d295a0a2c96c0f7c3abd94fea3aa4e2303bf2af2
+ff68b1a147eb60082fd60c198db0ef5477ade938
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/sysinfo.c b/libgo/sysinfo.c
index a4259c02ded..fc0210992fa 100644
--- a/libgo/sysinfo.c
+++ b/libgo/sysinfo.c
@@ -158,9 +158,6 @@
 #if defined(HAVE_LINUX_ETHER_H)
 #include 
 #endif
-#if defined(HAVE_LINUX_FS_H)
-#include 
-#endif
 #if defined(HAVE_LINUX_REBOOT_H)
 #include 
 #endif


Re: [PATCH 2/3] tree-cfg: do not duplicate returns_twice calls

2022-07-13 Thread Jeff Law via Gcc-patches




On 7/13/2022 1:13 AM, Richard Biener via Gcc-patches wrote:

On Tue, Jul 12, 2022 at 10:10 PM Alexander Monakov  wrote:


Apologies for the prolonged silence Richard, it is a bit of an obscure topic,
and I was unsure I'd be able to handle any complications in a timely manner.
I'm ready to revisit it now, please see below.

On Mon, 17 Jan 2022, Richard Biener wrote:


On Fri, Jan 14, 2022 at 7:21 PM Alexander Monakov  wrote:

A returns_twice call may have associated abnormal edges that correspond
to the "second return" from the call. If the call is duplicated, the
copies of those edges also need to be abnormal, but e.g. tracer does not
enforce that. Just prohibit the (unlikely to be useful) duplication.

The general CFG copying routines properly duplicate those edges, no?

No (in fact you say so in the next paragraph). In general I think they cannot,
abnormal edges are a special case, so it should be the responsibility of the
caller.


Tracer uses duplicate_block so it should also get copies of all successor
edges of that block.  It also only traces along normal edges.  What it might
miss is abnormal incoming edges - is that what you are referring to?

Yes (I think its entire point is to build a "trace" of duplicated blocks that
does not have incoming edges in the middle, abnormal or not).


That would be a thing we don't handle in duplicate_block on its own but
that callers are expected to do (though I don't see copy_bbs doing that
either).  I wonder if we can trigger this issue for some testcase?

Oh yes (in fact my desire to find a testcase delayed this quite a bit).
When compiling the following testcase with -O2 -ftracer:

__attribute__((returns_twice))
int rtwice_a(int), rtwice_b(int);

int f(int *x)
{
 volatile unsigned k, i = (*x);

 for (k = 1; (i = rtwice_a(i)) * k; k = 2);

 for (; (i = rtwice_b(i)) * k; k = 4);

 return k;
}

tracer manages to eliminate the ABNORMAL_DISPATCHER block completely, so
the possibility of transferring control back to rtwice_a from rtwice_b
is no longer modeled in the IR. I could spend some time "upgrading" this
to an end-to-end miscompilation, but I hope you agree this is quite broken
already.


The thing to check would be incoming abnormal edges in
can_duplicate_block_p, not (only) returns twice functions?

Unfortunately not, abnormal edges are also used for computed gotos, which are
less magic than returns_twice edges and should not block tracer I think.

I think computed gotos should use regular edges, only non-local goto should
use abnormals...

I suppose asm goto also uses abnormal edges?

Btw, I don't see how they in general are "less magic".  Sure, we have an
explicit receiver (the destination label), but we can only do edge inserts
if we have a single computed goto edge into a block (we can "move" the
label to the block created when splitting the edge).
I suspect treating them like abnormals probably came from the inability 
to reliably split them way back when we introduced RTL GCSE and the like.


Jeff



Re: [PATCH 2/3] tree-cfg: do not duplicate returns_twice calls

2022-07-13 Thread Alexander Monakov via Gcc-patches
On Wed, 13 Jul 2022, Richard Biener wrote:

> > > The thing to check would be incoming abnormal edges in
> > > can_duplicate_block_p, not (only) returns twice functions?
> >
> > Unfortunately not, abnormal edges are also used for computed gotos, which 
> > are
> > less magic than returns_twice edges and should not block tracer I think.
> 
> I think computed gotos should use regular edges, only non-local goto should
> use abnormals...

Yeah, afaict it's not documented what "abnormal" is supposed to mean :/

> I suppose asm goto also uses abnormal edges?

Heh, no, asm goto appears to use normal edges, but there's an old gap in
their specification: can you use them like computed gotos, i.e. can asm-goto
jump to a computed target? Or must they be similar to plain gotos where the
jump label is redirectable (because it's substitutable in the asm template)?

If you take a restrictive interpretation (asm goto may not jump to a computed
label) then using regular edges looks fine.

> Btw, I don't see how they in general are "less magic".  Sure, we have an
> explicit receiver (the destination label), but we can only do edge inserts
> if we have a single computed goto edge into a block (we can "move" the
> label to the block created when splitting the edge).

Sure, they are a bit magic, but returns_twice edges are even more magic: their
destination looks tied to a label in the IR, but in reality their destination
is inside a call that returns twice (hence GCC must be careful not to insert
anything between the label and the call, like in patch 1/3).

> > This implies patch 1/3 [1] unnecessary blocks sinking to computed goto 
> > targets.
> > [1] https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588498.html
> >
> > How would you like to proceed here? Is my initial patch ok?
> 
> Hmm, so for returns twice calls duplicate_block correctly copies the call
> and redirects the provided incoming edge to it.  The API does not
> handle adding any further incoming edges - the caller would be responsible
> for this.  So I still somewhat fail to see the point here.  If tracer does not
> handle extra incoming edges properly then we need to fix tracer?

I think abnormal edges corresponding to computed gotos are fine: we are
attempting to create a chain of blocks with no incoming edges in the middle,
right? Destinations of computed gotos remain at labels of original blocks.

Agreed about correcting this in the tracer.

> This also includes non-local goto (we seem to copy non-local labels just
> fine - wasn't there a bugreport about this!?).

Sorry, no idea about this.

> So I think can_duplicate_block_p is the wrong place to fix (the RTL side
> would need a similar fix anyhow?)

Right. I'm happy to leave both RTL and GIMPLE can_duplicate_block_p as is,
and instead constrain just the tracer. Alternative patch below:

* tracer.cc (analyze_bb): Disallow duplication of returns_twice calls.

diff --git a/gcc/tracer.cc b/gcc/tracer.cc
index 64517846d..422e2b6a7 100644
--- a/gcc/tracer.cc
+++ b/gcc/tracer.cc
@@ -132,14 +132,19 @@ analyze_bb (basic_block bb, int *count)
   gimple *stmt;
   int n = 0;

+  bool can_dup = can_duplicate_block_p (CONST_CAST_BB (bb));
+
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
 {
   stmt = gsi_stmt (gsi);
   n += estimate_num_insns (stmt, _size_weights);
+  if (can_dup && cfun->calls_setjmp && gimple_code (stmt) == GIMPLE_CALL
+ && gimple_call_flags (stmt) & ECF_RETURNS_TWICE)
+   can_dup = false;
 }
   *count = n;

-  cache_can_duplicate_bb_p (bb, can_duplicate_block_p (CONST_CAST_BB (bb)));
+  cache_can_duplicate_bb_p (bb, can_dup);
 }

 /* Return true if E1 is more frequent than E2.  */



[COMMITTED] Use nonzero bits in range-ops to determine if < 0 is false.

2022-07-13 Thread Aldy Hernandez via Gcc-patches
For a signed integer, x < 0 is false if the sign bit in the nonzero
bits of X is clear.

Both CCP and ipa-cp can set the global nonzero bits in a range, which
means we can now use some of that information in evrp and subsequent
passes.  I've adjusted two tests which now fold things earlier because
of this optimization.

Tested on x86-64 Linux.

gcc/ChangeLog:

* range-op.cc (operator_lt::fold_range): Use nonzero bits.

gcc/testsuite/ChangeLog:

* g++.dg/ipa/pure-const-3.C: Adjust.
* gcc.dg/pr102983.c: Adjust.
---
 gcc/range-op.cc | 3 +++
 gcc/testsuite/g++.dg/ipa/pure-const-3.C | 2 +-
 gcc/testsuite/gcc.dg/pr102983.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 0e16408027c..e184129f9af 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -803,6 +803,9 @@ operator_lt::fold_range (irange , tree type,
 r = range_true (type);
   else if (!wi::lt_p (op1.lower_bound (), op2.upper_bound (), sign))
 r = range_false (type);
+  // Use nonzero bits to determine if < 0 is false.
+  else if (op2.zero_p () && !wi::neg_p (op1.get_nonzero_bits (), sign))
+r = range_false (type);
   else
 r = range_true_and_false (type);
   return true;
diff --git a/gcc/testsuite/g++.dg/ipa/pure-const-3.C 
b/gcc/testsuite/g++.dg/ipa/pure-const-3.C
index 172a36bedb5..b4a4673e86e 100644
--- a/gcc/testsuite/g++.dg/ipa/pure-const-3.C
+++ b/gcc/testsuite/g++.dg/ipa/pure-const-3.C
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-ipa-vrp -fdump-tree-optimized -fno-tree-ccp"  } */
+/* { dg-options "-O2 -fno-ipa-vrp -fdump-tree-optimized -fno-tree-ccp 
-fdisable-tree-evrp"  } */
 int *ptr;
 static int barvar;
 static int b(int a);
diff --git a/gcc/testsuite/gcc.dg/pr102983.c b/gcc/testsuite/gcc.dg/pr102983.c
index ef58af6def0..e1bd24b2e39 100644
--- a/gcc/testsuite/gcc.dg/pr102983.c
+++ b/gcc/testsuite/gcc.dg/pr102983.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-evrp" } */
+/* { dg-options "-O2 -fdump-tree-evrp -fno-tree-ccp" } */
 void foo(void);
 
 static int a = 1;
-- 
2.36.1



[COMMITTED] Clear nonzero mask when inverting ranges.

2022-07-13 Thread Aldy Hernandez via Gcc-patches
Every time we set a range we should take into account the nonzero
mask.  This happens automatically for the set() methods, plus all the
other assignment, intersect, and union methods.  Unfortunately I
forgot about the invert code.

Also, for good measure I audited the rest of the setters in
value_range.cc and plugged the legacy code to pessimize the masks to
-1 for union/intersect, since we don't support the masks on them (or
rather, we don't keep very good track of them).

Tested on x86-64 Linux.

gcc/ChangeLog:

* value-range.cc (irange::copy_to_legacy): Set nonzero mask.
(irange::legacy_intersect): Clear nonzero mask.
(irange::legacy_union): Same.
(irange::invert): Same.
---
 gcc/value-range.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 2aa973b2af2..528ed547ef3 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -331,6 +331,7 @@ irange::copy_to_legacy (const irange )
   m_base[0] = src.m_base[0];
   m_base[1] = src.m_base[1];
   m_kind = src.m_kind;
+  m_nonzero_mask = src.m_nonzero_mask;
   return;
 }
   // Copy multi-range to legacy.
@@ -1336,6 +1337,9 @@ irange::legacy_intersect (irange *vr0, const irange *vr1)
   intersect_ranges (, , ,
vr1->kind (), vr1->min (), vr1->max ());
 
+  // Pessimize nonzero masks, as we don't support them.
+  m_nonzero_mask = NULL;
+
   /* Make sure to canonicalize the result though as the inversion of a
  VR_RANGE can still be a VR_RANGE.  */
   if (vr0kind == VR_UNDEFINED)
@@ -1657,6 +1661,9 @@ irange::legacy_union (irange *vr0, const irange *vr1)
   union_ranges (, , ,
vr1->kind (), vr1->min (), vr1->max ());
 
+  // Pessimize nonzero masks, as we don't support them.
+  m_nonzero_mask = NULL;
+
   if (vr0kind == VR_UNDEFINED)
 vr0->set_undefined ();
   else if (vr0kind == VR_VARYING)
@@ -2253,6 +2260,7 @@ irange::invert ()
 }
 
   gcc_checking_assert (!undefined_p () && !varying_p ());
+  m_nonzero_mask = NULL;
 
   // We always need one more set of bounds to represent an inverse, so
   // if we're at the limit, we can't properly represent things.
-- 
2.36.1



[PATCH v2 4/4] aarch64: Move vreinterpret definitions into the compiler

2022-07-13 Thread Andrew Carlotti via Gcc-patches
This removes a significant number of intrinsic definitions from the arm_neon.h
header file, and reduces the amount of code duplication. The new macros and
data structures are intended to also facilitate moving other intrinsic
definitions out of the header file in future.

There is a a slight change in the behaviour of the bf16 vreinterpret intrinsics
when compiling without bf16 support. Expressions like:

b = vreinterpretq_s32_bf16(vreinterpretq_bf16_s64(a))

are now compiled successfully, instead of causing a 'target specific option
mismatch' during inlining.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(MODE_d_bf16, MODE_d_f16, MODE_d_f32, MODE_d_f64, MODE_d_s8)
(MODE_d_s16, MODE_d_s32, MODE_d_s64, MODE_d_u8, MODE_d_u16)
(MODE_d_u32, MODE_d_u64, MODE_d_p8, MODE_d_p16, MODE_d_p64)
(MODE_q_bf16, MODE_q_f16, MODE_q_f32, MODE_q_f64, MODE_q_s8)
(MODE_q_s16, MODE_q_s32, MODE_q_s64, MODE_q_u8, MODE_q_u16)
(MODE_q_u32, MODE_q_u64, MODE_q_p8, MODE_q_p16, MODE_q_p64)
(MODE_q_p128): Define macro to map to corresponding mode name.
(QUAL_bf16, QUAL_f16, QUAL_f32, QUAL_f64, QUAL_s8, QUAL_s16)
(QUAL_s32, QUAL_s64, QUAL_u8, QUAL_u16, QUAL_u32, QUAL_u64)
(QUAL_p8, QUAL_p16, QUAL_p64, QUAL_p128): Define macro to map to
corresponding qualifier name.
(LENGTH_d, LENGTH_q): Define macro to map to "" or "q" suffix.
(SIMD_INTR_MODE, SIMD_INTR_QUAL, SIMD_INTR_LENGTH_CHAR): Macro
functions for the above mappings
(VREINTERPRET_BUILTIN2, VREINTERPRET_BUILTINS1, VREINTERPRET_BUILTINS)
(VREINTERPRETQ_BUILTIN2, VREINTERPRETQ_BUILTINS1)
(VREINTERPRETQ_BUILTINS, VREINTERPRET_BUILTIN)
(AARCH64_SIMD_VREINTERPRET_BUILTINS): New macros to create definitions
for all vreinterpret intrinsics
(enum aarch64_builtins): Add vreinterpret function codes
(aarch64_init_simd_intrinsics): New
(handle_arm_neon_h): Improved comment.
(aarch64_general_fold_builtin): Fold vreinterpret calls
* config/aarch64/arm_neon.h
(vreinterpret_p8_f16, vreinterpret_p8_f64, vreinterpret_p8_s8)
(vreinterpret_p8_s16, vreinterpret_p8_s32, vreinterpret_p8_s64)
(vreinterpret_p8_f32, vreinterpret_p8_u8, vreinterpret_p8_u16)
(vreinterpret_p8_u32, vreinterpret_p8_u64, vreinterpret_p8_p16)
(vreinterpret_p8_p64, vreinterpretq_p8_f64, vreinterpretq_p8_s8)
(vreinterpretq_p8_s16, vreinterpretq_p8_s32, vreinterpretq_p8_s64)
(vreinterpretq_p8_f16, vreinterpretq_p8_f32, vreinterpretq_p8_u8)
(vreinterpretq_p8_u16, vreinterpretq_p8_u32, vreinterpretq_p8_u64)
(vreinterpretq_p8_p16, vreinterpretq_p8_p64, vreinterpretq_p8_p128)
(vreinterpret_p16_f16, vreinterpret_p16_f64, vreinterpret_p16_s8)
(vreinterpret_p16_s16, vreinterpret_p16_s32, vreinterpret_p16_s64)
(vreinterpret_p16_f32, vreinterpret_p16_u8, vreinterpret_p16_u16)
(vreinterpret_p16_u32, vreinterpret_p16_u64, vreinterpret_p16_p8)
(vreinterpret_p16_p64, vreinterpretq_p16_f64, vreinterpretq_p16_s8)
(vreinterpretq_p16_s16, vreinterpretq_p16_s32, vreinterpretq_p16_s64)
(vreinterpretq_p16_f16, vreinterpretq_p16_f32, vreinterpretq_p16_u8)
(vreinterpretq_p16_u16, vreinterpretq_p16_u32, vreinterpretq_p16_u64)
(vreinterpretq_p16_p8, vreinterpretq_p16_p64, vreinterpretq_p16_p128)
(vreinterpret_p64_f16, vreinterpret_p64_f64, vreinterpret_p64_s8)
(vreinterpret_p64_s16, vreinterpret_p64_s32, vreinterpret_p64_s64)
(vreinterpret_p64_f32, vreinterpret_p64_u8, vreinterpret_p64_u16)
(vreinterpret_p64_u32, vreinterpret_p64_u64, vreinterpret_p64_p8)
(vreinterpret_p64_p16, vreinterpretq_p64_f64, vreinterpretq_p64_s8)
(vreinterpretq_p64_s16, vreinterpretq_p64_s32, vreinterpretq_p64_s64)
(vreinterpretq_p64_f16, vreinterpretq_p64_f32, vreinterpretq_p64_p128)
(vreinterpretq_p64_u8, vreinterpretq_p64_u16, vreinterpretq_p64_p16)
(vreinterpretq_p64_u32, vreinterpretq_p64_u64, vreinterpretq_p64_p8)
(vreinterpretq_p128_p8, vreinterpretq_p128_p16, vreinterpretq_p128_f16)
(vreinterpretq_p128_f32, vreinterpretq_p128_p64, vreinterpretq_p128_s64)
(vreinterpretq_p128_u64, vreinterpretq_p128_s8, vreinterpretq_p128_s16)
(vreinterpretq_p128_s32, vreinterpretq_p128_u8, vreinterpretq_p128_u16)
(vreinterpretq_p128_u32, vreinterpret_f16_f64, vreinterpret_f16_s8)
(vreinterpret_f16_s16): (vreinterpret_f16_s32): (vreinterpret_f16_s64):
(vreinterpret_f16_f32, vreinterpret_f16_u8, vreinterpret_f16_u16)
(vreinterpret_f16_u32, vreinterpret_f16_u64, vreinterpret_f16_p8)
(vreinterpret_f16_p16, vreinterpret_f16_p64, vreinterpretq_f16_f64)
(vreinterpretq_f16_s8, vreinterpretq_f16_s16, vreinterpretq_f16_s32)
(vreinterpretq_f16_s64, vreinterpretq_f16_f32, vreinterpretq_f16_u8)

[PATCH v2 2/4] aarch64: Remove qualifier_internal

2022-07-13 Thread Andrew Carlotti via Gcc-patches
This has been unused since 2014, so there's no reason to retain it.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(enum aarch64_type_qualifiers): Remove qualifier_internal.
(aarch64_init_simd_builtin_functions): Remove qualifier_internal check.

---

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
52d27c6978990ca3e6c523654fe1cdc952e77ad7..55ad2e8b6831d6cc2b039270c8656d429347092d
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -145,9 +145,7 @@ enum aarch64_type_qualifiers
   qualifier_maybe_immediate = 0x10, /* 1 << 4  */
   /* void foo (...).  */
   qualifier_void = 0x20, /* 1 << 5  */
-  /* Some patterns may have internal operands, this qualifier is an
- instruction to the initialisation code to skip this operand.  */
-  qualifier_internal = 0x40, /* 1 << 6  */
+  /* 1 << 6 is now unused */
   /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
  rather than using the type of the operand.  */
   qualifier_map_mode = 0x80, /* 1 << 7  */
@@ -1207,10 +1205,6 @@ aarch64_init_simd_builtin_functions (bool 
called_from_pragma)
  else
type_signature[op_num] = 's';
 
- /* Skip an internal operand for vget_{low, high}.  */
- if (qualifiers & qualifier_internal)
-   continue;
-
  /* Some builtins have different user-facing types
 for certain arguments, encoded in d->mode.  */
  if (qualifiers & qualifier_map_mode)


[PATCH v2 3/4] aarch64: Consolidate simd type lookup functions

2022-07-13 Thread Andrew Carlotti via Gcc-patches
There were several similarly-named functions, which each built or looked up a
type using a different subset of valid modes or qualifiers.

This change combines these all into a single function, which can additionally
handle const and pointer qualifiers.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(aarch64_simd_builtin_std_type, aarch64_lookup_simd_builtin_type)
(aarch64_simd_builtin_type): Combine and replace with...
(aarch64_build_simd_builtin_type): ...this new function.
(aarch64_init_fcmla_laneq_builtins): Update to call new function.
(aarch64_init_simd_builtin_functions): Ditto.
(aarch64_init_crc32_builtins): Ditto.

---

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
55ad2e8b6831d6cc2b039270c8656d429347092d..6b413a36a09c7a4ac41b0fe7c414a3247580f222
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -789,79 +789,101 @@ aarch64_general_mangle_builtin_type (const_tree type)
 }
 
 static tree
-aarch64_simd_builtin_std_type (machine_mode mode,
-  enum aarch64_type_qualifiers q)
-{
-#define QUAL_TYPE(M)  \
-  ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
-  switch (mode)
-{
-case E_QImode:
-  return QUAL_TYPE (QI);
-case E_HImode:
-  return QUAL_TYPE (HI);
-case E_SImode:
-  return QUAL_TYPE (SI);
-case E_DImode:
-  return QUAL_TYPE (DI);
-case E_TImode:
-  return QUAL_TYPE (TI);
-case E_OImode:
-  return aarch64_simd_intOI_type_node;
-case E_CImode:
-  return aarch64_simd_intCI_type_node;
-case E_XImode:
-  return aarch64_simd_intXI_type_node;
-case E_HFmode:
-  return aarch64_fp16_type_node;
-case E_SFmode:
-  return float_type_node;
-case E_DFmode:
-  return double_type_node;
-case E_BFmode:
-  return aarch64_bf16_type_node;
-default:
-  gcc_unreachable ();
-}
-#undef QUAL_TYPE
-}
-
-static tree
-aarch64_lookup_simd_builtin_type (machine_mode mode,
- enum aarch64_type_qualifiers q)
+aarch64_build_simd_builtin_type (machine_mode mode,
+enum aarch64_type_qualifiers qualifiers)
 {
+  tree type = NULL_TREE;
   int i;
   int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
 
-  /* Non-poly scalar modes map to standard types not in the table.  */
-  if (q != qualifier_poly && !VECTOR_MODE_P (mode))
-return aarch64_simd_builtin_std_type (mode, q);
+  /* For pointers, we want a pointer to the basic type of the vector.  */
+  if ((qualifiers & qualifier_pointer) && VECTOR_MODE_P (mode))
+mode = GET_MODE_INNER (mode);
 
-  for (i = 0; i < nelts; i++)
+  if ((qualifiers & qualifier_poly) || VECTOR_MODE_P (mode))
 {
-  if (aarch64_simd_types[i].mode == mode
- && aarch64_simd_types[i].q == q)
-   return aarch64_simd_types[i].itype;
-  if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
-   for (int j = 0; j < 3; j++)
- if (aarch64_simd_tuple_modes[i][j] == mode
+  int q = qualifiers & (qualifier_poly | qualifier_unsigned);
+  /* Poly or vector modes map to types in the table.  */
+  for (i = 0; i < nelts; i++)
+   {
+ if (aarch64_simd_types[i].mode == mode
  && aarch64_simd_types[i].q == q)
-   return aarch64_simd_tuple_types[i][j];
+   {
+ type = aarch64_simd_types[i].itype;
+ goto finished_type_lookup;
+   }
+ if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
+   {
+ for (int j = 0; j < 3; j++)
+   {
+ if (aarch64_simd_tuple_modes[i][j] == mode
+   && aarch64_simd_types[i].q == q)
+   {
+ type = aarch64_simd_tuple_types[i][j];
+ goto finished_type_lookup;
+   }
+   }
+   }
+   }
 }
+  else
+{
+  /* Non-poly scalar modes map to standard types.  */
+#define QUAL_TYPE(M) ((qualifiers & qualifier_unsigned) \
+  ? unsigned_int##M##_type_node : int##M##_type_node);
+  switch (mode)
+   {
+   case E_QImode:
+ type = QUAL_TYPE (QI);
+ break;
+   case E_HImode:
+ type = QUAL_TYPE (HI);
+ break;
+   case E_SImode:
+ type = QUAL_TYPE (SI);
+ break;
+   case E_DImode:
+ type = QUAL_TYPE (DI);
+ break;
+   case E_TImode:
+ type = QUAL_TYPE (TI);
+ break;
+   case E_OImode:
+ type = aarch64_simd_intOI_type_node;
+ break;
+   case E_CImode:
+ type = aarch64_simd_intCI_type_node;
+ break;
+   case E_XImode:
+ type = aarch64_simd_intXI_type_node;
+ break;
+   case E_HFmode:
+ type = aarch64_fp16_type_node;
+ 

[PATCH v2 1/4] aarch64: Add V1DI mode

2022-07-13 Thread Andrew Carlotti via Gcc-patches
We already have a V1DF mode, so this makes the vector modes more consistent.

Additionally, this allows us to recognise uint64x1_t and int64x1_t types given
only the mode and type qualifiers (e.g. in aarch64_lookup_simd_builtin_type).

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(v1di_UP): Add V1DI mode to _UP macros.
* config/aarch64/aarch64-modes.def (VECTOR_MODE): Add V1DI mode
* config/aarch64/aarch64-simd-builtin-types.def: Use V1DI mode
* config/aarch64/aarch64-simd.md
(vec_extractv2dfv1df): Replace with...
(vec_extract): ...this.
* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Add V1DI 
mode
* config/aarch64/iterators.md
(VQ_2E, V1HALF, V1half): New.
(nunits): Add V1DI mode.

---

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
c21476d7ae963450b12efa24418ce4004a3c74bf..52d27c6978990ca3e6c523654fe1cdc952e77ad7
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -55,6 +55,7 @@
 #define v2si_UP  E_V2SImode
 #define v2sf_UP  E_V2SFmode
 #define v1df_UP  E_V1DFmode
+#define v1di_UP  E_V1DImode
 #define di_UPE_DImode
 #define df_UPE_DFmode
 #define v16qi_UP E_V16QImode
diff --git a/gcc/config/aarch64/aarch64-modes.def 
b/gcc/config/aarch64/aarch64-modes.def
index 
8f399225a8048d93108e33e9d49c736aeb5612ce..d3c9b74434cd2c0d0cb1a2fd26af8c0bf38a4cfa
 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -70,6 +70,7 @@ VECTOR_MODES (INT, 8);/*   V8QI V4HI V2SI.  */
 VECTOR_MODES (INT, 16);   /* V16QI V8HI V4SI V2DI.  */
 VECTOR_MODES (FLOAT, 8);  /* V2SF.  */
 VECTOR_MODES (FLOAT, 16); /*V4SF V2DF.  */
+VECTOR_MODE (INT, DI, 1); /* V1DI.  */
 VECTOR_MODE (FLOAT, DF, 1);   /* V1DF.  */
 VECTOR_MODE (FLOAT, HF, 2);   /* V2HF.  */
 
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def 
b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index 
248e51e96549fb640817d79c099a3f5e62c71317..40545581408e2ee2be84f08abb5801058c4ea42e
 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -24,7 +24,7 @@
   ENTRY (Int16x8_t, V8HI, none, 11)
   ENTRY (Int32x2_t, V2SI, none, 11)
   ENTRY (Int32x4_t, V4SI, none, 11)
-  ENTRY (Int64x1_t, DI, none, 11)
+  ENTRY (Int64x1_t, V1DI, none, 11)
   ENTRY (Int64x2_t, V2DI, none, 11)
   ENTRY (Uint8x8_t, V8QI, unsigned, 11)
   ENTRY (Uint8x16_t, V16QI, unsigned, 12)
@@ -32,7 +32,7 @@
   ENTRY (Uint16x8_t, V8HI, unsigned, 12)
   ENTRY (Uint32x2_t, V2SI, unsigned, 12)
   ENTRY (Uint32x4_t, V4SI, unsigned, 12)
-  ENTRY (Uint64x1_t, DI, unsigned, 12)
+  ENTRY (Uint64x1_t, V1DI, unsigned, 12)
   ENTRY (Uint64x2_t, V2DI, unsigned, 12)
   ENTRY (Poly8_t, QI, poly, 9)
   ENTRY (Poly16_t, HI, poly, 10)
@@ -42,7 +42,7 @@
   ENTRY (Poly8x16_t, V16QI, poly, 12)
   ENTRY (Poly16x4_t, V4HI, poly, 12)
   ENTRY (Poly16x8_t, V8HI, poly, 12)
-  ENTRY (Poly64x1_t, DI, poly, 12)
+  ENTRY (Poly64x1_t, V1DI, poly, 12)
   ENTRY (Poly64x2_t, V2DI, poly, 12)
   ENTRY (Float16x4_t, V4HF, none, 13)
   ENTRY (Float16x8_t, V8HF, none, 13)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
a00e1c6ef8d6b43d8b1a0fe4701e6b8c1f0f622f..587a45d77721e1b39accbad7dbeca4d741eccb10
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -8026,16 +8026,16 @@
 })
 
 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
-(define_expand "vec_extractv2dfv1df"
-  [(match_operand:V1DF 0 "register_operand")
-   (match_operand:V2DF 1 "register_operand")
+(define_expand "vec_extract"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VQ_2E 1 "register_operand")
(match_operand 2 "immediate_operand")]
   "TARGET_SIMD"
 {
-  /* V1DF is rarely used by other patterns, so it should be better to hide
- it in a subreg destination of a normal DF op.  */
-  rtx scalar0 = gen_lowpart (DFmode, operands[0]);
-  emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
+  /* V1DI and V1DF are rarely used by other patterns, so it should be better
+ to hide it in a subreg destination of a normal DI or DF op.  */
+  rtx scalar0 = gen_lowpart (mode, operands[0]);
+  emit_insn (gen_vec_extract (scalar0, operands[1], operands[2]));
   DONE;
 })
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
f650abbc4ce49cf0947049931f86bad1130c3428..278910af0a38c0203a962d34c6792191f0fe9e31
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3568,7 +3568,7 @@ aarch64_classify_vector_mode (machine_mode mode)
 case E_V8QImode:
 case E_V4HImode:
 case E_V2SImode:
-/* ...E_V1DImode doesn't exist.  */
+case E_V1DImode:
 

[PATCH] Speed up DOM record_temporary_equivalences

2022-07-13 Thread Richard Biener via Gcc-patches
The following gets away computing a dominance bitmap when
fast queries are not available and we are doing
back_propagate_equivalences.  The comuted bitmap can be
cheaply kept up-to-date during the domwalk since it is
simply the set of blocks on the domwalk stack.

Abstraction of the threading makes this somewhat awkward
but it also fulfills the fixme comment in only considering
equivalences in already (domwalk) visited blocks, even when
querying from the outgoing block of a forward thread.  Maybe
that's not what is intended but at least we have no testsuite
coverage of such missed equivalences.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-dom.h (record_temporary_equivalences): Remove.
* tree-ssa-dom.cc (dom_jt_state::m_blocks_on_stack): New.
(dom_jt_state::get_blocks_on_stack): Likewise.
(dom_opt_dom_walker::dom_opt_dom_walker): Take dom_jt_state.
(back_propagate_equivalences): Remove dominator bitmap
compute and instead use passed in m_blocks_on_stack.
(record_temporary_equivalences): Likewise.
(record_equivalences_from_incoming_edge): Likewise.
(dom_opt_dom_walker::before_dom_children): Maintain and
pass down blocks on stack.
(dom_opt_dom_walker::after_dom_children): Likewise.
---
 gcc/tree-ssa-dom.cc | 67 +
 gcc/tree-ssa-dom.h  |  3 --
 2 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/gcc/tree-ssa-dom.cc b/gcc/tree-ssa-dom.cc
index 43acc756c96..f5e8f574997 100644
--- a/gcc/tree-ssa-dom.cc
+++ b/gcc/tree-ssa-dom.cc
@@ -112,7 +112,8 @@ static void record_equality (tree, tree, class 
const_and_copies *);
 static void record_equivalences_from_phis (basic_block);
 static void record_equivalences_from_incoming_edge (basic_block,
class const_and_copies *,
-   class avail_exprs_stack *);
+   class avail_exprs_stack *,
+   bitmap blocks_on_stack);
 static void eliminate_redundant_computations (gimple_stmt_iterator *,
  class const_and_copies *,
  class avail_exprs_stack *);
@@ -120,6 +121,8 @@ static void record_equivalences_from_stmt (gimple *, int,
   class avail_exprs_stack *);
 static void dump_dominator_optimization_stats (FILE *file,
   hash_table *);
+static void record_temporary_equivalences (edge, class const_and_copies *,
+  class avail_exprs_stack *, bitmap);
 
 /* Constructor for EDGE_INFO.  An EDGE_INFO instance is always
associated with an edge E.  */
@@ -591,6 +594,7 @@ public:
   dom_jt_state (const_and_copies *copies, avail_exprs_stack *avails)
 : m_copies (copies), m_avails (avails)
   {
+bitmap_tree_view (m_blocks_on_stack);
   }
   void push (edge e) override
   {
@@ -606,12 +610,16 @@ public:
   }
   void register_equivs_edge (edge e) override
   {
-record_temporary_equivalences (e, m_copies, m_avails);
+record_temporary_equivalences (e, m_copies, m_avails, m_blocks_on_stack);
   }
   void register_equiv (tree dest, tree src, bool update) override;
+  bitmap get_blocks_on_stack () { return m_blocks_on_stack; }
 private:
   const_and_copies *m_copies;
   avail_exprs_stack *m_avails;
+  /* Set of blocks on the stack, to be used for medium-fast
+ dominance queries in back_propagate_equivalences.  */
+  auto_bitmap m_blocks_on_stack;
 };
 
 void
@@ -653,7 +661,7 @@ class dom_opt_dom_walker : public dom_walker
 public:
   dom_opt_dom_walker (cdi_direction direction,
  jump_threader *threader,
- jt_state *state,
+ dom_jt_state *state,
  gimple_ranger *ranger,
  const_and_copies *const_and_copies,
  avail_exprs_stack *avail_exprs_stack)
@@ -693,7 +701,7 @@ private:
 
   jump_threader *m_threader;
   gimple_ranger *m_ranger;
-  jt_state *m_state;
+  dom_jt_state *m_state;
 };
 
 /* Jump threading, redundancy elimination and const/copy propagation.
@@ -962,7 +970,7 @@ dom_valueize (tree t)
 static void
 back_propagate_equivalences (tree lhs, edge e,
 class const_and_copies *const_and_copies,
-bitmap *domby)
+bitmap domby)
 {
   use_operand_p use_p;
   imm_use_iterator iter;
@@ -997,29 +1005,12 @@ back_propagate_equivalences (tree lhs, edge e,
}
   else
{
- /* Profiling has shown the domination tests here can be fairly
-expensive when the fast indexes are not computed.
-We get significant improvements by building the
-set of blocks that 

Re: [PATCH] c++: coroutines - Overlap variables in frame [PR105989]

2022-07-13 Thread Michal Jankovic via Gcc-patches
Hi Iain,

thanks for the info. I have some follow-up questions.

On Jul 12 2022, at 7:11 pm, Iain Sandoe  wrote:

> Hi Michal,
>  
>> On 12 Jul 2022, at 16:14, Michal Jankovič
>>  wrote:
>  
>> One other related thing I would like to investigate is reducing the
>> number of compiler generated variables in the frame, particularly
>> _Coro_destroy_fn and _Coro_self_handle.   
>>  
>> As I understand it, _Coro_destroy_fn just sets a flag in
>> _Coro_resume_index and calls _Coro_resume_fn; it should be possible to
>> move this logic to __builtin_coro_destroy, so that only _Coro_resume_fn
>> is stored in the frame;
>  
> That is a particular point about GCC’s implementation … (it is not
> neccesarily, or even
> likely to be the same for other implementations) - see below.
>  
> I was intending to do experiment with making the ramp/resume/destroy
> value a parameter
> to the actor function so that we would have something like -
>  
> ramp calls  actor(frame, 0)
> resume calls  actor(frame, 1)
> destroy calls  actor(frame, 2)  
> - the token values are illustrative, not intended to be a final version.
>  
> I think that should allow for more inlining opportunites and possibly
> a way forward to
> frame elision (a.k.a halo).
>  
>> this would however change the coroutine ABI - I don't know if that's
>> a problem.
>  
> The external ABI for the coroutine is the  
> resume,
> destroy pointers  
> and the promise  
> and that one can find each of these from the frame pointer.
>  
> This was agreed between the interested “vendors” so that one compiler
> could invoke
> coroutines built by another.  So I do not think this is so much a
> useful area to explore.
>  

I understand. I still want to try to implement a more light-weight frame
layout with just one function pointer; would it be possible to merge
such a change if it was made opt-in via a compiler flag, eg
`-fsmall-coroutine-frame`? My use-case for this is embedded environments
with very limited memory, and I do not care about interoperability with
other compilers there.  

> Also the intent is that an indirect call through the frame pointer is
> the most frequent
> operation so should be the most efficient.   
>  resume() might be called many times,  
>  destroy() just once thus it is a cold code path  
>  - space can be important too - but interoperability was the goal here.
>  
>> The _Coro_self_handle should be constructible on-demand from the
>> frame address.
>  
> Yes, and in the header the relevant items are all constexpr - so that
> should happen in the
> user’s code.  I elected to have that value in the frame to avoid
> recreating it each time - I
> suppose that is a trade-off of one oiptimisation c.f. another …  

If the handle construction cannot be optimized out, and its thus  
a tradeoff between frame size and number of instructions, then this
could also be enabled by a hypothetical `-fsmall-coroutine-frame`.

Coming back to this:

>>> (the other related optimisation is to eliminate frame entries for
>>> scopes without any suspend
>>> points - which has the potential to save even more space for code with
>>> sparse use of co_)

This would be nice; although it could encompassed by a more general  
optimization - eliminate frame entries for all variables which are not  
accessed (directly or via pointer / reference) beyond a suspend point.
To be fair, I do not know how to get started on such an optimization,
or if it is even possible to do on the frontend. This would however be
immensely useful for reducing the frame size taken-up by complicated
co_await expressions (among other things), for example, if I have a
composed operation:

co_await when_either(get_leaf_awaitable_1(), get_leaf_awaitable_2());

Right now, this creates space in the frame for the temporary 'leaf'  
awaitables, which were already moved into the composed awaitable.
If the awaitable has an operator co_await that returns the real awaiter,
the original awaitable is also stored in the frame, even if it  
is not referenced by the awaiter; another unused object gets stored if  
the .await_transform() customization point was used.

What are your thoughts on the feasibility / difficulty of implementing
such an optimization?

Michal

>>  
>> Do you have any advice / opinions on this before I try to implement it?
>  
> Hopefully, the notes above help.
>  
> I will rebase my latest code changes as soon as I have a chance and
> put them somewhere
> for you to look at - basically, these are to try and address the
> correctness issues we face,
>  
> Iain
>  
>  
>>  
>> Michal
>>  
>> On Jul 12 2022, at 4:08 pm, Iain Sandoe  wrote:
>>  
>>> Hi Michal,
>>>  
 On 12 Jul 2022, at 14:35, Michal Jankovič via Gcc-patches
  wrote:
  
 Currently, coroutine frames store all variables of a coroutine separately,
 even if their lifetime does not overlap (they are in distinct
 scopes). This
 patch implements overlapping distinct variable scopes in the
 coroutine 

Re: [PATCH v2] Simplify memchr with small constant strings

2022-07-13 Thread Richard Biener via Gcc-patches
On Tue, Jul 12, 2022 at 6:59 PM H.J. Lu  wrote:
>
> On Fri, Jul 8, 2022 at 5:54 AM Richard Biener
>  wrote:
> >
> > On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu  wrote:
> > >
> > > When memchr is applied on a constant string of no more than the bytes of
> > > a word, simplify memchr by checking each byte in the constant string.
> > >
> > > int f (int a)
> > > {
> > >return  __builtin_memchr ("AE", a, 2) != 0;
> > > }
> > >
> > > is simplified to
> > >
> > > int f (int a)
> > > {
> > >   return ((char) a == 'A' || (char) a == 'E') != 0;
> > > }
> > >
> > > gcc/
> > >
> > > PR tree-optimization/103798
> > > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h".
> > > (simplify_builtin_call): Inline memchr with constant strings of
> > > no more than the bytes of a word.
> > > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global.
> > > * tree-ssa-strlen.h (use_in_zero_equality): New.
> > >
> > > gcc/testsuite/
> > >
> > > PR tree-optimization/103798
> > > * c-c++-common/pr103798-1.c: New test.
> > > * c-c++-common/pr103798-2.c: Likewise.
> > > * c-c++-common/pr103798-3.c: Likewise.
> > > * c-c++-common/pr103798-4.c: Likewise.
> > > * c-c++-common/pr103798-5.c: Likewise.
> > > * c-c++-common/pr103798-6.c: Likewise.
> > > * c-c++-common/pr103798-7.c: Likewise.
> > > * c-c++-common/pr103798-8.c: Likewise.
> > > ---
> > >  gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++
> > >  gcc/testsuite/c-c++-common/pr103798-2.c | 30 
> > >  gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++
> > >  gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++
> > >  gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++
> > >  gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++
> > >  gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++
> > >  gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++
> > >  gcc/tree-ssa-forwprop.cc| 64 +
> > >  gcc/tree-ssa-strlen.cc  |  4 +-
> > >  gcc/tree-ssa-strlen.h   |  2 +
> > >  11 files changed, 289 insertions(+), 2 deletions(-)
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c
> > >  create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c
> > >
> > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c 
> > > b/gcc/testsuite/c-c++-common/pr103798-1.c
> > > new file mode 100644
> > > index 000..cd3edf569fc
> > > --- /dev/null
> > > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
> > > +
> > > +__attribute__ ((weak))
> > > +int
> > > +f (char a)
> > > +{
> > > +   return  __builtin_memchr ("a", a, 1) == 0;
> > > +}
> > > +
> > > +__attribute__ ((weak))
> > > +int
> > > +g (char a)
> > > +{
> > > +  return a != 'a';
> > > +}
> > > +
> > > +int
> > > +main ()
> > > +{
> > > + for (int i = 0; i < 255; i++)
> > > +   if (f (i) != g (i))
> > > + __builtin_abort ();
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-not "memchr" } } */
> > > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c 
> > > b/gcc/testsuite/c-c++-common/pr103798-2.c
> > > new file mode 100644
> > > index 000..e7e99c3679e
> > > --- /dev/null
> > > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c
> > > @@ -0,0 +1,30 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
> > > +
> > > +#include 
> > > +
> > > +__attribute__ ((weak))
> > > +int
> > > +f (int a)
> > > +{
> > > +   return memchr ("aE", a, 2) != NULL;
> > > +}
> > > +
> > > +__attribute__ ((weak))
> > > +int
> > > +g (char a)
> > > +{
> > > +  return a == 'a' || a == 'E';
> > > +}
> > > +
> > > +int
> > > +main ()
> > > +{
> > > + for (int i = 0; i < 255; i++)
> > > +   if (f (i + 256) != g (i + 256))
> > > + __builtin_abort ();
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-not "memchr" } } */
> > > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c 
> > > b/gcc/testsuite/c-c++-common/pr103798-3.c
> > > new file mode 100644
> > > index 000..ddcedc7e238
> > > --- /dev/null
> > > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */
> > > +
> > > +__attribute__ ((weak))
> > > +int
> > > +f (char a)
> > > +{
> > > +   return  __builtin_memchr ("aEgZ", a, 3) == 0;
> > > +}

Re: [PATCH v2 1/2] aarch64: Don't return invalid GIMPLE assign statements

2022-07-13 Thread Richard Biener via Gcc-patches
On Wed, Jul 13, 2022 at 12:50 PM Andrew Carlotti
 wrote:
>
> On Wed, Jul 13, 2022 at 09:10:25AM +0100, Richard Sandiford wrote:
> > Richard Biener via Gcc-patches  writes:
> > > On Tue, Jul 12, 2022 at 4:38 PM Andrew Carlotti  
> > > wrote:
> > >>
> > >> aarch64_general_gimple_fold_builtin doesn't check whether the LHS of a
> > >> function call is null before converting it to an assign statement. To 
> > >> avoid
> > >> returning an invalid GIMPLE statement in this case, we instead assign the
> > >> expression result to a new (unused) variable.
> > >>
> > >> This change only affects code that:
> > >> 1) Calls an intrinsic function that has no side effects;
> > >> 2) Does not use or store the value returned by the intrinsic;
> > >> 3) Uses parameters that prevent the front-end eliminating the call prior 
> > >> to
> > >> gimplification.
> > >>
> > >> The ICE is unlikely to have occurred in the wild, as it relies on the 
> > >> presence
> > >> of a redundant intrinsic call.
> > >
> > > Other targets usually simply refrain from folding intrinsic calls with no 
> > > LHS.
> > > Another option is to just drop it on the floor if it does not have any
> > > side-effects which for the gimple_fold_builtin hook means folding it to
> > > a GIMPLE_NOP (gimple_build_nop ()).
> >
> > Sorry, I just pushed the patch before seeing this.
> >
> > I guess the problem with refraining from folding calls with no lhs
> > is that it has to be done on a per-function basis.  (E.g. stores
> > should still be folded.)  It then becomes something that we need
> > to remember for each individual call.  E.g. ix86_gimple_fold_builtin
> > seems to have three different pieces of code for handling null lhses,
> > even with its heavy use of gotos.
> >
> > So a nice thing about the current patch is that it handles all this
> > in one place only.

True, I don't much like the x86 way but then who cares about
intrinsic uses without a LHS ...

> > Thanks,
> > Richard
>
> I specifically wanted to avoid not folding the call, because always
> folding means that the builtin doesn't need to be implemented anywhere
> else (which isn't relevant here, but may become relevant when folding
> newly defined builtins in the future).
>
> I considered dropping the statement, but I wasn't sure at the time that
> I could do it safely. I could send a patch to instead replace new_stmt
> with a GIMPLE_NOP.

If you can be sure there's no side-effect on the RHS then I think
I'd prefer that over allocating an SSA name for something that's
going to be DCEd anyway.

Richard.

> > >> gcc/ChangeLog:
> > >>
> > >>  * config/aarch64/aarch64-builtins.cc
> > >>  (aarch64_general_gimple_fold_builtin): Add fixup for invalid GIMPLE.
> > >>
> > >> gcc/testsuite/ChangeLog:
> > >>
> > >>  * gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c: New test.
> > >>
> > >> ---
> > >>
> > >> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> > >> b/gcc/config/aarch64/aarch64-builtins.cc
> > >> index 
> > >> e0a741ac663188713e21f457affa57217d074783..5753988a9964967c27a03aca5fddb9025fd8ed6e
> > >>  100644
> > >> --- a/gcc/config/aarch64/aarch64-builtins.cc
> > >> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> > >> @@ -3022,6 +3022,16 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> > >> fcode, gcall *stmt,
> > >>  default:
> > >>break;
> > >>  }
> > >> +
> > >> +  /* GIMPLE assign statements (unlike calls) require a non-null lhs. If 
> > >> we
> > >> + created an assign statement with a null lhs, then fix this by 
> > >> assigning
> > >> + to a new (and subsequently unused) variable. */
> > >> +  if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs 
> > >> (new_stmt))
> > >> +{
> > >> +  tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
> > >> +  gimple_assign_set_lhs (new_stmt, new_lhs);
> > >> +}
> > >> +
> > >>return new_stmt;
> > >>  }
> > >>
> > >> diff --git 
> > >> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c 
> > >> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
> > >> new file mode 100644
> > >> index 
> > >> ..345307456b175307f5cb22de5e59cfc6254f2737
> > >> --- /dev/null
> > >> +++ 
> > >> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
> > >> @@ -0,0 +1,9 @@
> > >> +/* { dg-do compile { target { aarch64*-*-* } } } */
> > >> +
> > >> +#include 
> > >> +
> > >> +int8_t *bar();
> > >> +
> > >> +void foo() {
> > >> +  __builtin_aarch64_ld1v16qi(bar());
> > >> +}


Re: [PATCH] lto-plugin: use -pthread only for detected targets

2022-07-13 Thread Richard Biener via Gcc-patches
On Wed, Jul 13, 2022 at 10:24 AM Martin Liška  wrote:
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

Didn't we have it that way and not work?  IIRC LDFLAGS is only
used during configure link tests and _not_ substituted?

> Thanks,
> Martin
>
> Use -pthread only if we are going to use pthread functionality.
>
> PR bootstrap/106156
>
> lto-plugin/ChangeLog:
>
> * configure: Regenerate.
> * configure.ac: Use -pthread only why use_locking == true.
> * Makefile.in: Rely on ac_lto_plugin_ldflags.
> ---
>  lto-plugin/Makefile.in  | 2 +-
>  lto-plugin/configure| 6 --
>  lto-plugin/configure.ac | 2 ++
>  3 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/lto-plugin/Makefile.in b/lto-plugin/Makefile.in
> index 9453bc7d607..6b161c01683 100644
> --- a/lto-plugin/Makefile.in
> +++ b/lto-plugin/Makefile.in
> @@ -345,7 +345,7 @@ libexecsubdir := 
> $(libexecdir)/gcc/$(real_target_noncanonical)/$(gcc_version)$(a
>  AM_CPPFLAGS = -I$(top_srcdir)/../include $(DEFS)
>  AM_CFLAGS = @ac_lto_plugin_warn_cflags@ $(CET_HOST_FLAGS) 
> -DBASE_VERSION='"$(gcc_version)"'
>  # The plug-in depends on pthreads.
> -AM_LDFLAGS = -pthread @ac_lto_plugin_ldflags@
> +AM_LDFLAGS = @ac_lto_plugin_ldflags@
>  AM_LIBTOOLFLAGS = --tag=disable-static
>  libexecsub_LTLIBRARIES = liblto_plugin.la
>  in_gcc_libs = $(foreach lib, $(libexecsub_LTLIBRARIES), 
> $(gcc_build_dir)/$(lib))
> diff --git a/lto-plugin/configure b/lto-plugin/configure
> index 870e49b2e62..0967ba1c798 100755
> --- a/lto-plugin/configure
> +++ b/lto-plugin/configure
> @@ -6023,6 +6023,8 @@ case $target in
>  esac
>
>  if test x$use_locking = xyes; then
> +  LDFLAGS="$LDFLAGS -pthread"
> +
>ac_fn_c_check_header_mongrel "$LINENO" "pthread.h" 
> "ac_cv_header_pthread_h" "$ac_includes_default"
>  if test "x$ac_cv_header_pthread_h" = xyes; then :
>
> @@ -12104,7 +12106,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 12107 "configure"
> +#line 12109 "configure"
>  #include "confdefs.h"
>
>  #if HAVE_DLFCN_H
> @@ -12210,7 +12212,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 12213 "configure"
> +#line 12215 "configure"
>  #include "confdefs.h"
>
>  #if HAVE_DLFCN_H
> diff --git a/lto-plugin/configure.ac b/lto-plugin/configure.ac
> index 18eb4f60b0a..a350aa56a8b 100644
> --- a/lto-plugin/configure.ac
> +++ b/lto-plugin/configure.ac
> @@ -100,6 +100,8 @@ case $target in
>  esac
>
>  if test x$use_locking = xyes; then
> +  LDFLAGS="$LDFLAGS -pthread"
> +
>AC_CHECK_HEADER(pthread.h,
>  [AC_DEFINE(HAVE_PTHREAD_LOCKING, 1, [Define if the system provides 
> pthread locking mechanism.])])
>  fi
> --
> 2.37.0
>


[PATCH] xtensa: Minor fix for FP constant synthesis

2022-07-13 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch fixes an non-fatal issue about negative constant values derived
from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'.

And also replaces the dedicated code in FP constant synthesis split
pattern with the appropriate existing function call.

gcc/ChangeLog:

* config/xtensa/xtensa.md:
In FP constant synthesis split pattern, subcontract to
avoid_constant_pool_reference() as in the case of integer,
because it can handle well too.  And cast to int32_t before
calling xtensa_constantsynth() in order to ignore upper 32-bit.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/constsynth_double.c:
Modify in order to catch the issue.
---
 gcc/config/xtensa/xtensa.md   | 35 +--
 .../gcc.target/xtensa/constsynth_double.c |  2 +-
 2 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 9d998589631..6a58d3e2776 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1244,35 +1244,16 @@
   "! optimize_debug && reload_completed"
   [(const_int 0)]
 {
-  int i = 0;
-  rtx x = XEXP (operands[1], 0);
-  long l[2];
-  if (SYMBOL_REF_P (x)
-  && CONSTANT_POOL_ADDRESS_P (x))
-x = get_pool_constant (x);
-  else if (GET_CODE (x) == CONST)
-{
-  x = XEXP (x, 0);
-  gcc_assert (GET_CODE (x) == PLUS
- && SYMBOL_REF_P (XEXP (x, 0))
- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
- && CONST_INT_P (XEXP (x, 1)));
-  i = INTVAL (XEXP (x, 1));
-  gcc_assert (i == 0 || i == 4);
-  i /= 4;
-  x = get_pool_constant (XEXP (x, 0));
-}
-  else
-gcc_unreachable ();
-  if (GET_MODE (x) == SFmode)
-REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]);
-  else if (GET_MODE (x) == DFmode)
-REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
-  else
+  rtx x = avoid_constant_pool_reference (operands[1]);
+  long l;
+  HOST_WIDE_INT value;
+  if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode)
 FAIL;
+  REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   x = gen_rtx_REG (SImode, REGNO (operands[0]));
-  if (! xtensa_constantsynth (x, l[i]))
-emit_move_insn (x, GEN_INT (l[i]));
+  value = (int32_t)l;
+  if (! xtensa_constantsynth (x, value))
+emit_move_insn (x, GEN_INT (value));
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c 
b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
index 890ca504780..5fba6a98650 100644
--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
@@ -5,7 +5,7 @@ void test(unsigned int count, double array[])
 {
   unsigned int i;
   for (i = 0; i < count; ++i)
-array[i] = 1.0;
+array[i] = 8.988474246316506e+307;
 }
 
 /* { dg-final { scan-assembler-not "l32r" } } */
-- 
2.20.1


Re: [PATCH v2 1/2] aarch64: Don't return invalid GIMPLE assign statements

2022-07-13 Thread Andrew Carlotti via Gcc-patches
On Wed, Jul 13, 2022 at 09:10:25AM +0100, Richard Sandiford wrote:
> Richard Biener via Gcc-patches  writes:
> > On Tue, Jul 12, 2022 at 4:38 PM Andrew Carlotti  
> > wrote:
> >>
> >> aarch64_general_gimple_fold_builtin doesn't check whether the LHS of a
> >> function call is null before converting it to an assign statement. To avoid
> >> returning an invalid GIMPLE statement in this case, we instead assign the
> >> expression result to a new (unused) variable.
> >>
> >> This change only affects code that:
> >> 1) Calls an intrinsic function that has no side effects;
> >> 2) Does not use or store the value returned by the intrinsic;
> >> 3) Uses parameters that prevent the front-end eliminating the call prior to
> >> gimplification.
> >>
> >> The ICE is unlikely to have occurred in the wild, as it relies on the 
> >> presence
> >> of a redundant intrinsic call.
> >
> > Other targets usually simply refrain from folding intrinsic calls with no 
> > LHS.
> > Another option is to just drop it on the floor if it does not have any
> > side-effects which for the gimple_fold_builtin hook means folding it to
> > a GIMPLE_NOP (gimple_build_nop ()).
> 
> Sorry, I just pushed the patch before seeing this.
> 
> I guess the problem with refraining from folding calls with no lhs
> is that it has to be done on a per-function basis.  (E.g. stores
> should still be folded.)  It then becomes something that we need
> to remember for each individual call.  E.g. ix86_gimple_fold_builtin
> seems to have three different pieces of code for handling null lhses,
> even with its heavy use of gotos.
> 
> So a nice thing about the current patch is that it handles all this
> in one place only.
> 
> Thanks,
> Richard

I specifically wanted to avoid not folding the call, because always
folding means that the builtin doesn't need to be implemented anywhere
else (which isn't relevant here, but may become relevant when folding
newly defined builtins in the future).

I considered dropping the statement, but I wasn't sure at the time that
I could do it safely. I could send a patch to instead replace new_stmt
with a GIMPLE_NOP.

> >> gcc/ChangeLog:
> >>
> >>  * config/aarch64/aarch64-builtins.cc
> >>  (aarch64_general_gimple_fold_builtin): Add fixup for invalid GIMPLE.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>  * gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c: New test.
> >>
> >> ---
> >>
> >> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> >> b/gcc/config/aarch64/aarch64-builtins.cc
> >> index 
> >> e0a741ac663188713e21f457affa57217d074783..5753988a9964967c27a03aca5fddb9025fd8ed6e
> >>  100644
> >> --- a/gcc/config/aarch64/aarch64-builtins.cc
> >> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> >> @@ -3022,6 +3022,16 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> >> fcode, gcall *stmt,
> >>  default:
> >>break;
> >>  }
> >> +
> >> +  /* GIMPLE assign statements (unlike calls) require a non-null lhs. If we
> >> + created an assign statement with a null lhs, then fix this by 
> >> assigning
> >> + to a new (and subsequently unused) variable. */
> >> +  if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs 
> >> (new_stmt))
> >> +{
> >> +  tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
> >> +  gimple_assign_set_lhs (new_stmt, new_lhs);
> >> +}
> >> +
> >>return new_stmt;
> >>  }
> >>
> >> diff --git 
> >> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c 
> >> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
> >> new file mode 100644
> >> index 
> >> ..345307456b175307f5cb22de5e59cfc6254f2737
> >> --- /dev/null
> >> +++ 
> >> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
> >> @@ -0,0 +1,9 @@
> >> +/* { dg-do compile { target { aarch64*-*-* } } } */
> >> +
> >> +#include 
> >> +
> >> +int8_t *bar();
> >> +
> >> +void foo() {
> >> +  __builtin_aarch64_ld1v16qi(bar());
> >> +}


[Ada] Use actual types instead of formal types consistently in debug info

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
This makes sure that the objects present in instantiations always have the
actual type instead of a local variant of the formal type in the debugging
information generated by the compiler (this was already the case when the
actual type is a record, a protected or a task type).

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/decl.cc (Gigi_Cloned_Subtype): New function.
(gnat_to_gnu_entity) : Call it to get the
cloned subtype, if any.
: Likewise.
: Likewise.
: Likewise.
: Likewise.
Deal with all cloned subtypes on the main path.diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -212,6 +212,7 @@ static tree gnat_to_gnu_subprog_type (Entity_Id, bool, bool, tree *);
 static int adjust_packed (tree, tree, int);
 static tree gnat_to_gnu_field (Entity_Id, tree, int, bool, bool);
 static enum inline_status_t inline_status_for_subprog (Entity_Id);
+static Entity_Id Gigi_Cloned_Subtype (Entity_Id);
 static tree gnu_ext_name_for_subprog (Entity_Id, tree);
 static void set_nonaliased_component_on_array_type (tree);
 static void set_reverse_storage_order_on_array_type (tree);
@@ -301,8 +302,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
   const bool foreign = Has_Foreign_Convention (gnat_entity);
   /* For a type, contains the equivalent GNAT node to be used in gigi.  */
   Entity_Id gnat_equiv_type = Empty;
-  /* For a type, contains the GNAT node to be used for back-annotation.  */
-  Entity_Id gnat_annotate_type = Empty;
+  /* For a subtype, contains the GNAT node to be used  as cloned subtype.  */
+  Entity_Id gnat_cloned_subtype = Empty;
   /* Temporary used to walk the GNAT tree.  */
   Entity_Id gnat_temp;
   /* Contains the GCC DECL node which is equivalent to the input GNAT node.
@@ -1807,6 +1808,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 case E_Modular_Integer_Subtype:
 case E_Ordinary_Fixed_Point_Subtype:
 case E_Decimal_Fixed_Point_Subtype:
+  gnat_cloned_subtype = Gigi_Cloned_Subtype (gnat_entity);
+  if (Present (gnat_cloned_subtype))
+	break;
 
   /* For integral subtypes, we make a new INTEGER_TYPE.  Note that we do
 	 not want to call create_range_type since we would like each subtype
@@ -2035,6 +2039,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
   break;
 
 case E_Floating_Point_Subtype:
+  gnat_cloned_subtype = Gigi_Cloned_Subtype (gnat_entity);
+  if (Present (gnat_cloned_subtype))
+	break;
+
   /* See the E_Signed_Integer_Subtype case for the rationale.  */
   if (!definition
 	  && Present (Ancestor_Subtype (gnat_entity))
@@ -2446,6 +2454,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
   break;
 
 case E_Array_Subtype:
+  gnat_cloned_subtype = Gigi_Cloned_Subtype (gnat_entity);
+  if (Present (gnat_cloned_subtype))
+	break;
 
   /* This is the actual data type for array variables.  Multidimensional
 	 arrays are implemented as arrays of arrays.  Note that arrays which
@@ -3443,18 +3454,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
   /* ... fall through ... */
 
 case E_Record_Subtype:
-  /* If Cloned_Subtype is Present it means this record subtype has
-	 identical layout to that type or subtype and we should use
-	 that GCC type for this one.  The front-end guarantees that
-	 the component list is shared.  */
-  if (Present (Cloned_Subtype (gnat_entity)))
-	{
-	  gnu_decl = gnat_to_gnu_entity (Cloned_Subtype (gnat_entity),
-	 NULL_TREE, false);
-	  gnat_annotate_type = Cloned_Subtype (gnat_entity);
-	  maybe_present = true;
-	  break;
-	}
+  gnat_cloned_subtype = Gigi_Cloned_Subtype (gnat_entity);
+  if (Present (gnat_cloned_subtype))
+	break;
 
   /* Otherwise, first ensure the base type is elaborated.  Then, if we are
 	 changing the type, make a new type with each field having the type of
@@ -3865,6 +3867,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
   break;
 
 case E_Access_Subtype:
+  gnat_cloned_subtype = Gigi_Cloned_Subtype (gnat_entity);
+  if (Present (gnat_cloned_subtype))
+	break;
+
   /* We treat this as identical to its base type; any constraint is
 	 meaningful only to the front-end.  */
   gnu_type = gnat_to_gnu_type (gnat_equiv_type);
@@ -4277,6 +4283,27 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
   gcc_unreachable ();
 }
 
+  /* If this is the clone of a subtype, just reuse the cloned subtype; another
+ approach would be to set the cloned subtype as the DECL_ORIGINAL_TYPE of
+ the entity, which would generate a DW_TAG_typedef in the debug info, but
+ at the cost of the duplication of the GCC type and, more 

[Ada] Fix internal error on instance of Ada.Task_Attributes at -O

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
This happens when there is a size mismatch, but this must be accepted.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/utils.cc (unchecked_convert): Also pad in most cases
if the source is not a scalar type but the destination is.diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -5503,8 +5503,8 @@ unchecked_convert (tree type, tree expr, bool notrunc_p)
   expr = unchecked_convert (type, expr, notrunc_p);
 }
 
-  /* If we are converting from a scalar type to a type with a different size,
- we need to pad to have the same size on both sides.
+  /* If we are converting between fixed-size types with different sizes, we
+ need to pad to have the same size on both sides.
 
  ??? We cannot do it unconditionally because unchecked conversions are
  used liberally by the front-end to implement interface thunks:
@@ -5515,8 +5515,10 @@ unchecked_convert (tree type, tree expr, bool notrunc_p)
 
  so we need to skip dereferences.  */
   else if (!INDIRECT_REF_P (expr)
-	   && !AGGREGATE_TYPE_P (etype)
+	   && TREE_CODE (expr) != STRING_CST
+	   && !(AGGREGATE_TYPE_P (etype) && AGGREGATE_TYPE_P (type))
 	   && ecode != UNCONSTRAINED_ARRAY_TYPE
+	   && TREE_CONSTANT (TYPE_SIZE (etype))
 	   && TREE_CONSTANT (TYPE_SIZE (type))
 	   && (c = tree_int_cst_compare (TYPE_SIZE (etype), TYPE_SIZE (type
 {
@@ -5532,15 +5534,18 @@ unchecked_convert (tree type, tree expr, bool notrunc_p)
 	  tree rec_type = maybe_pad_type (type, TYPE_SIZE (etype), 0, Empty,
 	  false, false, true);
 	  expr = unchecked_convert (rec_type, expr, notrunc_p);
-	  expr = build_component_ref (expr, TYPE_FIELDS (rec_type), false);
+	  expr = build3 (COMPONENT_REF, type, expr, TYPE_FIELDS (rec_type),
+			 NULL_TREE);
 	}
 }
 
-  /* Likewise if we are converting from a scalar type to a type with self-
+  /* Likewise if we are converting from a fixed-szie type to a type with self-
  referential size.  We use the max size to do the padding in this case.  */
   else if (!INDIRECT_REF_P (expr)
-	   && !AGGREGATE_TYPE_P (etype)
+	   && TREE_CODE (expr) != STRING_CST
+	   && !(AGGREGATE_TYPE_P (etype) && AGGREGATE_TYPE_P (type))
 	   && ecode != UNCONSTRAINED_ARRAY_TYPE
+	   && TREE_CONSTANT (TYPE_SIZE (etype))
 	   && CONTAINS_PLACEHOLDER_P (TYPE_SIZE (type)))
 {
   tree new_size = max_size (TYPE_SIZE (type), true);
@@ -5557,7 +5562,8 @@ unchecked_convert (tree type, tree expr, bool notrunc_p)
 	  tree rec_type = maybe_pad_type (type, TYPE_SIZE (etype), 0, Empty,
 	  false, false, true);
 	  expr = unchecked_convert (rec_type, expr, notrunc_p);
-	  expr = build_component_ref (expr, TYPE_FIELDS (rec_type), false);
+	  expr = build3 (COMPONENT_REF, type, expr, TYPE_FIELDS (rec_type),
+			 NULL_TREE);
 	}
 }
 




[Ada] Handle bodies-to-inline just like generic templates

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
Originally bodies-to-inline created for the frontend inlining were
analyzed with expansion disabled. Then, to facilitate inlining in
GNATprove mode, the analysis was changed to preanalysis.

However, preanalysis in this context works badly for calls in prefix
notation, because preanalysis assigns entities and types to nodes but
doesn't convert calls from prefix to ordinary notation. When the
body-to-inline is actually inlined, the (re)analysis of calls in prefix
notation fails.

The proper solution is rather to handle bodies-to-inline just like
generic templates.

>From the user point of view, this patch fixes spurious errors both in
GNATprove (which uses frontend inlining by default) and in GNAT (where
frontend inlining is typically explicitly requested with -gnatN and
pragma Inline_Always).

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* inline.adb (Build_Body_To_Inline): Instead of manipulating the
Full_Analysis flag, use the Inside_A_Generic flag (which is
conveniently manipulated by Start_Generic/End_Generic, together
with Expander_Active).
* sem_attr.adb (Analyze_Attribute_Old_Result): Adapt comment and
assertion to different flag that is set while building
body-to-inline.diff --git a/gcc/ada/inline.adb b/gcc/ada/inline.adb
--- a/gcc/ada/inline.adb
+++ b/gcc/ada/inline.adb
@@ -32,7 +32,6 @@ with Einfo.Entities; use Einfo.Entities;
 with Einfo.Utils;use Einfo.Utils;
 with Elists; use Elists;
 with Errout; use Errout;
-with Expander;   use Expander;
 with Exp_Ch6;use Exp_Ch6;
 with Exp_Ch7;use Exp_Ch7;
 with Exp_Tss;use Exp_Tss;
@@ -1107,7 +1106,6 @@ package body Inline is
 
procedure Build_Body_To_Inline (N : Node_Id; Spec_Id : Entity_Id) is
   Decl: constant Node_Id := Unit_Declaration_Node (Spec_Id);
-  Analysis_Status : constant Boolean := Full_Analysis;
   Original_Body   : Node_Id;
   Body_To_Analyze : Node_Id;
   Max_Size: constant := 10;
@@ -1419,12 +1417,7 @@ package body Inline is
  Append (Body_To_Analyze, Declarations (N));
   end if;
 
-  --  The body to inline is preanalyzed. In GNATprove mode we must disable
-  --  full analysis as well so that light expansion does not take place
-  --  either, and name resolution is unaffected.
-
-  Expander_Mode_Save_And_Set (False);
-  Full_Analysis := False;
+  Start_Generic;
 
   Analyze (Body_To_Analyze);
   Push_Scope (Defining_Entity (Body_To_Analyze));
@@ -1432,8 +1425,7 @@ package body Inline is
   End_Scope;
   Remove (Body_To_Analyze);
 
-  Expander_Mode_Restore;
-  Full_Analysis := Analysis_Status;
+  End_Generic;
 
   --  Restore environment if previously saved
 


diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -1508,9 +1508,9 @@ package body Sem_Attr is
and then Chars (Spec_Id) = Name_uParent
and then Chars (Scope (Spec_Id)) = Name_uPostconditions
  then
---  This situation occurs only when preanalyzing the inlined body
+--  This situation occurs only when analyzing the body-to-inline
 
-pragma Assert (not Full_Analysis);
+pragma Assert (Inside_A_Generic);
 
 Spec_Id := Scope (Spec_Id);
 pragma Assert (Is_Inlined (Spec_Id));




[Ada] Extend No_Dependence restriction to code generation

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
This reports violations for 4 units from gigi.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/trans.cc (gigi): Report a violation of No_Dependence
on System.Stack_Checking if Stack_Check_Probes_On_Target is not set
and -fstack-check is specified.
(build_binary_op_trapv): Report violatiosn of No_Dependence on both
System.Arith_64 and System.Arith_128.
(add_decl_expr): If an initialized variable, report a violation of
No_Dependence on System.Memory_Copy for large aggregate types.
(gnat_to_gnu) : Report a violation
of No_Dependence on System.Memory_Compare for large aggregate types.
! Report a violation of No_Dependence on
System.Memory_Set, System.Memory_Move or else System.Memory_Copy for
large aggregate types.
* gcc-interface/utils2.cc (maybe_wrap_malloc): Report a violation of
No_Dependence on System.Memory.
(maybe_wrap_free): Add GNAT_NODE parameter and report a violation of
No_Dependence on System.Memory.
(build_call_alloc_dealloc): Adjust call to maybe_wrap_free.diff --git a/gcc/ada/gcc-interface/trans.cc b/gcc/ada/gcc-interface/trans.cc
--- a/gcc/ada/gcc-interface/trans.cc
+++ b/gcc/ada/gcc-interface/trans.cc
@@ -364,7 +364,12 @@ gigi (Node_Id gnat_root,
 
   /* Enable GNAT stack checking method if needed */
   if (!Stack_Check_Probes_On_Target)
-set_stack_check_libfunc ("__gnat_stack_check");
+{
+  set_stack_check_libfunc ("__gnat_stack_check");
+  if (flag_stack_check != NO_STACK_CHECK)
+	Check_Restriction_No_Dependence_On_System (Name_Stack_Checking,
+		   gnat_root);
+}
 
   /* Retrieve alignment settings.  */
   double_float_alignment = get_target_double_float_alignment ();
@@ -6933,9 +6938,18 @@ gnat_to_gnu (Node_Id gnat_node)
 	  = convert (TREE_TYPE (gnu_rhs), TYPE_SIZE (gnu_type));
 	  }
 
+	/* If this is a comparison between (potentially) large aggregates, then
+	   declare the dependence on the memcmp routine.  */
+	else if ((kind == N_Op_Eq || kind == N_Op_Ne)
+		 && AGGREGATE_TYPE_P (TREE_TYPE (gnu_lhs))
+		 && (!TREE_CONSTANT (TYPE_SIZE (TREE_TYPE (gnu_lhs)))
+		 || compare_tree_int (TYPE_SIZE (TREE_TYPE (gnu_lhs)),
+	  2 * BITS_PER_WORD) > 0))
+	  Check_Restriction_No_Dependence_On_System (Name_Memory_Compare,
+		 gnat_node);
+
 	/* Pending generic support for efficient vector logical operations in
-	   GCC, convert vectors to their representative array type view and
-	   fallthrough.  */
+	   GCC, convert vectors to their representative array type view.  */
 	gnu_lhs = maybe_vector_array (gnu_lhs);
 	gnu_rhs = maybe_vector_array (gnu_rhs);
 
@@ -7254,6 +7268,8 @@ gnat_to_gnu (Node_Id gnat_node)
 		  value = int_const_binop (BIT_AND_EXPR, value, mask);
 		}
 	  gnu_result = build_call_expr (t, 3, dest, value, size);
+	  Check_Restriction_No_Dependence_On_System (Name_Memory_Set,
+			 gnat_node);
 	}
 
 	  /* Otherwise build a regular assignment.  */
@@ -7278,7 +7294,18 @@ gnat_to_gnu (Node_Id gnat_node)
 	  tree from_ptr = build_fold_addr_expr (from);
 	  tree t = builtin_decl_explicit (BUILT_IN_MEMMOVE);
 	  gnu_result = build_call_expr (t, 3, to_ptr, from_ptr, size);
+	  Check_Restriction_No_Dependence_On_System (Name_Memory_Move,
+			 gnat_node);
 	   }
+
+	  /* If this is an assignment between (potentially) large aggregates,
+	 then declare the dependence on the memcpy routine.  */
+	  else if (AGGREGATE_TYPE_P (TREE_TYPE (gnu_lhs))
+		   && (!TREE_CONSTANT (TYPE_SIZE (TREE_TYPE (gnu_lhs)))
+		   || compare_tree_int (TYPE_SIZE (TREE_TYPE (gnu_lhs)),
+	2 * BITS_PER_WORD) > 0))
+	Check_Restriction_No_Dependence_On_System (Name_Memory_Copy,
+		   gnat_node);
 	}
   break;
 
@@ -8437,27 +8464,37 @@ add_decl_expr (tree gnu_decl, Node_Id gnat_node)
   && !TYPE_FAT_POINTER_P (type))
 MARK_VISITED (TYPE_ADA_SIZE (type));
 
-  /* If this is a variable and an initializer is attached to it, it must be
- valid for the context.  Similar to init_const in create_var_decl.  */
-  if (TREE_CODE (gnu_decl) == VAR_DECL
-  && (gnu_init = DECL_INITIAL (gnu_decl))
-  && (!gnat_types_compatible_p (type, TREE_TYPE (gnu_init))
+  if (TREE_CODE (gnu_decl) == VAR_DECL && (gnu_init = DECL_INITIAL (gnu_decl)))
+{
+  /* If this is a variable and an initializer is attached to it, it must be
+	 valid for the context.  Similar to init_const in create_var_decl.  */
+  if (!gnat_types_compatible_p (type, TREE_TYPE (gnu_init))
 	  || (TREE_STATIC (gnu_decl)
 	  && !initializer_constant_valid_p (gnu_init,
-		TREE_TYPE (gnu_init)
-{
-  DECL_INITIAL (gnu_decl) = NULL_TREE;
-  if (TREE_READONLY (gnu_decl))
+		TREE_TYPE (gnu_init
 	{
-	  TREE_READONLY (gnu_decl) = 0;
-	  DECL_READONLY_ONCE_ELAB (gnu_decl) = 1;
-	}
+	  DECL_INITIAL (gnu_decl) = NULL_TREE;
+	  if (TREE_READONLY 

[Ada] Fix internal error on comparison with access function parameter

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
It comes from an overzealous assertion.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/utils2.cc (build_binary_op) : Also accept
pointer-to-function types that are not variant of each other.diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -1134,12 +1134,17 @@ build_binary_op (enum tree_code op_code, tree result_type,
 	  else if (POINTER_TYPE_P (left_base_type)
 		   && POINTER_TYPE_P (right_base_type))
 	{
+	  tree left_ref_type = TREE_TYPE (left_base_type);
+	  tree right_ref_type = TREE_TYPE (right_base_type);
+
 	  /* Anonymous access types in Ada 2005 can point to different
-		 members of a tagged type hierarchy.  */
-	  gcc_assert (TYPE_MAIN_VARIANT (TREE_TYPE (left_base_type))
-			  == TYPE_MAIN_VARIANT (TREE_TYPE (right_base_type))
-			  || (TYPE_ALIGN_OK (TREE_TYPE (left_base_type))
-			  && TYPE_ALIGN_OK (TREE_TYPE (right_base_type;
+		 members of a tagged hierarchy or different function types.  */
+	  gcc_assert (TYPE_MAIN_VARIANT (left_ref_type)
+			  == TYPE_MAIN_VARIANT (right_ref_type)
+			  || (TYPE_ALIGN_OK (left_ref_type)
+			  && TYPE_ALIGN_OK (right_ref_type))
+			  || (TREE_CODE (left_ref_type) == FUNCTION_TYPE
+			  && TREE_CODE (right_ref_type) == FUNCTION_TYPE));
 	  best_type = left_base_type;
 	}
 




[Ada] Fix wrong access check with access-to-unconstrained-array

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
The current implementation may create dangling references from a superset
of the alias set of the dummy pointer-to-array type when it exists.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : Save
and restore the alias set of the dummy pointer-to-array type.diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -2102,15 +2102,15 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	const bool convention_fortran_p
 	  = (Convention (gnat_entity) == Convention_Fortran);
 	const int ndim = Number_Dimensions (gnat_entity);
-	tree gnu_template_type;
-	tree gnu_ptr_template;
-	tree gnu_template_reference, gnu_template_fields, gnu_fat_type;
+	tree gnu_fat_type, gnu_template_type, gnu_ptr_template;
+	tree gnu_template_reference, gnu_template_fields;
 	tree *gnu_index_types = XALLOCAVEC (tree, ndim);
 	tree *gnu_temp_fields = XALLOCAVEC (tree, ndim);
-	tree gnu_max_size = size_one_node, tem, obj;
+	tree gnu_max_size = size_one_node;
+	tree comp_type, tem, obj;
 	Entity_Id gnat_index;
+	alias_set_type ptr_set = -1;
 	int index;
-	tree comp_type;
 
 	/* Create the type for the component now, as it simplifies breaking
 	   type reference loops.  */
@@ -2181,6 +2181,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	if (COMPLETE_TYPE_P (gnu_fat_type))
 	  {
 	tem = TYPE_FIELDS (gnu_fat_type);
+	if (TYPE_ALIAS_SET_KNOWN_P (TREE_TYPE (tem)))
+	  ptr_set = TYPE_ALIAS_SET (TREE_TYPE (tem));
 	TREE_TYPE (tem) = ptr_type_node;
 	TREE_TYPE (DECL_CHAIN (tem)) = gnu_ptr_template;
 	TYPE_DECL_SUPPRESS_DEBUG (TYPE_STUB_DECL (gnu_fat_type)) = 0;
@@ -2389,7 +2391,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	  tem = change_qualified_type (tem, TYPE_QUAL_VOLATILE);
 
 	/* Adjust the type of the pointer-to-array field of the fat pointer
-	   and record the aliasing relationships if necessary.  If this is
+	   and preserve its existing alias set, if any.  Note that calling
+	   again record_component_aliases on the fat pointer is not enough
+	   because this may leave dangling references to the existing alias
+	   set from types containing a fat pointer component.  If this is
 	   a packed type implemented specially, then use a ref-all pointer
 	   type since the implementation type may vary between constrained
 	   subtypes and unconstrained base type.  */
@@ -2398,8 +2403,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	= build_pointer_type_for_mode (tem, ptr_mode, true);
 	else
 	  TREE_TYPE (TYPE_FIELDS (gnu_fat_type)) = build_pointer_type (tem);
-	if (TYPE_ALIAS_SET_KNOWN_P (gnu_fat_type))
-	  record_component_aliases (gnu_fat_type);
+	if (ptr_set != -1)
+	  TYPE_ALIAS_SET (TREE_TYPE (TYPE_FIELDS (gnu_fat_type))) = ptr_set;
 
 	/* If the maximum size doesn't overflow, use it.  */
 	if (gnu_max_size




[Ada] Generate debug info entry for user-defined access subtype

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
This is consistent with the other kinds of subtypes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : Do
not reuse the TYPE_DECL of the base type.diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -3867,7 +3867,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 case E_Access_Subtype:
   /* We treat this as identical to its base type; any constraint is
 	 meaningful only to the front-end.  */
-  gnu_decl = gnat_to_gnu_entity (gnat_equiv_type, NULL_TREE, false);
+  gnu_type = gnat_to_gnu_type (gnat_equiv_type);
   maybe_present = true;
 
   /* The designated subtype must be elaborated as well, if it does
@@ -3877,11 +3877,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	  && Is_Frozen (Directly_Designated_Type (gnat_entity))
 	  && No (Freeze_Node (Directly_Designated_Type (gnat_entity
 	{
-	  tree gnu_base_type = TREE_TYPE (gnu_decl);
-	  tree gnu_desig_base_type
-	= TYPE_IS_FAT_POINTER_P (gnu_base_type)
-	  ? TREE_TYPE (TREE_TYPE (TYPE_FIELDS (gnu_base_type)))
-	  : TREE_TYPE (gnu_base_type);
+	  tree gnu_desig_type
+	= TYPE_IS_FAT_POINTER_P (gnu_type)
+	  ? TREE_TYPE (TREE_TYPE (TYPE_FIELDS (gnu_type)))
+	  : TREE_TYPE (gnu_type);
 
 	  /* If we are to defer elaborating incomplete types, make a dummy
 	 type node and elaborate it later.  */
@@ -3898,7 +3897,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 
 	  /* Otherwise elaborate the designated subtype only if its base type
 	 has already been elaborated.  */
-	  else if (!TYPE_IS_DUMMY_P (gnu_desig_base_type))
+	  else if (!TYPE_IS_DUMMY_P (gnu_desig_type))
 	gnat_to_gnu_entity (Directly_Designated_Type (gnat_entity),
 NULL_TREE, false);
 	}




[Ada] Revert recent change in debug info for vector array types

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
It lost too much useful information.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity): Do not set the debug
type for vector types.diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -4785,14 +4785,6 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
   else
 	gnu_decl = create_type_decl (gnu_entity_name, gnu_type, artificial_p,
  debug_info_p, gnat_entity);
-
-  /* For vector types, make the representative array the debug type.  */
-  if (VECTOR_TYPE_P (gnu_type))
-	{
-	  tree rep = TYPE_REPRESENTATIVE_ARRAY (gnu_type);
-	  TYPE_NAME (rep) = DECL_NAME (gnu_decl);
-	  SET_TYPE_DEBUG_TYPE (gnu_type, rep);
-	}
 }
 
   /* If we haven't already, associate the ..._DECL node that we just made with




[Ada] Fix for visibility of aspect expressions inside generic units

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
When a generic unit contains references to global entities (i.e.
entities declared outside of this generic unit), those references are
saved: from the analyzed copy of a generic unit (which is then
discarded) into a generic template (which is then instantiated, possibly
many times). To save those references we maintain an association from
nodes in the generic template to nodes in the analyzed copy. However,
this association breaks when analysis of the generic copy calls
Relocate_Node, which conceptually only moves the node, while in fact it
creates a copy with a new Node_Id.

In particular, this association was broken by calls to Relocate_Node
that happen when transforming various aspects into corresponding pragmas
or attribute definition clases. For the most common Pre and Post aspects
this was fixed years ago by not using Relocate_Node and simply sharing
the tree.  This patch extends this fix to other aspects, in particular
those that allow non-static expressions.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch13.adb (Relocate_Expression): New routine with code that
previously was only applied to Pre and Post aspects.
(Analyze_Aspect_Specifications): Apply the above routine to
other aspects, in particular to aspects Address, Attach_Handler,
Predicate and Interrupt_Priority.diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -1650,6 +1650,18 @@ package body Sem_Ch13 is
   --  pragma of the same kind. Flag Is_Generic should be set when the
   --  context denotes a generic instance.
 
+  function Relocate_Expression (Source : Node_Id) return Node_Id;
+  --  Outside of a generic this function is equivalent to Relocate_Node.
+  --  Inside a generic it is an identity function, because Relocate_Node
+  --  would create a new node that is not associated with the generic
+  --  template. This association is needed to save references to entities
+  --  that are global to the generic (and might be not visible from where
+  --  the generic is instantiated).
+  --
+  --  Inside a generic the original tree is shared between aspect and
+  --  a corresponding pragma (or an attribute definition clause). This
+  --  parallels what is done in sem_prag.adb (see Get_Argument).
+
   --
   -- Decorate --
   --
@@ -1835,6 +1847,19 @@ package body Sem_Ch13 is
  end if;
   end Insert_Pragma;
 
+  -
+  -- Relocate_Expression --
+  -
+
+  function Relocate_Expression (Source : Node_Id) return Node_Id is
+  begin
+ if Inside_A_Generic then
+return Source;
+ else
+return Atree.Relocate_Node (Source);
+ end if;
+  end Relocate_Expression;
+
   --  Local variables
 
   Aspect : Node_Id;
@@ -3229,7 +3254,7 @@ package body Sem_Ch13 is
 Make_Attribute_Definition_Clause (Loc,
   Name   => Ent,
   Chars  => Nam,
-  Expression => Relocate_Node (Expr));
+  Expression => Relocate_Expression (Expr));
 
   --  If the address is specified, then we treat the entity as
   --  referenced, to avoid spurious warnings. This is analogous
@@ -3293,7 +3318,7 @@ package body Sem_Ch13 is
Make_Pragma_Argument_Association (Sloc (Ent),
  Expression => Ent),
Make_Pragma_Argument_Association (Sloc (Expr),
- Expression => Relocate_Node (Expr))),
+ Expression => Relocate_Expression (Expr))),
  Pragma_Name  => Name_Attach_Handler);
 
   --  We need to insert this pragma into the tree to get proper
@@ -3335,7 +3360,7 @@ package body Sem_Ch13 is
Make_Pragma_Argument_Association (Sloc (Ent),
  Expression => Ent),
Make_Pragma_Argument_Association (Sloc (Expr),
- Expression => Relocate_Node (Expr))),
+ Expression => Relocate_Expression (Expr))),
  Pragma_Name => Name_Predicate);
 
   --  Mark type has predicates, and remember what kind of
@@ -3580,7 +3605,7 @@ package body Sem_Ch13 is
Make_Attribute_Definition_Clause (Loc,
  Name   => Ent,
  Chars  => Nam,
- Expression => Relocate_Node (Expr));
+ Expression => Relocate_Expression (Expr));
   end if;
 
--  Suppress/Unsuppress
@@ -4599,32 +4624,12 @@ package body Sem_Ch13 is
 
   --  Build the precondition/postcondition pragma
 
-  

[Ada] Fix incorrect handling of Ghost aspect

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
When a formal generic type is marked as Ghost, the instantiation of that
generic will contain a generic subtype for the actual with the Ghost
pragma. Recognize this case.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_prag.adb (Analyze_Pragma): Recognize a generated subtype
with Ghost pragma for generic instantiations.diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -16999,6 +16999,16 @@ package body Sem_Prag is
   then
  Id := Defining_Entity (Stmt);
  exit;
+
+  --  When pragma Ghost applies to a generic formal type, the
+  --  type declaration in the instantiation is a generated
+  --  subtype declaration.
+
+  elsif Nkind (Stmt) = N_Subtype_Declaration
+and then Present (Generic_Parent_Type (Stmt))
+  then
+ Id := Defining_Entity (Stmt);
+ exit;
   end if;
 
--  The pragma applies to a legal construct, stop the traversal




[Ada] Add support for defaulted Storage_Model_Type aspect and subaspects

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
The compiler currently rejects a Storage_Model_Type aspect that is not
specified with an aggregate, or that has an aggregate that does not
specify all defined "subaspects" (Address_Type, Null_Address, Allocate,
etc.). The RFC for this feature defines the aspect to fully default to
the native memory model when no aggregate is given, and also allows any
subaspects to be specified and others to default in the case where the
address type is the native address type (System.Address), whether that
address type is explicitly specified or defaulted. This set of changes
now supports that defaulting semantics. Note that the subaspect
retrieval functions in Sem_Util.Storage_Model_Support (which are called
by the compiler back ends) will now return Empty for any subprogram
subaspects (Allocate, Deallocate, etc.) that are defaulted in the aspect
(that is, in the native model case where the address type is
System.Address).  Also in the native case, retrieval of defaulted
subaspects Address_Type and Null_Address will return the entities for
System.Address and System.Null_Address, respectively. Additionally,
error checks for multiple associations given for the same subaspect are
now done.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* aspects.ads (Aspect_Argument): Change the association for
Aspect_Storage_Model_Type from Expression to
Optional_Expression.
* exp_util.ads (Find_Storage_Op): Update comment to indicate
that Empty can be returned in the case where a storage-model
operation is defaulted.
* exp_util.adb (Find_Storage_Op): Allow the function to return
Empty in Storage_Model_Type case rather than raising
Program_Error, so that Procedure_To_Call fields in N_Allocator
and N_Free_Statement nodes will be set to Empty in the defaulted
native storage-model case.
* sem_ch13.adb: Add with and use of System.Case_Util (and
reformat context_clause).
(Check_Aspect_At_Freeze_Point): Return with no action for a
Storage_Model_Type aspect with no expression (fully-defaulted
native memory-model case).
(Resolve_Storage_Model_Type_Argument): If an Address_Type has
not been explicitly specified, then set Addr_Type to denote type
System.Address.
(Validate_Storage_Model_Type_Aspect): Return immediately in the
case where the aspect has no Expression (fully-defaulted native
memory-model case).  No longer issue an error when Address_Type
isn't specified, and instead use type System.Address as the
default address type. When the address type is
System.Address (whether specified or defaulted), no longer issue
errors for any other "subaspects" that aren't specified, since
in that case those are allowed to default as well. Remove ???
comment about needing to check for duplicates, which is now
addressed.
(Check_And_Resolve_Storage_Model_Type_Argument): New procedure
to check that an association for a storage-model subaspect in
the aggregate has not been specified earlier in the aggregate,
and to then resolve the expression of the association and save
the resolved entity. Called by
Validate_Storage_Model_Type_Aspect.
* sem_util.ads (Storage_Model_Support): Update comments on specs
of the functions Get_Storage_Model_Type_Entity,
Storage_Model_Address_Type, and Storage_Model_Null_Address to
indicate the behavior when the address type is System.Address
(the native memory-model case).
* sem_util.adb
(Storage_Model_Support.Get_Storage_Model_Type_Entity): Suppress
the search for the given subaspect name (Nam) when the
Storage_Model_Type aspect is fully defaulted (i.e., no
Expression is present) and simply return. In cases where the
search is done, but no association that matches Nam is found,
return System.Address for the Name_Address_Type case, return
System.Null_Address for the Name_Null_Address case, and return
Empty for all other cases.diff --git a/gcc/ada/aspects.ads b/gcc/ada/aspects.ads
--- a/gcc/ada/aspects.ads
+++ b/gcc/ada/aspects.ads
@@ -441,7 +441,7 @@ package Aspects is
   Aspect_SPARK_Mode => Optional_Name,
   Aspect_Stable_Properties  => Expression,
   Aspect_Static_Predicate   => Expression,
-  Aspect_Storage_Model_Type => Expression,
+  Aspect_Storage_Model_Type => Optional_Expression,
   Aspect_Storage_Pool   => Name,
   Aspect_Storage_Size   => Expression,
   Aspect_Stream_Size=> Expression,


diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -6406,16 +6406,7 @@ package body Exp_Util is
 
begin
   if Has_Storage_Model_Type_Aspect (Typ) then
- 

[Ada] Do not generate DW_TAG_typedef for constrained array types

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
It no longer serves any useful purpose at this point.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/utils.cc (gnat_pushdecl): Build DECL_ORIGINAL_TYPE
only for pointer types.diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -877,21 +877,18 @@ gnat_pushdecl (tree decl, Node_Id gnat_node)
 {
   tree t = TREE_TYPE (decl);
 
-  /* Array and pointer types aren't tagged types in the C sense so we need
-	 to generate a typedef in DWARF for them and make sure it is preserved,
-	 unless the type is artificial.  */
+  /* Pointer types aren't named types in the C sense so we need to generate
+ a typedef in DWARF for them and make sure it is preserved, unless the
+ type is artificial.  */
   if (!(TYPE_NAME (t) && TREE_CODE (TYPE_NAME (t)) == TYPE_DECL)
-	  && ((TREE_CODE (t) != ARRAY_TYPE && TREE_CODE (t) != POINTER_TYPE)
-	  || DECL_ARTIFICIAL (decl)))
+	  && (TREE_CODE (t) != POINTER_TYPE || DECL_ARTIFICIAL (decl)))
 	;
-  /* For array and pointer types, create the DECL_ORIGINAL_TYPE that will
-	 generate the typedef in DWARF.  Also do that for fat pointer types
-	 because, even though they are tagged types in the C sense, they are
-	 still XUP types attached to the base array type at this point.  */
+  /* For pointer types, create the DECL_ORIGINAL_TYPE that will generate
+	 the typedef in DWARF.  Also do that for fat pointer types because,
+	 even though they are named types in the C sense, they are still the
+	 XUP types created for the base array type at this point.  */
   else if (!DECL_ARTIFICIAL (decl)
-	   && (TREE_CODE (t) == ARRAY_TYPE
-		   || TREE_CODE (t) == POINTER_TYPE
-		   || TYPE_IS_FAT_POINTER_P (t)))
+	   && (TREE_CODE (t) == POINTER_TYPE || TYPE_IS_FAT_POINTER_P (t)))
 	{
 	  tree tt = build_variant_type_copy (t);
 	  TYPE_NAME (tt) = decl;
@@ -905,10 +902,6 @@ gnat_pushdecl (tree decl, Node_Id gnat_node)
 	DECL_ORIGINAL_TYPE (decl) = DECL_ORIGINAL_TYPE (TYPE_NAME (t));
 	  else
 	DECL_ORIGINAL_TYPE (decl) = t;
-	  /* Array types need to have a name so that they can be related to
-	 their GNAT encodings.  */
-	  if (TREE_CODE (t) == ARRAY_TYPE && !TYPE_NAME (t))
-	TYPE_NAME (t) = DECL_NAME (decl);
 	  /* Remark the canonical fat pointer type as artificial.  */
 	  if (TYPE_IS_FAT_POINTER_P (t))
 	TYPE_ARTIFICIAL (t) = 1;




[Ada] Plug legality loophole for equality operator of untagged record types

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
In Ada 2012, the RM 4.5.2(9.8) clause prevents an equality operator for an
untagged record type from being declared after the type is frozen.  While
the clause is implemented in GNAT, the implementation has a loophole which
lets subprogram bodies that are not the completion of a declaration pass
the check without being flagged.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch6.adb (Analyze_Subprogram_Body_Helper): Set Acts_As_Spec
earlier if the body is not the completion of a declaration.
(Check_Untagged_Equality): Deal with subprogram bodies that are
not the completion of a declaration and make sure that they are
not flagged when they cause the freezing of the type themselves.
Give a warning on the freezing point of the type in more cases.
* sem_res.adb (Resolve_Equality_Op): Revert latest change.diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -4743,6 +4743,12 @@ package body Sem_Ch6 is
 Style.Body_With_No_Spec (N);
  end if;
 
+ --  First set Acts_As_Spec if appropriate
+
+ if Nkind (N) /= N_Subprogram_Body_Stub then
+Set_Acts_As_Spec (N);
+ end if;
+
  New_Overloaded_Entity (Body_Id);
 
  --  A subprogram body should cause freezing of its own declaration,
@@ -4767,7 +4773,6 @@ package body Sem_Ch6 is
  end if;
 
  if Nkind (N) /= N_Subprogram_Body_Stub then
-Set_Acts_As_Spec (N);
 Generate_Definition (Body_Id);
 Generate_Reference
   (Body_Id, Body_Id, 'b', Set_Ref => False, Force => True);
@@ -9525,15 +9530,85 @@ package body Sem_Ch6 is
-
 
procedure Check_Untagged_Equality (Eq_Op : Entity_Id) is
-  Typ  : constant Entity_Id := Etype (First_Formal (Eq_Op));
-  Decl : constant Node_Id   := Unit_Declaration_Node (Eq_Op);
-  Obj_Decl : Node_Id;
+  Eq_Decl : constant Node_Id   := Unit_Declaration_Node (Eq_Op);
+  Typ : constant Entity_Id := Etype (First_Formal (Eq_Op));
+
+  procedure Freezing_Point_Warning (N : Node_Id; S : String);
+  --  Output a warning about the freezing point N of Typ
+
+  function Is_Actual_Of_Instantiation
+(E: Entity_Id;
+ Inst : Node_Id) return Boolean;
+  --  Return True if E is an actual parameter of instantiation Inst
+
+  ---
+  -- Output_Freezing_Point_Warning --
+  ---
+
+  procedure Freezing_Point_Warning (N : Node_Id; S : String) is
+  begin
+ Error_Msg_String (1 .. S'Length) := S;
+ Error_Msg_Strlen := S'Length;
+
+ if Ada_Version >= Ada_2012 then
+Error_Msg_NE ("type& is frozen by ~??", N, Typ);
+Error_Msg_N
+  ("\an equality operator cannot be declared after this point??",
+   N);
+
+ else
+Error_Msg_NE ("type& is frozen by ~ (Ada 2012)?y?", N, Typ);
+Error_Msg_N
+  ("\an equality operator cannot be declared after this point"
+   & " (Ada 2012)?y?", N);
+ end if;
+  end Freezing_Point_Warning;
+
+  
+  -- Is_Actual_Of_Instantiation --
+  
+
+  function Is_Actual_Of_Instantiation
+(E: Entity_Id;
+ Inst : Node_Id) return Boolean
+  is
+ Assoc : Node_Id;
+
+  begin
+ if Present (Generic_Associations (Inst)) then
+Assoc := First (Generic_Associations (Inst));
+
+while Present (Assoc) loop
+   if Present (Explicit_Generic_Actual_Parameter (Assoc))
+ and then
+   Is_Entity_Name (Explicit_Generic_Actual_Parameter (Assoc))
+ and then
+   Entity (Explicit_Generic_Actual_Parameter (Assoc)) = E
+   then
+  return True;
+   end if;
+
+   Next (Assoc);
+end loop;
+ end if;
+
+ return False;
+  end Is_Actual_Of_Instantiation;
+
+  --  Local variable
+
+  Decl : Node_Id;
+
+   --  Start of processing for Check_Untagged_Equality
 
begin
-  --  This check applies only if we have a subprogram declaration with an
-  --  untagged record type that is conformant to the predefined operator.
+  --  This check applies only if we have a subprogram declaration or a
+  --  subprogram body that is not a completion, for an untagged record
+  --  type, and that is conformant with the predefined operator.
 
-  if Nkind (Decl) /= N_Subprogram_Declaration
+  if (Nkind (Eq_Decl) /= N_Subprogram_Declaration
+   and then not (Nkind (Eq_Decl) = N_Subprogram_Body
+  and then Acts_As_Spec (Eq_Decl)))
 or else not Is_Record_Type (Typ)

[Ada] Fix crash on frontend inlining of functions with single returns

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
When examining expression of the first declaration of the inlined body
make sure that this declaration is in fact an object declaration.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* inline.adb (Has_Single_Return): Add guard for the subsequent
call to Expression.diff --git a/gcc/ada/inline.adb b/gcc/ada/inline.adb
--- a/gcc/ada/inline.adb
+++ b/gcc/ada/inline.adb
@@ -4648,6 +4648,7 @@ package body Inline is
  return
Present (Declarations (N))
  and then Present (First (Declarations (N)))
+ and then Nkind (First (Declarations (N))) = N_Object_Declaration
  and then Entity (Expression (Return_Statement)) =
 Defining_Identifier (First (Declarations (N)));
   end if;




[Ada] Clarify hardening command-line options that require explicit choices

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
Prefixes -fzero-call-used-regs and -fstrub could be mistaken for full
command-line options with the references to them in the GNAT RM.  Make
it clearer that they require explicit choices.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* doc/gnat_rm/security_hardening_features.rst: Clarify the need
for choices after -fzero-call-used-regs and -fstrub.
* gnat_rm.texi: Regenerate.diff --git a/gcc/ada/doc/gnat_rm/security_hardening_features.rst b/gcc/ada/doc/gnat_rm/security_hardening_features.rst
--- a/gcc/ada/doc/gnat_rm/security_hardening_features.rst
+++ b/gcc/ada/doc/gnat_rm/security_hardening_features.rst
@@ -18,9 +18,10 @@ Register Scrubbing
 GNAT can generate code to zero-out hardware registers before returning
 from a subprogram.
 
-It can be enabled with the :switch:`-fzero-call-used-regs` command-line
-option, to affect all subprograms in a compilation, and with a
-:samp:`Machine_Attribute` pragma, to affect only specific subprograms.
+It can be enabled with the :switch:`-fzero-call-used-regs={choice}`
+command-line option, to affect all subprograms in a compilation, and
+with a :samp:`Machine_Attribute` pragma, to affect only specific
+subprograms.
 
 .. code-block:: ada
 
@@ -73,11 +74,11 @@ or a variable.)
  --  scrubbing of the stack space used by that subprogram.
 
 
-There are also :switch:`-fstrub` command-line options to control
-default settings.  For usage and more details on the command-line
-option, on the ``strub`` attribute, and their use with other
-programming languages, see :title:`Using the GNU Compiler Collection
-(GCC)`.
+There are also :switch:`-fstrub={choice}` command-line options to
+control default settings.  For usage and more details on the
+command-line options, on the ``strub`` attribute, and their use with
+other programming languages, see :title:`Using the GNU Compiler
+Collection (GCC)`.
 
 Note that Ada secondary stacks are not scrubbed.  The restriction
 ``No_Secondary_Stack`` avoids their use, and thus their accidental


diff --git a/gcc/ada/gnat_rm.texi b/gcc/ada/gnat_rm.texi
--- a/gcc/ada/gnat_rm.texi
+++ b/gcc/ada/gnat_rm.texi
@@ -21,7 +21,7 @@
 
 @copying
 @quotation
-GNAT Reference Manual , Jun 24, 2022
+GNAT Reference Manual , Jul 11, 2022
 
 AdaCore
 
@@ -28922,9 +28922,10 @@ change.
 GNAT can generate code to zero-out hardware registers before returning
 from a subprogram.
 
-It can be enabled with the @code{-fzero-call-used-regs} command-line
-option, to affect all subprograms in a compilation, and with a
-@code{Machine_Attribute} pragma, to affect only specific subprograms.
+It can be enabled with the @code{-fzero-call-used-regs=@emph{choice}}
+command-line option, to affect all subprograms in a compilation, and
+with a @code{Machine_Attribute} pragma, to affect only specific
+subprograms.
 
 @example
 procedure Foo;
@@ -28975,10 +28976,10 @@ pragma Machine_Attribute (Var, "strub");
 --  scrubbing of the stack space used by that subprogram.
 @end example
 
-There are also @code{-fstrub} command-line options to control
-default settings.  For usage and more details on the command-line
-option, on the @code{strub} attribute, and their use with other
-programming languages, see @cite{Using the GNU Compiler Collection (GCC)}.
+There are also @code{-fstrub=@emph{choice}} command-line options to
+control default settings.  For usage and more details on the
+command-line options, on the @code{strub} attribute, and their use with
+other programming languages, see @cite{Using the GNU Compiler Collection (GCC)}.
 
 Note that Ada secondary stacks are not scrubbed.  The restriction
 @code{No_Secondary_Stack} avoids their use, and thus their accidental




[Ada] Fix internal error on untagged record type with equality operator

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
After the binding interpretation issued under AI12-0413, a user-defined
primitive equality operator of an untagged record type hides the predefined
equality operator in an instantiation, but this does not apply if the
instantiation appears in the same declarative region as the type and
before the declaration of this user-defined operator.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_res.adb (Resolve_Equality_Op): Make sure that the user-defined
operator of an untagged record type is declared ahead of an instance
before using it to resolve the equality operator in the instance.diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -8967,7 +8967,14 @@ package body Sem_Res is
then
   Eq := Get_User_Defined_Equality (T);
 
-  if Present (Eq) then
+  --  We need to make sure that the instance is not within the
+  --  same declarative region as the type, or else that it lies
+  --  after the declaration of the user-defined "=" operator.
+
+  if Present (Eq)
+and then (not In_Same_Extended_Unit (Eq, N)
+   or else Earlier_In_Extended_Unit (Eq, N))
+  then
  if Is_Abstract_Subprogram (Eq) then
 Nondispatching_Call_To_Abstract_Operation (N, Eq);
  else




[Ada] Fix if expression returning slice

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
The compiler incorrectly assumed the prefix for a slice returned in one
branch of an if expression has its bounds known at compile time and would
crash when this is not true.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch4.adb (Expand_N_If_Expression): Test for compile time
known bounds when handling slices.diff --git a/gcc/ada/exp_ch4.adb b/gcc/ada/exp_ch4.adb
--- a/gcc/ada/exp_ch4.adb
+++ b/gcc/ada/exp_ch4.adb
@@ -6174,7 +6174,13 @@ package body Exp_Ch4 is
Slice_Bnd : Node_Id) return Node_Id is
 
 begin
-   if Nkind (Elsex) = N_Slice then
+   --  We need to use the special processing for slices only if
+   --  they do not have compile-time known bounds; if they do, they
+   --  can be treated like any other expressions.
+
+   if Nkind (Elsex) = N_Slice
+ and then not Compile_Time_Known_Bounds (Etype (Elsex))
+   then
   if Compile_Time_Known_Value (Slice_Bnd)
 and then Expr_Value (Slice_Bnd) = Then_Bnd
   then
@@ -6230,7 +6236,11 @@ package body Exp_Ch4 is
  begin
 Get_First_Index_Bounds (Etype (Thenx), Then_Lo, Then_Hi);
 
-if Nkind (Elsex) = N_Slice then
+--  See the rationale in Build_New_Bound
+
+if Nkind (Elsex) = N_Slice
+  and then not Compile_Time_Known_Bounds (Etype (Elsex))
+then
Slice_Lo := Low_Bound (Discrete_Range (Elsex));
Slice_Hi := High_Bound (Discrete_Range (Elsex));
Get_First_Index_Bounds
@@ -6289,7 +6299,11 @@ package body Exp_Ch4 is
 
 Set_Suppress_Assignment_Checks (Last (Then_List));
 
-if Nkind (Elsex) = N_Slice then
+--  See the rationale in Build_New_Bound
+
+if Nkind (Elsex) = N_Slice
+  and then not Compile_Time_Known_Bounds (Etype (Elsex))
+then
Else_List := New_List (
  Make_Assignment_Statement (Loc,
Name   =>




[Ada] Fix proof of runtime unit System.Arith_64

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
After changes in provers and Why3, changes are needed to recover
automatic proof of System.Arith_64. This is the first part of it.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/s-aridou.adb (Lemma_Mult_Div, Lemma_Powers): New
lemmas.
(Prove_Sign_Quotient): New local lemma.
(Prove_Signs): Expand definition of Big_R and Big_Q in the
postcondition. Add intermediate assertions.
(Double_Divide): Call new lemma.
(Lemma_Div_Eq): Provide body for proving lemma.
(Lemma_Powers_Of_2, Lemma_Shift_Without_Drop,
Prove_Dividend_Scaling, Prove_Multiplication, Prove_Z_Low): Call
lemmas, add intermediate assertions.diff --git a/gcc/ada/libgnat/s-aridou.adb b/gcc/ada/libgnat/s-aridou.adb
--- a/gcc/ada/libgnat/s-aridou.adb
+++ b/gcc/ada/libgnat/s-aridou.adb
@@ -438,6 +438,12 @@ is
  Ghost,
  Post => X * (Y + Z) = X * Y + X * Z;
 
+   procedure Lemma_Mult_Div (A, B : Big_Integer)
+   with
+ Ghost,
+ Pre  => B /= 0,
+ Post => A * B / B = A;
+
procedure Lemma_Mult_Non_Negative (X, Y : Big_Integer)
with
  Ghost,
@@ -469,6 +475,12 @@ is
  Post => not In_Double_Int_Range (Big_2xxDouble)
and then not In_Double_Int_Range (-Big_2xxDouble);
 
+   procedure Lemma_Powers (A : Big_Natural; B, C : Natural)
+   with
+ Ghost,
+ Pre  => B <= Natural'Last - C,
+ Post => A**B * A**C = A**(B + C);
+
procedure Lemma_Powers_Of_2 (M, N : Natural)
with
  Ghost,
@@ -606,7 +618,6 @@ is
is null;
procedure Lemma_Div_Ge (X, Y, Z : Big_Integer) is null;
procedure Lemma_Div_Lt (X, Y, Z : Big_Natural) is null;
-   procedure Lemma_Div_Eq (A, B, S, R : Big_Integer) is null;
procedure Lemma_Double_Big_2xxSingle is null;
procedure Lemma_Double_Shift (X : Double_Uns; S, S1 : Double_Uns) is null;
procedure Lemma_Double_Shift (X : Single_Uns; S, S1 : Natural) is null;
@@ -629,6 +640,7 @@ is
procedure Lemma_Mult_Non_Positive (X, Y : Big_Integer) is null;
procedure Lemma_Neg_Rem (X, Y : Big_Integer) is null;
procedure Lemma_Not_In_Range_Big2xx64 is null;
+   procedure Lemma_Powers (A : Big_Natural; B, C : Natural) is null;
procedure Lemma_Rem_Commutation (X, Y : Double_Uns) is null;
procedure Lemma_Rem_Is_Ident (X, Y : Big_Integer) is null;
procedure Lemma_Rem_Sign (X, Y : Big_Integer) is null;
@@ -864,6 +876,23 @@ is
 Post => abs Big_Q = Big (Qu);
   --  Proves correctness of the rounding of the unsigned quotient
 
+  procedure Prove_Sign_Quotient
+  with
+Ghost,
+Pre  => Mult /= 0
+  and then Quot = Big (X) / (Big (Y) * Big (Z))
+  and then Big_R = Big (X) rem (Big (Y) * Big (Z))
+  and then Big_Q =
+(if Round then
+   Round_Quotient (Big (X), Big (Y) * Big (Z), Quot, Big_R)
+ else Quot),
+Post =>
+  (if X >= 0 then
+ (if Den_Pos then Big_Q >= 0 else Big_Q <= 0)
+   else
+ (if Den_Pos then Big_Q <= 0 else Big_Q >= 0));
+  --  Proves the correct sign of the signed quotient Big_Q
+
   procedure Prove_Signs
   with
 Ghost,
@@ -880,7 +909,13 @@ is
   and then
 Q = (if (X >= 0) = Den_Pos then To_Int (Qu) else To_Int (-Qu))
   and then not (X = Double_Int'First and then Big (Y) * Big (Z) = -1),
-Post => Big (R) = Big_R and then Big (Q) = Big_Q;
+Post => Big (R) = Big (X) rem (Big (Y) * Big (Z))
+  and then
+(if Round then
+   Big (Q) = Round_Quotient (Big (X), Big (Y) * Big (Z),
+ Big (X) / (Big (Y) * Big (Z)),
+ Big (R))
+ else Big (Q) = Big (X) / (Big (Y) * Big (Z)));
   --  Proves final signs match the intended result after the unsigned
   --  division is done.
 
@@ -891,6 +926,7 @@ is
   procedure Prove_Overflow_Case is null;
   procedure Prove_Quotient_Zero is null;
   procedure Prove_Round_To_One is null;
+  procedure Prove_Sign_Quotient is null;
 
   -
   -- Prove_Rounding_Case --
@@ -1056,6 +1092,8 @@ is
 pragma Assert (Big (Double_Uns (Hi (T2))) >= 1);
 pragma Assert (Big (Double_Uns (Lo (T2))) >= 0);
 pragma Assert (Big (Double_Uns (Lo (T1))) >= 0);
+pragma Assert (Big_2xxSingle * Big (Double_Uns (Lo (T2)))
+ + Big (Double_Uns (Lo (T1))) >= 0);
 pragma Assert (Mult >= Big_2xxDouble * Big (Double_Uns (Hi (T2;
 pragma Assert (Mult >= Big_2xxDouble);
 if Hi (T2) > 1 then
@@ -1064,6 +1102,10 @@ is
  Mult > Big_2xxDouble);
 elsif Lo (T2) > 0 then
pragma Assert (Big (Double_Uns (Lo (T2))) > 0);
+   pragma Assert (Big_2xxSingle > 0);
+   pragma Assert 

[Ada] Fix CodePeer warnings in GNAT sources

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
This patch fixes various redundant constructs or uninitialized variables
identified by CodePeer in the GNAT frontend and runtime sources.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch6.adb (Expand_N_Extended_Return_Statement): Add default
initialization for Stmts.
* sem_ch12.adb (Analyze_Associations): Add default
initialization for Match.
* libgnat/a-ztenau.adb (Scan_Enum_Lit): Remove duplicated
boolean test.
* libgnat/g-spipat.adb (XMatch): Combine duplicated cases.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -5175,7 +5175,7 @@ package body Exp_Ch6 is
   Exp : Node_Id;
   HSS : Node_Id;
   Result  : Node_Id;
-  Stmts   : List_Id;
+  Stmts   : List_Id := No_List;
 
   Return_Stmt : Node_Id := Empty;
   --  Force initialization to facilitate static analysis


diff --git a/gcc/ada/libgnat/a-ztenau.adb b/gcc/ada/libgnat/a-ztenau.adb
--- a/gcc/ada/libgnat/a-ztenau.adb
+++ b/gcc/ada/libgnat/a-ztenau.adb
@@ -303,8 +303,6 @@ package body Ada.Wide_Wide_Text_IO.Enumeration_Aux is
 
 exit when
   Is_Character (WC)
-and then
-  not Is_Letter (To_Character (WC))
 and then
   not Is_Letter (To_Character (WC))
 and then


diff --git a/gcc/ada/libgnat/g-spipat.adb b/gcc/ada/libgnat/g-spipat.adb
--- a/gcc/ada/libgnat/g-spipat.adb
+++ b/gcc/ada/libgnat/g-spipat.adb
@@ -3961,7 +3961,7 @@ package body GNAT.Spitbol.Patterns is
 
  --  Any (one character case)
 
- when PC_Any_CH =>
+ when PC_Any_CH | PC_Char =>
 if Cursor < Length
   and then Subject (Cursor + 1) = Node.Char
 then
@@ -4103,9 +4103,10 @@ package body GNAT.Spitbol.Patterns is
 Pop_Region;
 goto Succeed;
 
- --  Assign on match. This node sets up for the eventual assignment
+ --  Write/assign on match. This node sets up for the eventual write
+ --  or assignment.
 
- when PC_Assign_OnM =>
+ when PC_Assign_OnM | PC_Write_OnM =>
 Stack (Stack_Base - 1).Node := Node;
 Push (CP_Assign'Access);
 Pop_Region;
@@ -4144,9 +4145,9 @@ package body GNAT.Spitbol.Patterns is
 Push (Node);
 goto Succeed;
 
- --  Break (one character case)
+ --  Break & BreakX (one character case)
 
- when PC_Break_CH =>
+ when PC_Break_CH | PC_BreakX_CH =>
 while Cursor < Length loop
if Subject (Cursor + 1) = Node.Char then
   goto Succeed;
@@ -4157,9 +4158,9 @@ package body GNAT.Spitbol.Patterns is
 
 goto Fail;
 
- --  Break (character set case)
+ --  Break & BreakX (character set case)
 
- when PC_Break_CS =>
+ when PC_Break_CS | PC_BreakX_CS =>
 while Cursor < Length loop
if Is_In (Subject (Cursor + 1), Node.CS) then
   goto Succeed;
@@ -4170,9 +4171,9 @@ package body GNAT.Spitbol.Patterns is
 
 goto Fail;
 
- --  Break (string function case)
+ --  Break & BreakX (string function case)
 
- when PC_Break_VF => declare
+ when PC_Break_VF | PC_BreakX_VF => declare
 U : constant VString := Node.VF.all;
 S : Big_String_Access;
 L : Natural;
@@ -4191,77 +4192,9 @@ package body GNAT.Spitbol.Patterns is
 goto Fail;
  end;
 
- --  Break (string pointer case)
+ --  Break & BreakX (string pointer case)
 
- when PC_Break_VP => declare
-U : constant VString := Node.VP.all;
-S : Big_String_Access;
-L : Natural;
-
- begin
-Get_String (U, S, L);
-
-while Cursor < Length loop
-   if Is_In (Subject (Cursor + 1), S (1 .. L)) then
-  goto Succeed;
-   else
-  Cursor := Cursor + 1;
-   end if;
-end loop;
-
-goto Fail;
- end;
-
- --  BreakX (one character case)
-
- when PC_BreakX_CH =>
-while Cursor < Length loop
-   if Subject (Cursor + 1) = Node.Char then
-  goto Succeed;
-   else
-  Cursor := Cursor + 1;
-   end if;
-end loop;
-
-goto Fail;
-
- --  BreakX (character set case)
-
- when PC_BreakX_CS =>
-while Cursor < Length loop
-   if Is_In (Subject (Cursor + 1), Node.CS) then
-  goto Succeed;
-   else
-  Cursor := Cursor + 1;
-   end if;
-end loop;
-
-goto Fail;
-
- --  BreakX (string function case)
-
- when PC_BreakX_VF => 

[Ada] Refine type for checking number of pragma arguments

2022-07-13 Thread Pierre-Marie de Rodat via Gcc-patches
Code cleanup.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* par-prag.adb (Check_Arg_Count): Change parameter type from Int
to Nat, because this parameter is compared to Arg_Count variable
which is of type Nat. Also, it wouldn't make sense to check for
negative number of pragma arguments.diff --git a/gcc/ada/par-prag.adb b/gcc/ada/par-prag.adb
--- a/gcc/ada/par-prag.adb
+++ b/gcc/ada/par-prag.adb
@@ -65,7 +65,7 @@ function Prag (Pragma_Node : Node_Id; Semi : Source_Ptr) return Node_Id is
--  the routine for the argument one past the last present argument, but
--  that is the only case in which a non-present argument can be referenced.
 
-   procedure Check_Arg_Count (Required : Int);
+   procedure Check_Arg_Count (Required : Nat);
--  Check argument count for pragma = Required. If not give error and raise
--  Error_Resync.
 
@@ -155,7 +155,7 @@ function Prag (Pragma_Node : Node_Id; Semi : Source_Ptr) return Node_Id is
-- Check_Arg_Count --
-
 
-   procedure Check_Arg_Count (Required : Int) is
+   procedure Check_Arg_Count (Required : Nat) is
begin
   if Arg_Count /= Required then
  Error_Msg_N ("wrong number of arguments for pragma%", Pragma_Node);




Re: PING^2: [PATCH] Add --enable-first-stage-cross configure option

2022-07-13 Thread Serge Belyshev via Gcc-patches
Jeff Law via Gcc-patches  writes:

> I'm not really sure we need a patch for this.  Isn't it sufficient to
> "make all-gcc && make all-target-libgcc"?  Folks have been doing that
> for decades.
>
> Jeff

Oh, I did not know that "make install-gcc install-target-libgcc" works in this 
case.

So in the end, patch simplifies quirky first stage cross tool build procedure 
from

  configure --target=$target  --enable-languages=c --disable-threads 
--disable-shared # plus --with-newlib when --with-sysroot=
  make all-gcc all-target-libgcc
  make install-gcc install-target-libgcc

to

  configure --target=$target --enable-first-stage-cross
  make
  make install

Either way is more or less okay for me, it just needs to be documented.


[PATCH 7/12 V2] arm: Emit build attributes for PACBTI target feature

2022-07-13 Thread Andrea Corallo via Gcc-patches
Richard Earnshaw  writes:

> On 28/04/2022 10:45, Andrea Corallo via Gcc-patches wrote:
>> This patch emits assembler directives for PACBTI build attributes as
>> defined by the
>> ABI.
>> 
>> gcc/ChangeLog:
>>  * config/arm/arm.c (arm_file_start): Emit EABI attributes for
>>  Tag_PAC_extension, Tag_BTI_extension, TAG_BTI_use, TAG_PACRET_use.
>
> This bit is OK.
>
>> gcc/testsuite/ChangeLog:
>>  * gcc.target/arm/acle/pacbti-m-predef-1.c: New test.
>>  * gcc.target/arm/acle/pacbti-m-predef-3: Likewise.
>>  * gcc.target/arm/acle/pacbti-m-predef-6.c: Likewise.
>>  * gcc.target/arm/acle/pacbti-m-predef-7.c: Likewise.
>
> These tests contain directives like:
>
> +/* { dg-additional-options " -mbranch-protection=pac-ret+bti
> --save-temps" } */
>
> But they don't check that the architecture permits this (it has to be
> armv8-m.main or later).

Hi Richard & all,

please find attached the updated patch.

BR

 Andrea

>From 7d66e00bf62c8624d03e0173be8b8631d7435c38 Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Mon, 6 Dec 2021 11:42:24 +0100
Subject: [PATCH] [PATCH 7/12] arm: Emit build attributes for PACBTI target
 feature

This patch emits assembler directives for PACBTI build attributes as
defined by the
ABI.



gcc/ChangeLog:

* config/arm/arm.c (arm_file_start): Emit EABI attributes for
Tag_PAC_extension, Tag_BTI_extension, TAG_BTI_use, TAG_PACRET_use.

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-1.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-3: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-6.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-7.c: Likewise.

Co-Authored-By: Tejas Belagod  
---
 gcc/config/arm/arm.cc  | 18 ++
 .../gcc.target/arm/acle/pacbti-m-predef-1.c| 16 
 .../gcc.target/arm/acle/pacbti-m-predef-3.c| 16 
 .../gcc.target/arm/acle/pacbti-m-predef-6.c| 15 +++
 .../gcc.target/arm/acle/pacbti-m-predef-7.c| 16 
 5 files changed, 81 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-6.c
 create mode 100644 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-7.c

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 0068817b0f2..ceec14f84b6 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -28349,6 +28349,8 @@ static void
 arm_file_start (void)
 {
   int val;
+  bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
+  bool bti = (aarch_enable_bti == 1);
 
   arm_print_asm_arch_directives
 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
@@ -28419,6 +28421,22 @@ arm_file_start (void)
arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
 (int) arm_fp16_format);
 
+  if (TARGET_HAVE_PACBTI)
+   {
+ arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
+ arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
+   }
+  else if (pac || bti)
+   {
+ arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
+ arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
+   }
+
+  if (bti)
+arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
+  if (pac)
+   arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
+
   if (arm_lang_output_object_attributes_hook)
arm_lang_output_object_attributes_hook();
 }
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c
new file mode 100644
index 000..d3ef58fcf9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.1-m.main 
-mbranch-protection=pac-ret+bti --save-temps" } */
+
+#if !defined (__ARM_FEATURE_BTI_DEFAULT)
+#error "Feature test macro __ARM_FEATURE_BTI_DEFAULT should be defined."
+#endif
+
+#if !defined (__ARM_FEATURE_PAC_DEFAULT)
+#error "Feature test macro __ARM_FEATURE_PAC_DEFAULT should be defined."
+#endif
+
+/* { dg-final { scan-assembler-not "\.arch_extension pacbti" } } */
+/* { dg-final { scan-assembler "\.eabi_attribute 50, 1" } } */
+/* { dg-final { scan-assembler "\.eabi_attribute 52, 1" } } */
+/* { dg-final { scan-assembler "\.eabi_attribute 74, 1" } } */
+/* { dg-final { scan-assembler "\.eabi_attribute 76, 1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c
new file mode 100644
index 000..c82c3a4b638
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c

[PING^2] nvptx: Allow '--with-arch' to override the default '-misa' (was: nvptx multilib setup)

2022-07-13 Thread Thomas Schwinge
Hi Tom!

Ping.


Grüße
 Thomas


On 2022-07-05T16:59:23+0200, I wrote:
> Hi Tom!
>
> Ping.
>
>
> Grüße
>  Thomas
>
>
> On 2022-06-15T23:18:10+0200, I wrote:
>> Hi Tom!
>>
>> On 2022-05-13T16:20:14+0200, I wrote:
>>> On 2022-02-04T13:09:29+0100, Tom de Vries via Gcc  wrote:
 On 2/4/22 08:21, Thomas Schwinge wrote:
> On 2022-02-03T13:35:55+, "vries at gcc dot gnu.org via Gcc-bugs" 
>  wrote:
>> I've tested this using (recommended) driver 470.94 on boards:
>>>
>> while iterating over dimensions { -mptx=3.1 , -mptx=6.3 } x { 
>> GOMP_NVPTX_JIT=-O0,  }.
>
> Do you use separate (nvptx-none offload target only?) builds for
> different '-mptx' variants (likewise: '-misa'), or have you hacked up the
> multilib configuration?

 Neither, I'm using --target_board=unix/foffload= for that.
>>>
>>> ACK, I see.  So these flags then only affect GCC/nvptx code generation
>>> for the actual user code (here: GCC libgomp test cases), but for the
>>> GCC/nvptx target libraries (such as: libc, libm, libgfortran, libgomp --
>>> the latter especially relevant for OpenMP), it uses PTX code from one of
>>> the two "pre-compiled" GCC/nvptx multilibs: default or '-mptx=3.1'.
>>>
>>> Meaning, one can't just use such a flag for "completely building code"
>>> for a specific configuration.  Random example,
>>> '-foffload-options=nvptx-none=-march=sm_75': as GCC/nvptx target
>>> libraries aren't being built for '-march=sm_75' multilib,
>>> '-foffload-options=nvptx-none=-march=sm_75' uses the default multilib,
>>> which isn't '-march=sm_75'.
>>>
>>>
   ('gcc/config/nvptx/t-nvptx:MULTILIB_OPTIONS'
> etc., I suppose?)  Should we add a few representative configurations to
> be built by default?  And/or, should we have a way to 'configure' per
> user needs (I suppose: '--with-multilib-list=[...]', as supported for a
> few other targets?)?  (I see there's also a new
> '--with-multilib-generator=[...]', haven't looked in detail.)  No matter
> which way: again, combinatorial explosion is a problem, of course...

 As far as I know, the gcc build doesn't finish when switching default to
 higher than sm_35, so there's little point to go to a multilib setup at
 this point.  But once we fix that, we could reconsider, otherwise,
 things are likely to regress again.
>>>
>>> As far as I remember, several issues have been fixed.  Still waiting for
>>> Roger's "middle-end: Support ABIs that pass FP values as wider integers"
>>> or something similar, but that PR104489 issue is being worked around by
>>> "Limit HFmode support to mexperimental", if I got that right.
>>>
>>> Now I'm not suggesting we should now enable all or any random GCC/nvptx
>>> multilibs, to get all these variants of GCC/nvptx target libraries built;
>>> especially also given that GCC/nvptx code generation currently doesn't
>>> make too much use of the new capabilities.
>>>
>>> However, we do have a specific request that a customer would like to be
>>> able to change at GCC 'configure' time the GCC/nvptx default multilib
>>> (including that being used for building corresponding GCC/nvptx target
>>> libraries).
>>>
>>> Per 'gcc/doc/install.texi', I do see that some GCC targets allow for
>>> GCC 'configure'-time '--with-multilib-list=[...]', or
>>> '--with-multilib-generator=[...]', and I suppose we could be doing
>>> something similar?  But before starting implementing, I'd like your
>>> input, as you'll be the one to approve in the end.  And/or, maybe you've
>>> already made up your own ideas about that?
>>
>> So, instead of "random GCC/nvptx multilib configuration" (last
>> paragraph), I've come up with a way to implement our customer's request
>> (second last paragraph): 'configure' GCC/nvptx '--with-arch=sm_70'.
>>
>> I think I've implemented this in a way so that "random GCC/nvptx multilib
>> configuration" may eventually be implemented on top of that.  For easy
>> review/testing I've split my changes into three commits, see attached
>> "nvptx: Make default '-misa=sm_30' explicit",
>> "nvptx: Introduce dummy multilib option for default '-misa=sm_30'",
>> "nvptx: Allow '--with-arch' to override the default '-misa'".
>>
>> To the best of my knowledge, the first two patches do not change any
>> user-visible behavior (I generally 'diff'ed target libraries, and
>> compared a good number of 'gcc -print-multi-directory [flags]'), and
>> likewise with the third patch, given implicit (default) or explicit
>> '--with-arch=sm_30', and that with '--with-arch=sm_70', for example, the
>> '-misa=sm_70' multilib variants are used for implicit (default) or
>> explicit '-misa=sm_70' or higher, and the '-misa=sm_30' multilib variants
>> are used for explicit lower '-misa'.
>>
>> What do you think, OK to push to master branch?
>>
>>
>> Grüße
>>  Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; 

[PING^3] nvptx: forward '-v' command-line option to assembler, linker

2022-07-13 Thread Thomas Schwinge
Hi Tom!

Ping.


Grüße
 Thomas


On 2022-07-05T16:58:54+0200, I wrote:
> Hi Tom!
>
> Ping.
>
>
> Grüße
>  Thomas
>
>
> On 2022-06-07T17:41:16+0200, I wrote:
>> Hi!
>>
>> On 2022-05-30T09:06:21+0200, Tobias Burnus  wrote:
>>> On 29.05.22 22:49, Thomas Schwinge wrote:
 Not sure if that's what you had in mind, but what do you think about the
 attached "nvptx: forward '-v' command-line option to assembler, linker"?
 OK to push to GCC master branch (after merging
 
 "Put '-v' verbose output onto stderr instead of stdout")?
>>>
>>> I was mainly thinking of some way to have it available — which
>>> '-foffload-options=-Wa,-v' already permits on the GCC side. (Once the
>>> nvptx-tools patch actually makes use of the '-v'.)
>>
>> (Merged a week ago.)
>>
>>> If I understand your patch correctly, this patch now causes 'gcc -v' to
>>> imply 'gcc -v -Wa,-v'. I think that's okay, since 'gcc -v' already
>>> outputs a lot of lines and those lines can be helpful to understand what
>>> happens and what not.
>>
>> ACK.
>>
>>> Tom, your thoughts on this?
>>
>> Ping.
>>
>>
>> Grüße
>>  Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 17c35607d4927299b0c4bd19dd6fd205c85c4a4b Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Sun, 29 May 2022 22:31:43 +0200
Subject: [PATCH] nvptx: forward '-v' command-line option to assembler, linker

For example, for offloading compilation with '-save-temps -v', before vs. after
word-diff then looks like:

[...]
 [...]/build-gcc-offload-nvptx-none/gcc/as {+-v -v+} -o ./a.xnvptx-none.mkoffload.o ./a.xnvptx-none.mkoffload.s
{+Verifying sm_30 code with sm_35 code generation.+}
{+ ptxas -c -o /dev/null ./a.xnvptx-none.mkoffload.o --gpu-name sm_35 -O0+}
[...]
 [...]/build-gcc-offload-nvptx-none/gcc/collect2 {+-v -v+} -o ./a.xnvptx-none.mkoffload [...] @./a.xnvptx-none.mkoffload.args.1 -lgomp -lgcc -lc -lgcc
{+collect2 version 12.0.1 20220428 (experimental)+}
{+[...]/build-gcc-offload-nvptx-none/gcc/collect-ld -v -v -o ./a.xnvptx-none.mkoffload [...] ./a.xnvptx-none.mkoffload.o -lgomp -lgcc -lc -lgcc+}
{+Linking ./a.xnvptx-none.mkoffload.o as 0+}
{+trying lib libc.a+}
{+trying lib libgcc.a+}
{+trying lib libgomp.a+}
{+Resolving abort+}
{+Resolving acc_on_device+}
{+Linking libgomp.a::oacc-init.o/ as 1+}
{+Linking libc.a::lib_a-abort.o/   as 2+}
[...]

(This depends on 
"Put '-v' verbose output onto stderr instead of stdout".)

	gcc/
	* config/nvptx/nvptx.h (ASM_SPEC, LINK_SPEC): Define.
---
 gcc/config/nvptx/nvptx.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index ed72c253191..b184f1d0150 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -27,6 +27,13 @@
 
 /* Run-time Target.  */
 
+/* Assembler supports '-v' option; handle similar to
+   '../../gcc.cc:asm_options', 'HAVE_GNU_AS'.  */
+#define ASM_SPEC "%{v}"
+
+/* Linker supports '-v' option.  */
+#define LINK_SPEC "%{v}"
+
 #define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
 
 #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins ()
-- 
2.25.1



Re: [PATCH v2 2/2] aarch64: Lower vcombine to GIMPLE

2022-07-13 Thread Richard Sandiford via Gcc-patches
Andrew Carlotti  writes:
> This lowers vcombine intrinsics to a GIMPLE vector constructor, which enables
> better optimisation during GIMPLE passes.
>
> gcc/
>
>   * config/aarch64/aarch64-builtins.c
>   (aarch64_general_gimple_fold_builtin): Add combine.
>
> gcc/testsuite/
>
>   * gcc.target/aarch64/advsimd-intrinsics/combine.c:
>   New test.
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> 5753988a9964967c27a03aca5fddb9025fd8ed6e..a25756cfed5fab3a98ebf3e2ee29a5e117cbd2aa
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -2857,6 +2857,28 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> fcode, gcall *stmt,
>   gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
>   break;
>  
> + BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
> + BUILTIN_VD_I (BINOPU, combine, 0, NONE)
> + BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
> + {
> +   tree first_part, second_part;
> +   if (BYTES_BIG_ENDIAN)
> + {
> +   second_part = args[0];
> +   first_part = args[1];
> + }
> +   else
> + {
> +   first_part = args[0];
> +   second_part = args[1];
> + }
> +   tree ret_type = TREE_TYPE (gimple_call_lhs (stmt));

Just repeating what we discussed off-list for the record: this needs
to be gimple_call_return_type.  LGTM with that change.

Thanks,
Richard

> +   tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part,
> + NULL_TREE, second_part);
> +   new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ctor);
> + }
> + break;
> +
>   /*lower store and load neon builtins to gimple.  */
>   BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
>   BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c
> new file mode 100644
> index 
> ..d08faf7a4a160a1e83428ed9b270731bbf7b8c8a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
> +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
> +
> +#include 
> +
> +/*
> +** foo:
> +**   umovw0, v1\.s\[1\]
> +**   ret
> +*/
> +
> +int32_t foo (int32x2_t a, int32x2_t b)
> +{
> +  int32x4_t c = vcombine_s32(a, b);
> +  return vgetq_lane_s32(c, 3);
> +}
> +


[PATCH] lto-plugin: use -pthread only for detected targets

2022-07-13 Thread Martin Liška
Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

Use -pthread only if we are going to use pthread functionality.

PR bootstrap/106156

lto-plugin/ChangeLog:

* configure: Regenerate.
* configure.ac: Use -pthread only why use_locking == true.
* Makefile.in: Rely on ac_lto_plugin_ldflags.
---
 lto-plugin/Makefile.in  | 2 +-
 lto-plugin/configure| 6 --
 lto-plugin/configure.ac | 2 ++
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lto-plugin/Makefile.in b/lto-plugin/Makefile.in
index 9453bc7d607..6b161c01683 100644
--- a/lto-plugin/Makefile.in
+++ b/lto-plugin/Makefile.in
@@ -345,7 +345,7 @@ libexecsubdir := 
$(libexecdir)/gcc/$(real_target_noncanonical)/$(gcc_version)$(a
 AM_CPPFLAGS = -I$(top_srcdir)/../include $(DEFS)
 AM_CFLAGS = @ac_lto_plugin_warn_cflags@ $(CET_HOST_FLAGS) 
-DBASE_VERSION='"$(gcc_version)"'
 # The plug-in depends on pthreads.
-AM_LDFLAGS = -pthread @ac_lto_plugin_ldflags@
+AM_LDFLAGS = @ac_lto_plugin_ldflags@
 AM_LIBTOOLFLAGS = --tag=disable-static
 libexecsub_LTLIBRARIES = liblto_plugin.la
 in_gcc_libs = $(foreach lib, $(libexecsub_LTLIBRARIES), 
$(gcc_build_dir)/$(lib))
diff --git a/lto-plugin/configure b/lto-plugin/configure
index 870e49b2e62..0967ba1c798 100755
--- a/lto-plugin/configure
+++ b/lto-plugin/configure
@@ -6023,6 +6023,8 @@ case $target in
 esac
 
 if test x$use_locking = xyes; then
+  LDFLAGS="$LDFLAGS -pthread"
+
   ac_fn_c_check_header_mongrel "$LINENO" "pthread.h" "ac_cv_header_pthread_h" 
"$ac_includes_default"
 if test "x$ac_cv_header_pthread_h" = xyes; then :
 
@@ -12104,7 +12106,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12107 "configure"
+#line 12109 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12210,7 +12212,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12213 "configure"
+#line 12215 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/lto-plugin/configure.ac b/lto-plugin/configure.ac
index 18eb4f60b0a..a350aa56a8b 100644
--- a/lto-plugin/configure.ac
+++ b/lto-plugin/configure.ac
@@ -100,6 +100,8 @@ case $target in
 esac
 
 if test x$use_locking = xyes; then
+  LDFLAGS="$LDFLAGS -pthread"
+
   AC_CHECK_HEADER(pthread.h,
 [AC_DEFINE(HAVE_PTHREAD_LOCKING, 1, [Define if the system provides pthread 
locking mechanism.])])
 fi
-- 
2.37.0



[PATCH] arm: Replace arm_builtin_vectorized_function [PR106253]

2022-07-13 Thread Richard Sandiford via Gcc-patches
This patch extends the fix for PR106253 to AArch32.  As with AArch64,
we were using ACLE intrinsics to vectorise scalar built-ins, even
though the two sometimes have different ECF_* flags.  (That in turn
is because the ACLE intrinsics should follow the instruction semantics
as closely as possible, whereas the scalar built-ins follow language
specs.)

The patch also removes the copysignf built-in, which only existed
for this purpose and wasn't a “real” arm_neon.h built-in.

Doing this also has the side-effect of enabling vectorisation of
rint and roundeven.  Logically that should be a separate patch,
but making it one would have meant adding a new int iterator
for the original set of instructions and then removing it again
when including new functions.

I've restricted the bswap tests to little-endian because we end
up with excessive spilling on big-endian.  E.g.:

sub sp, sp, #8
vstrd1, [sp]
vldrd16, [sp]
vrev16.8d16, d16
vstrd16, [sp]
vldrd0, [sp]
add sp, sp, #8
@ sp needed
bx  lr

Similarly, the copysign tests require little-endian because on
big-endian we unnecessarily load the constant from the constant pool:

vldr.32 s15, .L3
vdup.32 d0, d7[1]
vbsld0, d2, d1
bx  lr
.L3:
.word   -2147483648

Tested on arm-linux-gnueabihf and armeb-eabi.  OK to install?

Richard


gcc/
* config/arm/arm-builtins.cc (arm_builtin_vectorized_function):
Delete.
* config/arm/arm-protos.h (arm_builtin_vectorized_function): Delete.
* config/arm/arm.cc (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION):
Delete.
* config/arm/arm_neon_builtins.def (copysignf): Delete.
* config/arm/iterators.md (nvrint_pattern): New attribute.
* config/arm/neon.md (2):
New pattern.
(l2):
Likewise.
(neon_copysignf): Rename to...
(copysign3): ...this.

gcc/testsuite/
* gcc.target/arm/vect_unary_1.c: New test.
* gcc.target/arm/vect_binary_1.c: Likewise.
---
 gcc/config/arm/arm-builtins.cc   | 123 --
 gcc/config/arm/arm-protos.h  |   1 -
 gcc/config/arm/arm.cc|   4 -
 gcc/config/arm/arm_neon_builtins.def |   1 -
 gcc/config/arm/iterators.md  |   7 +
 gcc/config/arm/neon.md   |  17 +-
 gcc/testsuite/gcc.target/arm/vect_binary_1.c |  50 +
 gcc/testsuite/gcc.target/arm/vect_unary_1.c  | 224 +++
 8 files changed, 297 insertions(+), 130 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/vect_binary_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/vect_unary_1.c

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index d917137e5ee..8f8155c4413 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -4026,129 +4026,6 @@ arm_expand_builtin (tree exp,
   return NULL_RTX;
 }
 
-tree
-arm_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in)
-{
-  machine_mode in_mode, out_mode;
-  int in_n, out_n;
-  bool out_unsigned_p = TYPE_UNSIGNED (type_out);
-
-  /* Can't provide any vectorized builtins when we can't use NEON.  */
-  if (!TARGET_NEON)
-return NULL_TREE;
-
-  if (TREE_CODE (type_out) != VECTOR_TYPE
-  || TREE_CODE (type_in) != VECTOR_TYPE)
-return NULL_TREE;
-
-  out_mode = TYPE_MODE (TREE_TYPE (type_out));
-  out_n = TYPE_VECTOR_SUBPARTS (type_out);
-  in_mode = TYPE_MODE (TREE_TYPE (type_in));
-  in_n = TYPE_VECTOR_SUBPARTS (type_in);
-
-/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
-   decl of the vectorized builtin for the appropriate vector mode.
-   NULL_TREE is returned if no such builtin is available.  */
-#undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C)\
-  (TARGET_VFP5   \
-   && flag_unsafe_math_optimizations \
-   && ARM_CHECK_BUILTIN_MODE_1 (C))
-
-#undef ARM_CHECK_BUILTIN_MODE_1
-#define ARM_CHECK_BUILTIN_MODE_1(C) \
-  (out_mode == SFmode && out_n == C \
-   && in_mode == SFmode && in_n == C)
-
-#undef ARM_FIND_VRINT_VARIANT
-#define ARM_FIND_VRINT_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
-: (ARM_CHECK_BUILTIN_MODE (4) \
-  ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
-  : NULL_TREE))
-
-  switch (fn)
-{
-CASE_CFN_FLOOR:
-  return ARM_FIND_VRINT_VARIANT (vrintm);
-CASE_CFN_CEIL:
-  return ARM_FIND_VRINT_VARIANT (vrintp);
-CASE_CFN_TRUNC:
-  return ARM_FIND_VRINT_VARIANT (vrintz);
-CASE_CFN_ROUND:
-  return ARM_FIND_VRINT_VARIANT (vrinta);
-#undef ARM_CHECK_BUILTIN_MODE_1
-#define ARM_CHECK_BUILTIN_MODE_1(C) \
-  (out_mode == SImode && out_n == C \
-   && in_mode == SFmode && in_n == C)
-
-#define ARM_FIND_VCVT_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-   ? 

Re: [PATCH v2 1/2] aarch64: Don't return invalid GIMPLE assign statements

2022-07-13 Thread Richard Sandiford via Gcc-patches
Richard Biener via Gcc-patches  writes:
> On Tue, Jul 12, 2022 at 4:38 PM Andrew Carlotti  
> wrote:
>>
>> aarch64_general_gimple_fold_builtin doesn't check whether the LHS of a
>> function call is null before converting it to an assign statement. To avoid
>> returning an invalid GIMPLE statement in this case, we instead assign the
>> expression result to a new (unused) variable.
>>
>> This change only affects code that:
>> 1) Calls an intrinsic function that has no side effects;
>> 2) Does not use or store the value returned by the intrinsic;
>> 3) Uses parameters that prevent the front-end eliminating the call prior to
>> gimplification.
>>
>> The ICE is unlikely to have occurred in the wild, as it relies on the 
>> presence
>> of a redundant intrinsic call.
>
> Other targets usually simply refrain from folding intrinsic calls with no LHS.
> Another option is to just drop it on the floor if it does not have any
> side-effects which for the gimple_fold_builtin hook means folding it to
> a GIMPLE_NOP (gimple_build_nop ()).

Sorry, I just pushed the patch before seeing this.

I guess the problem with refraining from folding calls with no lhs
is that it has to be done on a per-function basis.  (E.g. stores
should still be folded.)  It then becomes something that we need
to remember for each individual call.  E.g. ix86_gimple_fold_builtin
seems to have three different pieces of code for handling null lhses,
even with its heavy use of gotos.

So a nice thing about the current patch is that it handles all this
in one place only.

Thanks,
Richard

>> gcc/ChangeLog:
>>
>>  * config/aarch64/aarch64-builtins.cc
>>  (aarch64_general_gimple_fold_builtin): Add fixup for invalid GIMPLE.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c: New test.
>>
>> ---
>>
>> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
>> b/gcc/config/aarch64/aarch64-builtins.cc
>> index 
>> e0a741ac663188713e21f457affa57217d074783..5753988a9964967c27a03aca5fddb9025fd8ed6e
>>  100644
>> --- a/gcc/config/aarch64/aarch64-builtins.cc
>> +++ b/gcc/config/aarch64/aarch64-builtins.cc
>> @@ -3022,6 +3022,16 @@ aarch64_general_gimple_fold_builtin (unsigned int 
>> fcode, gcall *stmt,
>>  default:
>>break;
>>  }
>> +
>> +  /* GIMPLE assign statements (unlike calls) require a non-null lhs. If we
>> + created an assign statement with a null lhs, then fix this by assigning
>> + to a new (and subsequently unused) variable. */
>> +  if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs 
>> (new_stmt))
>> +{
>> +  tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
>> +  gimple_assign_set_lhs (new_stmt, new_lhs);
>> +}
>> +
>>return new_stmt;
>>  }
>>
>> diff --git 
>> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
>> new file mode 100644
>> index 
>> ..345307456b175307f5cb22de5e59cfc6254f2737
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
>> @@ -0,0 +1,9 @@
>> +/* { dg-do compile { target { aarch64*-*-* } } } */
>> +
>> +#include 
>> +
>> +int8_t *bar();
>> +
>> +void foo() {
>> +  __builtin_aarch64_ld1v16qi(bar());
>> +}


Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-07-13 Thread Rui Ueyama via Gcc-patches
On Tue, Jul 12, 2022 at 9:31 PM Martin Liška  wrote:
>
> On 7/12/22 13:50, Rui Ueyama wrote:
> > I'm fine, though I don't think I have a right to sign off.
>
> I've just pushed that.
>
> @Rui: Can you please merge the mold counter-part?

I merged the mold counter-part as
https://github.com/rui314/mold/commit/b8a4df51ca574d411da2d51fbfdac9c9f10c76fb.


Re: [PATCH v2 1/2] aarch64: Don't return invalid GIMPLE assign statements

2022-07-13 Thread Richard Biener via Gcc-patches
On Tue, Jul 12, 2022 at 4:38 PM Andrew Carlotti  wrote:
>
> aarch64_general_gimple_fold_builtin doesn't check whether the LHS of a
> function call is null before converting it to an assign statement. To avoid
> returning an invalid GIMPLE statement in this case, we instead assign the
> expression result to a new (unused) variable.
>
> This change only affects code that:
> 1) Calls an intrinsic function that has no side effects;
> 2) Does not use or store the value returned by the intrinsic;
> 3) Uses parameters that prevent the front-end eliminating the call prior to
> gimplification.
>
> The ICE is unlikely to have occurred in the wild, as it relies on the presence
> of a redundant intrinsic call.

Other targets usually simply refrain from folding intrinsic calls with no LHS.
Another option is to just drop it on the floor if it does not have any
side-effects which for the gimple_fold_builtin hook means folding it to
a GIMPLE_NOP (gimple_build_nop ()).

> gcc/ChangeLog:
>
>  * config/aarch64/aarch64-builtins.cc
>  (aarch64_general_gimple_fold_builtin): Add fixup for invalid GIMPLE.
>
> gcc/testsuite/ChangeLog:
>
>  * gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c: New test.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> e0a741ac663188713e21f457affa57217d074783..5753988a9964967c27a03aca5fddb9025fd8ed6e
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -3022,6 +3022,16 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> fcode, gcall *stmt,
>  default:
>break;
>  }
> +
> +  /* GIMPLE assign statements (unlike calls) require a non-null lhs. If we
> + created an assign statement with a null lhs, then fix this by assigning
> + to a new (and subsequently unused) variable. */
> +  if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs 
> (new_stmt))
> +{
> +  tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
> +  gimple_assign_set_lhs (new_stmt, new_lhs);
> +}
> +
>return new_stmt;
>  }
>
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
> new file mode 100644
> index 
> ..345307456b175307f5cb22de5e59cfc6254f2737
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ignored_return_1.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +
> +#include 
> +
> +int8_t *bar();
> +
> +void foo() {
> +  __builtin_aarch64_ld1v16qi(bar());
> +}


[PATCH] tree-optimization/106249 - unroll-and-jam and LC SSA upate

2022-07-13 Thread Richard Biener via Gcc-patches
When I delayed the LC SSA update in unroll-and-jam this exposed
an issue that tree_transform_and_unroll_loop does a full function
LC SSA verification when new_loop is NULL (when it doesn't need to
do versioning).  That wasn't intended.  I also took the chance
to make the versioning in tree_transform_and_unroll_loop use
TODO_update_ssa_nophi for the loop versioning SSA update which
I somehow missed earlier.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/106249
* tree-ssa-loop-manip.cc (tree_transform_and_unroll_loop):
Only verify LC SSA of the new_loop if we created it.
Use TODO_update_ssa_nophi for the SSA update after versioning
the loop.

* gcc.dg/pr106249.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr106249.c | 16 
 gcc/tree-ssa-loop-manip.cc  |  5 +++--
 2 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr106249.c

diff --git a/gcc/testsuite/gcc.dg/pr106249.c b/gcc/testsuite/gcc.dg/pr106249.c
new file mode 100644
index 000..f97b07fb4da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr106249.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -floop-unroll-and-jam --param unroll-jam-min-percent=0" } 
*/
+
+void
+foo (double *arr)
+{
+  int i, j;
+
+  for (i = 0; i < 4; ++i)
+for (j = 0; j < 4; ++j)
+  arr[j] = 0;
+
+  for (i = 1; i < 4; ++i)
+for (j = 0; j < 4; ++j)
+  arr[j] = 1.0 / (i + 1);
+}
diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc
index c531f1f12fd..410a8516370 100644
--- a/gcc/tree-ssa-loop-manip.cc
+++ b/gcc/tree-ssa-loop-manip.cc
@@ -1208,7 +1208,7 @@ tree_transform_and_unroll_loop (class loop *loop, 
unsigned factor,
   profile_probability::guessed_always (),
   true);
   gcc_assert (new_loop != NULL);
-  update_ssa (TODO_update_ssa);
+  update_ssa (TODO_update_ssa_no_phi);
 
   /* Prepare the cfg and update the phi nodes.  Move the loop exit to the
 loop latch (and make its condition dummy, for the moment).  */
@@ -1428,7 +1428,8 @@ tree_transform_and_unroll_loop (class loop *loop, 
unsigned factor,
   checking_verify_flow_info ();
   checking_verify_loop_structure ();
   checking_verify_loop_closed_ssa (true, loop);
-  checking_verify_loop_closed_ssa (true, new_loop);
+  if (new_loop)
+checking_verify_loop_closed_ssa (true, new_loop);
 }
 
 /* Wrapper over tree_transform_and_unroll_loop for case we do not
-- 
2.35.3


Re: [PATCH] [RFC]Support vectorization for Complex type.

2022-07-13 Thread Richard Biener via Gcc-patches
On Wed, Jul 13, 2022 at 6:47 AM Hongtao Liu  wrote:
>
> On Tue, Jul 12, 2022 at 10:12 PM Richard Biener
>  wrote:
> >
> > On Tue, Jul 12, 2022 at 6:11 AM Hongtao Liu  wrote:
> > >
> > > On Mon, Jul 11, 2022 at 7:47 PM Richard Biener via Gcc-patches
> > >  wrote:
> > > >
> > > > On Mon, Jul 11, 2022 at 5:44 AM liuhongt  wrote:
> > > > >
> > > > > The patch only handles load/store(including ctor/permutation, except
> > > > > gather/scatter) for complex type, other operations don't needs to be
> > > > > handled since they will be lowered by pass cplxlower.(MASK_LOAD is not
> > > > > supported for complex type, so no need to handle either).
> > > >
> > > > (*)
> > > >
> > > > > Instead of support vector(2) _Complex double, this patch takes 
> > > > > vector(4)
> > > > > double as vector type of _Complex double. Since vectorizer originally
> > > > > takes TYPE_VECTOR_SUBPARTS as nunits which is not true for complex
> > > > > type, the patch handles nunits/ncopies/vf specially for complex type.
> > > >
> > > > For the limited set above(*) can you explain what's "special" about
> > > > vector(2) _Complex
> > > > vs. vector(4) double, thus why we need to have STMT_VINFO_COMPLEX_P at 
> > > > all?
> > > Supporting a vector(2) complex  is a straightforward idea, just like
> > > supporting other scalar type in vectorizer, but it requires more
> > > efforts(in the backend and frontend), considering that most of
> > > operations of complex type will be lowered into realpart and imagpart
> > > operations, supporting a vector(2) complex does not look that
> > > necessary. Then it comes up with supporting vector(4) double(with
> > > adjustment of vf/ctor/permutation), the vectorizer only needs to
> > > handle the vectorization of the move operation of the complex type(no
> > > need to worry about wrongly mapping vector(4) double multiplication to
> > > complex type multiplication since it's already lowered before
> > > vectorizer).
> > > stmt_info does not record the scalar type, in order to avoid duplicate
> > > operation like getting a lhs type from stmt to determine whether it is
> > > a complex type, STMT_VINFO_COMPLEX_P bit is added, this bit is mainly
> > > initialized in vect_analyze_data_refs and vect_get_vector_types_for_
> > > stmt.
> > > >
> > > > I wonder to what extent your handling can be extended to support 
> > > > re-vectorizing
> > > > (with a higher VF for example) already vectorized code?  The vectorizer 
> > > > giving
> > > > up on vector(2) double looks quite obviously similar to it giving up
> > > > on _Complex double ...
> > > Yes, it can be extended to vector(2) double/float/int/ with a bit
> > > adjustment(exacting element by using bit_field instead of
> > > imagpart_expr/realpart_expr).
> > > > It would be a shame to not use the same underlying mechanism for 
> > > > dealing with
> > > > both, where for the vector case obviously vector(4) would be supported 
> > > > as well.
> > > >
> > > > In principle _Complex double operations should be two SLP lanes but it 
> > > > seems you
> > > > are handling them with classical interleaving as well?
> > > I'm only handling move operations, for other operations it will be
> > > lowered to realpart and imagpart and thus two SLP lanes.
> >
> > Yes, I understood that.
> >
> > Doing it more general (and IMHO better) would involve enhancing
> > how we represent dataref groups, maintaining the number of scalars
> > covered by each of the vinfos.  On the SLP representation side it
> > probably requires to rely on the representative for access and not
> > on the scalar stmts (since those do not map properly to the lanes).
> >
> > Ideally we'd be able to handle
> >
> > struct { _Complex double c; double a; double b; } a[], b[];
> >
> > void foo ()
> > {
> >for (int i = 0; i < 100; ++i)
> > {
> >   a[i].c = b[i].c;
> >   a[i].a = b[i].a;
> >   a[i].b = b[i].b;
> > }
> > }
> >
> > which I guess your patch doesn't handle with plain AVX vector
> > copies but instead uses interleaving for the _Complex and non-_Complex
> > parts?
> Indeed, it produces wrong code.

For _Complex, in case we don't get to the "true and only" solution it
might be easier to split the loads and stores when it's just memory
copies and we have vectorization enabled and a supported vector
mode that would surely re-assemble them (store-merging doesn't seem
to do that).

Btw, we seem to produce

movsd   b(%rip), %xmm0
movsd   %xmm0, a(%rip)
movsd   b+8(%rip), %xmm0
movsd   %xmm0, a+8(%rip)

for a _Complex double memory copy on x86 which means we lack
true DCmode support (pseudos get decomposed).  Not sure if we
can somehow check whether a target has DCmode load/store
support and key decomposing on that (maybe check the SET optab).

It might be possible to check

_Complex double a, b;
void bar()
{
  a = b;
}

for all targets with a cc1 cross to see whether they somehow get
loads/stores _not_ decomposed (also check _Complex float,
I 

Re: [PATCH 2/3] tree-cfg: do not duplicate returns_twice calls

2022-07-13 Thread Richard Biener via Gcc-patches
On Tue, Jul 12, 2022 at 10:10 PM Alexander Monakov  wrote:
>
>
> Apologies for the prolonged silence Richard, it is a bit of an obscure topic,
> and I was unsure I'd be able to handle any complications in a timely manner.
> I'm ready to revisit it now, please see below.
>
> On Mon, 17 Jan 2022, Richard Biener wrote:
>
> > On Fri, Jan 14, 2022 at 7:21 PM Alexander Monakov  
> > wrote:
> > >
> > > A returns_twice call may have associated abnormal edges that correspond
> > > to the "second return" from the call. If the call is duplicated, the
> > > copies of those edges also need to be abnormal, but e.g. tracer does not
> > > enforce that. Just prohibit the (unlikely to be useful) duplication.
> >
> > The general CFG copying routines properly duplicate those edges, no?
>
> No (in fact you say so in the next paragraph). In general I think they cannot,
> abnormal edges are a special case, so it should be the responsibility of the
> caller.
>
> > Tracer uses duplicate_block so it should also get copies of all successor
> > edges of that block.  It also only traces along normal edges.  What it might
> > miss is abnormal incoming edges - is that what you are referring to?
>
> Yes (I think its entire point is to build a "trace" of duplicated blocks that
> does not have incoming edges in the middle, abnormal or not).
>
> > That would be a thing we don't handle in duplicate_block on its own but
> > that callers are expected to do (though I don't see copy_bbs doing that
> > either).  I wonder if we can trigger this issue for some testcase?
>
> Oh yes (in fact my desire to find a testcase delayed this quite a bit).
> When compiling the following testcase with -O2 -ftracer:
>
> __attribute__((returns_twice))
> int rtwice_a(int), rtwice_b(int);
>
> int f(int *x)
> {
> volatile unsigned k, i = (*x);
>
> for (k = 1; (i = rtwice_a(i)) * k; k = 2);
>
> for (; (i = rtwice_b(i)) * k; k = 4);
>
> return k;
> }
>
> tracer manages to eliminate the ABNORMAL_DISPATCHER block completely, so
> the possibility of transferring control back to rtwice_a from rtwice_b
> is no longer modeled in the IR. I could spend some time "upgrading" this
> to an end-to-end miscompilation, but I hope you agree this is quite broken
> already.
>
> > The thing to check would be incoming abnormal edges in
> > can_duplicate_block_p, not (only) returns twice functions?
>
> Unfortunately not, abnormal edges are also used for computed gotos, which are
> less magic than returns_twice edges and should not block tracer I think.

I think computed gotos should use regular edges, only non-local goto should
use abnormals...

I suppose asm goto also uses abnormal edges?

Btw, I don't see how they in general are "less magic".  Sure, we have an
explicit receiver (the destination label), but we can only do edge inserts
if we have a single computed goto edge into a block (we can "move" the
label to the block created when splitting the edge).

> This implies patch 1/3 [1] unnecessary blocks sinking to computed goto 
> targets.
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588498.html
>
> How would you like to proceed here? Is my initial patch ok?

Hmm, so for returns twice calls duplicate_block correctly copies the call
and redirects the provided incoming edge to it.  The API does not
handle adding any further incoming edges - the caller would be responsible
for this.  So I still somewhat fail to see the point here.  If tracer does not
handle extra incoming edges properly then we need to fix tracer?  This
also includes non-local goto (we seem to copy non-local labels just
fine - wasn't there a bugreport about this!?).

So I think can_duplicate_block_p is the wrong place to fix (the RTL side
would need a similar fix anyhow?)

Richard.

> Alexander
>
> >
> > Richard.
> >
> > > gcc/ChangeLog:
> > >
> > > * tree-cfg.c (gimple_can_duplicate_bb_p): Reject blocks with
> > > calls that may return twice.
> > > ---
> > >  gcc/tree-cfg.c | 7 +--
> > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> > > index b7fe313b7..a99f1acb4 100644
> > > --- a/gcc/tree-cfg.c
> > > +++ b/gcc/tree-cfg.c
> > > @@ -6304,12 +6304,15 @@ gimple_can_duplicate_bb_p (const_basic_block bb)
> > >  {
> > >gimple *g = gsi_stmt (gsi);
> > >
> > > -  /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
> > > +  /* Prohibit duplication of returns_twice calls, otherwise 
> > > associated
> > > +abnormal edges also need to be duplicated properly.
> > > +An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
> > >  duplicated as part of its group, or not at all.
> > >  The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of 
> > > such a
> > >  group, so the same holds there.  */
> > >if (is_gimple_call (g)
> > > - && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC)
> > > + && 

Re: ICE after folding svld1rq to vec_perm_expr duing forwprop

2022-07-13 Thread Richard Biener via Gcc-patches
On Tue, Jul 12, 2022 at 9:12 PM Prathamesh Kulkarni via Gcc-patches
 wrote:
>
> Hi Richard,
> For the following test:
>
> svint32_t f2(int a, int b, int c, int d)
> {
>   int32x4_t v = (int32x4_t) {a, b, c, d};
>   return svld1rq_s32 (svptrue_b8 (), [0]);
> }
>
> The compiler emits following ICE with -O3 -mcpu=generic+sve:
> foo.c: In function ‘f2’:
> foo.c:4:11: error: non-trivial conversion in ‘view_convert_expr’
> 4 | svint32_t f2(int a, int b, int c, int d)
>   |   ^~
> svint32_t
> __Int32x4_t
> _7 = VIEW_CONVERT_EXPR<__Int32x4_t>(_8);
> during GIMPLE pass: forwprop
> dump file: foo.c.109t.forwprop2
> foo.c:4:11: internal compiler error: verify_gimple failed
> 0xfda04a verify_gimple_in_cfg(function*, bool)
> ../../gcc/gcc/tree-cfg.cc:5568
> 0xe9371f execute_function_todo
> ../../gcc/gcc/passes.cc:2091
> 0xe93ccb execute_todo
> ../../gcc/gcc/passes.cc:2145
>
> This happens because, after folding svld1rq_s32 to vec_perm_expr, we have:
>   int32x4_t v;
>   __Int32x4_t _1;
>   svint32_t _9;
>   vector(4) int _11;
>
>:
>   _1 = {a_3(D), b_4(D), c_5(D), d_6(D)};
>   v_12 = _1;
>   _11 = v_12;
>   _9 = VEC_PERM_EXPR <_11, _11, { 0, 1, 2, 3, ... }>;
>   return _9;
>
> During forwprop, simplify_permutation simplifies vec_perm_expr to
> view_convert_expr,
> and the end result becomes:
>   svint32_t _7;
>   __Int32x4_t _8;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _8 = {a_2(D), b_3(D), c_4(D), d_5(D)};
>   _7 = VIEW_CONVERT_EXPR<__Int32x4_t>(_8);
>   return _7;
> ;;succ:   EXIT
>
> which causes the error duing verify_gimple since VIEW_CONVERT_EXPR
> has incompatible types (svint32_t, int32x4_t).
>
> The attached patch disables simplification of VEC_PERM_EXPR
> in simplify_permutation, if lhs and rhs have non compatible types,
> which resolves ICE, but am not sure if it's the correct approach ?

It for sure papers over the issue.  I think the error happens earlier,
the V_C_E should have been built with the type of the VEC_PERM_EXPR
which is the type of the LHS.  But then you probably run into the
different sizes ICE (VLA vs constant size).  I think for this case you
want a BIT_FIELD_REF instead of a VIEW_CONVERT_EXPR,
selecting the "low" part of the VLA vector.

>
> Alternatively, should we allow assignments from fixed-width to SVE
> vector, so the above
> VIEW_CONVERT_EXPR would result in dup ?
>
> Thanks,
> Prathamesh