Re: [PATCH] [PR rtl-optimization/97249]Simplify vec_select of paradoxical subreg.

2020-10-20 Thread Hongtao Liu via Gcc-patches
On Wed, Oct 21, 2020 at 5:07 AM Segher Boessenkool
 wrote:
>
> On Tue, Oct 20, 2020 at 11:20:48AM +0800, Hongtao Liu wrote:
> > +   unsigned HOST_WIDE_INT subreg_offset = 0;
> > +   if (GET_CODE (trueop0) == SUBREG
> > +   && GET_MODE_INNER (mode)
> > +  == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
> > +   && (GET_MODE_NUNITS (mode)).is_constant ()
> > +   && constant_multiple_p (SUBREG_BYTE (trueop0),
> > +   GET_MODE_UNIT_BITSIZE (mode),
> > +   _offset))
> > + {
> > +   gcc_assert (XVECLEN (trueop1, 0) == l1);
>
> Why?  If we want to check that, it should be in RTL checking (and maybe
> it already is!)
>

Yes, RTL checking would guarantee that and it should be removed.

> > +   bool success = true;
> > +   poly_uint64 nunits
> > + = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
> > +   for (int i = 0; i != l1; i++)
> > + {
> > +   rtx idx = XVECEXP (trueop1, 0, i);
> > +   if (!CONST_INT_P (idx)
> > +   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
>
> Can that ever happen in valid code?  This seems to just hide problems.
>

for rtx like (vec_select:v4di:(subreg:v8di (reg:v2di))
 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])),
It seems valid for rtl checking.

> > + {
> > +   success = false;
> > +   break;
> > + }
> > + }
> > +   if (success)
>
> If you have a huge piece of code like this, factor it?  Esp. if you now
> need to have all kinds of booleans where you really just want to do
> early returns.
>

I want to jump out of this if branch, since later codes in this function
 won't simplify VEC_SELECT further when it matches my if condition,
it's ok to use ealry returns.

>
> Segher

Update patch.

-- 
BR,
Hongtao
From e4e9c256efc636e994b0994c69cb0b4e7edc25a0 Mon Sep 17 00:00:00 2001
From: liuhongt 
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of a subreg of X to just a vec_select of
 X.

gcc/ChangeLog
	PR rtl-optimization/97249
	* simplify-rtx.c (simplify_binary_operation_1): Simplify
	vec_select of a subreg of X to a vec_select of X.

gcc/testsuite/ChangeLog

	* gcc.target/i386/pr97249-1.c: New test.
---
 gcc/simplify-rtx.c| 34 +++
 gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 
 2 files changed, 64 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..947a9f37241 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,40 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		return subop1;
 		}
 	}
+
+	  /* Simplify vec_select of a subreg of X to just a vec_select of X
+	 when X has same component mode as vec_select.  */
+	  unsigned HOST_WIDE_INT subreg_offset = 0;
+	  if (GET_CODE (trueop0) == SUBREG
+	  && GET_MODE_INNER (mode)
+		 == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
+	  && GET_MODE_NUNITS (mode).is_constant ()
+	  && constant_multiple_p (subreg_memory_offset (trueop0),
+  GET_MODE_UNIT_BITSIZE (mode),
+  _offset))
+	{
+	  poly_uint64 nunits
+		= GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
+	  rtx par = trueop1;
+	  for (int i = 0; i != l1; i++)
+		{
+		  rtx idx = XVECEXP (trueop1, 0, i);
+		  if (!CONST_INT_P (idx)
+		  || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
+		return 0;
+		}
+
+	  if (subreg_offset)
+		{
+		  rtvec vec = rtvec_alloc (l1);
+		  for (int i = 0; i < l1; i++)
+		RTVEC_ELT (vec, i)
+		  = GEN_INT (INTVAL (XVECEXP (trueop1, 0, i))
+ + subreg_offset);
+		  par = gen_rtx_PARALLEL (VOIDmode, vec);
+		}
+	  return gen_rtx_VEC_SELECT (mode, SUBREG_REG (trueop0), par);
+	}
 	}
 
   if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 000..4478a34a9f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249  */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxbw[ \t]+\(.*%xmm[0-9]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxwd[ \t]+\(.*%xmm[0-9]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxdq[ \t]+\(.*%xmm[0-9]} 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+for (int i = 0 ; i != 8; i++)
+ p3[i] = p1[i] + p2[i];
+ return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+for (int i = 0 ; i != 4; i++)
+ p3[i] = p1[i] + p2[i];
+ return;
+}
+
+void
+foo2 (unsigned 

Re: [PATCH 2/8] [RS6000] rs6000_rtx_costs for AND

2020-10-20 Thread Alan Modra via Gcc-patches
On Tue, Oct 20, 2020 at 01:55:56PM -0500, Segher Boessenkool wrote:
> On Thu, Oct 08, 2020 at 09:27:54AM +1030, Alan Modra wrote:
> > The existing "case AND" in this function is not sufficient for
> > optabs.c:avoid_expensive_constant usage, where the AND is passed in
> > outer_code.  We'd like to cost AND of rs6000_is_valid_and_mask
> > or rs6000_is_valid_2insn_and variety there, so that those masks aren't
> > seen as expensive (ie. better to load to a reg then AND).
> > 
> > * config/rs6000/rs6000.c (rs6000_rtx_costs): Combine CONST_INT
> > AND handling with IOR/XOR.  Move costing for AND with
> > rs6000_is_valid_and_mask or rs6000_is_valid_2insn_and to
> > CONST_INT.
> 
> Sorry this took so long to review :-(
> 
> On 64-bit BE this leads to *bigger* code, and closer observation shows
> that some common sequences degrade on all configs.  This seems to mostly
> be about "andc" (and its dot form).  It wasn't costed properly before,
> but after your patch, a single instruction is replaced by three.
> 
> Could you look into this?

~/build/gcc-alan/gcc$ for z in *.o; do if test `objdump -dr $z | grep andc | wc 
-l` != `objdump -dr ../../gcc/gcc/$z | grep andc | wc -l`; then echo $z; fi; 
done
gimplify.o
insn-emit.o
insn-opinit.o
insn-recog.o
rs6000-string.o

All of these are exactly the case I talked about in
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553919.html

"Sometimes correct insn cost leads to unexpected results.  For
example:

extern unsigned bar (void);
unsigned
f1 (unsigned a)
{
  if ((a & 0x01000200) == 0x01000200)
return bar ();
  return 0;
}

emits for a & 0x01000200
 (set (reg) (and (reg) (const_int 0x01000200)))
at expand time (two rlwinm insns) rather than the older
 (set (reg) (const_int 0x01000200))
 (set (reg) (and (reg) (reg)))
which is three insns.  However, since 0x01000200 is needed later the
older code after optimisation is smaller."

Things have changed slightly since I wrote the above, with the two
rlwinm insns being emitted at expand time, so you see
 (set (reg) (and (reg) (const_int 0xff0003ff)))
 (set (reg) (and (reg) (const_int 0x01fffe00)))
but of course that doesn't change anything regarding the cost of
"a & 0x01000200".

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] [PR rtl-optimization/97249]Simplify vec_select of paradoxical subreg.

2020-10-20 Thread Hongtao Liu via Gcc-patches
On Wed, Oct 21, 2020 at 12:42 AM Richard Sandiford
 wrote:
>
> Hongtao Liu  writes:
> >> > +   && (GET_MODE_NUNITS (mode)).is_constant ()
> >> > +   && (GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0
> >> > +   .is_constant ()
> >> > +   && known_le (l1, l2)
> >>
> >> I'm not sure the last two & are really the important condition.
> >> I think we should drop them for the suggestion below.
> >>
> >
> > Changed, assume gcc also support something like (vec_select:v4di
> > (reg:v2di) (parallel [ (const_int 0) (const_int 1) (const_int 1)
> > (const_int 0)]))
> > as long as the range of selection guaranteed by
> >   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
>
> Yeah, that vec_select looks OK.
>
> >>
> >> > +   if (!CONST_INT_P (idx))
> >>
> >> Here I think we should check:
> >>
> >>   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
> >>
> >> where:
> >>
> >>poly_uint64 nunits
> >>  = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0.
> >>
> >
> > Changed.
> >
> >> This makes sure that all indices are in range.  In particular, it's
> >> valid for the SUBREG_REG to be narrower than mode, for appropriate
> >> vec_select indices
> >>
> >
> > Yes, that's what paradoxical subreg means.
>
> But I was comparing the mode of the vec_select with the mode of the
> SUBREG_REG (rather than the mode of trueop0 with the mode of the
> SUBREG_REG, which is what matters for paradoxical subregs).
>
> > +   /* Simplify vec_select of a subreg of X to just a vec_select of X
> > +  when X has same component mode as vec_select.  */
> > +   unsigned HOST_WIDE_INT subreg_offset = 0;
> > +   if (GET_CODE (trueop0) == SUBREG
> > +   && GET_MODE_INNER (mode)
> > +  == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
> > +   && (GET_MODE_NUNITS (mode)).is_constant ()
>
> Unnecessary brackets around “GET_MODE_NUNITS (mode)”.
>

Changed.

> > +   && constant_multiple_p (SUBREG_BYTE (trueop0),
> > +   GET_MODE_UNIT_BITSIZE (mode),
> > +   _offset))
>
> Sorry, my bad, this should be:
>
>   && constant_multiple_p (subreg_memory_offset (trueop0),
>   GET_MODE_UNIT_BITSIZE (mode),
>   _offset))
>

Changed.

> > + {
> > +   gcc_assert (XVECLEN (trueop1, 0) == l1);
> > +   bool success = true;
> > +   poly_uint64 nunits
> > + = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
> > +   for (int i = 0; i != l1; i++)
> > + {
> > +   rtx idx = XVECEXP (trueop1, 0, i);
> > +   if (!CONST_INT_P (idx)
> > +   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
> > + {
> > +   success = false;
> > +   break;
> > + }
> > + }
> > +   if (success)
> > + {
> > +   rtx par = trueop1;
> > +   if (subreg_offset)
> > + {
> > +   rtvec vec = rtvec_alloc (l1);
> > +   for (int i = 0; i < l1; i++)
> > + RTVEC_ELT (vec, i)
> > +   = GEN_INT (INTVAL (XVECEXP (trueop1, 0, i)
> > +  + subreg_offset));
>
> This is applying subreg_offset to the pointer rather than the INTVAL.
> It should be:
>
>   = GEN_INT (UINTVAL (XVECEXP (trueop1, 0, i))
>  + subreg_offset);
>

oops, sorry for typo and changed.

> OK with those changes, thanks.
>
> Richard



-- 
BR,
Hongtao


Re: [PATCH 1/2] [target 87767] Refactor AVX512 broadcast patterns with speical memory constraint.

2020-10-20 Thread Hongtao Liu via Gcc-patches
On Tue, Oct 20, 2020 at 10:57 PM Vladimir Makarov  wrote:
>
>
> On 2020-10-20 1:33 a.m., Hongtao Liu wrote:
> > On Mon, Oct 19, 2020 at 11:38 PM Vladimir Makarov  
> > wrote:
> >>
> >> On 2020-10-11 8:58 p.m., Hongtao Liu wrote:
> >>> Hi:
> >>> This is done in 2 steps:
> >>> 1. Extend special memory constraint to handle non MEM_P cases, i.e.
> >>> (vec_duplicate:V4SF (mem:SF (addr)))
> >>> 2. Refactor implementation of *_bcst{_1,_2,_3} patterns. Add new
> >>> predicate bcst_mem_operand and corresponding constraint "Br" to merge
> >>> "$(pattern)_bcst{_1,_2,_3}" into "$(pattern)", also delete those
> >>> separate "*_bcst{_1,_2,_3}" patterns.
> >>>
> >>> Bootstrap is ok, regression test on i386 backend is ok.
> >>>
> >>> gcc/ChangeLog:
> >>>
> >>>   PR target/87767
> >>>   * ira-costs.c (record_operand_costs): Extract memory operand
> >>>   from recog_data.operand[i] for record_address_regs.
> >>>   (record_reg_classes): Extract memory operand from OP for
> >>>   conditional judgement MEM_P.
> >>>   * ira.c (ira_setup_alts): Ditto.
> >>>   * lra-constraints.c (extract_mem_from_operand): New function.
> >>>   (satisfies_memory_constraint_p): Extract memory operand from
> >>>   OP for decompose_mem_address, return false when there's no
> >>>   memory operand inside OP.
> >>>   (process_alt_operands): Remove MEM_P (op) since it would be
> >>>   judged in satisfies_memory_constraint_p.
> >>>   * recog.c (asm_operand_ok): Extract memory operand from OP for
> >>>   judgement of memory_operand (OP, VOIDmode).
> >>>   (constrain_operands): Don't unwrapper unary operator when
> >>>   there's memory operand inside.
> >>>   * rtl.h (extract_mem_from_operand): New decl.
> >>
> >> Thank you for working on the PR.  In general patch is ok for me. The
> >> only thing is
> >>
> >> +/* For special_memory_operand, it could be false for MEM_P (op),
> >> +   i.e. bcst_mem_operand in i386 backend.
> >> +   Extract and return real memory operand or op.  */
> >> +rtx
> >> +extract_mem_from_operand (rtx op)
> >> +{
> >> +  if (MEM_P (op))
> >> +return op;
> >> +  /* Only allow one memory_operand inside special memory operand.  */
> >>
> >> The comment contradicts to the below code which returns the first memory 
> >> operand (not the only one).
> >>
> > Yes.
> >
> >> +  subrtx_var_iterator::array_type array;
> >> +  FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
> >> +{
> >> +  rtx x = *iter;
> >> +  if (MEM_P (x))
> >> +   return x;
> >> +}
> >> +
> >> +  return op;
> >> +}
> >> +
> >>
> >> I think the code should look like
> >>
> >> /* For special_memory_operand, it could be false for MEM_P (op),
> >>  i.e. bcst_mem_operand in i386 backend.
> >>  Extract and return real memory operand or op.  */
> >> rtx
> >> extract_mem_from_operand (rtx op)
> >> {
> >> if (MEM_P (op))
> >>   return op;
> >> /* Only allow one memory_operand inside special memory operand.  */
> >> subrtx_var_iterator::array_type array;
> >> rtx res = op;
> >> FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
> >>   {
> >> rtx x = *iter;
> >> if (!MEM_P (x) || res != op)
> >>   return op;
> >> res = op;
> > Assume you want to assign res with x.
> > Also in the iteration, x would first be op which would be false for
> > MEM_P, then op would be returned.
> > That's not what you mean, so i changed to
> >
> >/* Only allow one memory_operand inside special memory operand.  */
> >subrtx_var_iterator::array_type array;
> >rtx res = op;
> >FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
> >  {
> >rtx x = *iter;
> >if (!MEM_P (x))
> >  continue;
> >/* Return op when there're multiple memory operands.  */
> >if (res != op)
> >  return op;
> >else
> >  res = x;
> >  }
>
> Actually I wanted to have constraint satisfying rtx with memory covered
> by **only unary** operator(s).  Your code satisfies memory covered by
> non-unary operators (e.g. binary ones).
>
> Why do I prefer less general constraint? Because other operands of
> operator containing the memory might need reloads too and the more
> general constraint will ignore this. If this situation is impossible
> now, it might be possible in the future.
>

Got your point.

> My proposed code is wrong as I forgot that FOR_EACH_SUBRTX_VAR processes
> sub-rtx recursively.  Thank you for starting the discussion.  Now I
> think the code should look like
>
> /* For special_memory_operand, it could be false for MEM_P (op),
>  i.e. bcst_mem_operand in i386 backend.
>  Extract and return real memory operand or op.  */
> rtx
> extract_mem_from_operand (rtx op)
> {
>for (rtx x = op;; x = XEXP (x, 0)) {
>
> if (MEM_P (x))
>   return x;
> if (GET_RTX_LENGTH (GET_CODE (x)) != 1 || GET_RTX_FORMAT (GET_CODE

Re: [r11-4134 Regression] FAIL: gcc.dg/Walloca-1.c (test for warnings, line 27) on Linux/x86_64

2020-10-20 Thread Jeff Law via Gcc-patches


On 10/20/20 5:13 PM, sunil.k.pandey via Gcc-patches wrote:
> On Linux/x86_64,
>
> 495ec0b2aa808a6463b8c24288a1730cbae1cfca is the first bad commit
> commit 495ec0b2aa808a6463b8c24288a1730cbae1cfca
> Author: Aldy Hernandez 
> Date:   Mon Oct 19 16:52:51 2020 +0200
>
> Convert -Walloca pass to ranger.
>
> caused
>
> FAIL: gcc.dg/Walloca-1.c (test for excess errors)
> FAIL: gcc.dg/Walloca-1.c  (test for warnings, line 27)
>
> with GCC configured with
>
> Configured with: ../../gcc/configure 
> --prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-4134/usr
>  --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
> --enable-libmpx x86_64-linux --disable-bootstrap
>
> To reproduce:
>
> $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/Walloca-1.c 
> --target_board='unix{-m32}'"
> $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/Walloca-1.c 
> --target_board='unix{-m32\ -march=cascadelake}'"
>
> (Please do not reply to this email, for question about this report, contact 
> me at skpgkp2 at gmail dot com)

My tester is flagging this too on the embedded targets.  visium-elf for
example.


Jef



Trivial testsuite fix for 16bit targets

2020-10-20 Thread Jeff Law via Gcc-patches

cr16-elf regressed this test:


cr16-sim: gcc.dg/Wbuiltin-declaration-mismatch-9.c (test for excess errors)



There's a dg-prune in the test which stripped out some messages, but it
wasn't sufficient to cover the 16 bit targets.  This patch adds another
string to prune.  Committed.


Jeff


gcc/testsuite
* gcc.dg/Wbuiltin-declaration-mismatch-9.c: Improve pruning of
invalid scanf call messages.

diff --git a/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-9.c 
b/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-9.c
index 56a827ab527..82db8fe33d1 100644
--- a/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-9.c
+++ b/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-9.c
@@ -12,4 +12,5 @@ void a (void)
 }
 
 /* The invalid scanf call may also trigger:
-   { dg-prune-output "accessing 4 bytes in a region of size 1" } */
+   { dg-prune-output "accessing 4 bytes in a region of size 1" }
+   { dg-prune-output "accessing 2 bytes in a region of size 1" } */


[PATCH, rs6000] Optimize pcrel access of globals

2020-10-20 Thread acsawdey--- via Gcc-patches
From: Aaron Sawdey 

This patch implements a RTL pass that looks for pc-relative loads of the
address of an external variable using the PCREL_GOT relocation and a
single load or store that uses that external address. It then uses the
PCREL_OPT relocation to convert that first load into a single pc-relative
load or store to directly access that external variable.

Produced by a cast of thousands:
 * Michael Meissner
 * Peter Bergner
 * Bill Schmidt
 * Alan Modra
 * Segher Boessenkool
 * Aaron Sawdey

Passes bootstrap/regtest on ppc64le power10. OK for trunk?

gcc/ChangeLog:

* config.gcc: Add pcrel-opt.o.
* config/rs6000/pcrel-opt.c: New file.
* config/rs6000/pcrel-opt.md: New file.
* config/rs6000/predicates.md: Add d_form_memory predicate.
* config/rs6000/rs6000-cpus.def: Add OPTION_MASK_PCREL_OPT.
* config/rs6000/rs6000-passes.def: Add pass_pcrel_opt.
* config/rs6000/rs6000-protos.h: Add reg_to_non_prefixed(),
offsettable_non_prefixed_memory(), output_pcrel_opt_reloc(),
and make_pass_pcrel_opt().
* config/rs6000/rs6000.c (reg_to_non_prefixed): Make global.
(rs6000_option_override_internal): Add pcrel-opt.
(rs6000_delegitimize_address): Support pcrel-opt.
(rs6000_opt_masks): Add pcrel-opt.
(offsettable_non_prefixed_memory): New function.
(reg_to_non_prefixed): Make global.
(rs6000_asm_output_opcode): Reset next_insn_prefixed_p.
(output_pcrel_opt_reloc): New function.
* config/rs6000/rs6000.md (loads_extern_addr): New attr.
(pcrel_extern_addr): Set loads_extern_addr.
Add include for pcrel-opt.md.
* config/rs6000/rs6000.opt: Add -mpcrel-opt.
* config/rs6000/t-rs6000: Add rules for pcrel-opt.c and
pcrel-opt.md.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pcrel-opt-inc-di.c: New test.
* gcc.target/powerpc/pcrel-opt-ld-df.c: New test.
* gcc.target/powerpc/pcrel-opt-ld-di.c: New test.
* gcc.target/powerpc/pcrel-opt-ld-hi.c: New test.
* gcc.target/powerpc/pcrel-opt-ld-qi.c: New test.
* gcc.target/powerpc/pcrel-opt-ld-sf.c: New test.
* gcc.target/powerpc/pcrel-opt-ld-si.c: New test.
* gcc.target/powerpc/pcrel-opt-ld-vector.c: New test.
* gcc.target/powerpc/pcrel-opt-st-df.c: New test.
* gcc.target/powerpc/pcrel-opt-st-di.c: New test.
* gcc.target/powerpc/pcrel-opt-st-hi.c: New test.
* gcc.target/powerpc/pcrel-opt-st-qi.c: New test.
* gcc.target/powerpc/pcrel-opt-st-sf.c: New test.
* gcc.target/powerpc/pcrel-opt-st-si.c: New test.
* gcc.target/powerpc/pcrel-opt-st-vector.c: New test.
---
 gcc/config.gcc|   6 +-
 gcc/config/rs6000/pcrel-opt.c | 887 ++
 gcc/config/rs6000/pcrel-opt.md| 386 
 gcc/config/rs6000/predicates.md   |  23 +
 gcc/config/rs6000/rs6000-cpus.def |   2 +
 gcc/config/rs6000/rs6000-passes.def   |   8 +
 gcc/config/rs6000/rs6000-protos.h |   4 +
 gcc/config/rs6000/rs6000.c| 116 ++-
 gcc/config/rs6000/rs6000.md   |   8 +-
 gcc/config/rs6000/rs6000.opt  |   4 +
 gcc/config/rs6000/t-rs6000|   7 +-
 .../gcc.target/powerpc/pcrel-opt-inc-di.c |  18 +
 .../gcc.target/powerpc/pcrel-opt-ld-df.c  |  36 +
 .../gcc.target/powerpc/pcrel-opt-ld-di.c  |  43 +
 .../gcc.target/powerpc/pcrel-opt-ld-hi.c  |  42 +
 .../gcc.target/powerpc/pcrel-opt-ld-qi.c  |  42 +
 .../gcc.target/powerpc/pcrel-opt-ld-sf.c  |  42 +
 .../gcc.target/powerpc/pcrel-opt-ld-si.c  |  41 +
 .../gcc.target/powerpc/pcrel-opt-ld-vector.c  |  36 +
 .../gcc.target/powerpc/pcrel-opt-st-df.c  |  36 +
 .../gcc.target/powerpc/pcrel-opt-st-di.c  |  37 +
 .../gcc.target/powerpc/pcrel-opt-st-hi.c  |  42 +
 .../gcc.target/powerpc/pcrel-opt-st-qi.c  |  42 +
 .../gcc.target/powerpc/pcrel-opt-st-sf.c  |  36 +
 .../gcc.target/powerpc/pcrel-opt-st-si.c  |  41 +
 .../gcc.target/powerpc/pcrel-opt-st-vector.c  |  36 +
 26 files changed, 2012 insertions(+), 9 deletions(-)
 create mode 100644 gcc/config/rs6000/pcrel-opt.c
 create mode 100644 gcc/config/rs6000/pcrel-opt.md
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c
 create mode 

[r11-4134 Regression] FAIL: gcc.dg/Walloca-1.c (test for warnings, line 27) on Linux/x86_64

2020-10-20 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

495ec0b2aa808a6463b8c24288a1730cbae1cfca is the first bad commit
commit 495ec0b2aa808a6463b8c24288a1730cbae1cfca
Author: Aldy Hernandez 
Date:   Mon Oct 19 16:52:51 2020 +0200

Convert -Walloca pass to ranger.

caused

FAIL: gcc.dg/Walloca-1.c (test for excess errors)
FAIL: gcc.dg/Walloca-1.c  (test for warnings, line 27)

with GCC configured with

Configured with: ../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-4134/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/Walloca-1.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/Walloca-1.c 
--target_board='unix{-m32\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


[PATCH] Temporarily disable trap in in extract_range_builtin check.

2020-10-20 Thread Andrew MacLeod via Gcc-patches

Not a permanent fix, so leave the PR open.

we'll bypass the comparison assertion code until we can look closer at 
how to resolve the symbolic issue..  This will keep compilations going...


bootstrapped on x86_64-pc-linux-gnu, no regressions, pushed.

Andrew
commit 292c92715b282f7c6617c94351d3e38ec027d637
Author: Andrew MacLeod 
Date:   Tue Oct 20 16:55:14 2020 -0400

Temporarily disable trap in in extract_range_builtin check.

Until we figure out how to adjust ubsan for symbolics, disable the trap.

gcc/ChangeLog:

PR tree-optimization/97505
* vr-values.c (vr_values::extract_range_basic): Trap if
vr_values version disagrees with range_of_builtin_call.

diff --git a/gcc/testsuite/gfortran.dg/pr97505.f90 b/gcc/testsuite/gfortran.dg/pr97505.f90
new file mode 100644
index 000..f0599b38517
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr97505.f90
@@ -0,0 +1,49 @@
+! { dg-do compile }
+! { dg-options "-Os -fsanitize=signed-integer-overflow" }
+!
+! Test the fix for PR35824, in which the interface assignment and
+! negation did not work correctly.
+!
+! Contributed by Rolf Roth 
+!
+module typemodule
+  type alltype
+ double precision :: a
+ double precision,allocatable :: b(:)
+  end type
+  interface assignment(=)
+module procedure at_from_at
+  end interface
+  interface operator(-)
+module procedure  neg_at
+  end interface
+contains
+  subroutine at_from_at(b,a)
+type(alltype), intent(in) :: a
+type(alltype), intent(out) :: b
+b%a=a%a
+allocate(b%b(2))
+b%b=a%b
+  end subroutine at_from_at
+  function neg_at(a) result(b)
+type(alltype), intent(in) :: a
+type(alltype) :: b
+b%a=-a%a
+allocate(b%b(2))
+b%b=-a%b
+  end function neg_at
+end module
+  use typemodule
+  type(alltype) t1,t2,t3
+  allocate(t1%b(2))
+  t1%a=0.5d0
+  t1%b(1)=1d0
+  t1%b(2)=2d0
+  t2=-t1
+  if (t2%a .ne. -0.5d0) STOP 1
+  if (any(t2%b .ne. [-1d0, -2d0])) STOP 2
+
+  t1=-t1
+  if (t1%a .ne. -0.5d0) STOP 3
+  if (any(t1%b .ne. [-1d0, -2d0])) STOP 4
+end
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 11beef82a64..67c88006f13 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -1436,7 +1436,10 @@ vr_values::extract_range_basic (value_range_equiv *vr, gimple *stmt)
   /* Assert that any ranges vr_values::extract_range_builtin gets
 	 are also handled by the ranger counterpart.  */
   gcc_assert (range_of_builtin_call (*this, tmp, as_a (stmt)));
+#if 0
+  /* Disable this while PR97505 is resolved.  */
   gcc_assert (tmp.equal_p (*vr, /*ignore_equivs=*/false));
+#endif
   return;
 }
   /* Handle extraction of the two results (result of arithmetics and


Re: [PATCH][middle-end][i386][version 3]Add -fzero-call-used-regs=[skip|used-gpr-arg|used-arg|all-arg|used-gpr|all-gpr|used|all]

2020-10-20 Thread Qing Zhao via Gcc-patches
Richard,

Thanks a lot for your comments.

> On Oct 20, 2020, at 1:12 PM, Richard Sandiford  
> wrote:
> 
>> 
>> +
>> +  if ((strcmp (TREE_STRING_POINTER (id), "skip") != 0)
>> +  && (strcmp (TREE_STRING_POINTER (id), "used-gpr-arg") != 0)
>> +  && (strcmp (TREE_STRING_POINTER (id), "used-arg") != 0)
>> +  && (strcmp (TREE_STRING_POINTER (id), "all-arg") != 0)
>> +  && (strcmp (TREE_STRING_POINTER (id), "used-gpr") != 0)
>> +  && (strcmp (TREE_STRING_POINTER (id), "all-gpr") != 0)
>> +  && (strcmp (TREE_STRING_POINTER (id), "used") != 0)
>> +  && (strcmp (TREE_STRING_POINTER (id), "all") != 0))
> 
> Any reason we don't support all-gpr-arg?  Seems to be the only
> “missing” combination.
Will add this one.

> 
> Would be good to have a single piece of code that parses these
> arguments into a set of flags, rather than have one list here
> and one get_call_used_regs_seq.
> 
> Maybe we could do something similar to sanitizer_opts, but that
> might not be necessary.

Okay, will do that.
> 
>> +{
>> +  error ("attribute %qE argument must be one of %qs, %qs, %qs, %qs,"
>> + "%qs, %qs, %qs, or %qs",
>> + name, "skip", "used-gpr-arg", "used-arg", "all-arg",
>> + "used-gpr", "all-gpr", "used", "all");
>> +  *no_add_attris = true;
>> +  return NULL_TREE;
>> +}
>> +
>> +  return NULL_TREE;
>> +}
>> +
>> /* Handle a "returns_nonnull" attribute; arguments as in
>>   struct attribute_spec.handler.  */
>> 
>> diff --git a/gcc/coretypes.h b/gcc/coretypes.h
>> index 6b6cfcd..0ce5eb4 100644
>> --- a/gcc/coretypes.h
>> +++ b/gcc/coretypes.h
>> @@ -418,6 +418,19 @@ enum symbol_visibility
>>  VISIBILITY_INTERNAL
>> };
>> 
>> +/* Zero call-used registers type.  */
>> +enum zero_call_used_regs {
>> +  zero_call_used_regs_unset = 0,
>> +  zero_call_used_regs_skip,
>> +  zero_call_used_regs_used_gpr_arg,
>> +  zero_call_used_regs_used_arg,
>> +  zero_call_used_regs_all_arg,
>> +  zero_call_used_regs_used_gpr,
>> +  zero_call_used_regs_all_gpr,
>> +  zero_call_used_regs_used,
>> +  zero_call_used_regs_all
>> +};
> 
> I think a bitmask would be easier to use:
> 
>  SKIP
>  ONLY_USED
>  ONLY_GPR
>  ONLY_ARG
> 
> Should probably be a class enum given that we're C++11.

Good suggestion.

> 
>> +pass parameters. @samp{used-arg} zeros used call-used registers that
>> +pass parameters. @samp{arg} zeros all call-used registers that pass
>> +parameters.  These 3 choices are used for ROP mitigation.
>> +
>> +@samp{used-gpr} zeros call-used general purpose registers
>> +which are used in function.  @samp{all-gpr} zeros all
>> +call-used registers.  @samp{used} zeros call-used registers which
>> +are used in function.  @samp{all} zeros all call-used registers.
>> +These 4 choices are used for preventing information leak through
>> +registers.
> 
> The description for all-gpr doesn't look right.
Oops. Will fix it.

>  I think it would
> be easier to describe (and hopefully to follow) if we start with
> the three basic choices: “skip”, “used” and “all”.  Then describe
> how “used” and “all” can be modified by adding “-gpr” to limit the
> clearing to general-purpose registers and “-arg” to limit the
> clearing to argument registers.
> 
> We need to say what “call-used” and “used” mean in this context.
> In particular, “call-used” is also known as “call-clobbered”,
> “caller-saved“ and “volatile”, so it would be good to list those
> as alternatives.  We need to say what “used” registers are.

Okay.

>> 
>> +@item -fzero-call-used-regs=@var{choice}
>> +@opindex fzero-call-used-regs
>> +Zero call-used registers at function return to increase the program
>> +security by either mitigating Return-Oriented Programming (ROP) or
>> +preventing information leak through registers.
>> +
>> +@samp{skip}, which is the default, doesn't zero call-used registers.
>> +
>> +@samp{used-gpr-arg} zeros used call-used general purpose registers that
>> +pass parameters. @samp{used-arg} zeros used call-used registers that
>> +pass parameters. @samp{all-arg} zeros all call-used registers that pass
>> +parameters.  These 3 choices are used for ROP mitigation.
>> +
>> +@samp{used-gpr} zeros call-used general purpose registers
>> +which are used in function.  @samp{all-gpr} zeros all
>> +call-used registers.  @samp{used} zeros call-used registers which
>> +are used in function.  @samp{all} zeros all call-used registers.
>> +These 4 choices are used for preventing information leak through
>> +registers.
> 
> Same comment here.

Okay.

> 
>> @@ -310,6 +310,9 @@ struct GTY(()) rtl_data {
>> sets them.  */
>>  HARD_REG_SET asm_clobbers;
>> 
>> +  /* All hard registers that are zeroed at the return of the routine.  */
>> +  HARD_REG_SET zeroed_reg_set;
> 
> How about “must_be_zero_on_return“?  “zeroed_reg_set” isn't very
> specific about where the zeroing happens or is needed.  E.g. we also
> zero uninitialised registers.
okay.

> 
>> +{
>> +  basic_block bb = BLOCK_FOR_INSN (ret);
>> +  auto_bitmap 

[PATCH 2/2, rs6000, V2] VSX load/store rightmost element operations

2020-10-20 Thread will schmidt via Gcc-patches
[PATCH 2/2, rs6000, v2] VSX load/store rightmost element operations

Hi,
This adds support for the VSX load/store rightmost element operations.
This includes the instructions lxvrbx, lxvrhx, lxvrwx, lxvrdx,
stxvrbx, stxvrhx, stxvrwx, stxvrdx; And the builtins
vec_xl_sext() /* vector load sign extend */
vec_xl_zext() /* vector load zero extend */
vec_xst_trunc() /* vector store truncate */.

Testcase results show that the instructions added with this patch show
up at low/no optimization (-O0), with a number of those being replaced
with other load and store instructions at higher optimization levels.
I've kept those tests at -O0 to confirm these newly added instructions
are generated.

[v2] Refreshed per review comments.
Comments cleaned up, indentation corrected, dg-* stanzas updated.

Regtested OK for Linux on power8,power9 targets.  Sniff-regtested OK on
power10 simulator.
OK for trunk?

Thanks,
-Will

gcc/ChangeLog:
* config/rs6000/altivec.h (vec_xl_zext, vec_xl_sext, vec_xst_trunc): New
defines.
* config/rs6000/rs6000-builtin.def (BU_P10V_OVERLOAD_X): New builtin macro.
(BU_P10V_AV_X): New builtin macro.
(se_lxvrhbx, se_lxrbhx, se_lxvrwx, se_lxvrdx): Define internal names for
load and sign extend vector element.
(ze_lxvrbx, ze_lxvrhx, ze_lxvrwx, ze_lxvrdx): Define internal names for
load and zero extend vector element.
(tr_stxvrbx, tr_stxvrhx, tr_stxvrwx, tr_stxvrdx): Define internal names
for truncate and store vector element.
(se_lxvrx, ze_lxvrx, tr_stxvrx): Define internal names for overloaded
load/store rightmost element.
* config/rs6000/rs6000-call.c (altivec_builtin_types): Define the internal
monomorphs P10_BUILTIN_SE_LXVRBX, P10_BUILTIN_SE_LXVRHX,
P10_BUILTIN_SE_LXVRWX, P10_BUILTIN_SE_LXVRDX,
P10_BUILTIN_ZE_LXVRBX, P10_BUILTIN_ZE_LXVRHX, P10_BUILTIN_ZE_LXVRWX,
P10_BUILTIN_ZE_LXVRDX,
P10_BUILTIN_TR_STXVRBX, P10_BUILTIN_TR_STXVRHX, P10_BUILTIN_TR_STXVRWX,
P10_BUILTIN_TR_STXVRDX,
(altivec_expand_lxvr_builtin): New expansion for load element builtins.
(altivec_expand_stv_builtin): Update to support truncate and store builtins.
(altivec_expand_builtin): Add clases for the load/store rightmost builtins.
(altivec_init_builtins): Add def_builtin entries for
__builtin_altivec_se_lxvrbx, __builtin_altivec_se_lxvrhx,
__builtin_altivec_se_lxvrwx, __builtin_altivec_se_lxvrdx,
__builtin_altivec_ze_lxvrbx, __builtin_altivec_ze_lxvrhx,
__builtin_altivec_ze_lxvrwx, __builtin_altivec_ze_lxvrdx,
__builtin_altivec_tr_stxvrbx, __builtin_altivec_tr_stxvrhx,
__builtin_altivec_tr_stxvrwx, __builtin_altivec_tr_stxvrdx,
__builtin_vec_se_lxvrx, __builtin_vec_ze_lxvrx, __builtin_vec_tr_stxvrx.
* config/rs6000/vsx.md (vsx_lxvrx, vsx_stxvrx, vsx_stxvrx):
New define_insn entries.
* gcc/doc/extend.texi:  Add documentation for vsx_xl_sext, vsx_xl_zext,
and vec_xst_trunc.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/vsx-load-element-extend-char.c: New test.
* gcc.target/powerpc/vsx-load-element-extend-int.c: New test.
* gcc.target/powerpc/vsx-load-element-extend-longlong.c: New test.
* gcc.target/powerpc/vsx-load-element-extend-short.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-char.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-int.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-longlong.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-short.c: New test.

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 8a2dcda01442..df10a8c498dd 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -234,10 +234,13 @@
 #define vec_lde __builtin_vec_lde
 #define vec_ldl __builtin_vec_ldl
 #define vec_lvebx __builtin_vec_lvebx
 #define vec_lvehx __builtin_vec_lvehx
 #define vec_lvewx __builtin_vec_lvewx
+#define vec_xl_zext __builtin_vec_ze_lxvrx
+#define vec_xl_sext __builtin_vec_se_lxvrx
+#define vec_xst_trunc __builtin_vec_tr_stxvrx
 #define vec_neg __builtin_vec_neg
 #define vec_pmsum_be __builtin_vec_vpmsum
 #define vec_shasigma_be __builtin_crypto_vshasigma
 /* Cell only intrinsics.  */
 #ifdef __PPU__
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 3eb55f0ae434..5b05da87f4bf 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1143,10 +1143,18 @@
(RS6000_BTC_ ## ATTR/* ATTR */  \
 | RS6000_BTC_BINARY),  \
CODE_FOR_ ## ICODE) /* ICODE */
 #endif
 
+#define BU_P10V_OVERLOAD_X(ENUM, NAME) \
+  RS6000_BUILTIN_X (P10_BUILTIN_VEC_ ## ENUM,  /* ENUM */  \
+   "__builtin_vec_" NAME,  /* NAME */  \
+   RS6000_BTM_P10, /* MASK */  \
+   

Re: [PATCH] openmp: Implement support for OMP_TARGET_OFFLOAD

2020-10-20 Thread Kwok Cheung Yeung

On 20/10/2020 1:57 pm, Jakub Jelinek wrote:

On Tue, Oct 20, 2020 at 02:17:26PM +0200, Tobias Burnus wrote:

On 10/20/20 2:11 PM, Tobias Burnus wrote:


Unfortunately, the committed patch
(r11-4121-g1bfc07d150790fae93184a79a7cce897655cb37b)
causes build errors.

The error seems to be provoked by function cloning – as the code
itself looks fine:
...
  struct gomp_device_descr *devices_s
 = malloc (num_devices * sizeof (struct gomp_device_descr));
...
   for (i = 0; i < num_devices; i++)
 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
   devices_s[num_devices_after_openmp++] = devices[i];


gomp_target_init.part.0 ()
{
...

   devices_s_1 = malloc (0);
...
   num_devices.16_67 = num_devices;
...
   if (num_devices.16_67 > 0)
 goto ; [89.00%]
   else
 goto ; [11.00%]

Which seems to have an ordering problem.


This patch fixes the warning that breaks the bootstrap, but haven't
tested it with offloading to see if it doesn't break offloading somehow.



Thank you for the fix.

It appears that the issue only occurs when offloading is not enabled (I tested 
with offloading to Nvidia and AMD GCN devices, but forgot to test no offloading).


Kwok


Re: [PATCH] [PR rtl-optimization/97249]Simplify vec_select of paradoxical subreg.

2020-10-20 Thread Segher Boessenkool
On Tue, Oct 20, 2020 at 11:20:48AM +0800, Hongtao Liu wrote:
> +   unsigned HOST_WIDE_INT subreg_offset = 0;
> +   if (GET_CODE (trueop0) == SUBREG
> +   && GET_MODE_INNER (mode)
> +  == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
> +   && (GET_MODE_NUNITS (mode)).is_constant ()
> +   && constant_multiple_p (SUBREG_BYTE (trueop0),
> +   GET_MODE_UNIT_BITSIZE (mode),
> +   _offset))
> + {
> +   gcc_assert (XVECLEN (trueop1, 0) == l1);

Why?  If we want to check that, it should be in RTL checking (and maybe
it already is!)

> +   bool success = true;
> +   poly_uint64 nunits
> + = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
> +   for (int i = 0; i != l1; i++)
> + {
> +   rtx idx = XVECEXP (trueop1, 0, i);
> +   if (!CONST_INT_P (idx)
> +   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))

Can that ever happen in valid code?  This seems to just hide problems.

> + {
> +   success = false;
> +   break;
> + }
> + }
> +   if (success)

If you have a huge piece of code like this, factor it?  Esp. if you now
need to have all kinds of booleans where you really just want to do
early returns.


Segher


Re: [PATCH] [PR rtl-optimization/97249]Simplify vec_select of paradoxical subreg.

2020-10-20 Thread Segher Boessenkool
On Thu, Oct 15, 2020 at 04:14:39PM +0800, Hongtao Liu wrote:
> On Thu, Oct 15, 2020 at 1:37 AM Segher Boessenkool
>  wrote:
> > > +   gcc_assert (can_div_trunc_p (SUBREG_BYTE (trueop0),
> > > +GET_MODE_SIZE (GET_MODE_INNER 
> > > (mode)),
> > > +_offset));
> >
> > Why is this needed?
> 
> I only found this interface for poly_uint64 division to get subreg_offset.

I mean, why do you have this assert at all?

> > > +   if (!CONST_INT_P (j)
> > > +   || known_ge (UINTVAL (j), l2 - subreg_offset))
> > > + {
> > > +   success = false;
> > > +   break;
> > > + }
> > > + }
> >
> > You don't have to test if the input RTL is valid.  You can assume it is.
> >
> 
> This test is for something like (vec_select:v2di (subreg:v4di
> (reg:v2di) 0)(parallel [ (const_int 2) (const_int 3)])).
> const_int 2 here is out of range. Are you meaning the upper rtx wouldn't 
> exist?

Assuming this is LE: yes, this is just invalid.  You can do whatever you
want with it (except ICE :-) )

> > subreg_offset will differ in meaning if big-endian; is this correct
> Yes.
> > there, do all the stars align so this code works out fine there as well?
> 
> i found it's a bit tricky to adjust selection index for target
> BYTES_BIG_ENDIA != WORDS_BIG_ENDIAN.
> Especially for component mode smaller than word, Any interface to handle this?

For most things you want BYTES_BIG_ENDIAN, anything in a subreg here for
example.  I don't know which of those vectors use; I cannot find it in
the documentation, either.


Segher


Re: [patch] Introduce vxworks7r2 support for ppc and ppc64

2020-10-20 Thread Segher Boessenkool
Hi!

On Tue, Oct 20, 2020 at 12:10:59PM +0200, Olivier Hainque wrote:
> This change introduces support for the most recent versions
> of VxWorks on PowerPC targets, for both 32 and 64 bit thanks
> to a bi-arch setup.
> 
> The system compilers are essentially configured as Linux
> toolchains with only a few specificities and we replicate
> that model here.

> +powerpc*-wrs-vxworks7r*)
> +
> + # Wind River 7 post SR0600 is mostly like Linux so we setup
> + # out config in a very similar fashion and adjust to a few
> + # specificities.

"our config"?

> -   analogous changes here too.  */
> +/* The port comes in two very different flavors at this stage:
> +
> +   - For 653 (AE) and regular versions prior to VxWorks 7, the port
> + comes with its own set of definitions, matching a system compiler
> + configured this way as well as the corresponding run-time
> + environment.  This is essentially an eabi system, so changes to
> + eabi.h should usually be reflected here.
> +
> +   - Starting with VxWorks 7 (post SR600), the system environment
> + was made extremely similar to GNU/Linux and this toolchain is
> + builtin on top of the corresponding header files.  */

"built on top"?

> +/
> + * Common definitions first *
> + /

We don't use such decorated comments in GCC.  But it is your header file
of course :-)

I don't see anything wrong with the actual code itself, fwiw :-)


Segher


Re: [PATCH][middle-end][i386][version 3]Add -fzero-call-used-regs=[skip|used-gpr-arg|used-arg|all-arg|used-gpr|all-gpr|used|all]

2020-10-20 Thread Qing Zhao via Gcc-patches



> On Oct 20, 2020, at 10:24 AM, Uros Bizjak  wrote:
> 
> On Tue, Oct 20, 2020 at 4:01 PM Qing Zhao  > wrote:
>> 
>> Hi, Uros,
>> 
>> Thanks a lot for your comments.
>> 
>> On Oct 19, 2020, at 2:30 PM, Uros Bizjak  wrote:
>> 
>> 
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index f684954..620114f 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -3551,6 +3551,161 @@ ix86_function_value_regno_p (const unsigned int 
>> regno)
>> return false;
>> }
>> 
>> +/* Check whether the register REGNO should be zeroed on X86.
>> +   When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
>> +   together, no need to zero it again.
>> +   Stack registers (st0-st7) and mm0-mm7 are aliased with each other.
>> +   very hard to be zeroed individually, don't zero individual st or
>> +   mm registgers at this time.  */
>> +
>> +static bool
>> +zero_call_used_regno_p (const unsigned int regno,
>> + bool all_sse_zeroed)
>> +{
>> +  return GENERAL_REGNO_P (regno)
>> +  || (!all_sse_zeroed && SSE_REGNO_P (regno))
>> +  || MASK_REGNO_P (regno);
>> +}
>> +
>> +/* Return the machine_mode that is used to zero register REGNO.  */
>> +
>> +static machine_mode
>> +zero_call_used_regno_mode (const unsigned int regno)
>> +{
>> +  /* NB: We only need to zero the lower 32 bits for integer registers
>> + and the lower 128 bits for vector registers since destination are
>> + zero-extended to the full register width.  */
>> +  if (GENERAL_REGNO_P (regno))
>> +return SImode;
>> +  else if (SSE_REGNO_P (regno))
>> +return V4SFmode;
>> +  else
>> +return HImode;
>> +}
>> +
>> +/* Generate a rtx to zero all vector registers togetehr if possible,
>> +   otherwise, return NULL.  */
>> +
>> +static rtx
>> +zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  if (!TARGET_AVX)
>> +return NULL;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +if ((IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)
>> +  || (TARGET_64BIT
>> +  && (REX_SSE_REGNO_P (regno)
>> +  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)
>> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> +  return NULL;
>> +
>> +  return gen_avx_vzeroall ();
>> +}
>> +
>> +/* Generate a rtx to zero all st and mm registers togetehr if possible,
>> +   otherwise, return NULL.  */
>> +
>> +static rtx
>> +zero_all_st_mm_registers (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  if (!TARGET_MMX)
>> +return NULL;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
>> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> +  return NULL;
>> +
>> +  return gen_mmx_emms ();
>> 
>> 
>> emms is not clearing any register, it only loads x87FPUTagWord with
>> H. So I think, the above is useless, as far as register clearing
>> is concerned.
>> 
>> 
>> Thanks for the info.
>> 
>> So, for mm and st registers, should we clear them, and how?
> 
> I don't know.
> 
> Please note that %mm and %st share the same register file, and
> touching %mm registers will block access to %st until emms is emitted.
> You can't just blindly load 0 to %st registers, because the register
> file can be in MMX mode and vice versa. For 32bit targets, function
> can also  return a value in the %mm0.

If data flow determine that %mm0 does not return a value at the return, can we 
clear all the %st as following:

emms
mov %st0, 0
mov %st1, 0
mov %st2, 0
mov %st3, 0
mov %st4, 0
mov %st5, 0
mov %st6, 0
mov %st7, 0

? 

Thanks.

Qing
> 



libgo patch committed: Adjust NetBSD-specific types

2020-10-20 Thread Ian Lance Taylor via Gcc-patches
This libgo patch by Nikhil Benesch adjusts some of the syscall types
and names to maintain backward compatibility on NetBSD.  Specifically,
the RTM_RESOLVE constant must be added if it is missing, and the
stat_t struct must use the suffix "timespec" rather than "tim" for its
time-related fields.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
141243370c0af0c44b125c0c47b129019738245f
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 4ac0e8c6fc6..fb7aa3e6eae 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-6662382a279dd5a5f99307e9b609654717638b24
+b2be94556bbc98f565fc277e30a038c742bf28a4
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/mksysinfo.sh b/libgo/mksysinfo.sh
index 607c97d26fe..deac5ce8d67 100755
--- a/libgo/mksysinfo.sh
+++ b/libgo/mksysinfo.sh
@@ -507,6 +507,13 @@ if grep 'define st_dev st_fsid' gen-sysinfo.go > /dev/null 
2>&1; then
   st_dev='-e s/st_fsid/Dev/'
 fi
 
+# For historical reasons Go uses the suffix "timespec" instead of "tim" for
+# stat_t's time fields on NetBSD.
+st_times='-e s/st_atim/Atim/ -e s/st_mtim/Mtim/ -e s/st_ctim/Ctim/'
+if test "${GOOS}" = "netbsd"; then
+st_times='-e s/st_atim/Atimespec/ -e s/st_mtim/Mtimespec/ -e 
s/st_ctim/Ctimespec/'
+fi
+
 # The stat type.
 # Prefer largefile variant if available.
 stat=`grep '^type _stat64 ' gen-sysinfo.go || true`
@@ -517,6 +524,7 @@ else
 fi | sed -e 's/type _stat64/type Stat_t/' \
  -e 's/type _stat/type Stat_t/' \
  ${st_dev} \
+ ${st_times} \
  -e 's/st_ino/Ino/g' \
  -e 's/st_nlink/Nlink/' \
  -e 's/st_mode/Mode/' \
@@ -526,9 +534,6 @@ fi | sed -e 's/type _stat64/type Stat_t/' \
  -e 's/st_size/Size/' \
  -e 's/st_blksize/Blksize/' \
  -e 's/st_blocks/Blocks/' \
- -e 's/st_atim/Atim/' \
- -e 's/st_mtim/Mtim/' \
- -e 's/st_ctim/Ctim/' \
  -e 's/\([^a-zA-Z0-9_]\)_timeval\([^a-zA-Z0-9_]\)/\1Timeval\2/g' \
  -e 's/\([^a-zA-Z0-9_]\)_timespec_t\([^a-zA-Z0-9_]\)/\1Timespec\2/g' \
  -e 
's/\([^a-zA-Z0-9_]\)_st_timespec_t\([^a-zA-Z0-9_]\)/\1StTimespec\2/g' \
@@ -1055,6 +1060,13 @@ grep '^const _RTCF' gen-sysinfo.go | \
 sed -e 's/^\(const \)_\(RTCF[^= ]*\)\(.*\)$/\1\2 = _\2/' >> ${OUT}
 grep '^const _RTM' gen-sysinfo.go | \
 sed -e 's/^\(const \)_\(RTM[^= ]*\)\(.*\)$/\1\2 = _\2/' >> ${OUT}
+if test "${GOOS}" = "netbsd"; then
+  if ! grep "RTM_RESOLVE" ${OUT} >/dev/null 2>&1; then
+# NetBSD 8.0 removed RTM_RESOLVE, but it is part of the syscall package's
+# stable API, so add it manually.
+echo "const RTM_RESOLVE = 0xb" >> ${OUT}
+  fi
+fi
 grep '^const _RTN' gen-sysinfo.go | \
 sed -e 's/^\(const \)_\(RTN[^= ]*\)\(.*\)$/\1\2 = _\2/' >> ${OUT}
 grep '^const _RTPROT' gen-sysinfo.go | \


c++: block-scope extern decl with default args

2020-10-20 Thread Nathan Sidwell


In adding the DECL_LOCAL_DECL handling, I'd forgotten that the
parm-decls also need cloning -- and resetting of their DECL_CONTEXT.
Also, any default args need droping when adding an alias, as those are
not propagated.  The std's not totally clear on this latter point when
there's no exising namespace decl, but that seems like the right thing
and is what clang does.

gcc/cp/
* name-lookup.c (push_local_extern_decl_alias): Reconstextualize
alias' parm decls.  Drop any default args.
gcc/testsuite/
* g++.dg/lookup/local-extern.C: New.

pushing to trunk

nathan

--
Nathan Sidwell
diff --git c/gcc/cp/name-lookup.c w/gcc/cp/name-lookup.c
index e951fb7885b..46374278068 100644
--- c/gcc/cp/name-lookup.c
+++ w/gcc/cp/name-lookup.c
@@ -2969,6 +2969,52 @@ push_local_extern_decl_alias (tree decl)
 	{
 	  /* No existing namespace-scope decl.  Make one.  */
 	  alias = copy_decl (decl);
+	  if (TREE_CODE (alias) == FUNCTION_DECL)
+	{
+	  /* Recontextualize the parms.  */
+	  for (tree *chain = _ARGUMENTS (alias);
+		   *chain; chain = _CHAIN (*chain))
+		{
+		  *chain = copy_decl (*chain);
+		  DECL_CONTEXT (*chain) = alias;
+		}
+
+	  tree type = TREE_TYPE (alias);
+	  for (tree args = TYPE_ARG_TYPES (type);
+		   args; args = TREE_CHAIN (args))
+		if (TREE_PURPOSE (args))
+		  {
+		/* There are default args.  Lose them.  */
+		tree nargs = NULL_TREE;
+		tree *chain = 
+		for (args = TYPE_ARG_TYPES (type);
+			 args; args = TREE_CHAIN (args))
+		  if (args == void_list_node)
+			{
+			  *chain = args;
+			  break;
+			}
+		  else
+			{
+			  *chain
+			= build_tree_list (NULL_TREE, TREE_VALUE (args));
+			  chain = _CHAIN (*chain);
+			}
+
+		tree fn_type = build_function_type (TREE_TYPE (type), nargs);
+
+		fn_type = apply_memfn_quals
+		  (fn_type, type_memfn_quals (type));
+
+		fn_type = build_cp_fntype_variant
+		  (fn_type, type_memfn_rqual (type),
+		   TYPE_RAISES_EXCEPTIONS (type),
+		   TYPE_HAS_LATE_RETURN_TYPE (type));
+
+		TREE_TYPE (alias) = fn_type;
+		break;
+		  }
+	}
 
 	  /* This is the real thing.  */
 	  DECL_LOCAL_DECL_P (alias) = false;
diff --git c/gcc/testsuite/g++.dg/lookup/local-extern.C w/gcc/testsuite/g++.dg/lookup/local-extern.C
new file mode 100644
index 000..1d6d8617bde
--- /dev/null
+++ w/gcc/testsuite/g++.dg/lookup/local-extern.C
@@ -0,0 +1,13 @@
+int foo ()
+{
+  extern int baz (int i = 5);
+  return baz ();
+}
+
+int baz (int i = 0);
+
+int bar ()
+{
+  extern int baz (int i = 6);
+  return baz ();
+}


Re: [PATCH 2/8] [RS6000] rs6000_rtx_costs for AND

2020-10-20 Thread Segher Boessenkool
On Thu, Oct 08, 2020 at 09:27:54AM +1030, Alan Modra wrote:
> The existing "case AND" in this function is not sufficient for
> optabs.c:avoid_expensive_constant usage, where the AND is passed in
> outer_code.  We'd like to cost AND of rs6000_is_valid_and_mask
> or rs6000_is_valid_2insn_and variety there, so that those masks aren't
> seen as expensive (ie. better to load to a reg then AND).
> 
>   * config/rs6000/rs6000.c (rs6000_rtx_costs): Combine CONST_INT
>   AND handling with IOR/XOR.  Move costing for AND with
>   rs6000_is_valid_and_mask or rs6000_is_valid_2insn_and to
>   CONST_INT.

Sorry this took so long to review :-(

On 64-bit BE this leads to *bigger* code, and closer observation shows
that some common sequences degrade on all configs.  This seems to mostly
be about "andc" (and its dot form).  It wasn't costed properly before,
but after your patch, a single instruction is replaced by three.

Could you look into this?


Segher


libbacktrace patch committed: Use __attribute__((__fallthrough__))

2020-10-20 Thread Ian Lance Taylor via Gcc-patches
This libbacktrace patch uses __attribute__((__fallthrough__)) rather
than relying on a /*fallthrough*/ comment.  Bootstrapped and ran
libbacktrace tests on x86_64-pc-linux-gnu.  Committed to mainline.

Ian

* internal.h (ATTRIBUTE_FALLTHROUGH): Define.
* elf.c (elf_zlib_inflate): Use ATTRIBUTE_FALLTHROUGH.
diff --git a/libbacktrace/elf.c b/libbacktrace/elf.c
index 941f820d944..d52b86cdeb5 100644
--- a/libbacktrace/elf.c
+++ b/libbacktrace/elf.c
@@ -2081,10 +2081,10 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, 
uint16_t *zdebug_table,
{
case 6:
  *plen++ = prev;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 5:
  *plen++ = prev;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 4:
  *plen++ = prev;
}
@@ -2115,22 +2115,22 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, 
uint16_t *zdebug_table,
{
case 10:
  *plen++ = 0;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 9:
  *plen++ = 0;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 8:
  *plen++ = 0;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 7:
  *plen++ = 0;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 6:
  *plen++ = 0;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 5:
  *plen++ = 0;
- /* fallthrough */
+ ATTRIBUTE_FALLTHROUGH;
case 4:
  *plen++ = 0;
}
diff --git a/libbacktrace/internal.h b/libbacktrace/internal.h
index 047a700c0ce..659db9e21e2 100644
--- a/libbacktrace/internal.h
+++ b/libbacktrace/internal.h
@@ -56,6 +56,14 @@ POSSIBILITY OF SUCH DAMAGE.  */
 # endif
 #endif
 
+#ifndef ATTRIBUTE_FALLTHROUGH
+# if (GCC_VERSION >= 7000)
+#  define ATTRIBUTE_FALLTHROUGH __attribute__ ((__fallthrough__))
+# else
+#  define ATTRIBUTE_FALLTHROUGH
+# endif
+#endif
+
 #ifndef HAVE_SYNC_FUNCTIONS
 
 /* Define out the sync functions.  These should never be called if


Re: [PATCH][middle-end][i386][version 3]Add -fzero-call-used-regs=[skip|used-gpr-arg|used-arg|all-arg|used-gpr|all-gpr|used|all]

2020-10-20 Thread Richard Sandiford via Gcc-patches
Qing Zhao  writes:
> @@ -4959,6 +4963,52 @@ handle_no_split_stack_attribute (tree *node, tree name,
>   return NULL_TREE;
> }
>
> +/* Handle a "zero_call_used_regs" attribute; arguments as in
> +   struct attribute_spec.handler.  */
> +
> +static tree
> +handle_zero_call_used_regs_attribute (tree *node, tree name, tree args,
> +   int ARG_UNUSED (flags),
> +   bool *no_add_attris)

s/attris/attrs/

> +{
> +  tree decl = *node;
> +  tree id = TREE_VALUE (args);
> +
> +  if (TREE_CODE (decl) != FUNCTION_DECL)
> +{
> +  error_at (DECL_SOURCE_LOCATION (decl),
> + "%qE attribute applies only to functions", name);
> +  *no_add_attris = true;
> +  return NULL_TREE;
> +}
> +
> +  if (TREE_CODE (id) != STRING_CST)
> +{
> +  error ("attribute %qE arguments not a string", name);
> +  *no_add_attris = true;
> +  return NULL_TREE;
> +}
> +
> +  if ((strcmp (TREE_STRING_POINTER (id), "skip") != 0)
> +  && (strcmp (TREE_STRING_POINTER (id), "used-gpr-arg") != 0)
> +  && (strcmp (TREE_STRING_POINTER (id), "used-arg") != 0)
> +  && (strcmp (TREE_STRING_POINTER (id), "all-arg") != 0)
> +  && (strcmp (TREE_STRING_POINTER (id), "used-gpr") != 0)
> +  && (strcmp (TREE_STRING_POINTER (id), "all-gpr") != 0)
> +  && (strcmp (TREE_STRING_POINTER (id), "used") != 0)
> +  && (strcmp (TREE_STRING_POINTER (id), "all") != 0))

Any reason we don't support all-gpr-arg?  Seems to be the only
“missing” combination.

Would be good to have a single piece of code that parses these
arguments into a set of flags, rather than have one list here
and one get_call_used_regs_seq.

Maybe we could do something similar to sanitizer_opts, but that
might not be necessary.

> +{
> +  error ("attribute %qE argument must be one of %qs, %qs, %qs, %qs,"
> +  "%qs, %qs, %qs, or %qs",
> +  name, "skip", "used-gpr-arg", "used-arg", "all-arg",
> +  "used-gpr", "all-gpr", "used", "all");
> +  *no_add_attris = true;
> +  return NULL_TREE;
> +}
> +
> +  return NULL_TREE;
> +}
> +
> /* Handle a "returns_nonnull" attribute; arguments as in
>struct attribute_spec.handler.  */
>
> diff --git a/gcc/coretypes.h b/gcc/coretypes.h
> index 6b6cfcd..0ce5eb4 100644
> --- a/gcc/coretypes.h
> +++ b/gcc/coretypes.h
> @@ -418,6 +418,19 @@ enum symbol_visibility
>   VISIBILITY_INTERNAL
> };
>
> +/* Zero call-used registers type.  */
> +enum zero_call_used_regs {
> +  zero_call_used_regs_unset = 0,
> +  zero_call_used_regs_skip,
> +  zero_call_used_regs_used_gpr_arg,
> +  zero_call_used_regs_used_arg,
> +  zero_call_used_regs_all_arg,
> +  zero_call_used_regs_used_gpr,
> +  zero_call_used_regs_all_gpr,
> +  zero_call_used_regs_used,
> +  zero_call_used_regs_all
> +};

I think a bitmask would be easier to use:

  SKIP
  ONLY_USED
  ONLY_GPR
  ONLY_ARG

Should probably be a class enum given that we're C++11.

> +/* Return true if REGNO is used by the epilogue.  */
> +bool
> +df_epilogue_uses_p (unsigned int regno)
> +{
> +return (EPILOGUE_USES (regno)
> + || TEST_HARD_REG_BIT (crtl->zeroed_reg_set, regno));

Nit: the { … } body should be indented by two spaces rather than four.

> diff --git a/gcc/df.h b/gcc/df.h
> index 8b6ca8c..0f098d7 100644
> --- a/gcc/df.h
> +++ b/gcc/df.h
> @@ -1085,6 +1085,7 @@ extern void df_update_entry_exit_and_calls (void);
> extern bool df_hard_reg_used_p (unsigned int);
> extern unsigned int df_hard_reg_used_count (unsigned int);
> extern bool df_regs_ever_live_p (unsigned int);
> +extern bool df_epilogue_uses_p (unsigned int);
> extern void df_set_regs_ever_live (unsigned int, bool);
> extern void df_compute_regs_ever_live (bool);
> extern void df_scan_verify (void);
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index c9f7299..f56f61a 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -3992,6 +3992,30 @@ performing a link with relocatable output (i.e.@: 
> @code{ld -r}) on them.
> A declaration to which @code{weakref} is attached and that is associated
> with a named @code{target} must be @code{static}.
>
> +@item zero_call_used_regs ("@var{choice}")
> +@cindex @code{zero_call_used_regs} function attribute
> +
> +The @code{zero_call_used_regs} attribute causes the compiler to zero
> +call-used registers at function return according to @var{choice}.
> +This is used to increase the program security by either mitigating
> +Return-Oriented Programming (ROP) or preventing information leak
> +through registers.
> +@samp{skip} doesn't zero call-used registers.
> +
> +@samp{used-arg-gpr} zeros used call-used general purpose registers that

used-gpr-arg

> +pass parameters. @samp{used-arg} zeros used call-used registers that
> +pass parameters. @samp{arg} zeros all call-used registers that pass
> +parameters.  These 3 choices are used for ROP mitigation.
> +
> +@samp{used-gpr} zeros call-used general purpose registers
> +which are 

libgo patch committed: Remove sendfile on NetBSD

2020-10-20 Thread Ian Lance Taylor via Gcc-patches
This libgo patch by Nikhil Benesch removes sendfile from the syscall
package on NetBSD, as NetBSD doesn't have the sendfile system call.
Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
76a9f0acd248bba801e6208d11a96db5b7f940dc
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 7496770f018..4ac0e8c6fc6 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-3b6252d2d3ce559826303dac07538da6e78940d8
+6662382a279dd5a5f99307e9b609654717638b24
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/go/syscall/libcall_bsd.go b/libgo/go/syscall/libcall_bsd.go
deleted file mode 100644
index 93f5710ba03..000
--- a/libgo/go/syscall/libcall_bsd.go
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd netbsd openbsd solaris
-
-// BSD library calls.
-
-package syscall
-
-import (
-   "internal/race"
-   "unsafe"
-)
-
-func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err 
error) {
-   if race.Enabled {
-   race.ReleaseMerge(unsafe.Pointer())
-   }
-   var soff Offset_t
-   var psoff *Offset_t
-   if offset != nil {
-   soff = Offset_t(*offset)
-   psoff = 
-   }
-   written, err = sendfile(outfd, infd, psoff, count)
-   if offset != nil {
-   *offset = int64(soff)
-   }
-   return
-}
diff --git a/libgo/go/syscall/libcall_bsd_regfile.go 
b/libgo/go/syscall/libcall_bsd_regfile.go
index 388c8a7d782..0b9d01f2fcf 100644
--- a/libgo/go/syscall/libcall_bsd_regfile.go
+++ b/libgo/go/syscall/libcall_bsd_regfile.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build darwin dragonfly freebsd netbsd openbsd solaris,amd64 solaris,sparc64
+// +build darwin dragonfly freebsd openbsd solaris,amd64 solaris,sparc64
 
 package syscall
 
diff --git a/libgo/go/syscall/libcall_bsd_sendfile.go 
b/libgo/go/syscall/libcall_bsd_sendfile.go
new file mode 100644
index 000..295a1f48969
--- /dev/null
+++ b/libgo/go/syscall/libcall_bsd_sendfile.go
@@ -0,0 +1,31 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd openbsd solaris
+
+// BSD sendfile support.
+
+package syscall
+
+import (
+   "internal/race"
+   "unsafe"
+)
+
+func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err 
error) {
+   if race.Enabled {
+   race.ReleaseMerge(unsafe.Pointer())
+   }
+   var soff Offset_t
+   var psoff *Offset_t
+   if offset != nil {
+   soff = Offset_t(*offset)
+   psoff = 
+   }
+   written, err = sendfile(outfd, infd, psoff, count)
+   if offset != nil {
+   *offset = int64(soff)
+   }
+   return
+}


Re: PING – Re: [Patch] collect-utils.c, lto-wrapper + mkoffload: Improve -save-temps filename

2020-10-20 Thread Tobias Burnus

On 10/20/20 11:27 AM, Richard Biener wrote:


OK.


Missed two things – a copy'n'paste issue using the wrong variable and to
update a testcase, adding the newly added dump files there.

Committed as obvious in r11-4132-g6267bb7a11f53381d77b2c0a6193fcb9115d2b30

Tobias

On 10/13/20 9:37 PM, Tobias Burnus wrote:

This patch avoids putting some [...] files to /tmp/cc* when
-save-temps has been specified.

For my testcase, it now generates:
a.lto_wrapper_args
a.offload_args
a.xnvptx-none.args
a.xnvptx-none.gcc_args
a.xamdgcn-amdhsa.gcc_args
a.xamdgcn-amdhsa.gccnative_args
a.xamdgcn-amdhsa.ld_args


This patch adds an additional argument to collect-utils.c's
collect_execute (and is wrapper fork_execute) which, if not NULL,
it is used in 'concat (dumppfx, atsuffix, NULL);'.

This patch adds a suffix to gcc/config/gcn/mkoffload.c,
gcc/config/nvptx/mkoffload.c and gcc/lto-wrapper.c.

It does not (yet) add a suffix to gcc/collect2.c and
gcc/config/i386/intelmic-mkoffload.c but just passes
NULL; for intelmic it is not a work item as it does
not use '@' files at all.

Hopefully, there is no file which is written twice
with the same name (or otherwise overridden) and
the files names do make sense.

OK?

Tobias

PS: There is still cceBdzZk.ofldlist (via lto-plugin/lto-plugin.c),
and @/tmp/cc* in calls to lto1 and collect2. And collect2.c
passes NULL also when use_atfile is true.

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
commit 6267bb7a11f53381d77b2c0a6193fcb9115d2b30
Author: Tobias Burnus 
Date:   Tue Oct 20 19:41:44 2020 +0200

lto-wrapper: Fix -save-temps filename commit

Fix for 'Improve -save-temps filename' patch,
commit b3032d1b84b8ab683f2d7345b6a2d9f783fe946d

gcc/ChangeLog
* lto-wrapper.c (run_gcc): Use proper variable for
%u.ltrans_args dump suffix.

gcc/testsuite/ChangeLog
* gcc.misc-tests/outputs.exp: Add ltrans_args dump files
for 'lto save-temps'.
---
 gcc/lto-wrapper.c|  2 +-
 gcc/testsuite/gcc.misc-tests/outputs.exp | 24 
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index e458347860c..9072a3cc0fe 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -1924,7 +1924,7 @@ cont:
 	{
 	  char argsuffix[sizeof (DUMPBASE_SUFFIX) + 1];
 	  if (save_temps)
-		snprintf (dumpbase, sizeof (DUMPBASE_SUFFIX),
+		snprintf (argsuffix, sizeof (DUMPBASE_SUFFIX),
 			  "ltrans%u.ltrans_args", i);
 	  fork_execute (new_argv[0], CONST_CAST (char **, new_argv),
 			true, save_temps ? argsuffix : NULL);
diff --git a/gcc/testsuite/gcc.misc-tests/outputs.exp b/gcc/testsuite/gcc.misc-tests/outputs.exp
index 1e3cd4122a6..1fdd61a95c0 100644
--- a/gcc/testsuite/gcc.misc-tests/outputs.exp
+++ b/gcc/testsuite/gcc.misc-tests/outputs.exp
@@ -703,20 +703,20 @@ outest "$b lto sing empty dumpdir empty dumpbase namedb" $sing "-dumpdir \"\" -d
 outest "$b lto mult empty dumpdir empty dumpbase namedb" $mult "-dumpdir \"\" -dumpbase \"\" -o dir/$b.exe -O2 -flto -flto-partition=one -fdump-ipa-icf-optimized -fdump-rtl-final -fstack-usage" {dir/} {{.exe} {-1.c.???i.icf !$ltop -1.c.???r.final !0 -2.c.???i.icf !$ltop -2.c.???r.final !0 .wpa.???i.icf .ltrans0.ltrans.???r.final .ltrans0.ltrans.su}}
 
 # Now -flto with -save-temps, not exhaustive.
-outest "$b lto st sing empty dumpbase unnamed" $sing "-dumpbase \"\" -save-temps -O2 -flto -flto-partition=one -fdump-ipa-icf-optimized -fdump-rtl-final -fstack-usage $oaout" {} {{-0.i -0.s -0.o -0.c.???i.icf !$ltop -0.c.???r.final !!$ltop a.lto_wrapper_args !0 a.wpa.???i.icf a.ltrans.out !!$ltop a.res !0 a.ltrans0.o a.ltrans0.ltrans.???r.final a.ltrans0.ltrans.su a.ltrans0.ltrans.s a.ltrans0.ltrans.o $aout}}
-outest "$b lto st mult empty dumpbase unnamed" $mult "-dumpbase \"\" -save-temps -O2 -flto -flto-partition=one -fdump-ipa-icf-optimized -fdump-rtl-final -fstack-usage $oaout" {} {{-1.i -1.s -1.o -1.c.???i.icf !$ltop -1.c.???r.final !0 -2.i -2.s -2.o -2.c.???i.icf !$ltop -2.c.???r.final !!$ltop a.lto_wrapper_args !0 a.wpa.???i.icf a.ltrans.out !!$ltop a.res !0 a.ltrans0.o a.ltrans0.ltrans.???r.final a.ltrans0.ltrans.su a.ltrans0.ltrans.s a.ltrans0.ltrans.o $aout}}
-outest "$b lto st sing dumpdir empty dumpbase named" $sing "-dumpdir dir/ -dumpbase \"\" -o $b-0.exe -save-temps -O2 -flto -flto-partition=one -fdump-ipa-icf-optimized -fdump-rtl-final -fstack-usage" {dir/} {{-0.i -0.s -0.o -0.c.???i.icf !$ltop -0.c.???r.final !!$ltop -0.lto_wrapper_args !0 -0.wpa.???i.icf -0.ltrans.out !!$ltop -0.res !0 -0.ltrans0.o -0.ltrans0.ltrans.???r.final -0.ltrans0.ltrans.su -0.ltrans0.ltrans.s -0.ltrans0.ltrans.o} {-0.exe}}
-outest "$b lto st mult dumpdir empty dumpbase named" $mult "-dumpdir dir/ -dumpbase \"\" -o $b-1.exe -save-temps -O2 -flto -flto-partition=one 

Re: [PATCH] libstdc++: Fix division by zero in std::sample

2020-10-20 Thread Patrick Palka via Gcc-patches
On Tue, 20 Oct 2020, Jonathan Wakely wrote:

> On 21/09/20 16:01 +0100, Jonathan Wakely wrote:
> > On 18/09/20 21:08 -0400, Patrick Palka via Libstdc++ wrote:
> > > This fixes a division by zero in the selection-sampling std::__search
> > > overload when the input range is empty (and hence __unsampled_sz is 0).
> > > 
> > > Tested on x86_64-pc-linux-gnu.
> > > 
> > > libstdc++-v3/ChangeLog:
> > > 
> > >   * include/bits/stl_algo.h (__sample): Exit early when the
> > >   input range is empty.
> > >   * testsuite/25_algorithms/sample/3.cc: New test.
> > > ---
> > > libstdc++-v3/include/bits/stl_algo.h  |  3 ++
> > > .../testsuite/25_algorithms/sample/3.cc   | 50 +++
> > > 2 files changed, 53 insertions(+)
> > > create mode 100644 libstdc++-v3/testsuite/25_algorithms/sample/3.cc
> > 
> > OK, thanks.
> 
> Could you please backport this to all branches too.

Ah yes, sorry about losing track of this patch.  I should be able to get
the backports done by end of today.



Re: PING: [PATCH] Convert -Wrestrict pass to ranger.

2020-10-20 Thread Andrew MacLeod via Gcc-patches

On 10/19/20 1:23 PM, Aldy Hernandez wrote:

Rebased on current trunk.

There is one adjustment to a C++ test which now gives a false positive.
After talking with Martin Sebor, we've concluded this is expected.  There
is no way to communicate that libstdc++ allocated objects are always
less than PTRDIFF_MAX.


I think you have addressed the various issues with Martin, as much as 
can be.


we'll address a mechanism to access a ranger instance from the memref 
and access classes later, as well as the get_size_range duality. I  dont 
think we should hold this up as it isn't an issue for other passes, and 
we'll consider a more central solution down the road.


OK

Andrew



OK?

 gcc/ChangeLog:

 * calls.c (get_size_range): Adjust to work with ranger.
 * calls.h (get_size_range): Add ranger argument to prototype.
 * gimple-ssa-warn-restrict.c (class wrestrict_dom_walker): Remove.
 (check_call): Pull out of wrestrict_dom_walker into a
 static function.
 (wrestrict_dom_walker::before_dom_children): Rename to...
 (wrestrict_walk): ...this.
 (pass_wrestrict::execute): Instantiate ranger.
 (class builtin_memref): Add stmt and query fields.
 (builtin_access::builtin_access): Add range_query field.
 (builtin_memref::builtin_memref): Same.
 (builtin_memref::extend_offset_range): Same.
 (builtin_access::builtin_access): Make work with ranger.
 (wrestrict_dom_walker::check_call): Pull out into...
 (check_call): ...here.
 (check_bounds_or_overlap): Add range_query argument.
 * gimple-ssa-warn-restrict.h (check_bounds_or_overlap):
 Add range_query and gimple stmt arguments.

 gcc/testsuite/ChangeLog:

 * gcc.dg/Wrestrict-22.c: New test.
 * g++.dg/torture/pr92421.C: Adjust for ranger.

 libstdc++-v3/ChangeLog:

* testsuite/21_strings/basic_string/capacity/1.cc: Pass
-Wno-stringop-overflow to test.
---
  gcc/calls.c   | 26 -
  gcc/calls.h   |  2 +
  gcc/gimple-ssa-warn-restrict.c| 99 +++
  gcc/gimple-ssa-warn-restrict.h|  3 +
  gcc/testsuite/g++.dg/torture/pr92421.C|  4 +
  gcc/testsuite/gcc.dg/Wrestrict-22.c   |  9 ++
  .../21_strings/basic_string/capacity/1.cc |  2 +
  7 files changed, 104 insertions(+), 41 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/Wrestrict-22.c

diff --git a/gcc/calls.c b/gcc/calls.c
index d3120b23f60..a12b84744c0 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -59,6 +59,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "builtins.h"
  #include "gimple-fold.h"
  #include "attr-fnspec.h"
+#include "value-query.h"
  
  #include "tree-pretty-print.h"
  
@@ -1244,7 +1245,8 @@ alloc_max_size (void)

 in a multi-range, otherwise to the smallest valid subrange.  */
  
  bool

-get_size_range (tree exp, tree range[2], int flags /* = 0 */)
+get_size_range (range_query *query, tree exp, gimple *stmt, tree range[2],
+   int flags /* = 0 */)
  {
if (!exp)
  return false;
@@ -1263,7 +1265,21 @@ get_size_range (tree exp, tree range[2], int flags /* = 
0 */)
enum value_range_kind range_type;
  
if (integral)

-range_type = determine_value_range (exp, , );
+{
+  value_range vr;
+  if (query && query->range_of_expr (vr, exp, stmt))
+   {
+ range_type = vr.kind ();
+ if (!vr.undefined_p ())
+   {
+ min = wi::to_wide (vr.min ());
+ max = wi::to_wide (vr.max ());
+   }
+   }
+  else
+   range_type = determine_value_range (exp, , );
+
+}
else
  range_type = VR_VARYING;
  
@@ -1369,6 +1385,12 @@ get_size_range (tree exp, tree range[2], int flags /* = 0 */)

return true;
  }
  
+bool

+get_size_range (tree exp, tree range[2], int flags /* = 0 */)
+{
+  return get_size_range (/*query=*/NULL, exp, /*stmt=*/NULL, range, flags);
+}
+
  /* Diagnose a call EXP to function FN decorated with attribute alloc_size
 whose argument numbers given by IDX with values given by ARGS exceed
 the maximum object size or cause an unsigned oveflow (wrapping) when
diff --git a/gcc/calls.h b/gcc/calls.h
index 644ec45d92c..f32b6308b58 100644
--- a/gcc/calls.h
+++ b/gcc/calls.h
@@ -142,6 +142,8 @@ enum size_range_flags
 SR_USE_LARGEST = 2
};
  extern bool get_size_range (tree, tree[2], int = 0);
+extern bool get_size_range (class range_query *, tree, gimple *,
+   tree[2], int = 0);
  extern rtx rtx_for_static_chain (const_tree, bool);
  extern bool cxx17_empty_base_field_p (const_tree);
  
diff --git a/gcc/gimple-ssa-warn-restrict.c b/gcc/gimple-ssa-warn-restrict.c

index e2734c81456..3a79e7240f9 100644
--- a/gcc/gimple-ssa-warn-restrict.c
+++ 

Re: PING: [PATCH] Convert -Walloca pass to ranger.

2020-10-20 Thread Andrew MacLeod via Gcc-patches

On 10/19/20 1:16 PM, Aldy Hernandez wrote:

Rebased on current trunk.

FWIW, we finally get rid of the Walloca-6.c XFAIL.  This has been a long time
in coming:

-/* { dg-xfail-if "Currently broken but Andrew's work should fix this" { *-*-* 
} } */

:-)

OK?


You are the original author of this pass, and everything seems 
addressed... so i think...


OK
Andrew



Aldy

 gcc/ChangeLog:

 * gimple-ssa-warn-alloca.c (enum alloca_type): Remove
 ALLOCA_BOUND_UNKNOWN and ALLOCA_CAST_FROM_SIGNED.
 (warn_limit_specified_p): New.
 (alloca_call_type_by_arg): Remove.
 (cast_from_signed_p): Remove.
 (is_max): Remove.
 (alloca_call_type): Remove heuristics and replace with call into
 ranger.
 (pass_walloca::execute): Instantiate ranger.

 gcc/testsuite/ChangeLog:

 * gcc.dg/Walloca-1.c: Adjust for ranger.
 * gcc.dg/Walloca-12.c: Same.
 * gcc.dg/Walloca-13.c: Same.
 * gcc.dg/Walloca-2.c: Same.
 * gcc.dg/Walloca-3.c: Same.
 * gcc.dg/Walloca-6.c: Same.
---
  gcc/gimple-ssa-warn-alloca.c  | 351 +++---
  gcc/testsuite/gcc.dg/Walloca-1.c  |   3 +-
  gcc/testsuite/gcc.dg/Walloca-12.c |   2 +-
  gcc/testsuite/gcc.dg/Walloca-13.c |   2 +-
  gcc/testsuite/gcc.dg/Walloca-2.c  |   6 +-
  gcc/testsuite/gcc.dg/Walloca-3.c  |   4 +-
  gcc/testsuite/gcc.dg/Walloca-6.c  |   1 -
  gcc/testsuite/gcc.dg/Wvla-larger-than-2.c |   1 -
  8 files changed, 51 insertions(+), 319 deletions(-)

diff --git a/gcc/gimple-ssa-warn-alloca.c b/gcc/gimple-ssa-warn-alloca.c
index 9e80e5dbbd9..33824a7a091 100644
--- a/gcc/gimple-ssa-warn-alloca.c
+++ b/gcc/gimple-ssa-warn-alloca.c
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "calls.h"
  #include "cfgloop.h"
  #include "intl.h"
+#include "gimple-range.h"
  
  static unsigned HOST_WIDE_INT adjusted_warn_limit (bool);
  
@@ -99,12 +100,6 @@ enum alloca_type {

// Alloca argument may be too large.
ALLOCA_BOUND_MAYBE_LARGE,
  
-  // Alloca argument is bounded but of an indeterminate size.

-  ALLOCA_BOUND_UNKNOWN,
-
-  // Alloca argument was casted from a signed integer.
-  ALLOCA_CAST_FROM_SIGNED,
-
// Alloca appears in a loop.
ALLOCA_IN_LOOP,
  
@@ -135,6 +130,15 @@ public:

}
  };
  
+/* Return TRUE if the user specified a limit for either VLAs or ALLOCAs.  */

+
+static bool
+warn_limit_specified_p (bool is_vla)
+{
+  unsigned HOST_WIDE_INT max = is_vla ? warn_vla_limit : warn_alloca_limit;
+  return max != HOST_WIDE_INT_MAX;
+}
+
  /* Return the value of the argument N to -Walloca-larger-than= or
 -Wvla-larger-than= adjusted for the target data model so that
 when N == HOST_WIDE_INT_MAX, the adjusted value is set to
@@ -158,183 +162,15 @@ adjusted_warn_limit (bool idx)
return limits[idx];
  }
  
-

-// NOTE: When we get better range info, this entire function becomes
-// irrelevant, as it should be possible to get range info for an SSA
-// name at any point in the program.
-//
-// We have a few heuristics up our sleeve to determine if a call to
-// alloca() is within bounds.  Try them out and return the type of
-// alloca call with its assumed limit (if applicable).
-//
-// Given a known argument (ARG) to alloca() and an EDGE (E)
-// calculating said argument, verify that the last statement in the BB
-// in E->SRC is a gate comparing ARG to an acceptable bound for
-// alloca().  See examples below.
-//
-// If set, ARG_CASTED is the possible unsigned argument to which ARG
-// was casted to.  This is to handle cases where the controlling
-// predicate is looking at a casted value, not the argument itself.
-//arg_casted = (size_t) arg;
-//if (arg_casted < N)
-//  goto bb3;
-//else
-//  goto bb5;
-//
-// MAX_SIZE is WARN_ALLOCA= adjusted for VLAs.  It is the maximum size
-// in bytes we allow for arg.
-
-static class alloca_type_and_limit
-alloca_call_type_by_arg (tree arg, tree arg_casted, edge e,
-unsigned HOST_WIDE_INT max_size)
-{
-  basic_block bb = e->src;
-  gimple_stmt_iterator gsi = gsi_last_bb (bb);
-  gimple *last = gsi_stmt (gsi);
-
-  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
-
-  /* When MAX_SIZE is greater than or equal to PTRDIFF_MAX treat
- allocations that aren't visibly constrained as OK, otherwise
- report them as (potentially) unbounded.  */
-  alloca_type unbounded_result = (max_size < maxobjsize.to_uhwi ()
- ? ALLOCA_UNBOUNDED : ALLOCA_OK);
-
-  if (!last || gimple_code (last) != GIMPLE_COND)
-{
-  return alloca_type_and_limit (unbounded_result);
-}
-
-  enum tree_code cond_code = gimple_cond_code (last);
-  if (e->flags & EDGE_TRUE_VALUE)
-;
-  else if (e->flags & EDGE_FALSE_VALUE)
-cond_code = invert_tree_comparison (cond_code, 

Re: [PATCH] libstdc++: Fix division by zero in std::sample

2020-10-20 Thread Jonathan Wakely via Gcc-patches

On 21/09/20 16:01 +0100, Jonathan Wakely wrote:

On 18/09/20 21:08 -0400, Patrick Palka via Libstdc++ wrote:

This fixes a division by zero in the selection-sampling std::__search
overload when the input range is empty (and hence __unsampled_sz is 0).

Tested on x86_64-pc-linux-gnu.

libstdc++-v3/ChangeLog:

* include/bits/stl_algo.h (__sample): Exit early when the
input range is empty.
* testsuite/25_algorithms/sample/3.cc: New test.
---
libstdc++-v3/include/bits/stl_algo.h  |  3 ++
.../testsuite/25_algorithms/sample/3.cc   | 50 +++
2 files changed, 53 insertions(+)
create mode 100644 libstdc++-v3/testsuite/25_algorithms/sample/3.cc


OK, thanks.


Could you please backport this to all branches too.




Re: [PATCH] Remove vr_values::extract_range_builtin.

2020-10-20 Thread Andrew MacLeod via Gcc-patches

On 10/20/20 12:43 PM, Aldy Hernandez wrote:

As promised.

Now that we know the vr_values and ranger versions are in sync, it
is safe to remove the vr_values version and just call the ranger one.

I am holding off on pushing this for a week or two, or until Fedora gets
rebuilt with the current compiler.


Looks like a clean separation.  Lets let it stew for a week+

Andrew



Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions emitted at -O3

2020-10-20 Thread Richard Sandiford via Gcc-patches
xiezhiheng  writes:
> I made two separate patches for these two groups, get/set register intrinsics 
> and store intrinsics.
>
> Note: It does not matter which patch is applied first.
>
> Bootstrapped and tested on aarch64 Linux platform.

Thanks.  I pushed the get/set patch.  For the store patch, I think
we should have:

const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;

since the FP forms don't (for example) read the FPCR.

Thanks,
Richard


Re: [PATCH] openmp: Implement support for OMP_TARGET_OFFLOAD

2020-10-20 Thread Jakub Jelinek via Gcc-patches
On Tue, Oct 20, 2020 at 06:39:39PM +0200, Rainer Orth wrote:
> Hi Tobias,
> 
> > On 10/19/20 8:21 PM, Jakub Jelinek via Gcc-patches wrote:
> >
> >> On Mon, Oct 19, 2020 at 06:57:49PM +0100, Kwok Cheung Yeung wrote:
> >>> --- a/libgomp/target.c
> >>> +++ b/libgomp/target.c
> > ...
> >> Otherwise LGTM.
> >
> > Unfortunately, the committed patch 
> > (r11-4121-g1bfc07d150790fae93184a79a7cce897655cb37b)
> > causes build errors.
> 
> the patch also breaks bootstrap on both i386-pc-solaris2.11 and
> sparc-sun-solaris2.11:
> 
> /vol/gcc/src/hg/master/local/libgomp/env.c: In function 'initialize_env':
> /vol/gcc/src/hg/master/local/libgomp/env.c:414:16: error: 'new_offload' may 
> be used uninitialized in this function [-Werror=maybe-uninitialized]
>   414 |   *offload = new_offload;
>   |   ~^
> /vol/gcc/src/hg/master/local/libgomp/env.c:384:30: note: 'new_offload' was 
> declared here
>   384 |   enum gomp_target_offload_t new_offload;
>   |  ^~~

I can't reproduce that, but I fail to see why we need two separate
variables, one with actual value and one tracking if the value is valid.

So I'd go with:

2020-10-20  Jakub Jelinek  

* env.c (parse_target_offload): Change new_offload var type to int,
preinitialize to -1, remove found var and test new_offload != -1
instead of found.

--- libgomp/env.c.jj2020-10-20 14:37:36.593968443 +0200
+++ libgomp/env.c   2020-10-20 18:43:00.338389023 +0200
@@ -380,8 +380,7 @@ static void
 parse_target_offload (const char *name, enum gomp_target_offload_t *offload)
 {
   const char *env;
-  bool found = false;
-  enum gomp_target_offload_t new_offload;
+  int new_offload = -1;
 
   env = getenv (name);
   if (env == NULL)
@@ -392,24 +391,21 @@ parse_target_offload (const char *name,
   if (strncasecmp (env, "default", 7) == 0)
 {
   env += 7;
-  found = true;
   new_offload = GOMP_TARGET_OFFLOAD_DEFAULT;
 }
   else if (strncasecmp (env, "mandatory", 9) == 0)
 {
   env += 9;
-  found = true;
   new_offload = GOMP_TARGET_OFFLOAD_MANDATORY;
 }
   else if (strncasecmp (env, "disabled", 8) == 0)
 {
   env += 8;
-  found = true;
   new_offload = GOMP_TARGET_OFFLOAD_DISABLED;
 }
   while (isspace ((unsigned char) *env))
 ++env;
-  if (found && *env == '\0')
+  if (new_offload != -1 && *env == '\0')
 {
   *offload = new_offload;
   return;


Jakub



[PATCH] Remove vr_values::extract_range_builtin.

2020-10-20 Thread Aldy Hernandez via Gcc-patches
As promised.

Now that we know the vr_values and ranger versions are in sync, it
is safe to remove the vr_values version and just call the ranger one.

I am holding off on pushing this for a week or two, or until Fedora gets
rebuilt with the current compiler.

gcc/ChangeLog:

* vr-values.h (class vr_values): Remove extract_range_builtin.
* vr-values.c (vr_values::extract_range_basic): Remove call to
extract_range_builtin.
(vr_values::extract_range_builtin): Remove.
---
 gcc/vr-values.c | 277 +---
 gcc/vr-values.h |   1 -
 2 files changed, 3 insertions(+), 275 deletions(-)

diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 11beef82a64..1dac1cfc49f 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -1155,271 +1155,6 @@ check_for_binary_op_overflow (range_query *query,
   return true;
 }
 
-/* Derive a range from a builtin.  Set range in VR and return TRUE if
-   successful.  */
-
-bool
-vr_values::extract_range_builtin (value_range_equiv *vr, gimple *stmt)
-{
-  gcc_assert (is_gimple_call (stmt));
-  tree type = gimple_expr_type (stmt);
-  tree arg;
-  int mini, maxi, zerov = 0, prec;
-  enum tree_code subcode = ERROR_MARK;
-  combined_fn cfn = gimple_call_combined_fn (stmt);
-  scalar_int_mode mode;
-
-  switch (cfn)
-{
-case CFN_BUILT_IN_CONSTANT_P:
-  /* Resolve calls to __builtin_constant_p after inlining.  */
-  if (cfun->after_inlining)
-   {
- vr->set_zero (type);
- vr->equiv_clear ();
- return true;
-   }
-  break;
-  /* Both __builtin_ffs* and __builtin_popcount return
-[0, prec].  */
-CASE_CFN_FFS:
-CASE_CFN_POPCOUNT:
-  arg = gimple_call_arg (stmt, 0);
-  prec = TYPE_PRECISION (TREE_TYPE (arg));
-  mini = 0;
-  maxi = prec;
-  if (TREE_CODE (arg) == SSA_NAME)
-   {
- const value_range_equiv *vr0 = get_value_range (arg);
- /* If arg is non-zero, then ffs or popcount are non-zero.  */
- if (range_includes_zero_p (vr0) == 0)
-   mini = 1;
- /* If some high bits are known to be zero,
-we can decrease the maximum.  */
- if (vr0->kind () == VR_RANGE
- && TREE_CODE (vr0->max ()) == INTEGER_CST
- && !operand_less_p (vr0->min (),
- build_zero_cst (TREE_TYPE (vr0->min ()
-   maxi = tree_floor_log2 (vr0->max ()) + 1;
-   }
-  goto bitop_builtin;
-  /* __builtin_parity* returns [0, 1].  */
-CASE_CFN_PARITY:
-  mini = 0;
-  maxi = 1;
-  goto bitop_builtin;
-  /* __builtin_clz* return [0, prec-1], except for
-when the argument is 0, but that is undefined behavior.
-Always handle __builtin_clz* which can be only written
-by user as UB on 0 and so [0, prec-1] range, and the internal-fn
-calls depending on how CLZ_DEFINED_VALUE_AT_ZERO is defined.  */
-CASE_CFN_CLZ:
-  arg = gimple_call_arg (stmt, 0);
-  prec = TYPE_PRECISION (TREE_TYPE (arg));
-  mini = 0;
-  maxi = prec - 1;
-  mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
-  if (gimple_call_internal_p (stmt))
-   {
- if (optab_handler (clz_optab, mode) != CODE_FOR_nothing
- && CLZ_DEFINED_VALUE_AT_ZERO (mode, zerov) == 2)
-   {
- /* Handle only the single common value.  */
- if (zerov == prec)
-   maxi = prec;
- /* Magic value to give up, unless vr0 proves
-arg is non-zero.  */
- else
-   mini = -2;
-   }
-   }
-  if (TREE_CODE (arg) == SSA_NAME)
-   {
- const value_range_equiv *vr0 = get_value_range (arg);
- /* From clz of VR_RANGE minimum we can compute
-result maximum.  */
- if (vr0->kind () == VR_RANGE
- && TREE_CODE (vr0->min ()) == INTEGER_CST
- && integer_nonzerop (vr0->min ()))
-   {
- maxi = prec - 1 - tree_floor_log2 (vr0->min ());
- if (mini == -2)
-   mini = 0;
-   }
- else if (vr0->kind () == VR_ANTI_RANGE
-  && integer_zerop (vr0->min ()))
-   {
- maxi = prec - 1;
- mini = 0;
-   }
- if (mini == -2)
-   break;
- /* From clz of VR_RANGE maximum we can compute
-result minimum.  */
- if (vr0->kind () == VR_RANGE
- && TREE_CODE (vr0->max ()) == INTEGER_CST)
-   {
- int newmini = prec - 1 - tree_floor_log2 (vr0->max ());
- if (newmini == prec)
-   {
- if (maxi == prec)
-   mini = prec;
-   }
- else
-   mini = newmini;
-   }
-   }
-  if (mini == -2)
-   break;
-  goto bitop_builtin;
-  /* __builtin_ctz* return [0, prec-1], except for
-when the 

Re: [PATCH] [PR rtl-optimization/97249]Simplify vec_select of paradoxical subreg.

2020-10-20 Thread Richard Sandiford via Gcc-patches
Hongtao Liu  writes:
>> > +   && (GET_MODE_NUNITS (mode)).is_constant ()
>> > +   && (GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0
>> > +   .is_constant ()
>> > +   && known_le (l1, l2)
>>
>> I'm not sure the last two & are really the important condition.
>> I think we should drop them for the suggestion below.
>>
>
> Changed, assume gcc also support something like (vec_select:v4di
> (reg:v2di) (parallel [ (const_int 0) (const_int 1) (const_int 1)
> (const_int 0)]))
> as long as the range of selection guaranteed by
>   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))

Yeah, that vec_select looks OK.

>>
>> > +   if (!CONST_INT_P (idx))
>>
>> Here I think we should check:
>>
>>   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
>>
>> where:
>>
>>poly_uint64 nunits
>>  = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0.
>>
>
> Changed.
>
>> This makes sure that all indices are in range.  In particular, it's
>> valid for the SUBREG_REG to be narrower than mode, for appropriate
>> vec_select indices
>>
>
> Yes, that's what paradoxical subreg means.

But I was comparing the mode of the vec_select with the mode of the
SUBREG_REG (rather than the mode of trueop0 with the mode of the
SUBREG_REG, which is what matters for paradoxical subregs).

> +   /* Simplify vec_select of a subreg of X to just a vec_select of X
> +  when X has same component mode as vec_select.  */
> +   unsigned HOST_WIDE_INT subreg_offset = 0;
> +   if (GET_CODE (trueop0) == SUBREG
> +   && GET_MODE_INNER (mode)
> +  == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
> +   && (GET_MODE_NUNITS (mode)).is_constant ()

Unnecessary brackets around “GET_MODE_NUNITS (mode)”.

> +   && constant_multiple_p (SUBREG_BYTE (trueop0),
> +   GET_MODE_UNIT_BITSIZE (mode),
> +   _offset))

Sorry, my bad, this should be:

  && constant_multiple_p (subreg_memory_offset (trueop0),
  GET_MODE_UNIT_BITSIZE (mode),
  _offset))

> + {
> +   gcc_assert (XVECLEN (trueop1, 0) == l1);
> +   bool success = true;
> +   poly_uint64 nunits
> + = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
> +   for (int i = 0; i != l1; i++)
> + {
> +   rtx idx = XVECEXP (trueop1, 0, i);
> +   if (!CONST_INT_P (idx)
> +   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
> + {
> +   success = false;
> +   break;
> + }
> + }
> +   if (success)
> + {
> +   rtx par = trueop1;
> +   if (subreg_offset)
> + {
> +   rtvec vec = rtvec_alloc (l1);
> +   for (int i = 0; i < l1; i++)
> + RTVEC_ELT (vec, i)
> +   = GEN_INT (INTVAL (XVECEXP (trueop1, 0, i)
> +  + subreg_offset));

This is applying subreg_offset to the pointer rather than the INTVAL.
It should be:

  = GEN_INT (UINTVAL (XVECEXP (trueop1, 0, i))
 + subreg_offset);

OK with those changes, thanks.

Richard


Re: [PATCH] openmp: Implement support for OMP_TARGET_OFFLOAD

2020-10-20 Thread Rainer Orth
Hi Tobias,

> On 10/19/20 8:21 PM, Jakub Jelinek via Gcc-patches wrote:
>
>> On Mon, Oct 19, 2020 at 06:57:49PM +0100, Kwok Cheung Yeung wrote:
>>> --- a/libgomp/target.c
>>> +++ b/libgomp/target.c
> ...
>> Otherwise LGTM.
>
> Unfortunately, the committed patch 
> (r11-4121-g1bfc07d150790fae93184a79a7cce897655cb37b)
> causes build errors.

the patch also breaks bootstrap on both i386-pc-solaris2.11 and
sparc-sun-solaris2.11:

/vol/gcc/src/hg/master/local/libgomp/env.c: In function 'initialize_env':
/vol/gcc/src/hg/master/local/libgomp/env.c:414:16: error: 'new_offload' may be 
used uninitialized in this function [-Werror=maybe-uninitialized]
  414 |   *offload = new_offload;
  |   ~^
/vol/gcc/src/hg/master/local/libgomp/env.c:384:30: note: 'new_offload' was 
declared here
  384 |   enum gomp_target_offload_t new_offload;
  |  ^~~
  
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH 0/2] [OpenACC] Kernels loop annotation

2020-10-20 Thread Sandra Loosemore

On 9/9/20 11:53 AM, Sandra Loosemore wrote:

This set of patches implements C/C++ and Fortran front end support for
adding "acc loop auto" annotations to loop nests in OpenACC kernels
regions.  For background on this, refer to Thomas Schwinge's talk from
last year's cauldron, at

https://gcc.gnu.org/wiki/cauldron2019talks?action=AttachFile=view=OpenACC+kernels-cauldron2019.pdf

In particular, pages 20-24 describe this part of the work.  We're
trying to identify loops that might be parallelizable and convert them
to ACC_LOOP tree structures for further analysis, instead of lowering
them to goto form early in compilation, as we do with ordinary
for/while/do loops in C/C++ and DO loops in Fortran.


Ping!

Overview: 
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553518.html


C/C++ part: 
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553519.html


Fortran part: 
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553520.html


-Sandra




Re: PING: Fwd: [PATCH] Refactor range handling of builtins in vr_values and ranger.

2020-10-20 Thread Aldy Hernandez via Gcc-patches
On Tue, Oct 20, 2020 at 6:19 PM Andrew MacLeod  wrote:
>
> On 10/19/20 6:03 AM, Aldy Hernandez wrote:
> >
> >
> >
> >  Forwarded Message 
> > Subject: [PATCH] Refactor range handling of builtins in vr_values and
> > ranger.
> > Date: Fri,  9 Oct 2020 14:32:05 +0200
> > From: Aldy Hernandez 
> > To: GCC patches , Jakub Jelinek
> > 
> > CC: Andrew MacLeod , Aldy Hernandez
> > 
> >
> > Hi Jakub.
> >
> > As the last known expert in this area, would you review this, please? :)
> >
> > This sets things up so we can share range handling of builtins between
> > vr_values and ranger.  It is meant to refactor the code so that we can
> > verify that both implementations yield the same results.
> >
> > First, we abstract out gimple_ranger::range_of_builtin_call into an
> > externally
> > visible counterpart that can be called from vr_values.  It will take a
> > range_query since both ranger and vr_values inherit from this base class.
> >
> > Then we abstract out all the builtin handling in vr_values into a
> > separate
> > method that is easier to compare against.
> >
> > Finally, we call the ranger version from vr_values and compare it with
> > the
> > vr_values version.  Since this proves both versions return the same,
> > we can remove vr_values::extract_range_builtin in a follow-up patch.
> >
> > The vr_values::range_of_expr change brings the vr_values version up to
> > par
> > with the ranger version.  It should've handled non-SSA's.  This was
> > a small oversight that went unnoticed because the vr_value version isn't
> > stressed nearly as much as the ranger version.  The change is needed
> > because
> > the ranger code handling builtins calls, may call it for integer
> > arguments
> > in range_of_builtin_ubsan_call.
> >
> > There should be no change in functionality.
> >
> > Tested on x86_64, with aarch64 tests still going.
> >
> > OK provided aarch64 tests finish this century?
>
>
>
>
> IIRC you basically duplicated the builtin code from vr-values and
> adapted it, we just never got back to consolidating them.  Until
> range_query i guess that would have been more difficult.

Yes.  We had a compare and trap in our original ranger branch, and
then it got lost somewhere in the transition to the staging branch :).

>
> I think you should also post the followup patch which removes the old
> builtin range extraction.  There shouldn't be much churn so it's not a
> waste of time?  It would just be useful to see the other half.

Will do.

>
>  Â This is OK,and the plan is to leave the verification code in place for
> a week or two to allow OS builds and various other things to bounce off
> it just as a double check?

Yes.  A week or two would be fine.  I think Jeff runs Fedora builds every week.

Pushed.

Aldy
>
> Andrew
>



Re: PING: Fwd: [PATCH] Refactor range handling of builtins in vr_values and ranger.

2020-10-20 Thread Andrew MacLeod via Gcc-patches

On 10/19/20 6:03 AM, Aldy Hernandez wrote:




 Forwarded Message 
Subject: [PATCH] Refactor range handling of builtins in vr_values and 
ranger.

Date: Fri,  9 Oct 2020 14:32:05 +0200
From: Aldy Hernandez 
To: GCC patches , Jakub Jelinek 

CC: Andrew MacLeod , Aldy Hernandez 



Hi Jakub.

As the last known expert in this area, would you review this, please? :)

This sets things up so we can share range handling of builtins between
vr_values and ranger.  It is meant to refactor the code so that we can
verify that both implementations yield the same results.

First, we abstract out gimple_ranger::range_of_builtin_call into an 
externally

visible counterpart that can be called from vr_values.  It will take a
range_query since both ranger and vr_values inherit from this base class.

Then we abstract out all the builtin handling in vr_values into a 
separate

method that is easier to compare against.

Finally, we call the ranger version from vr_values and compare it with 
the

vr_values version.  Since this proves both versions return the same,
we can remove vr_values::extract_range_builtin in a follow-up patch.

The vr_values::range_of_expr change brings the vr_values version up to 
par

with the ranger version.  It should've handled non-SSA's.  This was
a small oversight that went unnoticed because the vr_value version isn't
stressed nearly as much as the ranger version.  The change is needed 
because
the ranger code handling builtins calls, may call it for integer 
arguments

in range_of_builtin_ubsan_call.

There should be no change in functionality.

Tested on x86_64, with aarch64 tests still going.

OK provided aarch64 tests finish this century?





IIRC you basically duplicated the builtin code from vr-values and 
adapted it, we just never got back to consolidating them.  Until 
range_query i guess that would have been more difficult.


I think you should also post the followup patch which removes the old 
builtin range extraction.  There shouldn't be much churn so it's not a 
waste of time?  It would just be useful to see the other half.


 This is OK,and the plan is to leave the verification code in place for 
a week or two to allow OS builds and various other things to bounce off 
it just as a double check?


Andrew



Re: [PATCH][middle-end][i386][version 3]Add -fzero-call-used-regs=[skip|used-gpr-arg|used-arg|all-arg|used-gpr|all-gpr|used|all]

2020-10-20 Thread Uros Bizjak via Gcc-patches
On Tue, Oct 20, 2020 at 4:01 PM Qing Zhao  wrote:
>
> Hi, Uros,
>
> Thanks a lot for your comments.
>
> On Oct 19, 2020, at 2:30 PM, Uros Bizjak  wrote:
>
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index f684954..620114f 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -3551,6 +3551,161 @@ ix86_function_value_regno_p (const unsigned int regno)
>  return false;
> }
>
> +/* Check whether the register REGNO should be zeroed on X86.
> +   When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
> +   together, no need to zero it again.
> +   Stack registers (st0-st7) and mm0-mm7 are aliased with each other.
> +   very hard to be zeroed individually, don't zero individual st or
> +   mm registgers at this time.  */
> +
> +static bool
> +zero_call_used_regno_p (const unsigned int regno,
> + bool all_sse_zeroed)
> +{
> +  return GENERAL_REGNO_P (regno)
> +  || (!all_sse_zeroed && SSE_REGNO_P (regno))
> +  || MASK_REGNO_P (regno);
> +}
> +
> +/* Return the machine_mode that is used to zero register REGNO.  */
> +
> +static machine_mode
> +zero_call_used_regno_mode (const unsigned int regno)
> +{
> +  /* NB: We only need to zero the lower 32 bits for integer registers
> + and the lower 128 bits for vector registers since destination are
> + zero-extended to the full register width.  */
> +  if (GENERAL_REGNO_P (regno))
> +return SImode;
> +  else if (SSE_REGNO_P (regno))
> +return V4SFmode;
> +  else
> +return HImode;
> +}
> +
> +/* Generate a rtx to zero all vector registers togetehr if possible,
> +   otherwise, return NULL.  */
> +
> +static rtx
> +zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
> +{
> +  if (!TARGET_AVX)
> +return NULL;
> +
> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> +if ((IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)
> +  || (TARGET_64BIT
> +  && (REX_SSE_REGNO_P (regno)
> +  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)
> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
> +  return NULL;
> +
> +  return gen_avx_vzeroall ();
> +}
> +
> +/* Generate a rtx to zero all st and mm registers togetehr if possible,
> +   otherwise, return NULL.  */
> +
> +static rtx
> +zero_all_st_mm_registers (HARD_REG_SET need_zeroed_hardregs)
> +{
> +  if (!TARGET_MMX)
> +return NULL;
> +
> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> +if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
> +  return NULL;
> +
> +  return gen_mmx_emms ();
>
>
> emms is not clearing any register, it only loads x87FPUTagWord with
> H. So I think, the above is useless, as far as register clearing
> is concerned.
>
>
> Thanks for the info.
>
> So, for mm and st registers, should we clear them, and how?

I don't know.

Please note that %mm and %st share the same register file, and
touching %mm registers will block access to %st until emms is emitted.
You can't just blindly load 0 to %st registers, because the register
file can be in MMX mode and vice versa. For 32bit targets, function
can also  return a value in the %mm0.

>
>
> +}
> +
> +/* TARGET_ZERO_CALL_USED_REGS.  */
> +/* Generate a sequence of instructions that zero registers specified by
> +   NEED_ZEROED_HARDREGS.  Return the ZEROED_HARDREGS that are actually
> +   zeroed.  */
> +static HARD_REG_SET
> +ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
> +{
> +  HARD_REG_SET zeroed_hardregs;
> +  bool all_sse_zeroed = false;
> +
> +  /* first, let's see whether we can zero all vector registers together.  */
> +  rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
> +  if (zero_all_vec_insn)
> +{
> +  emit_insn (zero_all_vec_insn);
> +  all_sse_zeroed = true;
> +}
> +
> +  /* then, let's see whether we can zero all st+mm registers togeter.  */
> +  rtx zero_all_st_mm_insn = zero_all_st_mm_registers (need_zeroed_hardregs);
> +  if (zero_all_st_mm_insn)
> +emit_insn (zero_all_st_mm_insn);
> +
> +  /* Now, generate instructions to zero all the registers.  */
> +
> +  CLEAR_HARD_REG_SET (zeroed_hardregs);
> +  rtx zero_gpr = NULL_RTX;
> +  rtx zero_vector = NULL_RTX;
> +  rtx zero_mask = NULL_RTX;
> +
> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> +{
> +  if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
> + continue;
> +  if (!zero_call_used_regno_p (regno, all_sse_zeroed))
> + continue;
> +
> +  SET_HARD_REG_BIT (zeroed_hardregs, regno);
> +
> +  rtx reg, tmp;
> +  machine_mode mode = zero_call_used_regno_mode (regno);
> +
> +  reg = gen_rtx_REG (mode, regno);
> +
> +  if (mode == SImode)
> + if (zero_gpr == NULL_RTX)
> +   {
> + zero_gpr = reg;
> + tmp = gen_rtx_SET (reg, const0_rtx);
> + if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
>
>
> No need to complicate here, there is a peephole2 pattern 

[PATCH] Saturate overflows return from SCEV in ranger.

2020-10-20 Thread Aldy Hernandez via Gcc-patches
bounds_of_var_in_loop is returning an overflowed int, which is causing
us to create a range for which we can't compare the bounds causing
an ICE in verify_range.

Overflowed bounds cause compare_values() to return -2, which we
don't handle in verify_range.

We don't represent overflowed ranges in irange, so this patch just
saturates any overflowed end-points to MIN or MAX.

Pushed.

gcc/ChangeLog:

PR 97501/tree-optimization
* gimple-range.cc (gimple_ranger::range_of_ssa_name_with_loop_info):
Saturate overflows returned from SCEV.

gcc/testsuite/ChangeLog:

* gcc.dg/pr97501.c: New test.
---
 gcc/gimple-range.cc|  4 ++--
 gcc/testsuite/gcc.dg/pr97501.c | 14 ++
 2 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr97501.c

diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index e4864ba60f6..ed9609be68e 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -1146,9 +1146,9 @@ gimple_ranger::range_of_ssa_name_with_loop_info (irange 
, tree name,
   // ?? We could do better here.  Since MIN/MAX can only be an
   // SSA, SSA +- INTEGER_CST, or INTEGER_CST, we could easily call
   // the ranger and solve anything not an integer.
-  if (TREE_CODE (min) != INTEGER_CST)
+  if (TREE_CODE (min) != INTEGER_CST || TREE_OVERFLOW (min))
min = vrp_val_min (type);
-  if (TREE_CODE (max) != INTEGER_CST)
+  if (TREE_CODE (max) != INTEGER_CST || TREE_OVERFLOW (max))
max = vrp_val_max (type);
   r.set (min, max);
 }
diff --git a/gcc/testsuite/gcc.dg/pr97501.c b/gcc/testsuite/gcc.dg/pr97501.c
new file mode 100644
index 000..aedac83962d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr97501.c
@@ -0,0 +1,14 @@
+// { dg-do compile }
+// { dg-options "-O2" }
+
+static int c = 0;
+
+int main() {
+  int b = 0;
+  if (c) {
+  for (;; b--)
+do
+  b++;
+while (b);
+  }
+}
-- 
2.26.2



preprocessor: Further fix for EOF in macro args [PR97471]

2020-10-20 Thread Nathan Sidwell


My previous attempt at fixing this was incorrect.  The problem occurs
earlier in that _cpp_lex_direct processes the unwinding EOF needs in
collect_args mode.  This patch changes it not to do that, in the same
way as directive parsing works.  Also collect_args shouldn't push_back
such fake EOFs, and neither should funlike_invocation_p.

libcpp/
* lex.c (_cpp_lex_direct): Do not complete EOF processing when
parsing_args.
* macro.c (collect_args): Do not unwind fake EOF.
(funlike_invocation_p): Do not unwind fake EOF.
(cpp_context): Replace abort with gcc_assert.
gcc/testsuite/
* gcc.dg/cpp/endif.c: Move to ...
* c-c++-common/cpp/endif.c: ... here.
* gcc.dg/cpp/endif.h: Move to ...
* c-c++-common/cpp/endif.h: ... here.
* c-c++-common/cpp/eof-2.c: Adjust diagnostic.
* c-c++-common/cpp/eof-3.c: Adjust diagnostic.

pushing to trunk

--
Nathan Sidwell
diff --git c/gcc/testsuite/gcc.dg/cpp/endif.c w/gcc/testsuite/c-c++-common/cpp/endif.c
similarity index 100%
rename from gcc/testsuite/gcc.dg/cpp/endif.c
rename to gcc/testsuite/c-c++-common/cpp/endif.c
diff --git c/gcc/testsuite/gcc.dg/cpp/endif.h w/gcc/testsuite/c-c++-common/cpp/endif.h
similarity index 100%
rename from gcc/testsuite/gcc.dg/cpp/endif.h
rename to gcc/testsuite/c-c++-common/cpp/endif.h
diff --git c/gcc/testsuite/c-c++-common/cpp/eof-2.c w/gcc/testsuite/c-c++-common/cpp/eof-2.c
index 3a4af7f6850..9cc4fed6564 100644
--- c/gcc/testsuite/c-c++-common/cpp/eof-2.c
+++ w/gcc/testsuite/c-c++-common/cpp/eof-2.c
@@ -5,4 +5,4 @@
 #define f(x) x
 
 #include "eof-2.h"
- /* { dg-regexp {[^\n]*eof-2.h:4: error: unterminated argument list invoking macro "f"\n} } */
+ /* { dg-regexp {[^\n]*eof-2.h:4:21: error: unterminated argument list invoking macro "f"\n} } */
diff --git c/gcc/testsuite/c-c++-common/cpp/eof-3.c w/gcc/testsuite/c-c++-common/cpp/eof-3.c
index 316918e3a6c..e309a548c0c 100644
--- c/gcc/testsuite/c-c++-common/cpp/eof-3.c
+++ w/gcc/testsuite/c-c++-common/cpp/eof-3.c
@@ -3,6 +3,6 @@
 /* { dg-do preprocess } */
 /* { dg-additional-options "-include $srcdir/c-c++-common/cpp/eof-2.h" } */
 
- /* { dg-regexp {[^\n]*eof-2.h:4: error: unterminated argument list invoking macro "f"\n} } */
+ /* { dg-regexp {[^\n]*eof-2.h:4:21: error: unterminated argument list invoking macro "f"\n} } */
 
 token )
diff --git c/libcpp/lex.c w/libcpp/lex.c
index 2fe77d162a2..fb222924c8c 100644
--- c/libcpp/lex.c
+++ w/libcpp/lex.c
@@ -2768,7 +2768,10 @@ _cpp_lex_direct (cpp_reader *pfile)
   if (!_cpp_get_fresh_line (pfile))
 	{
 	  result->type = CPP_EOF;
-	  if (!pfile->state.in_directive)
+	  /* Not a real EOF in a directive or arg parsing -- we refuse
+  	 to advance to the next file now, and will once we're out
+  	 of those modes.  */
+	  if (!pfile->state.in_directive && !pfile->state.parsing_args)
 	{
 	  /* Tell the compiler the line number of the EOF token.  */
 	  result->src_loc = pfile->line_table->highest_line;
diff --git c/libcpp/macro.c w/libcpp/macro.c
index 9cb3b10a9a0..0874028b211 100644
--- c/libcpp/macro.c
+++ w/libcpp/macro.c
@@ -1259,13 +1259,10 @@ collect_args (cpp_reader *pfile, const cpp_hashnode *node,
 
   if (token->type == CPP_EOF)
 {
-  /* We still need the CPP_EOF to end directives, to end
-	 pre-expansion of a macro argument, and at the end of the main
-	 file.  We do not want it at the end of a -include'd (forced)
-	 header file.  */
-  if (pfile->state.in_directive
-	  || !pfile->line_table->depth
-	  || pfile->context->prev)
+  /* Unless the EOF is marking the end of an argument, it's a fake
+	 one from the end of a file that _cpp_clean_line will not have
+	 advanced past.  */
+  if (token == >endarg)
 	_cpp_backup_tokens (pfile, 1);
   cpp_error (pfile, CPP_DL_ERROR,
 		 "unterminated argument list invoking macro \"%s\"",
@@ -1328,13 +1325,19 @@ funlike_invocation_p (cpp_reader *pfile, cpp_hashnode *node,
   pfile->state.parsing_args = 2;
   return collect_args (pfile, node, pragma_buff, num_args);
 }
-
-  /* Back up.  We may have skipped padding, in which case backing
- up more than one token when expanding macros is in general
- too difficult.  We re-insert it in its own context.  */
-  _cpp_backup_tokens (pfile, 1);
-  if (padding)
-_cpp_push_token_context (pfile, NULL, padding, 1);
+  
+  /* Back up.  A CPP_EOF is either an EOF from an argument we're
+ expanding, or a fake one from lex_direct.  We want to backup the
+ former, but not the latter.  We may have skipped padding, in
+ which case backing up more than one token when expanding macros
+ is in general too difficult.  We re-insert it in its own
+ context.  */
+  if (token->type != CPP_EOF || token == >endarg)
+{
+  _cpp_backup_tokens (pfile, 1);
+  if (padding)
+	_cpp_push_token_context (pfile, NULL, padding, 1);
+}
 
   return NULL;
 }
@@ -2638,8 +2641,7 @@ 

Re: [PATCH 1/2] [target 87767] Refactor AVX512 broadcast patterns with speical memory constraint.

2020-10-20 Thread Vladimir Makarov via Gcc-patches



On 2020-10-20 1:33 a.m., Hongtao Liu wrote:

On Mon, Oct 19, 2020 at 11:38 PM Vladimir Makarov  wrote:


On 2020-10-11 8:58 p.m., Hongtao Liu wrote:

Hi:
This is done in 2 steps:
1. Extend special memory constraint to handle non MEM_P cases, i.e.
(vec_duplicate:V4SF (mem:SF (addr)))
2. Refactor implementation of *_bcst{_1,_2,_3} patterns. Add new
predicate bcst_mem_operand and corresponding constraint "Br" to merge
"$(pattern)_bcst{_1,_2,_3}" into "$(pattern)", also delete those
separate "*_bcst{_1,_2,_3}" patterns.

Bootstrap is ok, regression test on i386 backend is ok.

gcc/ChangeLog:

  PR target/87767
  * ira-costs.c (record_operand_costs): Extract memory operand
  from recog_data.operand[i] for record_address_regs.
  (record_reg_classes): Extract memory operand from OP for
  conditional judgement MEM_P.
  * ira.c (ira_setup_alts): Ditto.
  * lra-constraints.c (extract_mem_from_operand): New function.
  (satisfies_memory_constraint_p): Extract memory operand from
  OP for decompose_mem_address, return false when there's no
  memory operand inside OP.
  (process_alt_operands): Remove MEM_P (op) since it would be
  judged in satisfies_memory_constraint_p.
  * recog.c (asm_operand_ok): Extract memory operand from OP for
  judgement of memory_operand (OP, VOIDmode).
  (constrain_operands): Don't unwrapper unary operator when
  there's memory operand inside.
  * rtl.h (extract_mem_from_operand): New decl.


Thank you for working on the PR.  In general patch is ok for me. The
only thing is

+/* For special_memory_operand, it could be false for MEM_P (op),
+   i.e. bcst_mem_operand in i386 backend.
+   Extract and return real memory operand or op.  */
+rtx
+extract_mem_from_operand (rtx op)
+{
+  if (MEM_P (op))
+return op;
+  /* Only allow one memory_operand inside special memory operand.  */

The comment contradicts to the below code which returns the first memory 
operand (not the only one).


Yes.


+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
+{
+  rtx x = *iter;
+  if (MEM_P (x))
+   return x;
+}
+
+  return op;
+}
+

I think the code should look like

/* For special_memory_operand, it could be false for MEM_P (op),
 i.e. bcst_mem_operand in i386 backend.
 Extract and return real memory operand or op.  */
rtx
extract_mem_from_operand (rtx op)
{
if (MEM_P (op))
  return op;
/* Only allow one memory_operand inside special memory operand.  */
subrtx_var_iterator::array_type array;
rtx res = op;
FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
  {
rtx x = *iter;
if (!MEM_P (x) || res != op)
  return op;
res = op;

Assume you want to assign res with x.
Also in the iteration, x would first be op which would be false for
MEM_P, then op would be returned.
That's not what you mean, so i changed to

   /* Only allow one memory_operand inside special memory operand.  */
   subrtx_var_iterator::array_type array;
   rtx res = op;
   FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
 {
   rtx x = *iter;
   if (!MEM_P (x))
 continue;
   /* Return op when there're multiple memory operands.  */
   if (res != op)
 return op;
   else
 res = x;
 }


Actually I wanted to have constraint satisfying rtx with memory covered 
by **only unary** operator(s).  Your code satisfies memory covered by 
non-unary operators (e.g. binary ones).


Why do I prefer less general constraint? Because other operands of 
operator containing the memory might need reloads too and the more 
general constraint will ignore this. If this situation is impossible 
now, it might be possible in the future.


My proposed code is wrong as I forgot that FOR_EACH_SUBRTX_VAR processes 
sub-rtx recursively.  Thank you for starting the discussion.  Now I 
think the code should look like


/* For special_memory_operand, it could be false for MEM_P (op),
i.e. bcst_mem_operand in i386 backend.
Extract and return real memory operand or op.  */
rtx
extract_mem_from_operand (rtx op)
{
  for (rtx x = op;; x = XEXP (x, 0)) {

   if (MEM_P (x))
     return x;
   if (GET_RTX_LENGTH (GET_CODE (x)) != 1 || GET_RTX_FORMAT (GET_CODE 
(x))[0] != 'e')

 break;

  }

  return op;

}

Let me know what do you think.



Re: [PATCH] c++: Distinguish btw. alignof and __alignof__ in cp_tree_equal [PR97273]

2020-10-20 Thread Patrick Palka via Gcc-patches
On Fri, 9 Oct 2020, Jason Merrill wrote:

> On 10/9/20 4:48 AM, Jakub Jelinek wrote:
> > On Tue, Oct 06, 2020 at 03:40:52PM -0400, Jason Merrill via Gcc-patches
> > wrote:
> > > On 10/4/20 11:28 PM, Patrick Palka wrote:
> > > > cp_tree_equal currently considers alignof the same as __alignof__, but
> > > > these operators are semantically different ever since r8-7957.  In the
> > > > testcase below, this causes the second static_assert to fail on targets
> > > > where alignof(double) != __alignof__(double) because the specialization
> > > > cache (which uses cp_tree_equal as the equality predicate) conflates the
> > > > two dependent specializations integral_constant<__alignof__(T)> and
> > > > integral_constant.
> > > > 
> > > > This patch makes cp_tree_equal distinguish between these two operators
> > > > by inspecting the ALIGNOF_EXPR_STD_P flag.
> > > > 
> > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, and also verified
> > > > that we now correctly compile the  PR97273 testcase, does this look OK
> > > > for trunk and the release branches?
> > > 
> > > OK.
> > 
> > Shouldn't we then mangle alignof and __alignof__ differently though?
> 
> Good point.  Then I guess __alignof__ should be mangled as v111__alignof__

While working on this mangling change, I noticed that the non-standard
extension alignof(expr) returns the preferred alignment, so it behaves
like __alignof__(type) instead of alignof(type).

Do we want to keep it that way, or should we make it behave like
alignof(type) and return the ABI alignment instead (and therefore set
ALIGNOF_EXPR_STD_P on it and mangle it as "az" instead of
"v111__alignof__z" etc)?  FWIW, on Clang alignof(expr) returns the ABI
alignment.

> 
> Jason
> 
> 



Re: libstdc++: std::iterator is deprecated since C++17

2020-10-20 Thread Jonathan Wakely via Gcc-patches
Patches for libstdc++ need to be sent to both the gcc-patches list and
libstdc++ list, or they will be ignored.

Removing the std::iterator base classes is an ABI break, so not
acceptable.

std::iterator is deprecated, but that doesn't the library can't use
it. Even after it gets removed, we can continue to define it as a
non-standard extension, see the similar comments at
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91260

If you want to add deprecated warnings to std::iterator that's fine,
but you'll need to also use #pragma to prevent uses within libstdc++
from giving any warnings.

One way to do that would be to introduce a new class template that
uses std::iterator and replaces all uses of std::iterator with that:

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated"
  template
struct __iterator
: iterator<_Category, _Tp, _Distance, _Pointer, _Reference>
{ };
#pragma GCC diagnostic pop

Ths will allow the library to use it without warnings, but user code
that refers to std::iterator will get warnings.

That would be a much simpler patch too.


We could also consider making it an alias template for C++11, to avoid
the cost of another class template instantiation:

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated"
  template
#if __cplusplus < 201103L
struct __iterator
: iterator<_Category, _Tp, _Distance, _Pointer, _Reference>
{ };
#else
using __iterator
  = iterator<_Category, _Tp, _Distance, _Pointer, _Reference>;
#pragma GCC diagnostic pop






[pushed] c++: Add fixed test [PR82239]

2020-10-20 Thread Marek Polacek via Gcc-patches
This test was fixed by r256550 but that commit was fixing another issue,
and just happened to fix this too.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/testsuite/ChangeLog:

PR c++/82239
* g++.dg/cpp0x/static_assert16.C: New test.
---
 gcc/testsuite/g++.dg/cpp0x/static_assert16.C | 14 ++
 1 file changed, 14 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/static_assert16.C

diff --git a/gcc/testsuite/g++.dg/cpp0x/static_assert16.C 
b/gcc/testsuite/g++.dg/cpp0x/static_assert16.C
new file mode 100644
index 000..5b40b774f7b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/static_assert16.C
@@ -0,0 +1,14 @@
+// PR c++/82239
+// { dg-do compile { target c++11 } }
+
+template
+struct C {
+   static constexpr int x = 5;
+   void f()
+   {
+  static_assert(0 < x, "");
+  static_assert(0 < (x), "");
+  static_assert(true || (0 < x), "");
+  static_assert(true || (0 < (x)), "");
+   }
+};

base-commit: 5bcd7de6402595fcaefcf1305e053fcb47dfc424
-- 
2.26.2



Re: [PATCH][middle-end][i386][version 3]Add -fzero-call-used-regs=[skip|used-gpr-arg|used-arg|all-arg|used-gpr|all-gpr|used|all]

2020-10-20 Thread Qing Zhao via Gcc-patches
Hi, Uros,

Thanks a lot for your comments.

> On Oct 19, 2020, at 2:30 PM, Uros Bizjak  wrote:
> 
>> 
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index f684954..620114f 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -3551,6 +3551,161 @@ ix86_function_value_regno_p (const unsigned int 
>> regno)
>>  return false;
>> }
>> 
>> +/* Check whether the register REGNO should be zeroed on X86.
>> +   When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
>> +   together, no need to zero it again.
>> +   Stack registers (st0-st7) and mm0-mm7 are aliased with each other.
>> +   very hard to be zeroed individually, don't zero individual st or
>> +   mm registgers at this time.  */
>> +
>> +static bool
>> +zero_call_used_regno_p (const unsigned int regno,
>> + bool all_sse_zeroed)
>> +{
>> +  return GENERAL_REGNO_P (regno)
>> +  || (!all_sse_zeroed && SSE_REGNO_P (regno))
>> +  || MASK_REGNO_P (regno);
>> +}
>> +
>> +/* Return the machine_mode that is used to zero register REGNO.  */
>> +
>> +static machine_mode
>> +zero_call_used_regno_mode (const unsigned int regno)
>> +{
>> +  /* NB: We only need to zero the lower 32 bits for integer registers
>> + and the lower 128 bits for vector registers since destination are
>> + zero-extended to the full register width.  */
>> +  if (GENERAL_REGNO_P (regno))
>> +return SImode;
>> +  else if (SSE_REGNO_P (regno))
>> +return V4SFmode;
>> +  else
>> +return HImode;
>> +}
>> +
>> +/* Generate a rtx to zero all vector registers togetehr if possible,
>> +   otherwise, return NULL.  */
>> +
>> +static rtx
>> +zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  if (!TARGET_AVX)
>> +return NULL;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +if ((IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)
>> +  || (TARGET_64BIT
>> +  && (REX_SSE_REGNO_P (regno)
>> +  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)
>> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> +  return NULL;
>> +
>> +  return gen_avx_vzeroall ();
>> +}
>> +
>> +/* Generate a rtx to zero all st and mm registers togetehr if possible,
>> +   otherwise, return NULL.  */
>> +
>> +static rtx
>> +zero_all_st_mm_registers (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  if (!TARGET_MMX)
>> +return NULL;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
>> + && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> +  return NULL;
>> +
>> +  return gen_mmx_emms ();
> 
> emms is not clearing any register, it only loads x87FPUTagWord with
> H. So I think, the above is useless, as far as register clearing
> is concerned.

Thanks for the info.

So, for mm and st registers, should we clear them, and how?


> 
>> +}
>> +
>> +/* TARGET_ZERO_CALL_USED_REGS.  */
>> +/* Generate a sequence of instructions that zero registers specified by
>> +   NEED_ZEROED_HARDREGS.  Return the ZEROED_HARDREGS that are actually
>> +   zeroed.  */
>> +static HARD_REG_SET
>> +ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>> +{
>> +  HARD_REG_SET zeroed_hardregs;
>> +  bool all_sse_zeroed = false;
>> +
>> +  /* first, let's see whether we can zero all vector registers together.  */
>> +  rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
>> +  if (zero_all_vec_insn)
>> +{
>> +  emit_insn (zero_all_vec_insn);
>> +  all_sse_zeroed = true;
>> +}
>> +
>> +  /* then, let's see whether we can zero all st+mm registers togeter.  */
>> +  rtx zero_all_st_mm_insn = zero_all_st_mm_registers (need_zeroed_hardregs);
>> +  if (zero_all_st_mm_insn)
>> +emit_insn (zero_all_st_mm_insn);
>> +
>> +  /* Now, generate instructions to zero all the registers.  */
>> +
>> +  CLEAR_HARD_REG_SET (zeroed_hardregs);
>> +  rtx zero_gpr = NULL_RTX;
>> +  rtx zero_vector = NULL_RTX;
>> +  rtx zero_mask = NULL_RTX;
>> +
>> +  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>> +{
>> +  if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>> + continue;
>> +  if (!zero_call_used_regno_p (regno, all_sse_zeroed))
>> + continue;
>> +
>> +  SET_HARD_REG_BIT (zeroed_hardregs, regno);
>> +
>> +  rtx reg, tmp;
>> +  machine_mode mode = zero_call_used_regno_mode (regno);
>> +
>> +  reg = gen_rtx_REG (mode, regno);
>> +
>> +  if (mode == SImode)
>> + if (zero_gpr == NULL_RTX)
>> +   {
>> + zero_gpr = reg;
>> + tmp = gen_rtx_SET (reg, const0_rtx);
>> + if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
> 
> No need to complicate here, there is a peephole2 pattern that will perform:
> 
> ;; Attempt to always use XOR for zeroing registers (including FP modes).
> (define_peephole2
>  [(set (match_operand 0 "general_reg_operand")
>(match_operand 1 "const0_operand"))]
> 
> So, simply load a register with 0 and leave to 

[PATCH] Simplify and split irange::copy_legacy_range into two functions.

2020-10-20 Thread Aldy Hernandez via Gcc-patches
copy_legacy_range was a small but complex function.  It was tricky
to understand, and easy to introduce bugs into it.  It also did
unnecessary work on some code paths.

This patch splits the function into two functions that are more
efficient and easier to read (copy_to_legacy and
copy_legacy_to_multi_range).

Pushed.

gcc/ChangeLog:

* value-range.cc (irange::operator=): Split up call to
copy_legacy_range into...
(irange::copy_to_legacy): ...this.
(irange::copy_legacy_to_multi_range): ...and this.
(irange::copy_legacy_range): Remove.
* value-range.h: Remove copoy_legacy_range.
Add copy_legacy_to_multi_range and copy_to_legacy.
---
 gcc/value-range.cc | 72 +++---
 gcc/value-range.h  |  3 +-
 2 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index cdcc6c65594..7847104050c 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -35,18 +35,14 @@ along with GCC; see the file COPYING3.  If not see
 irange &
 irange::operator= (const irange )
 {
-  if (legacy_mode_p () != src.legacy_mode_p ())
+  if (legacy_mode_p ())
 {
-  copy_legacy_range (src);
+  copy_to_legacy (src);
   return *this;
 }
-  if (legacy_mode_p ())
+  if (src.legacy_mode_p ())
 {
-  gcc_checking_assert (src.legacy_mode_p ());
-  m_num_ranges = src.m_num_ranges;
-  m_base[0] = src.m_base[0];
-  m_base[1] = src.m_base[1];
-  m_kind = src.m_kind;
+  copy_legacy_to_multi_range (src);
   return *this;
 }
 
@@ -81,44 +77,60 @@ irange::maybe_anti_range () const
  && upper_bound () == wi::max_value (precision, sign));
 }
 
-// Copy between a legacy and a multi-range, or vice-versa.
-
 void
-irange::copy_legacy_range (const irange )
+irange::copy_legacy_to_multi_range (const irange )
 {
-  gcc_checking_assert (src.legacy_mode_p () != legacy_mode_p ());
+  gcc_checking_assert (src.legacy_mode_p ());
+  gcc_checking_assert (!legacy_mode_p ());
   if (src.undefined_p ())
 set_undefined ();
   else if (src.varying_p ())
 set_varying (src.type ());
-  else if (src.kind () == VR_ANTI_RANGE)
-{
-  if (src.legacy_mode_p () && !range_has_numeric_bounds_p ())
-   set_varying (src.type ());
-  else
-   set (src.min (), src.max (), VR_ANTI_RANGE);
-}
-  else if (legacy_mode_p () && src.maybe_anti_range ())
-{
-  int_range<3> tmp (src);
-  tmp.invert ();
-  set (tmp.min (), wide_int_to_tree (src.type (), tmp.upper_bound (0)),
-  VR_ANTI_RANGE);
-}
   else
 {
-  // If copying legacy to int_range, normalize any symbolics.
-  if (src.legacy_mode_p () && !range_has_numeric_bounds_p ())
+  if (range_has_numeric_bounds_p ())
+   set (src.min (), src.max (), src.kind ());
+  else
{
  value_range cst (src);
  cst.normalize_symbolics ();
+ gcc_checking_assert (cst.varying_p () || cst.kind () == VR_RANGE);
  set (cst.min (), cst.max ());
- return;
}
-  set (src.min (), src.max ());
 }
 }
 
+// Copy any type of irange into a legacy.
+
+void
+irange::copy_to_legacy (const irange )
+{
+  gcc_checking_assert (legacy_mode_p ());
+  // Copy legacy to legacy.
+  if (src.legacy_mode_p ())
+{
+  m_num_ranges = src.m_num_ranges;
+  m_base[0] = src.m_base[0];
+  m_base[1] = src.m_base[1];
+  m_kind = src.m_kind;
+  return;
+}
+  // Copy multi-range to legacy.
+  if (src.undefined_p ())
+set_undefined ();
+  else if (src.varying_p ())
+set_varying (src.type ());
+  else if (src.maybe_anti_range ())
+{
+  int_range<3> r (src);
+  r.invert ();
+  // Use tree variants to save on tree -> wi -> tree conversions.
+  set (r.tree_lower_bound (0), r.tree_upper_bound (0), VR_ANTI_RANGE);
+}
+  else
+set (src.tree_lower_bound (), src.tree_upper_bound ());
+}
+
 // Swap min/max if they are out of order.  Return TRUE if further
 // processing of the range is necessary, FALSE otherwise.
 
diff --git a/gcc/value-range.h b/gcc/value-range.h
index 63c96204cda..760ee772316 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -124,7 +124,8 @@ protected:
   wide_int legacy_upper_bound (unsigned) const;
   int value_inside_range (tree) const;
   bool maybe_anti_range () const;
-  void copy_legacy_range (const irange &);
+  void copy_to_legacy (const irange &);
+  void copy_legacy_to_multi_range (const irange &);
 
 private:
   unsigned char m_num_ranges;
-- 
2.26.2



Re: [PATCH] openmp: Add support for omp_get_supported_active_levels

2020-10-20 Thread Tobias Burnus

See patch and thread at
https://gcc.gnu.org/pipermail/gcc-patches/2020-October/556570.html

Tobias

On 10/20/20 3:51 PM, David Edelsohn via Gcc-patches wrote:

This patch broke bootstrap on AIX.

/nasfarm/edelsohn/src/src/libgomp/target.c: In function
'gomp_target_init.part.':
/nasfarm/edelsohn/src/src/libgomp/target.c:3367:45: error: array
subscript [-19877956975980120, 19877956975980120] is outside array
bounds of 'struct gomp_device_descr[0]' [-Werror=array-bounds]
  3367 |   devices_s[num_devices_after_openmp++] = devices[i];
   |   ~~^~~~
/nasfarm/edelsohn/src/src/libgomp/target.c:3353:7: note: referencing
an object of size 0 allocated by 'malloc'
  3353 | = malloc (num_devices * sizeof (struct gomp_device_descr));
   |   ^~~~
/nasfarm/edelsohn/src/src/libgomp/target.c:3363:39: error: array
subscript [-19877956975980120, 19877956975980120] is outside array
bounds of 'struct gomp_device_descr[0]' [-Werror=array-bounds]
  3363 |   devices_s[num_devices_openmp++] = devices[i];
   |   ^~~~
/nasfarm/edelsohn/src/src/libgomp/target.c:3353:7: note: referencing
an object of size 0 allocated by 'malloc'
  3353 | = malloc (num_devices * sizeof (struct gomp_device_descr));
   |   ^~~~

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH] openmp: Add support for omp_get_supported_active_levels

2020-10-20 Thread David Edelsohn via Gcc-patches
This patch broke bootstrap on AIX.

/nasfarm/edelsohn/src/src/libgomp/target.c: In function
'gomp_target_init.part.':
/nasfarm/edelsohn/src/src/libgomp/target.c:3367:45: error: array
subscript [-19877956975980120, 19877956975980120] is outside array
bounds of 'struct gomp_device_descr[0]' [-Werror=array-bounds]
 3367 |   devices_s[num_devices_after_openmp++] = devices[i];
  |   ~~^~~~
/nasfarm/edelsohn/src/src/libgomp/target.c:3353:7: note: referencing
an object of size 0 allocated by 'malloc'
 3353 | = malloc (num_devices * sizeof (struct gomp_device_descr));
  |   ^~~~
/nasfarm/edelsohn/src/src/libgomp/target.c:3363:39: error: array
subscript [-19877956975980120, 19877956975980120] is outside array
bounds of 'struct gomp_device_descr[0]' [-Werror=array-bounds]
 3363 |   devices_s[num_devices_openmp++] = devices[i];
  |   ^~~~
/nasfarm/edelsohn/src/src/libgomp/target.c:3353:7: note: referencing
an object of size 0 allocated by 'malloc'
 3353 | = malloc (num_devices * sizeof (struct gomp_device_descr));
  |   ^~~~


Re: [committed][nvptx] Remove -m32

2020-10-20 Thread Tobias Burnus

On 10/15/20 3:26 PM, Tom de Vries wrote:

  PR target/97436
  * config/nvptx/nvptx.opt (m32): Comment out.
  * doc/invoke.texi (NVPTX options): Remove -m32.


This caused the warning: doc/invoke.texi:25617: warning: @itemx should
not begin @table

Fixed by the committed attached patch.

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
commit f7378bbf4778a09ccdf7e46dd81731ea4f7d638b
Author: Tobias Burnus 
Date:   Tue Oct 20 15:32:58 2020 +0200

gcc/doc/invoke.texi: Fix @table syntax

gcc/
* doc/invoke.texi (NVPTX options): Use @item not @itemx.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b3418b43a34..0b87822349f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -25614,7 +25614,7 @@ These options are defined for Nvidia PTX:
 
 @table @gcctabopt
 
-@itemx -m64
+@item -m64
 @opindex m64
 Ignored, but preserved for backward compatibility.  Only 64-bit ABI is
 supported.


Re: [PATCH] openmp: Implement support for OMP_TARGET_OFFLOAD

2020-10-20 Thread Jakub Jelinek via Gcc-patches
On Tue, Oct 20, 2020 at 02:17:26PM +0200, Tobias Burnus wrote:
> On 10/20/20 2:11 PM, Tobias Burnus wrote:
> 
> > Unfortunately, the committed patch
> > (r11-4121-g1bfc07d150790fae93184a79a7cce897655cb37b)
> > causes build errors.
> > 
> > The error seems to be provoked by function cloning – as the code
> > itself looks fine:
> > ...
> >  struct gomp_device_descr *devices_s
> > = malloc (num_devices * sizeof (struct gomp_device_descr));
> > ...
> >   for (i = 0; i < num_devices; i++)
> > if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
> >   devices_s[num_devices_after_openmp++] = devices[i];
> 
> gomp_target_init.part.0 ()
> {
> ...
> 
>   devices_s_1 = malloc (0);
> ...
>   num_devices.16_67 = num_devices;
> ...
>   if (num_devices.16_67 > 0)
> goto ; [89.00%]
>   else
> goto ; [11.00%]
> 
> Which seems to have an ordering problem.

This patch fixes the warning that breaks the bootstrap, but haven't
tested it with offloading to see if it doesn't break offloading somehow.

2020-10-20  Jakub Jelinek  

* target.c (gomp_target_init): Inside of the function, use automatic
variables corresponding to num_devices, num_devices_openmp and devices
global variables and update the globals only at the end of the
function.

--- libgomp/target.c.jj 2020-10-20 14:37:36.630967911 +0200
+++ libgomp/target.c2020-10-20 14:52:36.556023803 +0200
@@ -3279,10 +3279,9 @@ gomp_target_init (void)
   const char *suffix = SONAME_SUFFIX (1);
   const char *cur, *next;
   char *plugin_name;
-  int i, new_num_devices;
-
-  num_devices = 0;
-  devices = NULL;
+  int i, new_num_devs;
+  int num_devs = 0, num_devs_openmp;
+  struct gomp_device_descr *devs = NULL;
 
   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
 return;
@@ -3303,7 +3302,7 @@ gomp_target_init (void)
plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
if (!plugin_name)
  {
-   num_devices = 0;
+   num_devs = 0;
break;
  }
 
@@ -3313,16 +3312,16 @@ gomp_target_init (void)
 
if (gomp_load_plugin_for_device (_device, plugin_name))
  {
-   new_num_devices = current_device.get_num_devices_func ();
-   if (new_num_devices >= 1)
+   new_num_devs = current_device.get_num_devices_func ();
+   if (new_num_devs >= 1)
  {
/* Augment DEVICES and NUM_DEVICES.  */
 
-   devices = realloc (devices, (num_devices + new_num_devices)
-  * sizeof (struct gomp_device_descr));
-   if (!devices)
+   devs = realloc (devs, (num_devs + new_num_devs)
+ * sizeof (struct gomp_device_descr));
+   if (!devs)
  {
-   num_devices = 0;
+   num_devs = 0;
free (plugin_name);
break;
  }
@@ -3332,12 +3331,12 @@ gomp_target_init (void)
current_device.type = current_device.get_type_func ();
current_device.mem_map.root = NULL;
current_device.state = GOMP_DEVICE_UNINITIALIZED;
-   for (i = 0; i < new_num_devices; i++)
+   for (i = 0; i < new_num_devs; i++)
  {
current_device.target_id = i;
-   devices[num_devices] = current_device;
-   gomp_mutex_init ([num_devices].lock);
-   num_devices++;
+   devs[num_devs] = current_device;
+   gomp_mutex_init ([num_devs].lock);
+   num_devs++;
  }
  }
  }
@@ -3349,34 +3348,37 @@ gomp_target_init (void)
 
   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
  NUM_DEVICES_OPENMP.  */
-  struct gomp_device_descr *devices_s
-= malloc (num_devices * sizeof (struct gomp_device_descr));
-  if (!devices_s)
+  struct gomp_device_descr *devs_s
+= malloc (num_devs * sizeof (struct gomp_device_descr));
+  if (!devs_s)
 {
-  num_devices = 0;
-  free (devices);
-  devices = NULL;
+  num_devs = 0;
+  free (devs);
+  devs = NULL;
 }
-  num_devices_openmp = 0;
-  for (i = 0; i < num_devices; i++)
-if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
-  devices_s[num_devices_openmp++] = devices[i];
-  int num_devices_after_openmp = num_devices_openmp;
-  for (i = 0; i < num_devices; i++)
-if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
-  devices_s[num_devices_after_openmp++] = devices[i];
-  free (devices);
-  devices = devices_s;
+  num_devs_openmp = 0;
+  for (i = 0; i < num_devs; i++)
+if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+  devs_s[num_devs_openmp++] = devs[i];
+  int num_devs_after_openmp = num_devs_openmp;
+  for (i = 0; i < num_devs; i++)
+if (!(devs[i].capabilities 

Re: [PATCH] Implement no_stack_protect attribute.

2020-10-20 Thread Richard Biener via Gcc-patches
On Tue, Oct 20, 2020 at 1:24 PM Martin Liška  wrote:
>
> PING^5

So can we use the same identifier as clang here as Nick
requests?  Thus, OK with re-naming everything alongside
no_stack_protector.  It isn't really the opposite of the
stack_protect attribute since that only protects when
-fstack-protector-explicit is enabled.

Thanks,
Richard.

> On 8/17/20 2:35 PM, Martin Liška wrote:
> > PING^4
> >
> > On 7/23/20 1:10 PM, Martin Liška wrote:
> >> PING^3
> >>
> >> On 6/24/20 11:09 AM, Martin Liška wrote:
> >>> PING^2
> >>>
> >>> On 6/10/20 10:12 AM, Martin Liška wrote:
>  PING^1
> 
>  On 5/25/20 3:10 PM, Martin Liška wrote:
> > On 5/21/20 4:53 PM, Martin Sebor wrote:
> >> On 5/21/20 5:28 AM, Martin Liška wrote:
> >>> On 5/18/20 10:37 PM, Martin Sebor wrote:
>  I know there are some somewhat complex cases the attribute exclusion
>  mechanism isn't general enough to handle but this seems simple enough
>  that it should work.  Unless I'm missing something that makes it not
>  feasible I would suggest to use it.
> >>>
> >>> Hi Martin.
> >>>
> >>> Do we have a better place where we check for attribute collision?
> >>
> >> If by collision you mean the same thing as the mutual exclusion I was
> >> talking about then that's done by creating an 
> >> attribute_spec::exclusions
> >> array like for instance attr_cold_hot_exclusions in c-attribs.c and
> >> pointing to it from the attribute_spec entries for each of
> >> the mutually exclusive attributes in the attribute table.  Everything
> >> else is handled automatically by decl_attributes.
> >>
> >> Martin
> >
> > Thanks, I'm sending updated version of the patch that utilizes the 
> > conflict
> > detection.
> >
> > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >
> > Ready to be installed?
> > Thanks,
> > Martin
> 
> >>>
> >>
> >
>


Re: [PATCH] openmp: Implement support for OMP_TARGET_OFFLOAD

2020-10-20 Thread Tobias Burnus

On 10/20/20 2:11 PM, Tobias Burnus wrote:


Unfortunately, the committed patch
(r11-4121-g1bfc07d150790fae93184a79a7cce897655cb37b)
causes build errors.

The error seems to be provoked by function cloning – as the code
itself looks fine:
...
 struct gomp_device_descr *devices_s
= malloc (num_devices * sizeof (struct gomp_device_descr));
...
  for (i = 0; i < num_devices; i++)
if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  devices_s[num_devices_after_openmp++] = devices[i];


gomp_target_init.part.0 ()
{
...

  devices_s_1 = malloc (0);
...
  num_devices.16_67 = num_devices;
...
  if (num_devices.16_67 > 0)
goto ; [89.00%]
  else
goto ; [11.00%]

Which seems to have an ordering problem.

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH] openmp: Implement support for OMP_TARGET_OFFLOAD

2020-10-20 Thread Tobias Burnus

On 10/19/20 8:21 PM, Jakub Jelinek via Gcc-patches wrote:


On Mon, Oct 19, 2020 at 06:57:49PM +0100, Kwok Cheung Yeung wrote:

--- a/libgomp/target.c
+++ b/libgomp/target.c

...

Otherwise LGTM.


Unfortunately, the committed patch 
(r11-4121-g1bfc07d150790fae93184a79a7cce897655cb37b)
causes build errors.

The error seems to be provoked by function cloning – as the code
itself looks fine:


static void
gomp_target_init (void)
{
...
  num_devices = 0;
...
// NEW LINES:
  if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
return;
...
 struct gomp_device_descr *devices_s
= malloc (num_devices * sizeof (struct gomp_device_descr));
...
  for (i = 0; i < num_devices; i++)
if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  devices_s[num_devices_after_openmp++] = devices[i];

I fail so see why this gives now the -Werror warning:

../../../repos/gcc/libgomp/target.c: In function ‘gomp_target_init.part.0’:
../../../repos/gcc/libgomp/target.c:3367:45: error: array subscript 
[-27450512014448737, 27450512014448737] is outside array bounds of ‘struct 
gomp_device_descr[0]’ [-Werror=array-bounds]
 3367 |   devices_s[num_devices_after_openmp++] = devices[i];
  |   ~~^~~~
../../../repos/gcc/libgomp/target.c:3353:7: note: referencing an object of size 
0 allocated by ‘malloc’
 3353 | = malloc (num_devices * sizeof (struct gomp_device_descr));
  |   ^~~~
../../../repos/gcc/libgomp/target.c:3363:39: error: array subscript 
[-27450512014448737, 27450512014448737] is outside array bounds of ‘struct 
gomp_device_descr[0]’ [-Werror=array-bounds]
 3363 |   devices_s[num_devices_openmp++] = devices[i];
  |   ^~~~
../../../repos/gcc/libgomp/target.c:3353:7: note: referencing an object of size 
0 allocated by ‘malloc’
 3353 | = malloc (num_devices * sizeof (struct gomp_device_descr));
  |   ^~~~

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH] arm: Fix multiple inheritance thunks for thumb-1 with -mpure-code

2020-10-20 Thread Richard Earnshaw via Gcc-patches
On 20/10/2020 12:22, Richard Earnshaw wrote:
> On 19/10/2020 17:32, Christophe Lyon via Gcc-patches wrote:
>> On Mon, 19 Oct 2020 at 16:39, Richard Earnshaw
>>  wrote:
>>>
>>> On 12/10/2020 08:59, Christophe Lyon via Gcc-patches wrote:
 On Thu, 8 Oct 2020 at 11:58, Richard Earnshaw
  wrote:
>
> On 08/10/2020 10:07, Christophe Lyon via Gcc-patches wrote:
>> On Tue, 6 Oct 2020 at 18:02, Richard Earnshaw
>>  wrote:
>>>
>>> On 29/09/2020 20:50, Christophe Lyon via Gcc-patches wrote:
 When mi_delta is > 255 and -mpure-code is used, we cannot load delta
 from code memory (like we do without -mpure-code).

 This patch builds the value of mi_delta into r3 with a series of
 movs/adds/lsls.

 We also do some cleanup by not emitting the function address and delta
 via .word directives at the end of the thunk since we don't use them
 with -mpure-code.

 No need for new testcases, this bug was already identified by
 eg. pr46287-3.C

 2020-09-29  Christophe Lyon  

   gcc/
   * config/arm/arm.c (arm_thumb1_mi_thunk): Build mi_delta in r3 
 and
   do not emit function address and delta when -mpure-code is used.
>>>
>> Hi Richard,
>>
>> Thanks for your comments.
>>
>>> There are some optimizations you can make to this code.
>>>
>>> Firstly, for values between 256 and 510 (inclusive), it would be better
>>> to just expand a mov of 255 followed by an add.
>> I now see the splitted for the "Pe" constraint which I hadn't noticed
>> before, so I can write something similar indeed.
>>
>> However, I'm note quite sure to understand the benefit in the split
>> when -mpure-code is NOT used.
>> Consider:
>> int f3_1 (void) { return 510; }
>> int f3_2 (void) { return 511; }
>> Compile with -O2 -mcpu=cortex-m0:
>> f3_1:
>> movsr0, #255
>> lslsr0, r0, #1
>> bx  lr
>> f3_2:
>> ldr r0, .L4
>> bx  lr
>>
>> The splitter makes the code bigger, does it "compensate" for this by
>> not having to load the constant?
>> Actually the constant uses 4 more bytes, which should be taken into
>> account when comparing code size,
>
> Yes, the size of the literal pool entry needs to be taken into account.
>  It might happen that the entry could be shared with another use of that
> literal, but in general that's rare.
>
>> so f3_1 uses 6 bytes, and f3_2 uses 8, so as you say below three
>> thumb1 instructions would be equivalent in size compared to loading
>> from the literal pool. Should the 256-510 range be extended?
>
> It's a bit borderline at three instructions when literal pools are not
> expensive to use, but in thumb1 literal pools tend to be quite small due
> to the limited pc offsets we can use.  I think on balance we probably
> want to use the instruction sequence unless optimizing for size.
>
>>
>>
>>> This is also true for
>>> the literal pools alternative as well, so should be handled before all
>>> this.
>> I am not sure what you mean: with -mpure-code, the above sample is 
>> compiled as:
>> f3_1:
>> movsr0, #255
>> lslsr0, r0, #1
>> bx  lr
>> f3_2:
>> movsr0, #1
>> lslsr0, r0, #8
>> addsr0, r0, #255
>> bx  lr
>>
>> so the "return 510" case is already handled as without -mpure-code.
>
> I was thinking specifically of the thunk sequence where you seem to be
> emitting instructions directly rather than generating RTL.  The examples
> you show here are not thunks.
>
 OK thanks for the clarification.

 Here is an updated version, split into 3 patches to hopefully make
 review easier.
 They apply on top of my other mpure-code patches for PR96967 and PR96770:
 https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554956.html
 https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554957.html

 I kept it this way to make incremental changes easier to understand.

 Patch 1: With the hope to avoid confusion and make maintenance easier,
 I have updated thumb1_gen_const_int() so that it can generate either RTL or
 asm. This way, all the code used to build thumb-1 constants is in the
 same place,
  in case we need to improve/fix it later. We now generate shorter 
 sequences in
 several cases matching your comments.

 Patch 2: Removes the equivalent loop from thumb1_movsi_insn pattern and
 calls thumb1_gen_const_int.

 Patch 3: Update of the original patch in this thread, now calls
 thumb1_gen_const_int.
>>>
>>> Yuk!  Those changes to thumb1_gen_const_int are 

Re: [PATCH] Implement no_stack_protect attribute.

2020-10-20 Thread Martin Liška

PING^5

On 8/17/20 2:35 PM, Martin Liška wrote:

PING^4

On 7/23/20 1:10 PM, Martin Liška wrote:

PING^3

On 6/24/20 11:09 AM, Martin Liška wrote:

PING^2

On 6/10/20 10:12 AM, Martin Liška wrote:

PING^1

On 5/25/20 3:10 PM, Martin Liška wrote:

On 5/21/20 4:53 PM, Martin Sebor wrote:

On 5/21/20 5:28 AM, Martin Liška wrote:

On 5/18/20 10:37 PM, Martin Sebor wrote:

I know there are some somewhat complex cases the attribute exclusion
mechanism isn't general enough to handle but this seems simple enough
that it should work.  Unless I'm missing something that makes it not
feasible I would suggest to use it.


Hi Martin.

Do we have a better place where we check for attribute collision?


If by collision you mean the same thing as the mutual exclusion I was
talking about then that's done by creating an attribute_spec::exclusions
array like for instance attr_cold_hot_exclusions in c-attribs.c and
pointing to it from the attribute_spec entries for each of
the mutually exclusive attributes in the attribute table.  Everything
else is handled automatically by decl_attributes.

Martin


Thanks, I'm sending updated version of the patch that utilizes the conflict
detection.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin












Re: [PATCH] arm: Fix multiple inheritance thunks for thumb-1 with -mpure-code

2020-10-20 Thread Richard Earnshaw via Gcc-patches
On 19/10/2020 17:32, Christophe Lyon via Gcc-patches wrote:
> On Mon, 19 Oct 2020 at 16:39, Richard Earnshaw
>  wrote:
>>
>> On 12/10/2020 08:59, Christophe Lyon via Gcc-patches wrote:
>>> On Thu, 8 Oct 2020 at 11:58, Richard Earnshaw
>>>  wrote:

 On 08/10/2020 10:07, Christophe Lyon via Gcc-patches wrote:
> On Tue, 6 Oct 2020 at 18:02, Richard Earnshaw
>  wrote:
>>
>> On 29/09/2020 20:50, Christophe Lyon via Gcc-patches wrote:
>>> When mi_delta is > 255 and -mpure-code is used, we cannot load delta
>>> from code memory (like we do without -mpure-code).
>>>
>>> This patch builds the value of mi_delta into r3 with a series of
>>> movs/adds/lsls.
>>>
>>> We also do some cleanup by not emitting the function address and delta
>>> via .word directives at the end of the thunk since we don't use them
>>> with -mpure-code.
>>>
>>> No need for new testcases, this bug was already identified by
>>> eg. pr46287-3.C
>>>
>>> 2020-09-29  Christophe Lyon  
>>>
>>>   gcc/
>>>   * config/arm/arm.c (arm_thumb1_mi_thunk): Build mi_delta in r3 and
>>>   do not emit function address and delta when -mpure-code is used.
>>
> Hi Richard,
>
> Thanks for your comments.
>
>> There are some optimizations you can make to this code.
>>
>> Firstly, for values between 256 and 510 (inclusive), it would be better
>> to just expand a mov of 255 followed by an add.
> I now see the splitted for the "Pe" constraint which I hadn't noticed
> before, so I can write something similar indeed.
>
> However, I'm note quite sure to understand the benefit in the split
> when -mpure-code is NOT used.
> Consider:
> int f3_1 (void) { return 510; }
> int f3_2 (void) { return 511; }
> Compile with -O2 -mcpu=cortex-m0:
> f3_1:
> movsr0, #255
> lslsr0, r0, #1
> bx  lr
> f3_2:
> ldr r0, .L4
> bx  lr
>
> The splitter makes the code bigger, does it "compensate" for this by
> not having to load the constant?
> Actually the constant uses 4 more bytes, which should be taken into
> account when comparing code size,

 Yes, the size of the literal pool entry needs to be taken into account.
  It might happen that the entry could be shared with another use of that
 literal, but in general that's rare.

> so f3_1 uses 6 bytes, and f3_2 uses 8, so as you say below three
> thumb1 instructions would be equivalent in size compared to loading
> from the literal pool. Should the 256-510 range be extended?

 It's a bit borderline at three instructions when literal pools are not
 expensive to use, but in thumb1 literal pools tend to be quite small due
 to the limited pc offsets we can use.  I think on balance we probably
 want to use the instruction sequence unless optimizing for size.

>
>
>> This is also true for
>> the literal pools alternative as well, so should be handled before all
>> this.
> I am not sure what you mean: with -mpure-code, the above sample is 
> compiled as:
> f3_1:
> movsr0, #255
> lslsr0, r0, #1
> bx  lr
> f3_2:
> movsr0, #1
> lslsr0, r0, #8
> addsr0, r0, #255
> bx  lr
>
> so the "return 510" case is already handled as without -mpure-code.

 I was thinking specifically of the thunk sequence where you seem to be
 emitting instructions directly rather than generating RTL.  The examples
 you show here are not thunks.

>>> OK thanks for the clarification.
>>>
>>> Here is an updated version, split into 3 patches to hopefully make
>>> review easier.
>>> They apply on top of my other mpure-code patches for PR96967 and PR96770:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554956.html
>>> https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554957.html
>>>
>>> I kept it this way to make incremental changes easier to understand.
>>>
>>> Patch 1: With the hope to avoid confusion and make maintenance easier,
>>> I have updated thumb1_gen_const_int() so that it can generate either RTL or
>>> asm. This way, all the code used to build thumb-1 constants is in the
>>> same place,
>>>  in case we need to improve/fix it later. We now generate shorter sequences 
>>> in
>>> several cases matching your comments.
>>>
>>> Patch 2: Removes the equivalent loop from thumb1_movsi_insn pattern and
>>> calls thumb1_gen_const_int.
>>>
>>> Patch 3: Update of the original patch in this thread, now calls
>>> thumb1_gen_const_int.
>>
>> Yuk!  Those changes to thumb1_gen_const_int are horrible.
>>
>> I think we should be able to leverage the fact that the compiler can use
>> C++ now to do much better than that, for example by making that function
>> a template. 

[PATCH] Fix latch PHI arg lookup in vectorizable_reduction for double-reduction

2020-10-20 Thread Richard Biener
We were using the wrong loop to figure the latch arg of a
double-reduction PHI.  Which isn't a problem in case ->dest_idx
match up with the outer loop edges - but that's of course not guaranteed.

Bootstrap & regtest pending on x86_64-unknown-linux-gnu.

2020-10-20  Richard Biener  

* tree-vect-loop.c (vectorizable_reduction): Use the correct
loops latch edge for the PHI arg lookup.
---
 gcc/tree-vect-loop.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index dceb65c934a..c8747595a63 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6359,8 +6359,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   /* Verify following REDUC_IDX from the latch def leads us back to the PHI
  and compute the reduction chain length.  Discover the real
  reduction operation stmt on the way (stmt_info and slp_for_stmt_info).  */
-  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
- loop_latch_edge (loop));
+  tree reduc_def
+= PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
+loop_latch_edge
+  (gimple_bb (reduc_def_phi)->loop_father));
   unsigned reduc_chain_length = 0;
   bool only_slp_reduc_chain = true;
   stmt_info = NULL;
-- 
2.26.2


Re: [PATCH] Avoid re-allocating PHIs in split_edge

2020-10-20 Thread Richard Biener via Gcc-patches
On Tue, Aug 22, 2017 at 4:11 PM Jeff Law  wrote:
>
> On 08/22/2017 03:03 AM, Richard Biener wrote:
> >
> > The following patch makes sure to not grow the number of incoming
> > edges in the destination when doing split_edge on GIMPLE.  That's
> > easy by first redirecting the existing edge to the destination
> > to the new block rather than creating the new fallthru from the
> > new block to the destination.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> >
> > Richard.
> >
> > 2017-08-22  Richard Biener  
> >
> >   * tree-cfg.c (gimple_split_edge): Avoid reallocating target
> >   PHI nodes.
> Definitely a good thing.  Having PHIs get reallocated has led to some
> subtle bugs.  I realize this isn't a complete solution to that problem,
> but every bit helps.

So this causes PHI args to be swapped which I need to avoid now.

Thus the following followup, bootstrapped on x86_64-unknown-linux-gnu,
testing in progress.

Richard.

>From b17d1ec8ef94f1ab87fcfedf7c947815e60e42e7 Mon Sep 17 00:00:00 2001
From: Richard Biener 
Date: Tue, 20 Oct 2020 12:52:31 +0200
Subject: [PATCH] Avoid changing PHIs in GIMPLE split_edge
To: gcc-patches@gcc.gnu.org

Previously I've changed gimple_split_edge to avoid PHI node
re-allocation, but this introduced swapping of PHI arguments
due to the way edge redirection works.  This is now a problem
for me and which can be solved with the following approach
reducing the overhead of split_edge even more.  We can simply
pretend there are no PHI nodes if we can make sure the
new fallthru will have the same dest_idx as the old edge
into the destination.

2020-10-20  Richard Biener  

* tree-cfg.c (reinstall_phi_args): Remove.
(gimple_split_edge): Remove PHIs around the edge redirection
to avoid touching them at all.


p
Description: Binary data


Re: [PATCH] Hashtable refactoring and simplification

2020-10-20 Thread Jonathan Wakely via Gcc-patches

On 15/10/20 19:31 +0200, François Dumont via Libstdc++ wrote:
I'd like to integrate some of the refactoring I had to do as part of 
my work on support of custom pointers in unordered containers.


Those are of course the abi compatible changes.

Thanks to the changes on _Hash_node type definition I was able to get 
rid of _Hash_code_base partial specialization. I also removed the 
smaller _Hashtable_base::_Equal_hash_code helper type. For this one I 
wonder if you prefer to keep it in place of the new 
_S_equals/_S_node_equals overloads ?



No, I like your changes. It is a nice simpliciation.

OK for trunk, thanks.




[committed 2/2] libstdc++: Define noop coroutine details private and inline [PR 95917]

2020-10-20 Thread Jonathan Wakely via Gcc-patches

libstdc++: Define noop coroutine details private and inline [PR 95917]

This moves the __noop_coro_frame type, the __noop_coro_fr global
variable, and the __dummy_resume_destroy function from namespace scope,
replacing them with private members of the specialization
coroutine_handle.

The function and variable are also declared inline, so that they
generate no code unless used.

libstdc++-v3/ChangeLog:

PR libstdc++/95917
* include/std/coroutine (__noop_coro_frame): Replace with
noop_coroutine_handle::__frame.
(__dummy_resume_destroy): Define inline in __frame.
(__noop_coro_fr): Replace with noop_coroutine_handle::_S_fr
and define as inline.
* testsuite/18_support/coroutines/95917.cc: New test.

Tested powerpc64le-linux. Committed to trunk.



[committed 1/2] libstdc++: Remove inheritance from std::coroutine_handle<> [LWG 3460]

2020-10-20 Thread Jonathan Wakely via Gcc-patches
This removes the coroutine_handle<> base class from the primary template
and the noop_coroutine_promise explicit specialization. To preserve the
API various members are added, as they are no longer inherited from the
base class.

I've also tweaked some indentation and formatting, and replaced
subclause numbers from the standard with stable names like
[coroutine.handle.con].

libstdc++-v3/ChangeLog:

* include/std/coroutine (coroutine_handle<_Promise>): Remove
base class. Add constructors, conversions, accessors etc. as
proposed for LWG 3460.
(coroutine_handle): Likewise.
* testsuite/18_support/coroutines/lwg3460.cc: New test.

Tested powerpc64le-linux. Committed to trunk.

commit 2c2278f300cdd5f3181fe7df4dd1d869a67266a9
Author: Jonathan Wakely 
Date:   Tue Oct 20 11:18:35 2020

libstdc++: Remove inheritance from std::coroutine_handle<> [LWG 3460]

This removes the coroutine_handle<> base class from the primary template
and the noop_coroutine_promise explicit specialization. To preserve the
API various members are added, as they are no longer inherited from the
base class.

I've also tweaked some indentation and formatting, and replaced
subclause numbers from the standard with stable names like
[coroutine.handle.con].

libstdc++-v3/ChangeLog:

* include/std/coroutine (coroutine_handle<_Promise>): Remove
base class. Add constructors, conversions, accessors etc. as
proposed for LWG 3460.
(coroutine_handle): Likewise.
* testsuite/18_support/coroutines/lwg3460.cc: New test.

diff --git a/libstdc++-v3/include/std/coroutine 
b/libstdc++-v3/include/std/coroutine
index 468d1107557..6e1cf141579 100644
--- a/libstdc++-v3/include/std/coroutine
+++ b/libstdc++-v3/include/std/coroutine
@@ -87,7 +87,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 coroutine_handle
 {
 public:
-  // 17.12.3.1, construct/reset
+  // [coroutine.handle.con], construct/reset
   constexpr coroutine_handle() noexcept : _M_fr_ptr(0) {}
 
   constexpr coroutine_handle(std::nullptr_t __h) noexcept
@@ -101,7 +101,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 
 public:
-  // 17.12.3.2, export/import
+  // [coroutine.handle.export.import], export/import
   constexpr void* address() const noexcept { return _M_fr_ptr; }
 
   constexpr static coroutine_handle from_address(void* __a) noexcept
@@ -112,7 +112,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 
 public:
-  // 17.12.3.3, observers
+  // [coroutine.handle.observers], observers
   constexpr explicit operator bool() const noexcept
   {
return bool(_M_fr_ptr);
@@ -120,7 +120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   bool done() const noexcept { return __builtin_coro_done(_M_fr_ptr); }
 
-  // 17.12.3.4, resumption
+  // [coroutine.handle.resumption], resumption
   void operator()() const { resume(); }
 
   void resume() const { __builtin_coro_resume(_M_fr_ptr); }
@@ -131,10 +131,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   void* _M_fr_ptr;
   };
 
-  // 17.12.3.6 Comparison operators
-  /// [coroutine.handle.compare]
-  constexpr bool operator==(coroutine_handle<> __a,
-   coroutine_handle<> __b) noexcept
+  // [coroutine.handle.compare], comparison operators
+
+  constexpr bool
+  operator==(coroutine_handle<> __a, coroutine_handle<> __b) noexcept
   {
 return __a.address() == __b.address();
   }
@@ -142,76 +142,107 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #if _COROUTINES_USE_SPACESHIP
   constexpr strong_ordering
   operator<=>(coroutine_handle<> __a, coroutine_handle<> __b) noexcept
-  { return std::compare_three_way()(__a.address(), __b.address()); }
+  {
+return std::compare_three_way()(__a.address(), __b.address());
+  }
 #else
   // These are to enable operation with std=c++14,17.
-  constexpr bool operator!=(coroutine_handle<> __a,
-   coroutine_handle<> __b) noexcept
+  constexpr bool
+  operator!=(coroutine_handle<> __a, coroutine_handle<> __b) noexcept
   {
 return !(__a == __b);
   }
 
-  constexpr bool operator<(coroutine_handle<> __a,
-  coroutine_handle<> __b) noexcept
+  constexpr bool
+  operator<(coroutine_handle<> __a, coroutine_handle<> __b) noexcept
   {
 return less()(__a.address(), __b.address());
   }
 
-  constexpr bool operator>(coroutine_handle<> __a,
-  coroutine_handle<> __b) noexcept
+  constexpr bool
+  operator>(coroutine_handle<> __a, coroutine_handle<> __b) noexcept
   {
 return __b < __a;
   }
 
-  constexpr bool operator<=(coroutine_handle<> __a,
-   coroutine_handle<> __b) noexcept
+  constexpr bool
+  operator<=(coroutine_handle<> __a, coroutine_handle<> __b) noexcept
   {
 return !(__a > __b);
   }
 
-  constexpr bool operator>=(coroutine_handle<> __a,
-   

[committed] MSP430: Support a memory operand for op1 of andneghi3

2020-10-20 Thread Jozef Lawrynowicz
The attached patch fixes an ICE caused by an unrecognizeable insn
generated when compiling gcc.c-torture/execute/pr97386-1.c at -O0 for
msp430-elf.

Successfully regtested on trunk and committed as obvious.
>From 8c3846e80210ba437644b5b91d9bd9c564ca565a Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Tue, 20 Oct 2020 11:26:20 +0100
Subject: [PATCH] MSP430: Support a memory operand for op1 of andneghi3

This fixes an ICE caused by an unrecognizeable insn generated when
compiling gcc.c-torture/execute/pr97386-1.c at -O0.

gcc/ChangeLog:

* config/msp430/msp430.md (andneghi3): Allow general operand for
op1 and update output assembler template.
---
 gcc/config/msp430/msp430.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index f70e61b97dd..ad244bb0f33 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -1346,12 +1346,12 @@ (define_insn "bis_SR"
 ;; instructions, so we provide a pattern to support it here.
 (define_insn "andneghi3"
   [(set (match_operand:HI 0 "register_operand" "=r")
-   (and:HI (neg:HI (match_operand:HI 1 "register_operand"  "r"))
+   (and:HI (neg:HI (match_operand:HI 1 "general_operand"  "rm"))
(match_operand2 "immediate_operand" "n")))]
   ""
   "*
 if (REGNO (operands[0]) != REGNO (operands[1]))
-  return \"MOV.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
+  return \"MOV%X1.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
 else
   return \"INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
   "
-- 
2.28.0



[PATCH] aarch64: Add vstN_lane_bf16 + vstNq_lane_bf16 intrinsics

2020-10-20 Thread Andrea Corallo via Gcc-patches
Hi all,

I'd like to submit the following patch implementing the bfloat16_t neon
related store intrinsics: vst2_lane_bf16, vst2q_lane_bf16,
vst3_lane_bf16, vst3q_lane_bf16 vst4_lane_bf16, vst4q_lane_bf16.

Please see refer to:
ACLE 
ISA  

Regtested and bootstrapped.

Thanks!

  Andrea

>From e94cea950403c8c61fc46d405521dcaf609555b1 Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Thu, 8 Oct 2020 11:02:09 +0200
Subject: [PATCH] aarch64: Add vstN_lane_bf16 + vstNq_lane_bf16 intrinsics

gcc/ChangeLog

2020-10-19  Andrea Corallo  

* config/aarch64/arm_neon.h (__STX_LANE_FUNC): Move to the bottom
of the file so we can use these also for defining the bf16 related
intrinsics.
(vst2_lane_bf16, vst2q_lane_bf16, vst3_lane_bf16, vst3q_lane_bf16)
(vst4_lane_bf16, vst4q_lane_bf16): Add new intrinsics.

gcc/testsuite/ChangeLog

2020-10-19  Andrea Corallo  

* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
(hbfloat16_t): Define type.
(CHECK_FP): Make it working for bfloat types.
* gcc.target/aarch64/advsimd-intrinsics/bf16_vstN_lane_1.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/bf16_vstN_lane_2.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vst2_lane_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vst3_lane_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vst4_lane_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_bf16_indices_1.c:
Likewise.
---
 gcc/config/aarch64/arm_neon.h | 534 +-
 .../aarch64/advsimd-intrinsics/arm-neon-ref.h |   4 +-
 .../advsimd-intrinsics/bf16_vstN_lane_1.c | 227 
 .../advsimd-intrinsics/bf16_vstN_lane_2.c |  52 ++
 .../vst2_lane_bf16_indices_1.c|  16 +
 .../vst2q_lane_bf16_indices_1.c   |  16 +
 .../vst3_lane_bf16_indices_1.c|  16 +
 .../vst3q_lane_bf16_indices_1.c   |  16 +
 .../vst4_lane_bf16_indices_1.c|  16 +
 .../vst4q_lane_bf16_indices_1.c   |  16 +
 10 files changed, 656 insertions(+), 257 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstN_lane_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstN_lane_2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_bf16_indices_1.c

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2bb20e15069..0088ea9896f 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10873,262 +10873,6 @@ __STRUCTN (poly, 8, 4)
 __STRUCTN (float, 64, 4)
 #undef __STRUCTN
 
-
-#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode,   \
-   qmode, ptr_mode, funcsuffix, signedtype) \
-__extension__ extern __inline void  \
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
-vst2_lane_ ## funcsuffix (ptrtype *__ptr,   \
- intype __b, const int __c) \
-{   \
-  __builtin_aarch64_simd_oi __o;\
-  largetype __temp; \
-  __temp.val[0]
 \
-= vcombine_##funcsuffix (__b.val[0],\
-vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __temp.val[1]
 \
-= vcombine_##funcsuffix (__b.val[1],\
-vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-  __o = __builtin_aarch64_set_qregoi##qmode (__o,   \
-(signedtype) __temp.val[0], 0); \
-  __o = __builtin_aarch64_set_qregoi##qmode (__o,

[patch] Introduce vxworks7r2 support for ppc and ppc64

2020-10-20 Thread Olivier Hainque
Hello,

This change introduces support for the most recent versions
of VxWorks on PowerPC targets, for both 32 and 64 bit thanks
to a bi-arch setup.

The system compilers are essentially configured as Linux
toolchains with only a few specificities and we replicate
that model here.

The most visible specificities are the use of secureplt by
default, the pre-definition of some macros that the system
headers still rely on (_VX_CPU and _VX_CPU_FAMILY, for example),
and of course some variations related to the so VxWorks typical
kernel vs RTP mode distinction.

In addition to the introduction of config.gcc and libgcc
configuration chunks, much inspired by the linux ones, the
change

- Reworks rs6000/vxworks.h file to feature bits common to the
  Vx6 and Vx7 port then a separate section for each, where the
  Vx7 part is very short as we rely on the Linux definitions
  for most things.

- Adjusts the CPU macro predefinitions in CPP_SPEC to resort to
  "_VX_CPU" instead of "CPU" for Vx7, to better match the more recent
  system headers expectations,

- Adds a cpu definition case for e6500.

- Changes to the use SUB3TARGET_OVERRIDE_OPTIONS instead of
  SUBSUBTARGET_OVERRIDE_OPTIONS for specifics, so we don't override
  the Linux's version of the latter for vx7.


We have been using this for a while in gcc-9 production toolchains
with Ada, C & C++ support.

I have also verified that I could

- build and pass quite a few tests with a gcc-10 based ongoing port
  (an in-house testsuite for cross configurations and Ada acats for
  both kernel and RTP setups), for both VxWorks 6.9 and 7.2

- build a VxWorks 6.9 and 7.2 compiler with mainline sources, as well
  as a Ada+C VxWorks 653 compiler with only a couple of minor configuration
  self-spec presets (-DVTHREADS -D_WRS_VXWORKS_MAJOR=5).

The base ports were initially developed by Doug Rupp, then reworked a
bit to fit with other ongoing work in the VxWorks area.

Olivier


2020-10-20  Douglas Rupp  
Olivier Hainque  

gcc/
* config.gcc (powerpc*-wrs-vxworks7r*): New case.
* config/rs6000/vxworks.h: Rework to handle VxWorks7. Refactor as
common bits + vx6 vs vx7 ones. For the latter, rely essentially on
Linux configuration and adjust CPU to _VX_CPU in CPP_SPEC. Add case
for e6500. Use SUB3TARGET_OVERRIDE_OPTIONS for specifics so we don't
override the Linux's version of the latter for vx7.

libgcc/
* config.host (powerpc*-wrs-vxworks7*): New case.
* configure.ac: Handle powerpc*-*-vxworks7* as powerpc*-*-linux*
for ppc-fp_type.
* configure: Regenerate.

diff --git a/gcc/config.gcc b/gcc/config.gcc
index b79c544c9fa4..2cc121ac3686 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2976,6 +2976,27 @@ powerpc*-*-linux*)
tm_file="rs6000/secureplt.h ${tm_file}"
fi
;;
+powerpc*-wrs-vxworks7r*)
+
+   # Wind River 7 post SR0600 is mostly like Linux so we setup
+   # out config in a very similar fashion and adjust to a few
+   # specificities.
+
+   # The system compiler is configured with secureplt by default.
+   tm_file="${tm_file} rs6000/secureplt.h"
+
+   tm_file="${tm_file} elfos.h gnu-user.h linux.h freebsd-spec.h"
+   tm_file="${tm_file} rs6000/sysv4.h rs6000/biarch64.h rs6000/default64.h 
rs6000/linux64.h"
+   tm_file="${tm_file} vx-common.h vxworks.h rs6000/vxworks.h"
+
+   extra_options="${extra_options} rs6000/sysv4.opt linux.opt 
rs6000/linux64.opt"
+
+   tmake_file="${tmake_file} t-linux rs6000/t-linux64 rs6000/t-fprules 
rs6000/t-ppccomm"
+   tmake_file="${tmake_file} rs6000/t-vxworks"
+
+   tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC"
+   extra_objs="$extra_objs linux.o rs6000-linux.o"
+   ;;
 powerpc-wrs-vxworks*)
tm_file="${tm_file} elfos.h gnu-user.h freebsd-spec.h rs6000/sysv4.h"
tmake_file="${tmake_file} rs6000/t-fprules rs6000/t-ppccomm 
rs6000/t-vxworks"
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
index 60e1ef42390f..ed324a9b94b8 100644
--- a/gcc/config/rs6000/vxworks.h
+++ b/gcc/config/rs6000/vxworks.h
@@ -18,10 +18,21 @@ You should have received a copy of the GNU General Public 
License
 along with GCC; see the file COPYING3.  If not see
 .  */
 
-/* Note to future editors: VxWorks is mostly an EABI target.  We do
-   not use rs6000/eabi.h because we would have to override most of
-   it anyway.  However, if you change that file, consider making
-   analogous changes here too.  */
+/* The port comes in two very different flavors at this stage:
+
+   - For 653 (AE) and regular versions prior to VxWorks 7, the port
+ comes with its own set of definitions, matching a system compiler
+ configured this way as well as the corresponding run-time
+ environment.  This is essentially an eabi system, so changes to
+ eabi.h should usually be reflected here.
+
+   - Starting with VxWorks 7 (post 

Re: PING – Re: [Patch] collect-utils.c, lto-wrapper + mkoffload: Improve -save-temps filename

2020-10-20 Thread Richard Biener
On Mon, 19 Oct 2020, Tobias Burnus wrote:

> PING.
> 
> Attached patch has a minor change: the renamed suffixes for nvptx as
> suggested by Tom
> (and for considency, 'ltrans.args' ? 'ltrans_args').
> 
> OK?

OK.

Thanks,
Richard.

> Tobias
> 
> On 10/13/20 9:37 PM, Tobias Burnus wrote:
> > This patch avoids putting some [...] files to /tmp/cc* when
> > -save-temps has been specified.
> >
> > For my testcase, it now generates:
> > a.lto_wrapper_args
> > a.offload_args
> > a.xnvptx-none.args
> > a.xnvptx-none.gcc_args
> > a.xamdgcn-amdhsa.gcc_args
> > a.xamdgcn-amdhsa.gccnative_args
> > a.xamdgcn-amdhsa.ld_args
> >
> >
> > This patch adds an additional argument to collect-utils.c's
> > collect_execute (and is wrapper fork_execute) which, if not NULL,
> > it is used in 'concat (dumppfx, atsuffix, NULL);'.
> >
> > This patch adds a suffix to gcc/config/gcn/mkoffload.c,
> > gcc/config/nvptx/mkoffload.c and gcc/lto-wrapper.c.
> >
> > It does not (yet) add a suffix to gcc/collect2.c and
> > gcc/config/i386/intelmic-mkoffload.c but just passes
> > NULL; for intelmic it is not a work item as it does
> > not use '@' files at all.
> >
> > Hopefully, there is no file which is written twice
> > with the same name (or otherwise overridden) and
> > the files names do make sense.
> >
> > OK?
> >
> > Tobias
> >
> > PS: There is still cceBdzZk.ofldlist (via lto-plugin/lto-plugin.c),
> > and @/tmp/cc* in calls to lto1 and collect2. And collect2.c
> > passes NULL also when use_atfile is true.
> -
> Mentor Graphics (Deutschland) GmbH, Arnulfstra?e 201, 80634 M?nchen / Germany
> Registergericht M?nchen HRB 106955, Gesch?ftsf?hrer: Thomas Heurung, Alexander
> Walter
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend


Re: [PATCH] Libsanitizer: merge from master.

2020-10-20 Thread Martin Liška

On 10/20/20 10:09 AM, Tobias Burnus wrote:

On 10/19/20 9:39 AM, Tobias Burnus wrote:

Filled as https://bugs.llvm.org/show_bug.cgi?id=47896


That issue was fixed. What's the proper way to apply this patch? Simply
committing the attached patch* or is there more required?


We normally do only a cherry-pick. I've just did so and pushed the commit.
Typically you don't want to do a merge and process all steps mentioned in
libsanitizer/HOWTO_MERGE.

Thanks,
Martin



Tobias

*Namely, from LLVM: git diff 51ff04567b2f8d06b2062bd3ed72eab2e93e4466..
compiler-rt/lib/sanitizer_common/; I ignored the changes in
compiler-rt/lib/{gwp_asan,memprof}.

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter




[PATCH] gcov-profile: use static pool for TOPN first

2020-10-20 Thread Martin Liška

Hello.

As noticed in the PR, it's quite tricky to not run malloc (or calloc)
in context of libgcov. I'm suggesting a new approach where we'll first
use the pre-allocated static buffer in hope that malloc function is initialized
and so every call to calloc can happen. That's why I increased number of KVP
to 64 and I believe one reaches malloc pretty soon in an application run.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

PR gcov-profile/97461
* gcov-io.h (GCOV_PREALLOCATED_KVP): Pre-allocate 64
static counters.

libgcc/ChangeLog:

PR gcov-profile/97461
* libgcov.h (gcov_counter_add): Use first static counters
as it should help to have malloc wrappers set up.

gcc/testsuite/ChangeLog:

PR gcov-profile/97461
* gcc.dg/tree-prof/pr97461.c: New test.
---
 gcc/gcov-io.h|  2 +-
 gcc/testsuite/gcc.dg/tree-prof/pr97461.c | 58 
 libgcc/libgcov.h | 24 +++---
 3 files changed, 65 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-prof/pr97461.c

diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 4dba01c78ce..4e95c7c82ee 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -293,7 +293,7 @@ GCOV_COUNTERS
 #define GCOV_TOPN_MAXIMUM_TRACKED_VALUES 32
 
 /* Number of pre-allocated gcov_kvp structures.  */

-#define GCOV_PREALLOCATED_KVP 16
+#define GCOV_PREALLOCATED_KVP 64
 
 /* Convert a counter index to a tag.  */

 #define GCOV_TAG_FOR_COUNTER(COUNT)\
diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr97461.c 
b/gcc/testsuite/gcc.dg/tree-prof/pr97461.c
new file mode 100644
index 000..8d21a3ef421
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-prof/pr97461.c
@@ -0,0 +1,58 @@
+/* PR gcov-profile/97461 */
+/* { dg-options "-O2 -ldl" } */
+
+#define _GNU_SOURCE
+
+#include 
+#include 
+#include 
+
+static int malloc_depth = 0;
+
+static char memory[128* 1024];
+static size_t memory_p = 0;
+
+void f1(void) {}
+void f2(void) {}
+
+typedef void (*fun_t)(void);
+static const fun_t funs[2] = { f1, f2, };
+
+static void * malloc_impl(size_t size) {
+void * r = [memory_p];
+memory_p += size;
+
+// force TOPN profile
+funs[size % 2]();
+return r;
+}
+
+// Override default malloc, check it it get s called recursively
+void * malloc(size_t size) {
+// Must not be called recursively. Malloc implementation does not support 
it.
+if (malloc_depth != 0) __builtin_trap();
+
+++malloc_depth;
+  void * r = malloc_impl(size);
+--malloc_depth;
+return r;
+}
+
+// Called from gcov
+void *calloc(size_t nmemb, size_t size) {
+// Must not be called recursively.  Malloc implementation does not support 
it.
+if (malloc_depth != 0) __builtin_trap();
+
+++malloc_depth;
+  void * r = malloc_impl(size * nmemb);
+  memset(r, 0, size * nmemb);
+--malloc_depth;
+return r;
+}
+
+void free(void *ptr){}
+
+int main() {
+void * p = malloc(8);
+return p != 0 ? 0 : 1;
+}
diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
index 8be5bebcac0..e70cf63b414 100644
--- a/libgcc/libgcov.h
+++ b/libgcc/libgcov.h
@@ -404,22 +404,16 @@ gcov_counter_add (gcov_type *counter, gcov_type value,
 *counter += value;
 }
 
-/* Allocate gcov_kvp from heap.  If we are recursively called, then allocate

-   it from a list of pre-allocated pool.  */
+/* Allocate gcov_kvp from statically pre-allocated pool,
+   or use heap otherwise.  */
 
 static inline struct gcov_kvp *

 allocate_gcov_kvp (void)
 {
   struct gcov_kvp *new_node = NULL;
 
-  static

-#if defined(HAVE_CC_TLS)
-__thread
-#endif
-  volatile unsigned in_recursion ATTRIBUTE_UNUSED = 0;
-
 #if !defined(IN_GCOV_TOOL) && !defined(L_gcov_merge_topn)
-  if (__builtin_expect (in_recursion, 0))
+  if (__gcov_kvp_pool_index < GCOV_PREALLOCATED_KVP)
 {
   unsigned index;
 #if GCOV_SUPPORTS_ATOMIC
@@ -430,17 +424,11 @@ __thread
 #endif
   if (index < GCOV_PREALLOCATED_KVP)
new_node = &__gcov_kvp_pool[index];
-  else
-   /* Do not crash in the situation.  */
-   return NULL;
 }
-  else
 #endif
-{
-  in_recursion = 1;
-  new_node = (struct gcov_kvp *)xcalloc (1, sizeof (struct gcov_kvp));
-  in_recursion = 0;
-}
+
+  if (new_node == NULL)
+new_node = (struct gcov_kvp *)xcalloc (1, sizeof (struct gcov_kvp));
 
   return new_node;

 }
--
2.28.0



Re: [PATCH] Handle right shifts by zero in range-ops.

2020-10-20 Thread Aldy Hernandez via Gcc-patches




On 10/19/20 4:22 PM, Jakub Jelinek wrote:

On Mon, Oct 19, 2020 at 10:19:48AM -0400, Andrew MacLeod via Gcc-patches wrote:

On 10/19/20 5:38 AM, Aldy Hernandez wrote:

If the shift amount in operator_lshift::op1_range was zero, an invalid range
of [1, 0] was being created.


Should we do the same thing with rshift::op1_range?     ie, return the LHS
if the shift is 0 instead of trying to figure it out


Shift by 0 is a noop, so yes, for all of left/right shifts and rotates
the result range should be the range of the first operand.


Sounds good.

Tested on x86-64 Linux.

Pushed.

Special case shifting by zero in operator_rshift::op1_range.

gcc/ChangeLog:

* range-op.cc (operator_rshift::op1_range): Special case
shifting by zero.

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 0efa00186e8..40d45b1ce47 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1633,6 +1633,11 @@ operator_rshift::op1_range (irange ,
wi::uhwi (prec, TYPE_PRECISION (TREE_TYPE (shift))),
UNSIGNED))
return false;
+  if (wi::to_wide (shift) == 0)
+   {
+ r = lhs;
+ return true;
+   }

   // Folding the original operation may discard some impossible
   // ranges from the LHS.



Re: [Ada,FYI] revamp ada.numerics.aux

2020-10-20 Thread Rainer Orth
Hi Alexandre,

> On Oct 19, 2020, Andreas Schwab  wrote:
>
>> -nostdinc a-nallfl.ads -o a-nallfl.o
>> a-nallfl.ads:48:13: warning: intrinsic binding type mismatch on return value
>> a-nallfl.ads:48:13: warning: intrinsic binding type mismatch on argument 1
>> a-nallfl.ads:48:13: warning: profile of "Sin" doesn't match the builtin
>> it binds
>
> Thanks for the report.  Ada's Standard.Long_Long_Float is mapped to C
> double rather than long double on this target.
>
> Here's a workaround, for aarch64-* and ppc*-linux-gnu, where I've
> observed the mismatch so far.

your patch similarly broke sparc*-sun-solaris* bootstrap.  The attached
snippet on top of this workaround fixes that.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl
--- a/gcc/ada/Makefile.rtl
+++ b/gcc/ada/Makefile.rtl
@@ -1466,6 +1468,7 @@ endif
 ifeq ($(strip $(filter-out sparc% sun solaris%,$(target_cpu) $(target_vendor) $(target_os))),)
   LIBGNAT_TARGET_PAIRS = \
   a-intnam.ads

Re: [PATCH] Libsanitizer: merge from master.

2020-10-20 Thread Tobias Burnus

On 10/19/20 9:39 AM, Tobias Burnus wrote:

Filled as https://bugs.llvm.org/show_bug.cgi?id=47896


That issue was fixed. What's the proper way to apply this patch? Simply
committing the attached patch* or is there more required?

Tobias

*Namely, from LLVM: git diff 51ff04567b2f8d06b2062bd3ed72eab2e93e4466..
compiler-rt/lib/sanitizer_common/; I ignored the changes in
compiler-rt/lib/{gwp_asan,memprof}.

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
libsanitizer: merge from master


 libsanitizer/MERGE| 2 +-
 libsanitizer/sanitizer_common/sanitizer_allocator_report.cpp  | 8 
 libsanitizer/sanitizer_common/sanitizer_allocator_report.h| 1 +
 libsanitizer/sanitizer_common/sanitizer_flags.inc | 5 +++--
 libsanitizer/sanitizer_common/sanitizer_internal_defs.h   | 3 +++
 libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp | 7 ++-
 .../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp | 4 
 libsanitizer/sanitizer_common/sanitizer_tls_get_addr.cpp  | 7 ++-
 8 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/libsanitizer/MERGE b/libsanitizer/MERGE
index d2a25927007..f9657fc6728 100644
--- a/libsanitizer/MERGE
+++ b/libsanitizer/MERGE
@@ -1,4 +1,4 @@
-51ff04567b2f8d06b2062bd3ed72eab2e93e4466
+f97ca48b1cbbf5da065e94271cb3af4f1c907dd4
 
 The first line of this file holds the git revision number of the
 last merge done from the master library sources.
diff --git a/libsanitizer/sanitizer_common/sanitizer_allocator_report.cpp b/libsanitizer/sanitizer_common/sanitizer_allocator_report.cpp
index d74e08010d5..1c6520819ef 100644
--- a/libsanitizer/sanitizer_common/sanitizer_allocator_report.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_allocator_report.cpp
@@ -134,4 +134,12 @@ void NORETURN ReportOutOfMemory(uptr requested_size, const StackTrace *stack) {
   Die();
 }
 
+void NORETURN ReportRssLimitExceeded(const StackTrace *stack) {
+  {
+ScopedAllocatorErrorReport report("rss-limit-exceeded", stack);
+Report("ERROR: %s: allocator exceeded the RSS limit\n", SanitizerToolName);
+  }
+  Die();
+}
+
 }  // namespace __sanitizer
diff --git a/libsanitizer/sanitizer_common/sanitizer_allocator_report.h b/libsanitizer/sanitizer_common/sanitizer_allocator_report.h
index 0653c365c1c..6e4e6b13549 100644
--- a/libsanitizer/sanitizer_common/sanitizer_allocator_report.h
+++ b/libsanitizer/sanitizer_common/sanitizer_allocator_report.h
@@ -33,6 +33,7 @@ void NORETURN ReportInvalidPosixMemalignAlignment(uptr alignment,
 void NORETURN ReportAllocationSizeTooBig(uptr user_size, uptr max_size,
  const StackTrace *stack);
 void NORETURN ReportOutOfMemory(uptr requested_size, const StackTrace *stack);
+void NORETURN ReportRssLimitExceeded(const StackTrace *stack);
 
 }  // namespace __sanitizer
 
diff --git a/libsanitizer/sanitizer_common/sanitizer_flags.inc b/libsanitizer/sanitizer_common/sanitizer_flags.inc
index d1412478fd2..d8e809b0609 100644
--- a/libsanitizer/sanitizer_common/sanitizer_flags.inc
+++ b/libsanitizer/sanitizer_common/sanitizer_flags.inc
@@ -82,8 +82,9 @@ COMMON_FLAG(bool, print_summary, true,
 "If false, disable printing error summaries in addition to error "
 "reports.")
 COMMON_FLAG(int, print_module_map, 0,
-"OS X only (0 - don't print, 1 - print only once before process "
-"exits, 2 - print after each report).")
+"Print the process module map where supported (0 - don't print, "
+"1 - print only once before process exits, 2 - print after each "
+"report).")
 COMMON_FLAG(bool, check_printf, true, "Check printf arguments.")
 #define COMMON_FLAG_HANDLE_SIGNAL_HELP(signal) \
 "Controls custom tool's " #signal " handler (0 - do not registers the " \
diff --git a/libsanitizer/sanitizer_common/sanitizer_internal_defs.h b/libsanitizer/sanitizer_common/sanitizer_internal_defs.h
index a6c55148705..8b34e54137d 100644
--- a/libsanitizer/sanitizer_common/sanitizer_internal_defs.h
+++ b/libsanitizer/sanitizer_common/sanitizer_internal_defs.h
@@ -448,5 +448,8 @@ using namespace __sanitizer;
 namespace __hwasan {
 using namespace __sanitizer;
 }
+namespace __memprof {
+using namespace __sanitizer;
+}
 
 #endif  // SANITIZER_DEFS_H
diff --git a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
index af077439478..2bd5564ae05 100644
--- a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -385,12 +385,9 @@ uptr ThreadSelf() {
   descr_addr = reinterpret_cast(__builtin_thread_pointer()) -
   ThreadDescriptorSize();
 #elif SANITIZER_RISCV64
-  uptr 

Re: [PATCH] [PR target/97194] [AVX2] Support variable index vec_set.

2020-10-20 Thread Richard Biener via Gcc-patches
On Tue, Oct 20, 2020 at 4:35 AM Hongtao Liu  wrote:
>
> On Mon, Oct 19, 2020 at 5:55 PM Richard Biener
>  wrote:
> >
> > On Mon, Oct 19, 2020 at 11:37 AM Hongtao Liu  wrote:
> > >
> > > On Mon, Oct 19, 2020 at 5:07 PM Richard Biener
> > >  wrote:
> > > >
> > > > On Mon, Oct 19, 2020 at 10:21 AM Hongtao Liu  wrote:
> > > > >
> > > > > Hi:
> > > > >   It's implemented as below:
> > > > > V setg (V v, int idx, T val)
> > > > >
> > > > > {
> > > > >   V idxv = (V){idx, idx, idx, idx, idx, idx, idx, idx};
> > > > >   V valv = (V){val, val, val, val, val, val, val, val};
> > > > >   V mask = ((V){0, 1, 2, 3, 4, 5, 6, 7} == idxv);
> > > > >   v = (v & ~mask) | (valv & mask);
> > > > >   return v;
> > > > > }
> > > > >
> > > > > Bootstrap is fine, regression test for i386/x86-64 backend is ok.
> > > > > Ok for trunk?
> > > >
> > > > Hmm, I guess you're trying to keep the code for !AVX512BW simple
> > > > but isn't just splitting the compare into
> > > >
> > > >  clow = {0, 1, 2, 3 ... } == idxv
> > > >  chigh = {16, 17, 18, ... } == idxv;
> > > >  cmp = {clow, chigh}
> > > >
> > >
> > > We also don't have 512-bits byte/word blend instructions without
> > > TARGET_AVX512W, so how to use 512-bits cmp?
> >
> > Oh, I see.  Guess two back-to-back vpternlog could emulate
>
> Yes, we can have something like vpternlogd %zmm0, %zmm1, %zmm2, 0xD8,
> but since we don't have 512-bits bytes/word broadcast instruction,
> It would need 2 broadcast and 1 vec_concat to get 1 512-bits vector.
> it wouldn't save many instructions compared to my version(as below).
>
> ---
> leal-16(%rsi), %eax
> vmovd   %edi, %xmm2
> vmovdqa .LC0(%rip), %ymm4
> vextracti64x4   $0x1, %zmm0, %ymm3
> vmovd   %eax, %xmm1
> vpbroadcastw%xmm2, %ymm2
> vpbroadcastw%xmm1, %ymm1
> vpcmpeqw%ymm4, %ymm1, %ymm1
> vpblendvb   %ymm1, %ymm2, %ymm3, %ymm3
> vmovd   %esi, %xmm1
> vpbroadcastw%xmm1, %ymm1
> vpcmpeqw%ymm4, %ymm1, %ymm1
> vpblendvb   %ymm1, %ymm2, %ymm0, %ymm0
> vinserti64x4$0x1, %ymm3, %zmm0, %zmm0
> ---
>
> > the blend?  Not sure if important - I recall only knl didn't have bw?
> >
>
> Yes, after(including) SKX, all avx512 targets will support AVX512BW.
> And i don't think performance for V32HI/V64QI without AVX512BW is important.

True.

I have no further comments on the patch then - it still needs i386 maintainer
approval though.

Thanks,
Richard.

>
> > > cut from i386-expand.c:
> > > in ix86_expand_sse_movcc
> > >  3682case E_V64QImode:
> > >  3683  gen = gen_avx512bw_blendmv64qi; ---> TARGET_AVX512BW needed
> > >  3684  break;
> > >  3685case E_V32HImode:
> > >  3686  gen = gen_avx512bw_blendmv32hi; --> TARGET_AVX512BW needed
> > >  3687  break;
> > >  3688case E_V16SImode:
> > >  3689  gen = gen_avx512f_blendmv16si;
> > >  3690  break;
> > >  3691case E_V8DImode:
> > >  3692  gen = gen_avx512f_blendmv8di;
> > >  3693  break;
> > >  3694case E_V8DFmode:
> > >
> > > > faster, smaller and eventually even easier during expansion?
> > > >
> > > > +  gcc_assert (ix86_expand_vector_init_duplicate (false, mode, valv, 
> > > > val));
> > > > +  gcc_assert (ix86_expand_vector_init_duplicate (false, cmp_mode,
> > > > idxv, idx_tmp));
> > > >
> > > > side-effects in gcc_assert is considered bad style, use
> > > >
> > > >   ok = ix86_expand_vector_init_duplicate (false, mode, valv, val);
> > > >   gcc_assert (ok);
> > > >
> > > > +  vec[5] = constv;
> > > > +  ix86_expand_int_vcond (vec);
> > > >
> > > > this also returns a bool you probably should assert true.
> > > >
> > >
> > > Yes, will change.
> > >
> > > > Otherwise thanks for tackling this.
> > > >
> > > > Richard.
> > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > > PR target/97194
> > > > > * config/i386/i386-expand.c (ix86_expand_vector_set_var): New 
> > > > > function.
> > > > > * config/i386/i386-protos.h (ix86_expand_vector_set_var): New 
> > > > > Decl.
> > > > > * config/i386/predicates.md (vec_setm_operand): New predicate,
> > > > > true for const_int_operand or register_operand under 
> > > > > TARGET_AVX2.
> > > > > * config/i386/sse.md (vec_set): Support both constant
> > > > > and variable index vec_set.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > > * gcc.target/i386/avx2-vec-set-1.c: New test.
> > > > > * gcc.target/i386/avx2-vec-set-2.c: New test.
> > > > > * gcc.target/i386/avx512bw-vec-set-1.c: New test.
> > > > > * gcc.target/i386/avx512bw-vec-set-2.c: New test.
> > > > > * gcc.target/i386/avx512f-vec-set-2.c: New test.
> > > > > * gcc.target/i386/avx512vl-vec-set-2.c: New test.
> > > > >
> > > > > --
> > > > > BR,
> > > > > Hongtao
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
>
>
>
> --
> BR,
> Hongtao


[PATCH] c++: Fix up constexpr evaluation of arguments passed by invisible reference [PR97388]

2020-10-20 Thread Jakub Jelinek via Gcc-patches
Hi!

For arguments passed by invisible reference, in the IL until genericization
we have the source types on the callee side and while on the caller side
we already pass references to the actual argument slot in the caller, we
undo that in cxx_bind_parameters_in_call's
  if (TREE_ADDRESSABLE (type))
/* Undo convert_for_arg_passing work here.  */
x = convert_from_reference (x);
This works fine most of the time, except when the type also has constexpr
destructor; in that case the destructor is invoked in the caller and thus
the unsharing we do to make sure that the callee doesn't modify caller's
values is in that case undesirable, it prevents the changes done in the
callee propagating to the caller which should see them for the constexpr
dtor evaluation.

The following patch fixes that.  While it could be perhaps done for all
TREE_ADDRESSABLE types, I don't see the need to change the behavior
if there is no constexpr non-trivial dtor.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-10-20  Jakub Jelinek  

PR c++/97388
* constexpr.c (cxx_eval_call_expression): Don't unshare arguments
if they have TREE_ADDRESSABLE type with non-trivial dtor.

* g++.dg/cpp2a/constexpr-dtor5.C: New test.
* g++.dg/cpp2a/constexpr-dtor6.C: New test.
* g++.dg/cpp2a/constexpr-dtor7.C: New test.

--- gcc/cp/constexpr.c.jj   2020-10-19 09:32:28.0 +0200
+++ gcc/cp/constexpr.c  2020-10-19 15:34:17.707431525 +0200
@@ -2579,6 +2579,7 @@ cxx_eval_call_expression (const constexp
  for (int i = 0; i < TREE_VEC_LENGTH (bound); ++i)
{
  tree arg = TREE_VEC_ELT (bound, i);
+ tree orig_arg = arg;
  if (entry)
{
  /* Unshare args going into the hash table to separate them
@@ -2587,13 +2588,24 @@ cxx_eval_call_expression (const constexp
  arg = unshare_expr_without_location (arg);
  TREE_VEC_ELT (bound, i) = arg;
}
- /* Don't share a CONSTRUCTOR that might be changed.  This is not
-redundant with the unshare just above; we also don't want to
-change the argument values in the hash table.  XXX Could we
-unshare lazily in cxx_eval_store_expression?  */
- arg = unshare_constructor (arg);
- if (TREE_CODE (arg) == CONSTRUCTOR)
-   vec_safe_push (ctors, arg);
+ /* For arguments passed by invisible reference, if they have
+non-trivial dtors, use the original argument without any
+unsharing, because changes in the function call should be
+reflected in the caller.  */
+ if (TREE_ADDRESSABLE (TREE_TYPE (arg))
+ && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (arg)))
+   arg = orig_arg;
+ else
+   {
+ /* Don't share a CONSTRUCTOR that might be changed.  This is
+not redundant with the unshare just above; we also don't
+want to change the argument values in the hash table.
+XXX Could we unshare lazily in
+cxx_eval_store_expression?  */
+ arg = unshare_constructor (arg);
+ if (TREE_CODE (arg) == CONSTRUCTOR)
+   vec_safe_push (ctors, arg);
+   }
  ctx->global->values.put (remapped, arg);
  remapped = DECL_CHAIN (remapped);
}
--- gcc/testsuite/g++.dg/cpp2a/constexpr-dtor5.C.jj 2020-10-19 
15:48:19.460200816 +0200
+++ gcc/testsuite/g++.dg/cpp2a/constexpr-dtor5.C2020-10-19 
15:47:47.715662068 +0200
@@ -0,0 +1,26 @@
+// PR c++/97388
+// { dg-do compile { target c++20 } }
+
+struct S {
+  int m;
+  constexpr S () : m(1) {}
+  constexpr ~S () noexcept (false) { if (m == 1) { throw; } }
+};
+
+constexpr bool
+foo (S v)
+{
+  v.m = 2;
+  return true;
+}
+
+constexpr bool
+bar ()
+{
+  return foo (S ());
+}
+
+static_assert (foo (S ()));
+static_assert (bar ());
+constexpr bool x = foo (S ());
+constexpr bool y = bar ();
--- gcc/testsuite/g++.dg/cpp2a/constexpr-dtor6.C.jj 2020-10-19 
15:48:22.475157012 +0200
+++ gcc/testsuite/g++.dg/cpp2a/constexpr-dtor6.C2020-10-19 
15:47:56.204538722 +0200
@@ -0,0 +1,27 @@
+// PR c++/97388
+// { dg-do compile { target c++20 } }
+
+struct S {
+  int *s;
+  constexpr S () : s(new int ()) {}
+  constexpr S (S &) noexcept : s(x.s) { x.s = nullptr; }
+  constexpr ~S () noexcept { delete s; }
+};
+
+constexpr bool
+foo (S v)
+{
+  auto x = static_cast (v);
+  return true;
+}
+
+constexpr bool
+bar ()
+{
+  return foo (S ());
+}
+
+static_assert (foo (S ()));
+static_assert (bar ());
+constexpr bool x = foo (S ());
+constexpr bool y = bar ();
--- gcc/testsuite/g++.dg/cpp2a/constexpr-dtor7.C.jj 2020-10-19 
15:48:25.316115731 +0200
+++ 

[Ada] Refine type for type constraining routines

2020-10-20 Thread Pierre-Marie de Rodat
Parameters of Constrain_Decimal, Constrain_Enumeration, etc. routines
are type entities, which is now reflected in the parameter type. Cleanup
only; behavior is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch3.adb (Constrain_Decimal, Constrain_Enumeration,
Constrain_Float, Constrain_Integer, Constrain_Ordinary_Fixed):
Refine parameter type from Node_Id to Entity_Id.diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -410,7 +410,7 @@ package body Sem_Ch3 is
--  When constraining a protected type or task type with discriminants,
--  constrain the corresponding record with the same discriminant values.
 
-   procedure Constrain_Decimal (Def_Id : Node_Id; S : Node_Id);
+   procedure Constrain_Decimal (Def_Id : Entity_Id; S : Node_Id);
--  Constrain a decimal fixed point type with a digits constraint and/or a
--  range constraint, and build E_Decimal_Fixed_Point_Subtype entity.
 
@@ -426,11 +426,11 @@ package body Sem_Ch3 is
--  Constrain_Concurrent. See Build_Discriminated_Subtype for an explanation
--  of For_Access.
 
-   procedure Constrain_Enumeration (Def_Id : Node_Id; S : Node_Id);
+   procedure Constrain_Enumeration (Def_Id : Entity_Id; S : Node_Id);
--  Constrain an enumeration type with a range constraint. This is identical
--  to Constrain_Integer, but for the Ekind of the resulting subtype.
 
-   procedure Constrain_Float (Def_Id : Node_Id; S : Node_Id);
+   procedure Constrain_Float (Def_Id : Entity_Id; S : Node_Id);
--  Constrain a floating point type with either a digits constraint
--  and/or a range constraint, building a E_Floating_Point_Subtype.
 
@@ -447,10 +447,10 @@ package body Sem_Ch3 is
--  array. The Related_Id and Suffix parameters are used to build the
--  associated Implicit type name.
 
-   procedure Constrain_Integer (Def_Id : Node_Id; S : Node_Id);
+   procedure Constrain_Integer (Def_Id : Entity_Id; S : Node_Id);
--  Build subtype of a signed or modular integer type
 
-   procedure Constrain_Ordinary_Fixed (Def_Id : Node_Id; S : Node_Id);
+   procedure Constrain_Ordinary_Fixed (Def_Id : Entity_Id; S : Node_Id);
--  Constrain an ordinary fixed point type with a range constraint, and
--  build an E_Ordinary_Fixed_Point_Subtype entity.
 
@@ -13800,7 +13800,7 @@ package body Sem_Ch3 is
-- Constrain_Decimal --
---
 
-   procedure Constrain_Decimal (Def_Id : Node_Id; S : Node_Id) is
+   procedure Constrain_Decimal (Def_Id : Entity_Id; S : Node_Id) is
   T   : constant Entity_Id  := Entity (Subtype_Mark (S));
   C   : constant Node_Id:= Constraint (S);
   Loc : constant Source_Ptr := Sloc (C);
@@ -14017,7 +14017,7 @@ package body Sem_Ch3 is
-- Constrain_Enumeration --
---
 
-   procedure Constrain_Enumeration (Def_Id : Node_Id; S : Node_Id) is
+   procedure Constrain_Enumeration (Def_Id : Entity_Id; S : Node_Id) is
   T : constant Entity_Id := Entity (Subtype_Mark (S));
   C : constant Node_Id   := Constraint (S);
 
@@ -14040,7 +14040,7 @@ package body Sem_Ch3 is
-- Constrain_Float --
--
 
-   procedure Constrain_Float (Def_Id : Node_Id; S : Node_Id) is
+   procedure Constrain_Float (Def_Id : Entity_Id; S : Node_Id) is
   T: constant Entity_Id := Entity (Subtype_Mark (S));
   C: Node_Id;
   D: Node_Id;
@@ -14249,7 +14249,7 @@ package body Sem_Ch3 is
-- Constrain_Integer --
---
 
-   procedure Constrain_Integer (Def_Id : Node_Id; S : Node_Id) is
+   procedure Constrain_Integer (Def_Id : Entity_Id; S : Node_Id) is
   T : constant Entity_Id := Entity (Subtype_Mark (S));
   C : constant Node_Id   := Constraint (S);
 
@@ -14272,7 +14272,7 @@ package body Sem_Ch3 is
-- Constrain_Ordinary_Fixed --
--
 
-   procedure Constrain_Ordinary_Fixed (Def_Id : Node_Id; S : Node_Id) is
+   procedure Constrain_Ordinary_Fixed (Def_Id : Entity_Id; S : Node_Id) is
   T: constant Entity_Id := Entity (Subtype_Mark (S));
   C: Node_Id;
   D: Node_Id;




[Ada] Add support for Unreferenced aspect on formal parameters

2020-10-20 Thread Pierre-Marie de Rodat
This is useful in particular to be able to mark a formal parameter of an
expression function as unreferenced:

   function F (Param : Integer with Unreferenced) return Integer is (1);

We also add the infrastructure to support other aspects on formal
parameters in the future.

Put this feature under -gnatX for now since the Ada RM doesn't support
this syntax (yet).

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* aspects.adb (Has_Aspect_Specifications_Flag): Add
N_Parameter_Specification.
* par-ch13.adb (Aspect_Specifications_Present): Also handle case
of an unknown aspect on the last formal parameter (terminated by
a Tok_Right_Paren). Minor reformatting.
* par-ch6.adb (P_Formal_Part): Scan aspects on formal
parameters.
* par.adb: Fix typos.
* sem_ch6.adb (Process_Formals): Add processing of aspects and
in particular Unreferenced aspect for now.
* sinfo.ads: Allow ASPECT_SPECIFICATIONS on a
PARAMETER_SPECIFICATION.
* doc/gnat_rm/implementation_defined_aspects.rst
(Aspect Unreferenced): Update documentation.
* gnat_rm.texi: Regenerate.diff --git a/gcc/ada/aspects.adb b/gcc/ada/aspects.adb
--- a/gcc/ada/aspects.adb
+++ b/gcc/ada/aspects.adb
@@ -455,6 +455,7 @@ package body Aspects is
   N_Package_Instantiation  => True,
   N_Package_Specification  => True,
   N_Package_Renaming_Declaration   => True,
+  N_Parameter_Specification=> True,
   N_Private_Extension_Declaration  => True,
   N_Private_Type_Declaration   => True,
   N_Procedure_Instantiation=> True,


diff --git a/gcc/ada/doc/gnat_rm/implementation_defined_aspects.rst b/gcc/ada/doc/gnat_rm/implementation_defined_aspects.rst
--- a/gcc/ada/doc/gnat_rm/implementation_defined_aspects.rst
+++ b/gcc/ada/doc/gnat_rm/implementation_defined_aspects.rst
@@ -564,9 +564,11 @@ Aspect Unreferenced
 ===
 .. index:: Unreferenced
 
-This boolean aspect is equivalent to :ref:`pragma Unreferenced`. Note that
-in the case of formal parameters, it is not permitted to have aspects for
-a formal parameter, so in this case the pragma form must be used.
+This boolean aspect is equivalent to :ref:`pragma Unreferenced`.
+
+When using the ``-gnatX`` switch, this aspect is also supported on formal
+parameters, which is in particular the only form possible for expression
+functions.
 
 Aspect Unreferenced_Objects
 ===


diff --git a/gcc/ada/gnat_rm.texi b/gcc/ada/gnat_rm.texi
--- a/gcc/ada/gnat_rm.texi
+++ b/gcc/ada/gnat_rm.texi
@@ -9923,9 +9923,11 @@ This boolean aspect is equivalent to @ref{10b,,pragma Unmodified}.
 
 @geindex Unreferenced
 
-This boolean aspect is equivalent to @ref{10c,,pragma Unreferenced}. Note that
-in the case of formal parameters, it is not permitted to have aspects for
-a formal parameter, so in this case the pragma form must be used.
+This boolean aspect is equivalent to @ref{10c,,pragma Unreferenced}.
+
+When using the @code{-gnatX} switch, this aspect is also supported on formal
+parameters, which is in particular the only form possible for expression
+functions.
 
 @node Aspect Unreferenced_Objects,Aspect Value_Size,Aspect Unreferenced,Implementation Defined Aspects
 @anchor{gnat_rm/implementation_defined_aspects aspect-unreferenced-objects}@anchor{15e}


diff --git a/gcc/ada/par-ch13.adb b/gcc/ada/par-ch13.adb
--- a/gcc/ada/par-ch13.adb
+++ b/gcc/ada/par-ch13.adb
@@ -153,7 +153,8 @@ package body Ch13 is
 Result := True;
  else
 Scan; -- past identifier
-Result := Token in Tok_Arrow | Tok_Comma | Tok_Is | Tok_Semicolon;
+Result := Token in
+  Tok_Arrow | Tok_Comma | Tok_Is | Tok_Semicolon | Tok_Right_Paren;
  end if;
 
   --  If earlier than Ada 2012, check for valid aspect identifier (possibly
@@ -956,7 +957,7 @@ package body Ch13 is
  --  If Decl is Error, we ignore the aspects, and issue a message
 
  elsif Decl = Error
-or else not Permits_Aspect_Specifications (Decl)
+   or else not Permits_Aspect_Specifications (Decl)
  then
 Error_Msg ("aspect specifications not allowed here", Ptr);
 


diff --git a/gcc/ada/par-ch6.adb b/gcc/ada/par-ch6.adb
--- a/gcc/ada/par-ch6.adb
+++ b/gcc/ada/par-ch6.adb
@@ -1627,6 +1627,25 @@ package body Ch6 is
 Scan; -- past right paren
 exit Specification_Loop;
 
+ --  Support for aspects on formal parameters is a GNAT extension for
+ --  the time being.
+
+ elsif Token = Tok_With then
+if not Extensions_Allowed then
+   Error_Msg_SP ("aspect on formal parameter requires -gnatX");
+end if;
+
+P_Aspect_Specifications (Specification_Node, False);
+
+if Token = Tok_Right_Paren then
+ 

[Ada] AI12-0339: Empty function for Aggregate aspect of Ada containers

2020-10-20 Thread Pierre-Marie de Rodat
The specification of the aspect Aggregate includes a primitive operation
Empty that returns the initial value to be used when building an
aggregate for the corresponding composite type. For bounded containers,
the function Empty includes an explicit parameter that corresponds to
the discriminant of the object being built.

This patch also implements the uniform resolution rule for aggregates,
specified in AI12-0307, so that the ambiguities consequence of the
new resolution rule are properly diagnosed by GNAT.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sinfo.ads, sinfo.adb: The flag Box_Present can appear in
Iterated_Element_Association nodes.
* sem_aggr.adb (Resolve_Aggregate): Call
Resolve_Container_Aggregate when type of context has
corresponding aspect.
* sem_type.adb (Covers): In Ada_2020 an aggregate is compatible
with a type that carries the corresponding aspect.
* exp_ch3.adb (Make_Controlling_Function_Wrappers): Do not
create declarations and bodies for inherited primitive functions
of null extensions that dispatch on result, when current scope
includes an immediately visible non-overloadable homonym of the
function.
* libgnat/a-cborse.adb, libgnat/a-cborse.ads,
libgnat/a-cbhase.ads, libgnat/a-cbhase.adb,
libgnat/a-cborma.adb, libgnat/a-cborma.ads,
libgnat/a-cbhama.adb, libgnat/a-cbhama.ads,
libgnat/a-cbdlli.adb, libgnat/a-cbdlli.ads,
libgnat/a-convec.ads, libgnat/a-ciorse.ads,
libgnat/a-cihase.ads, libgnat/a-cihase.adb,
libgnat/a-ciorma.ads, libgnat/a-cihama.ads,
libgnat/a-cihama.adb, libgnat/a-cidlli.ads,
libgnat/a-cidlli.adb, libgnat/a-coinve.adb,
libgnat/a-cobove.adb, libgnat/a-cobove.ads,
libgnat/a-convec.adb, libgnat/a-coinve.ads,
libgnat/a-coorse.ads, libgnat/a-cohase.adb,
libgnat/a-cohase.ads, libgnat/a-coorma.ads,
libgnat/a-cohama.adb, libgnat/a-cohama.ads,
libgnat/a-cdlili.ads: Add primitive function Empty for use in
aspect Aggregate, and add corresponding body or expression
function.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -9471,6 +9471,31 @@ package body Exp_Ch3 is
  (Is_Null_Extension (Etype (Subp))
and then Etype (Alias (Subp)) /= Etype (Subp))
  then
+--  If there is a non-overloadable homonym in the current
+--  scope, the implicit declaration remains invisible.
+--  We check the current entity with the same name, or its
+--  homonym in case the derivation takes place after the
+--  hiding object declaration.
+
+if Present (Current_Entity (Subp)) then
+   declare
+  Curr : constant Entity_Id := Current_Entity (Subp);
+  Prev : constant Entity_Id := Homonym (Curr);
+   begin
+  if (Comes_From_Source (Curr)
+and then Scope (Curr) = Current_Scope
+and then not Is_Overloadable (Curr))
+  or else
+(Present (Prev)
+  and then Comes_From_Source (Prev)
+  and then Scope (Prev) = Current_Scope
+  and then not Is_Overloadable (Prev))
+  then
+ goto Next_Prim;
+  end if;
+   end;
+end if;
+
 Formal_List := No_List;
 Formal := First_Formal (Subp);
 


diff --git a/gcc/ada/libgnat/a-cbdlli.adb b/gcc/ada/libgnat/a-cbdlli.adb
--- a/gcc/ada/libgnat/a-cbdlli.adb
+++ b/gcc/ada/libgnat/a-cbdlli.adb
@@ -518,6 +518,17 @@ is
   return Position.Container.Nodes (Position.Node).Element;
end Element;
 
+   ---
+   -- Empty --
+   ---
+
+   function Empty (Capacity : Count_Type := 10) return List is
+   begin
+  return Result : List (Capacity) do
+ null;
+  end return;
+   end Empty;
+
--
-- Finalize --
--


diff --git a/gcc/ada/libgnat/a-cbdlli.ads b/gcc/ada/libgnat/a-cbdlli.ads
--- a/gcc/ada/libgnat/a-cbdlli.ads
+++ b/gcc/ada/libgnat/a-cbdlli.ads
@@ -56,7 +56,7 @@ is
   Variable_Indexing => Reference,
   Default_Iterator  => Iterate,
   Iterator_Element  => Element_Type,
-  Aggregate => (Empty=> Empty_List,
+  Aggregate => (Empty=> Empty,
 Add_Unnamed  => Append_One);
pragma Preelaborable_Initialization (List);
 
@@ -67,6 +67,8 @@ is
 
No_Element : constant Cursor;
 
+   function Empty (Capacity : Count_Type := 10) return List;
+
function Has_Element (Position : Cursor) return Boolean;
 
package List_Iterator_Interfaces is new


diff --git a/gcc/ada/libgnat/a-cbhama.adb b/gcc/ada/libgnat/a-cbhama.adb
--- 

[Ada] Prevent crashes when pretty-printing freeze nodes from gdb

2020-10-20 Thread Pierre-Marie de Rodat
With recently enabled data validity checking in development builds
routine "ps", when called from gdb to examine the AST, might crash on
invalid data.  This is because its Sprint_Node_Actual callee reads
Dump_Freeze_Null global variable when processing nodes of
N_Freeze_Entity and this global variable is not initialized by default.

Found while working on inheritance of the Default_Initial_Condition
pragma.  Compilation is not affected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sprint.adb (po): Set Dump_Freeze_Null to False; align colons.
(ps): Likewise.diff --git a/gcc/ada/sprint.adb b/gcc/ada/sprint.adb
--- a/gcc/ada/sprint.adb
+++ b/gcc/ada/sprint.adb
@@ -441,7 +441,8 @@ package body Sprint is
procedure po (Arg : Union_Id) is
begin
   Dump_Generated_Only := False;
-  Dump_Original_Only := True;
+  Dump_Original_Only  := True;
+  Dump_Freeze_Null:= False;
   Current_Source_File := No_Source_File;
 
   if Arg in List_Range then
@@ -473,7 +474,8 @@ package body Sprint is
procedure ps (Arg : Union_Id) is
begin
   Dump_Generated_Only := False;
-  Dump_Original_Only := False;
+  Dump_Original_Only  := False;
+  Dump_Freeze_Null:= False;
   Current_Source_File := No_Source_File;
 
   if Arg in List_Range then




[Ada] Implement AI12-0280 Making 'Old more flexible

2020-10-20 Thread Pierre-Marie de Rodat
AI12-0280 relaxes the "potentially unevaluated" restrictions on the use
of the Old attribute in postconditions. A complex attribute prefix is
allowed in cases that were previously forbidden if the conditions
governing whether the attribute value will be needed during evaluation
of the postcondition are "known on entry". In this newly-allowed case,
evaluation of the 'Old attribute prefix upon entry to the subprogram is
performed conditionally. It turns out that this is similar to something
that was already part of the implementation of the Contract_Cases
attribute, and that bugs having to do with 'Old attribute references in
Contract_Cases consequences are fixed by sharing code with this new
implementation via a new package Sem_Util.Old_Attr_Util.Indirect_Temps.
In particular, such an attribute reference would previously generate a
malformed tree if the subtype of the prefix was indefinite; for example,
we might previously have generated an illegal declaration like
"Temp123 : String;" .

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.ads: Declare a new package, Old_Attr_Util, which in
turn declares two more packages, Conditional_Evaluation and
Indirect_Temps. Conditional_Evaluation provides a predicate for
deciding whether a given 'Old attribute reference is eligible
for conditional evaluation and, in the case where it is
eligible, a function that constructs the Boolean-valued
condition that is to be evaluated at run time in deciding
whether to evaluate the attribute prefix.  Indirect_Temps
provides support for declaring a temporary which is only
initialized conditionally; more specifically, an access type and
a variable of that type are declared (unconditionally) and then
the variable is (conditionally) initialized with an allocator.
The existence of the access type and the pointer variable is
hidden from clients, except that a predicate,
Is_Access_Type_For_Indirect_Temp, is provided for identifying
such access types. This is needed because we want such an access
type to be treated like a "normal" access type (specifically
with respect to finalization of allocated objects). Other parts
of the compiler treat access types differently if
Comes_From_Source is False, or if the secondary stack storage
pool is used; this predicate is used to disable this special
treatment.
* sem_attr.adb (Uneval_Old_Msg): Improve message text to reflect
Ada202x changes.
(Analyze_Attribute): A previously-illegal 'Old attribute
reference is accepted in Ada2020 if it is eligible for
conditional evaluation.
* sem_res.adb (Valid_Conversion): Do not treat a rewritten 'Old
attribute like other rewrite substitutions. This makes a
difference, for example, in the case where we are generating the
expansion of a membership test of the form "Saooaaat'Old in
Named_Access_Type"; in this case Valid_Conversion needs to
return True (otherwise the expansion will be False - see the
call site in exp_ch4.adb).
* exp_attr.adb (Expand_N_Attribute_Reference): When expanding a
'Old attribute reference, test for the case where the reference
is eligible for conditional evaluation. In that case, use the
new "indirect temporary" mechanism provided by Sem_Util.
* exp_prag.adb
(Expand_Attributes_In_Consequence.Expand_Attributes): If
Sem_Util.Indirect_Temp_Needed indicates that there could be
correctness problems associated with the old expansion scheme
for dealing with 'Old attributes in contract cases consequences,
then we use the new "indirect temporary" mechanism provided by
Sem_Util instead. We do not want to do this unconditionally.
* sem_util.adb: Provide a body for the new Old_Attr_Util
package. Further work is needed in several areas for
correctness:
- The function Is_Repeatedly_Evaluated does not deal with
container aggregates yet.
- The function Is_Known_On_Entry does not deal with interactions
with the Global aspect.
Each area where more work is needed is indicated with a "???"
comment in the code; a more detailed description can be found
there. Some optimization opportunties are similarly indicated
with a "???" comment.
* exp_ch3.adb (Freeze_Type): In deciding whether to generate
expansion for the list controller of an access type, take the
predicate Is_Access_Type_For_Indirect_Temp into account. If the
predicate yields True, then generate the expansion.
* exp_util.adb (Build_Allocate_Deallocate_Proc): We don't
normally finalize allocated objects that are allocated on the
secondary stack. Add an exception to this rule if the 

[Ada] Issue with gnatbind -V switch and Ada 2012

2020-10-20 Thread Pierre-Marie de Rodat
When compiling the binder generated file in Ada 2012 mode and using
the gnatbind -V switch, invalid characters are flagged.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* bindgen.adb (Gen_Bind_Env_String): Generate Ada 2012 compatible
strings. Code cleanup.diff --git a/gcc/ada/bindgen.adb b/gcc/ada/bindgen.adb
--- a/gcc/ada/bindgen.adb
+++ b/gcc/ada/bindgen.adb
@@ -33,7 +33,6 @@ with Osint;use Osint;
 with Osint.B;  use Osint.B;
 with Output;   use Output;
 with Rident;   use Rident;
-with Stringt;  use Stringt;
 with Table;
 with Targparm; use Targparm;
 with Types;use Types;
@@ -1161,19 +1160,18 @@ package body Bindgen is
   procedure Write_Name_With_Len (Nam : Name_Id) is
   begin
  Get_Name_String (Nam);
-
- Start_String;
- Store_String_Char (Character'Val (Name_Len));
- Store_String_Chars (Name_Buffer (1 .. Name_Len));
-
- Write_String_Table_Entry (End_String);
+ Write_Str ("Character'Val (");
+ Write_Int (Int (Name_Len));
+ Write_Str (") & """);
+ Write_Str (Name_Buffer (1 .. Name_Len));
+ Write_Char ('"');
   end Write_Name_With_Len;
 
   --  Local variables
 
-  Amp : Character;
-  KN  : Name_Id := No_Name;
-  VN  : Name_Id := No_Name;
+  First : Boolean := True;
+  KN: Name_Id := No_Name;
+  VN: Name_Id := No_Name;
 
--  Start of processing for Gen_Bind_Env_String
 
@@ -1187,21 +1185,26 @@ package body Bindgen is
   Set_Special_Output (Write_Bind_Line'Access);
 
   WBI ("   Bind_Env : aliased constant String :=");
-  Amp := ' ';
+
   while VN /= No_Name loop
- Write_Str (" " & Amp & ' ');
+ if First then
+Write_Str (" ");
+ else
+Write_Str (" & ");
+ end if;
+
  Write_Name_With_Len (KN);
  Write_Str (" & ");
  Write_Name_With_Len (VN);
  Write_Eol;
 
  Bind_Environment.Get_Next (KN, VN);
- Amp := '&';
+ First := False;
   end loop;
+
   WBI (" & ASCII.NUL;");
 
   Cancel_Special_Output;
-
   Bind_Env_String_Built := True;
end Gen_Bind_Env_String;
 




[Ada] gnatpp: Fix documentation of threshold switches

2020-10-20 Thread Pierre-Marie de Rodat
The gnatpp switches --call-threshold and --par-threshold were documented
with "_" instead of "-" (as in "--par_threshold").  It does accept that
form, but for consistency, the documentation is changed to "-".

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* doc/gnat_ugn/gnat_utility_programs.rst: Change "_" to "-".diff --git a/gcc/ada/doc/gnat_ugn/gnat_utility_programs.rst b/gcc/ada/doc/gnat_ugn/gnat_utility_programs.rst
--- a/gcc/ada/doc/gnat_ugn/gnat_utility_programs.rst
+++ b/gcc/ada/doc/gnat_ugn/gnat_utility_programs.rst
@@ -2153,18 +2153,18 @@ building specialized scripts.
A_Very_Very_Very_Very_Very_Very_Very_Very_Long_One);
 
 
-   .. index:: --call_threshold (gnatpp)
+   .. index:: --call-threshold (gnatpp)
 
-   :switch:`--call_threshold={nnn}`
+   :switch:`--call-threshold={nnn}`
  If the number of parameter associations is greater than ``nnn`` and if at
  least one association uses named notation, start each association from
  a new line. If ``nnn`` is 0, no check for the number of associations
  is made; this is the default.
 
 
-   .. index:: --par_threshold (gnatpp)
+   .. index:: --par-threshold (gnatpp)
 
-   :switch:`--par_threshold={nnn}`
+   :switch:`--par-threshold={nnn}`
  If the number of parameter specifications is greater than ``nnn``
  (or equal to ``nnn`` in case of a function), start each specification from
  a new line. If ``nnn`` is 0, and :switch:`--no-separate-is` was not specified, then




[Ada] New warning on not fully initialized box aggregate

2020-10-20 Thread Pierre-Marie de Rodat
This new warning detect cases of aggregates of the form (others => <>)
where the type doesn't have full default values for all its components.

Refine handling of -gnatwv by not warning when an object of a type with
partial initialization is declared (and used): this generates a better
ratio of useful messages vs uninteresting ones and in particular does
not generate a warning when e.g. a controlled type with effects
performed via Initialize/Finalize is declared.

Fix a few latent bugs along the way related to the use of continuation
lines with no primary line, now visible with the suppressed warning in
a-except.ads.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_aggr.adb (Resolve_Aggregate): Warn on not fully
initialized box aggregate.
* sem_aggr.ads: Fix typo.
* sem_res.adb (Resolve_Actuals): Fix typo in error message
format marking it incorrectly as a continuation message.
* sem_elab.adb (Check_Internal_Call_Continue): Similarly, add
missing primary message in case of a call to an actual generic
subprogram.
* sem_warn.adb (Check_References): Do not warn on read but never
assigned variables if the type is partially initialized.
* libgnat/a-except.ads, libgnat/a-ststun.ads,
libgnat/g-sechas.ads, libgnat/a-cbdlli.ads,
libgnat/a-cfdlli.ads, libgnat/a-cobove.ads,
libgnat/a-cohata.ads, libgnat/a-crbltr.ads,
libgnat/a-cbmutr.ads, libgnat/a-crdlli.ads,
libgnat/a-cbsyqu.ads: Address new warning.
* doc/gnat_ugn/building_executable_programs_with_gnat.rst:
Update doc on -gnatwv.
* gnat_ugn.texi: Regenerate.

gcc/testsuite/

* gnat.dg/opt11.adb: Add new expected warning.diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -3865,8 +3865,14 @@ of the pragma in the :title:`GNAT_Reference_manual`).
 
   This switch activates warnings for access to variables which
   may not be properly initialized. The default is that
-  such warnings are generated.
+  such warnings are generated. This switch will also be emitted when
+  initializing an array or record object via the following aggregate:
 
+  .. code-block:: ada
+
+   Array_Or_Record : XXX := (others => <>);
+
+  unless the relevant type fully initializes all components.
 
 .. index:: -gnatwV  (gcc)
 
@@ -3875,17 +3881,6 @@ of the pragma in the :title:`GNAT_Reference_manual`).
 
   This switch suppresses warnings for access to variables which
   may not be properly initialized.
-  For variables of a composite type, the warning can also be suppressed in
-  Ada 2005 by using a default initialization with a box. For example, if
-  Table is an array of records whose components are only partially uninitialized,
-  then the following code:
-
-  .. code-block:: ada
-
-   Tab : Table := (others => <>);
-
-  will suppress warnings on subsequent statements that access components
-  of variable Tab.
 
 
 .. index:: -gnatw.v  (gcc)


diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -12224,7 +12224,14 @@ that no warnings are given for comparisons or subranges for any type.
 
 This switch activates warnings for access to variables which
 may not be properly initialized. The default is that
-such warnings are generated.
+such warnings are generated. This switch will also be emitted when
+initializing an array or record object via the following aggregate:
+
+@example
+Array_Or_Record : XXX := (others => <>);
+@end example
+
+unless the relevant type fully initializes all components.
 @end table
 
 @geindex -gnatwV (gcc)
@@ -12238,17 +12245,6 @@ such warnings are generated.
 
 This switch suppresses warnings for access to variables which
 may not be properly initialized.
-For variables of a composite type, the warning can also be suppressed in
-Ada 2005 by using a default initialization with a box. For example, if
-Table is an array of records whose components are only partially uninitialized,
-then the following code:
-
-@example
-Tab : Table := (others => <>);
-@end example
-
-will suppress warnings on subsequent statements that access components
-of variable Tab.
 @end table
 
 @geindex -gnatw.v (gcc)


diff --git a/gcc/ada/libgnat/a-cbdlli.ads b/gcc/ada/libgnat/a-cbdlli.ads
--- a/gcc/ada/libgnat/a-cbdlli.ads
+++ b/gcc/ada/libgnat/a-cbdlli.ads
@@ -274,7 +274,7 @@ private
type Node_Array is array (Count_Type range <>) of Node_Type;
 
type List (Capacity : Count_Type) is tagged record
-  Nodes  : Node_Array (1 .. Capacity) := (others => <>);
+  Nodes  : Node_Array (1 .. Capacity);
   Free   : Count_Type'Base := -1;
   First  : Count_Type := 0;
   Last   : Count_Type := 0;



[Ada] Fixes for pretty command-line GNATprove output with -gnatdF

2020-10-20 Thread Pierre-Marie de Rodat
Various fixes are applied to the recent pretty output mode for GNATprove,
activated under debug switch -gnatdF:
- do not separate info messages from previous ones
- do not display souce code line for info messages
- display source code lines closer to the format adopted in GCC
- do not set the exit status to error when only check messages are issued

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* errout.adb (Write_Source_Code_Line): Adopt display closer to
GCC format.
(Output_Messages): Deal specially with info messages.
* erroutc.adb (Prescan_Message): Fix bug leading to check
messages being considered as error messages in pretty output
mode.diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -1840,7 +1840,6 @@ package body Errout is
   procedure Write_Source_Code_Line (Loc : Source_Ptr);
   --  Write the source code line corresponding to Loc, as follows:
   --
-  --   |
   --  line |  actual code line here with Loc somewhere
   --   | ^ here
   --
@@ -2041,26 +2040,50 @@ package body Errout is
   
 
   procedure Write_Source_Code_Line (Loc : Source_Ptr) is
- Line: constant Pos := Pos (Get_Physical_Line_Number (Loc));
+
+ function Image (X : Positive; Width : Positive) return String;
+ --  Output number X over Width characters, with whitespace padding.
+ --  Only output the low-order Width digits of X, if X is larger than
+ --  Width digits.
+
+ ---
+ -- Image --
+ ---
+
+ function Image (X : Positive; Width : Positive) return String is
+Str  : String (1 .. Width);
+Curr : Natural := X;
+ begin
+for J in reverse 1 .. Width loop
+   if Curr > 0 then
+  Str (J) := Character'Val (Character'Pos ('0') + Curr mod 10);
+  Curr := Curr / 10;
+   else
+  Str (J) := ' ';
+   end if;
+end loop;
+
+return Str;
+ end Image;
+
+ --  Local variables
+
+ Line: constant Pos := Pos (Get_Physical_Line_Number (Loc));
  Col : constant Natural := Natural (Get_Column_Number (Loc));
- Padding : constant String (1 .. Int'Image (Line)'Length) :=
-  (others => ' ');
+ Width   : constant := 5;
 
  Buf : Source_Buffer_Ptr;
  Cur_Loc : Source_Ptr := Loc;
+
+  --  Start of processing for Write_Source_Code_Line
+
   begin
  if Loc >= First_Source_Ptr then
 Buf := Source_Text (Get_Source_File_Index (Loc));
 
---  First line
-
-Write_Str (Padding);
-Write_Char ('|');
-Write_Eol;
-
---  Second line with the actual source code line
+--  First line with the actual source code line
 
-Write_Int (Line);
+Write_Str (Image (Positive (Line), Width => Width));
 Write_Str (" |");
 Write_Str (String (Buf (Loc - Source_Ptr (Col) + 1  .. Loc - 1)));
 
@@ -2073,10 +2096,10 @@ package body Errout is
 
 Write_Eol;
 
---  Third line with carret sign pointing to location Loc
+--  Second line with carret sign pointing to location Loc
 
-Write_Str (Padding);
-Write_Char ('|');
+Write_Str (String'(1 .. Width => ' '));
+Write_Str (" |");
 Write_Str (String'(1 .. Col - 1 => ' '));
 Write_Str ("^ here");
 Write_Eol;
@@ -2117,9 +2140,10 @@ package body Errout is
  while E /= No_Error_Msg loop
 
 --  If -gnatdF is used, separate main messages from previous
---  messages with a newline and make continuation messages
---  follow the main message with only an indentation of two
---  space characters, without repeating file:line:col: prefix.
+--  messages with a newline (unless it is an info message) and
+--  make continuation messages follow the main message with only
+--  an indentation of two space characters, without repeating
+--  file:line:col: prefix.
 
 Use_Prefix :=
   not (Debug_Flag_FF and then Errors.Table (E).Msg_Cont);
@@ -2129,7 +2153,7 @@ package body Errout is
if Debug_Flag_FF then
   if Errors.Table (E).Msg_Cont then
  Write_Str ("  ");
-  else
+  elsif not Errors.Table (E).Info then
  Write_Eol;
   end if;
end if;
@@ -2158,7 +2182,14 @@ package body Errout is
Output_Msg_Text (E);
Write_Eol;
 
-   if Debug_Flag_FF 

[Ada] Crash on cond expression as actual for anonymous access formal

2020-10-20 Thread Pierre-Marie de Rodat
This patch fixes a bug in the compiler whereby a conditional expression
used as an actual for an anonymous access formal when the condition is
known at compile time triggers a crash during compilation.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch6.adb (Expand_Branch): Properly anticipate expansion of
conditional expressions producing object declarations in
addition to assignment statements, and rename formal.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -3961,42 +3961,47 @@ package body Exp_Ch6 is
 
procedure Insert_Level_Assign (Branch : Node_Id) is
 
-  procedure Expand_Branch (Assn : Node_Id);
+  procedure Expand_Branch (Res_Assn : Node_Id);
   --  Perform expansion or iterate further within
-  --  nested conditionals.
+  --  nested conditionals given the object
+  --  declaration or assignment to result object
+  --  created during expansion which represents
+  --  a branch of the conditional expression.
 
   ---
   -- Expand_Branch --
   ---
 
-  procedure Expand_Branch (Assn : Node_Id) is
+  procedure Expand_Branch (Res_Assn : Node_Id) is
   begin
- pragma Assert (Nkind (Assn) =
- N_Assignment_Statement);
+ pragma Assert (Nkind (Res_Assn) in
+ N_Assignment_Statement |
+ N_Object_Declaration);
 
  --  There are more nested conditional
  --  expressions so we must go deeper.
 
- if Nkind (Expression (Assn)) =
+ if Nkind (Expression (Res_Assn)) =
   N_Expression_With_Actions
and then
  Nkind
-   (Original_Node (Expression (Assn))) in
- N_Case_Expression | N_If_Expression
+   (Original_Node (Expression (Res_Assn)))
+ in N_Case_Expression | N_If_Expression
  then
-Insert_Level_Assign (Expression (Assn));
+Insert_Level_Assign
+  (Expression (Res_Assn));
 
  --  Add the level assignment
 
  else
-Insert_Before_And_Analyze (Assn,
+Insert_Before_And_Analyze (Res_Assn,
   Make_Assignment_Statement (Loc,
 Name   =>
   New_Occurrence_Of
 (Lvl, Loc),
 Expression =>
   Dynamic_Accessibility_Level
-(Expression (Assn;
+(Expression (Res_Assn;
  end if;
   end Expand_Branch;
 




[Ada] Make attribute Update an obsolescent feature

2020-10-20 Thread Pierre-Marie de Rodat
In the new version of Ada, GNAT specific 'Update atttribute is replaced
by delta aggregates. Add a warning on occurrences of 'Update.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_attr.adb (Analyze_Attribute): Emit a warning on 'Update
when Warn_On_Obsolescent_Feature is set to True.diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -6899,6 +6899,11 @@ package body Sem_Attr is
   --  Start of processing for Update
 
   begin
+ if Warn_On_Obsolescent_Feature then
+Error_Msg_N ("?j?attribute Update is an obsolescent feature", N);
+Error_Msg_N ("\?j?use a delta aggregate instead", N);
+ end if;
+
  Check_E1;
 
  if not Is_Object_Reference (P) then




[Ada] Refine result type of Get_Accessibility

2020-10-20 Thread Pierre-Marie de Rodat
Routine Get_Accessibility returns either the result of
Minimum_Accessibility or Extra_Accessibility functions, both of which
return Entity_Id, so Get_Accessibility should return Entity_Id as well,
not just Node_Id.

Cleanup only; behavior is not affected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.ads, sem_util.adb (Get_Accessibility): Refine result
type from Node_Id to Entity_Id.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -9568,7 +9568,7 @@ package body Sem_Util is
-- Get_Accessibility --
---
 
-   function Get_Accessibility (E : Entity_Id) return Node_Id is
+   function Get_Accessibility (E : Entity_Id) return Entity_Id is
begin
   --  When minimum accessibility is set for E then we utilize it - except
   --  in a few edge cases like the expansion of select statements where


diff --git a/gcc/ada/sem_util.ads b/gcc/ada/sem_util.ads
--- a/gcc/ada/sem_util.ads
+++ b/gcc/ada/sem_util.ads
@@ -1015,7 +1015,7 @@ package Sem_Util is
--  discriminants. Otherwise all components of the parent must be included
--  in the subtype for semantic analysis.
 
-   function Get_Accessibility (E : Entity_Id) return Node_Id;
+   function Get_Accessibility (E : Entity_Id) return Entity_Id;
--  Obtain the accessibility level for a given entity formal taking into
--  account both extra and minimum accessibility.
 




[Ada] Hang on cond expression as actual for anonymous access formal

2020-10-20 Thread Pierre-Marie de Rodat
This patch fixes a bug in the compiler whereby a conditional expression
used as an actual for an anonymous access formal when the condition is
known at compile time.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch6.adb (Expand_Call_Helper): Properly handle the case
where the condition of a conditional expression has been
optimized out when calculating the value of an extra
accessibility parameter.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -4014,20 +4014,23 @@ package body Exp_Ch6 is
   --  Find the relevant statement in the actions
 
   Cond := First (Actions (Branch));
-  loop
+  while Present (Cond) loop
  exit when Nkind (Cond) in
  N_Case_Statement | N_If_Statement;
 
  Next (Cond);
-
- if No (Cond) then
-raise Program_Error;
- end if;
   end loop;
 
+  --  The conditional expression may have been
+  --  optimized away, so examine the actions in
+  --  the branch.
+
+  if No (Cond) then
+ Expand_Branch (Last (Actions (Branch)));
+
   --  Iterate through if expression branches
 
-  if Nkind (Cond) = N_If_Statement then
+  elsif Nkind (Cond) = N_If_Statement then
  Expand_Branch (Last (Then_Statements (Cond)));
  Expand_Branch (Last (Else_Statements (Cond)));
 




[Ada] Spurious discriminant check on "for of" loop

2020-10-20 Thread Pierre-Marie de Rodat
When using a "for ... of" loop with an element of a record type with
defaulted discriminants, a spurious disciminant check is emitted.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch8.adb (Check_Constrained_Object): Suppress discriminant
checks when the type has default discriminants and comes from
expansion of a "for of" loop.diff --git a/gcc/ada/sem_ch8.adb b/gcc/ada/sem_ch8.adb
--- a/gcc/ada/sem_ch8.adb
+++ b/gcc/ada/sem_ch8.adb
@@ -776,8 +776,9 @@ package body Sem_Ch8 is
   --
 
   procedure Check_Constrained_Object is
- Typ  : constant Entity_Id := Etype (Nam);
- Subt : Entity_Id;
+ Typ : constant Entity_Id := Etype (Nam);
+ Subt: Entity_Id;
+ Loop_Scheme : Node_Id;
 
   begin
  if Nkind (Nam) in N_Function_Call | N_Explicit_Dereference
@@ -821,6 +822,29 @@ package body Sem_Ch8 is
Rewrite (Subtype_Mark (N), New_Occurrence_Of (Subt, Loc));
Set_Etype (Nam, Subt);
 
+   --  Suppress discriminant checks on this subtype if the original
+   --  type has defaulted discriminants and Id is a "for of" loop
+   --  iterator.
+
+   if Has_Defaulted_Discriminants (Typ)
+ and then Nkind (Original_Node (Parent (N))) = N_Loop_Statement
+   then
+  Loop_Scheme := Iteration_Scheme (Original_Node (Parent (N)));
+
+  if Present (Loop_Scheme)
+and then Present (Iterator_Specification (Loop_Scheme))
+and then
+  Defining_Identifier
+(Iterator_Specification (Loop_Scheme)) = Id
+  then
+ Set_Checks_May_Be_Suppressed (Subt);
+ Push_Local_Suppress_Stack_Entry
+   (Entity   => Subt,
+Check=> Discriminant_Check,
+Suppress => True);
+  end if;
+   end if;
+
--  Freeze subtype at once, to prevent order of elaboration
--  issues in the backend. The renamed object exists, so its
--  type is already frozen in any case.




[Ada] Inlining nonstatic calls to static expression functions

2020-10-20 Thread Pierre-Marie de Rodat
Static expression functions were already effectively inlined in the
case of calls to them with static arguments, but calls with nonstatic
arguments were not generally being inlined, but now they are, as a result
of setting the Has_Pragma_Inline and Is_Inlined flags on such functions.
It appears that it isn't strictly necessary to set the latter flag,
but it still seems reasonable to set it and may be needed in general.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch6.adb (Analyze_Expression_Function): Mark static
expression functions as inlined.diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -609,6 +609,12 @@ package body Sem_Ch6 is
   Set_Expression
 (Original_Node (Subprogram_Spec (Def_Id)),
  New_Copy_Tree (Expr));
+
+  --  Mark static expression functions as inlined, to ensure
+  --  that even calls with nonstatic actuals will be inlined.
+
+  Set_Has_Pragma_Inline (Def_Id);
+  Set_Is_Inlined (Def_Id);
end if;
 end if;
  end;




[Ada] Propagate predicate function to a full view of a private subtype

2020-10-20 Thread Pierre-Marie de Rodat
When the ancestor in subtype declaration is a private type, routine
Analyze_Subtype_Declaration creates private and full view, but only
the private view had Predicate_Function inherited from the ancestor.

Now also the full view has Predicate_Function inherited. This doesn't
appear to affect compilation, but fixes a crash in GNATprove, which
expects Predicate_Function to be set on both views.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch3.adb (Analyze_Subtype_Declaration): Propagate predicate
function to full view of the created type as well, if it was
created.diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -5713,6 +5713,16 @@ package body Sem_Ch3 is
   then
  Set_Subprograms_For_Type (Id, Subprograms_For_Type (T));
 
+ --  If the current declaration created both a private and a full view,
+ --  then propagate Predicate_Function to the latter as well.
+
+ if Present (Full_View (Id))
+   and then No (Predicate_Function (Full_View (Id)))
+ then
+Set_Subprograms_For_Type
+  (Full_View (Id), Subprograms_For_Type (Id));
+ end if;
+
  if Has_Static_Predicate (T) then
 Set_Has_Static_Predicate (Id);
 Set_Static_Discrete_Predicate (Id, Static_Discrete_Predicate (T));




[Ada] Fix internal error on bit-aligned component of function call

2020-10-20 Thread Pierre-Marie de Rodat
This fixes an internal error in the code generator triggered by a
reference generated by the front-end to a bit-aligned component of
the result of a function call.  Such a reference cannot be handled
by the code generator because the object is not aligned on a byte
boundary, so it is replaced by a renaming that is fully handled by
the front-end because of the potential nonstandard representation.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_util.adb (Remove_Side_Effects): Always generate a renaming
that is handled by the front-end in the case of an indexed or a
selected component whose prefix has a nonstandard representation.diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -11530,7 +11530,26 @@ package body Exp_Util is
 Insert_Action (Exp, E);
  end if;
 
-  --  For expressions that denote names, we can use a renaming scheme.
+  --  If this is a packed array component or a selected component with a
+  --  nonstandard representation, we cannot generate a reference because
+  --  the component may be unaligned, so we must use a renaming and this
+  --  renaming must be handled by the front end, as the back end may balk
+  --  at the nonstandard representation (see Exp_Ch2.Expand_Renaming).
+
+  elsif Nkind (Exp) in N_Indexed_Component | N_Selected_Component
+and then Has_Non_Standard_Rep (Etype (Prefix (Exp)))
+  then
+ Def_Id := Build_Temporary (Loc, 'R', Exp);
+ Res := New_Occurrence_Of (Def_Id, Loc);
+
+ Insert_Action (Exp,
+   Make_Object_Renaming_Declaration (Loc,
+ Defining_Identifier => Def_Id,
+ Subtype_Mark=> New_Occurrence_Of (Exp_Type, Loc),
+ Name=> Relocate_Node (Exp)));
+
+  --  For an expression that denotes a name, we can use a renaming scheme
+  --  that is handled by the back end, instead of the front end as above.
   --  This is needed for correctness in the case of a volatile object of
   --  a nonvolatile type because the Make_Reference call of the "default"
   --  approach would generate an illegal access value (an access value
@@ -11553,21 +11572,7 @@ package body Exp_Util is
  Subtype_Mark=> New_Occurrence_Of (Exp_Type, Loc),
  Name=> Relocate_Node (Exp)));
 
- --  If this is a packed reference, or a selected component with
- --  a nonstandard representation, a reference to the temporary
- --  will be replaced by a copy of the original expression (see
- --  Exp_Ch2.Expand_Renaming). Otherwise the temporary must be
- --  elaborated by gigi, and is of course not to be replaced in-line
- --  by the expression it renames, which would defeat the purpose of
- --  removing the side effect.
-
- if Nkind (Exp) in N_Selected_Component | N_Indexed_Component
-   and then Has_Non_Standard_Rep (Etype (Prefix (Exp)))
- then
-null;
- else
-Set_Is_Renaming_Of_Object (Def_Id, False);
- end if;
+ Set_Is_Renaming_Of_Object (Def_Id, False);
 
   --  Avoid generating a variable-sized temporary, by generating the
   --  reference just for the function call. The transformation could be




[Ada] Crash on precondition, discriminant and protected objects

2020-10-20 Thread Pierre-Marie de Rodat
This combination led to a hole in function Is_Prologue_Renaming, causing
the generation of precondition checks too early.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* contracts.adb (Is_Prologue_Renaming): This function was
missing support for E_Constant which can also be generated in
protected objects.diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb
--- a/gcc/ada/contracts.adb
+++ b/gcc/ada/contracts.adb
@@ -2333,7 +2333,7 @@ package body Contracts is
   --  A renamed private component is just a component of
   --  _object, with an arbitrary name.
 
-  elsif Ekind (Obj) = E_Variable
+  elsif Ekind (Obj) in E_Variable | E_Constant
 and then Nkind (Pref) = N_Identifier
 and then Chars (Pref) = Name_uObject
 and then Nkind (Sel) = N_Identifier




[Ada] Flexible AST node structure

2020-10-20 Thread Pierre-Marie de Rodat
This patch is preliminary work in preparation for changing the node
structure so that nodes and entities can be of different sizes depending
on the Node_Kind or Entity_Kind.

This is cleanup/simplification combined with efficiency improvements.

We rearrange the subranges of the various union types in types.ads to
allow for more Node_Ids. Node_Id is now most of the nonnegative range.
The other ranges remain the same length (100 million, except for Uint),
but are now negative numbers. Fix various places that assumed the
numbers are positive.

Note that -gnaten switch is currently undocumented, and might
be removed.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* atree.ads: Make Default_Node a constant.  Remove the
modification of Comes_From_Source, and use a separate flag for
that.  Change Sloc to 0; it always overwritten, and never left
as the No_Location value.
(Print_Statistics): Move to spec so we can call it from
gnat1drv.
(Num_Nodes): Rename to clarify that this is approximate.
Correct comment: nodes and entities are never deleted, the count
is never decremented, and this is not used by Xref.
(Initialize): Correct comment: Error_List is not created here.
Other minor naming and comment changes.
* atree.adb (Extend_Node, New_Copy, New_Entity, New_Node):
Streamline these. Simplify and improve efficiency.  Move code
from Allocate_Initialize_Node to these, where it can be executed
unconditionally.  Take advantage of automatic zeroing of the
Nodes table.
(Allocate_Initialize_Node): Remove this. It was an efficiency
bottleneck, and somewhat complicated, because it was called from
4 places, and had all sorts of conditionals to check where it
was called from. Better to move most of that code to the call
sites, where it can be executed (or not) unconditionally.
(Allocate_New_Node): New procedure to partly replace
Allocate_Initialize_Node (called from just 2 of those 4 places).
(Comes_From_Source_Default): New flag written/read by
Set_Comes_From_Source_Default/Get_Comes_From_Source_Default.
This allows us to make Default_Node into a constant with
all-zeros value.
(Set_Paren_Count_Of_Copy): New procedure to avoid duplicated
code.
(Report): New procedure to encapsulate the call to the reporting
procedure.
(Atree_Private_Part): We now need a body for this package, to
contain package body Nodes.
(Approx_Num_Nodes_And_Entities): Was Num_Nodes.  For efficiency,
compute the answer from Nodes.Last. That way we don't need to
increment a counter on every node creation. Other minor naming
and comment changes.
* gnat1drv.adb: Call Atree.Print_Statistics if -gnatd.A switch
was given.  Add comment documenting the new order dependency (we
must process the command line before calling Atree.Initialize).
* debug.adb: Document -gnatd.A.
* einfo.adb, sinfo.adb: Remove useless Style_Checks pragmas.
* nlists.ads (Allocate_List_Tables): Inline makes node creation
a little faster.
* nlists.adb (Initialize): Remove local constant E, which didn't
seem to add clarity.
* treepr.adb (Print_Init): Use renamed
Approx_Num_Nodes_And_Entities function.
* types.ads: Change the Low and High bounds as described above.
* types.h: Change Low and High bounds to match types.ads.
* sem_ch8.adb, namet.adb, namet.ads: Move the computation of
Last_Name_Id from sem_ch8 to namet, and correct it to not assume
Name_Ids are positive.
* ali.adb, ali-util.adb, bindo-writers.adb, exp_dist.adb,
fmap.adb, fname-uf.adb, osint.adb: Fix various hash functions to
avoid assuming the various ranges are positive. Note that "mod"
returns a nonnegative result when the second operand is
positive. "rem" can return negative values in that case (in
particular, if the first operand is negative, which it now is).
* switch-c.adb: Allow switch -gnaten to control the value of
Nodes_Size_In_Meg.
* doc/gnat_ugn/building_executable_programs_with_gnat.rst:
Remove traling whitespaces.
* opt.ads (Nodes_Size_In_Meg): New Variable.

patch.diff.gz
Description: application/gzip


[Ada] Remove extra validity check in case statement

2020-10-20 Thread Pierre-Marie de Rodat
Recognize code of the form:

  if X'Valid then
 case X is
  [...]

and suppress the redundant validity check on X done as part of the case
statement.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch5.adb (Expand_N_Case_Statement): Do not generate
validity check when possible.diff --git a/gcc/ada/exp_ch5.adb b/gcc/ada/exp_ch5.adb
--- a/gcc/ada/exp_ch5.adb
+++ b/gcc/ada/exp_ch5.adb
@@ -3115,7 +3115,35 @@ package body Exp_Ch5 is
  if Validity_Check_Default
and then not Predicates_Ignored (Etype (Expr))
  then
-Ensure_Valid (Expr);
+--  Recognize the simple case where Expr is an object reference
+--  and the case statement is directly preceded by an
+--  "if Obj'Valid then": in this case, do not emit another validity
+--  check.
+
+declare
+   Check_Validity : Boolean := True;
+   Attr   : Node_Id;
+begin
+   if Nkind (Expr) = N_Identifier
+ and then Nkind (Parent (N)) = N_If_Statement
+ and then Nkind (Original_Node (Condition (Parent (N
+   = N_Attribute_Reference
+ and then No (Prev (N))
+   then
+  Attr := Original_Node (Condition (Parent (N)));
+
+  if Attribute_Name (Attr) = Name_Valid
+and then Nkind (Prefix (Attr)) = N_Identifier
+and then Entity (Prefix (Attr)) = Entity (Expr)
+  then
+ Check_Validity := False;
+  end if;
+   end if;
+
+   if Check_Validity then
+  Ensure_Valid (Expr);
+   end if;
+end;
  end if;
 
  --  If there is only a single alternative, just replace it with the




[Ada] Ada_2020: Further code cleanup for container aggregates

2020-10-20 Thread Pierre-Marie de Rodat
This patch corrects a potential use of an uninitialized variable
uncovered by CodePeer in Expand_Iterated_Component, and removes
some code duplication.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_aggr.adb (Expand_Iterated_Component): Reorganize code to
ensure that Loop_Id is properly initialized on all paths, and
remove code duplication.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -6905,6 +6905,7 @@ package body Exp_Aggr is
  L_Range: Node_Id;
  L_Iteration_Scheme : Node_Id;
  Loop_Stat  : Node_Id;
+ Params : List_Id;
  Stats  : List_Id;
 
   begin
@@ -6936,31 +6937,39 @@ package body Exp_Aggr is
Loop_Parameter_Specification =>
  Loop_Parameter_Specification (Comp));
Loop_Id :=
-  Make_Defining_Identifier (Loc,
-Chars => Chars (Defining_Identifier
-   (Loop_Parameter_Specification (Comp;
+ Make_Defining_Identifier (Loc,
+   Chars => Chars (Defining_Identifier
+  (Loop_Parameter_Specification (Comp;
Set_Defining_Identifier
-  (Loop_Parameter_Specification
- (L_Iteration_Scheme), Loop_Id);
+ (Loop_Parameter_Specification
+(L_Iteration_Scheme), Loop_Id);
 end if;
+ else
 
- elsif Present (Iterator_Specification (Comp)) then
-L_Iteration_Scheme :=
-  Make_Iteration_Scheme (Loc,
-Iterator_Specification => Iterator_Specification (Comp));
+--  Iterated_Component_Association.
 
- else
-L_Range := Relocate_Node (First (Discrete_Choices (Comp)));
 Loop_Id :=
   Make_Defining_Identifier (Loc,
 Chars => Chars (Defining_Identifier (Comp)));
 
-L_Iteration_Scheme :=
-  Make_Iteration_Scheme (Loc,
-Loop_Parameter_Specification =>
-  Make_Loop_Parameter_Specification (Loc,
-Defining_Identifier => Loop_Id,
-Discrete_Subtype_Definition => L_Range));
+if Present (Iterator_Specification (Comp)) then
+   L_Iteration_Scheme :=
+ Make_Iteration_Scheme (Loc,
+   Iterator_Specification => Iterator_Specification (Comp));
+
+else
+   --  Loop_Parameter_Specifcation is parsed with a choice list.
+   --  where the range is the first (and only) choice.
+
+   L_Range := Relocate_Node (First (Discrete_Choices (Comp)));
+
+   L_Iteration_Scheme :=
+ Make_Iteration_Scheme (Loc,
+   Loop_Parameter_Specification =>
+ Make_Loop_Parameter_Specification (Loc,
+   Defining_Identifier => Loop_Id,
+   Discrete_Subtype_Definition => L_Range));
+end if;
  end if;
 
  --  Build insertion statement. For a positional aggregate, only the
@@ -6983,23 +6992,19 @@ package body Exp_Aggr is
 --  possibly with a specified key_expression.
 
 if Present (Key_Expr) then
-   Stats := New_List
- (Make_Procedure_Call_Statement (Loc,
-Name => New_Occurrence_Of (Entity (Add_Named_Subp), Loc),
-Parameter_Associations =>
-  New_List (New_Occurrence_Of (Temp, Loc),
-New_Copy_Tree (Key_Expr),
-New_Copy_Tree (Expr;
-
+   Params := New_List (New_Occurrence_Of (Temp, Loc),
+New_Copy_Tree (Key_Expr),
+New_Copy_Tree (Expr));
 else
-   Stats := New_List
- (Make_Procedure_Call_Statement (Loc,
-Name => New_Occurrence_Of (Entity (Add_Named_Subp), Loc),
-Parameter_Associations =>
-  New_List (New_Occurrence_Of (Temp, Loc),
-New_Occurrence_Of (Loop_Id, Loc),
-New_Copy_Tree (Expr;
+   Params := New_List (New_Occurrence_Of (Temp, Loc),
+New_Occurrence_Of (Loop_Id, Loc),
+New_Copy_Tree (Expr));
 end if;
+
+Stats := New_List
+  (Make_Procedure_Call_Statement (Loc,
+ Name => New_Occurrence_Of (Entity (Add_Named_Subp), Loc),
+ Parameter_Associations => Params));
  end if;
 
  Loop_Stat :=  Make_Implicit_Loop_Statement




[Ada] Support for new aspect Subprogram_Variant on recursive subprograms

2020-10-20 Thread Pierre-Marie de Rodat
This patch introduces a new aspect "Subprogram_Variant" with expressions
that are meant to increase/decrease with each recursive call of the
annotated subprogram. It is inspired by the existing pragma
Loop_Variant, whose expressions are meant to increase/decrease with each
iteration of the loop. Both of those annotations are primarily used by
GNATprove to verify that subprograms and loops terminate, but GNAT does
all the legality checks and expansion for the dynamic semantics.

The new aspect is processed in the following steps:
1. Translate aspect to a corresponding pragma, so that we can reuse the
   existing circuitry for similar aspects; in particular, we heavily
   mimic processing of aspect Contract_Cases, which appears to be the
   most similar one.
2. Validate placement of the pragma.
3. Analyse increase/decrease expressions, which might contain references
   to both the subprogram parameters and forward references to global
   objects (just like expressions of the aspects Pre/Post/Contract_Cases
   and Global/Depends, so analysis is delayed).
4. Build declarations of constant objects that capture values of the
   variant expressions at subprogram entry and a procedure that compares
   those constants with values of the corresponding expressions at a
   recursive call.
5. Add call to the above procedure at recursive calls of the subprogram.

Steps 1-3 are needed for both GNAT and GNATprove (which requires
increases/decreases expressions to be analyzed); steps 4-5 are only
needed for GNAT to implement dynamics semantics.

No existing GNAT functionality is affected by this new aspect.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* aspects.ads: Introduce Subprogram_Variant aspect with the
following properties: GNAT-specific, with mandatory expression,
not a representation aspect, never delayed.
* contracts.adb (Expand_Subprogram_Contract): Mention new aspect
in the comment.
(Add_Contract_Item): Support addition of pragma
Subprogram_Variant to N_Contract node.
(Analyze_Entry_Or_Subprogram_Contract): Mention new aspect in
the comment; add pragma Subprogram_Variant to N_Contract node.
(Build_Postconditions_Procedure): Adapt call to
Insert_Before_First_Source_Declaration, which is now reused in
expansion of new aspect.
(Process_Contract_Cases_For): Also process Subprogram_Variant,
which is stored in N_Contract node together with Contract_Cases.
* contracts.ads (Analyze_Entry_Or_Subprogram_Contract): Mention
new aspect in the comment.
(Analyze_Entry_Or_Subprogram_Body_Contract): Likewise.
* einfo.adb (Get_Pragma): Support retrieval of new pragma.
* einfo.ads (Get_Pragma): Likewise.
* exp_ch6.adb (Check_Subprogram_Variant): New routine for
emitting call to check Subprogram_Variant expressions at run
time.
(Expand_Call_Helper): Check Subprogram_Variant expressions at
recursive calls.
* exp_prag.adb (Make_Op): Moved from expansion of pragma
Loop_Variant to Exp_Util, so it is now reused for expansion of
pragma Subprogram_Variant.
(Process_Variant): Adapt call to Make_Op after moving it to
Exp_Util.
(Expand_Pragma_Subprogram_Variant): New routine.
* exp_prag.ads (Expand_Pragma_Subprogram_Variant): Likewise.
* exp_util.adb (Make_Variant_Comparison): Moved from Exp_Prag
(see above).
* exp_util.ads (Make_Variant_Comparison): Likewise.
* inline.adb (Remove_Aspects_And_Pragmas): Handle aspect/pragma
Subprogram_Variant just like similar contracts.
* par-prag.adb (Prag): Likewise.
* sem.adb (Insert_Before_First_Source_Declaration): Moved from
Contracts (see above).
* sem.ads (Insert_Before_First_Source_Declaration): Likewise.
* sem_ch12.adb: Mention new aspect in the comment about
"Implementation of Generic Contracts", just like similar aspects
are mentioned there.
* sem_ch13.adb (Insert_Pragma): Mention new aspect in the
comment, because this routine is now used for Subprogram_Variant
just like for other similar aspects.
(Analyze_Aspect_Specifications): Mention new aspect in comments;
it is handled just like aspect Contract_Cases.
(Check_Aspect_At_Freeze_Point): Do not expect aspect
Subprogram_Variant just like we don't expect aspect
Contract_Cases.
* sem_prag.adb (Ensure_Aggregate_Form): Now also used for pragma
Subprogram_Variant, so update comment.
(Analyze_Pragma): Add initial checks for pragma
Subprogram_Variant.
(Analyze_Subprogram_Variant_In_Decl_Part): New routine with
secondary checks on the new pragma.
(Sig_Flags): Handle references within pragma Subprogram_Variant
expression just like references in similar pragma

[Ada] Display source code pointing at locations in messages for GNATprove

2020-10-20 Thread Pierre-Marie de Rodat
In GNATprove, output of messages is adapted (under debug switch -gnatdF)
so that both for the location of messages, and extra locations appearing
as line insertion in continuation messages, the corresponding line of
source code is displayed. For example:

incr.adb:3:11: medium: overflow check might fail
  |
3 |   X := X + 1;
  |  ^ here
  e.g. when X = Integer'Last
  reason for check: result of addition must fit in a 32-bits machine integer
  possible fix: subprogram at line 1 should mention X in a precondition
  |
1 |procedure Incr (X : in out Integer) is
  |^ here

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* errout.adb: (Error_Msg-Internal): Pass the location for a line
insertion if any in the message.
(Output_Messages: Add display of source code lines if -gnatdF is
set.
(Write_Source_Code_Line): Code clean up.
* erroutc.adb (Prescan_Message): Apply prescan for continuation
lines when -gnatdF is set, and record presence of line
insertion.
* erroutc.ads (Has_Insertion_Line): New global for prescan.
(Error_Msg_Object): Add field to record line insertion if
present.
* errutil.adb (Error_Msg): Pass no location for Insertion_Sloc.diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -1119,6 +1119,8 @@ package body Errout is
   Prev=> No_Error_Msg,
   Sptr=> Sptr,
   Optr=> Optr,
+  Insertion_Sloc  => (if Has_Insertion_Line then Error_Msg_Sloc
+  else No_Location),
   Sfile   => Get_Source_File_Index (Sptr),
   Line=> Get_Physical_Line_Number (Sptr),
   Col => Get_Column_Number (Sptr),
@@ -1823,8 +1825,8 @@ package body Errout is
-
 
procedure Output_Messages is
-  E: Error_Msg_Id;
-  Err_Flag : Boolean;
+
+  --  Local subprograms
 
   procedure Write_Error_Summary;
   --  Write error summary
@@ -1835,6 +1837,15 @@ package body Errout is
   procedure Write_Max_Errors;
   --  Write message if max errors reached
 
+  procedure Write_Source_Code_Line (Loc : Source_Ptr);
+  --  Write the source code line corresponding to Loc, as follows:
+  --
+  --   |
+  --  line |  actual code line here with Loc somewhere
+  --   | ^ here
+  --
+  --  where the carret on the last line points to location Loc.
+
   -
   -- Write_Error_Summary --
   -
@@ -2025,6 +2036,59 @@ package body Errout is
  end if;
   end Write_Max_Errors;
 
+  
+  -- Write_Source_Code_Line --
+  
+
+  procedure Write_Source_Code_Line (Loc : Source_Ptr) is
+ Line: constant Pos := Pos (Get_Physical_Line_Number (Loc));
+ Col : constant Natural := Natural (Get_Column_Number (Loc));
+ Padding : constant String (1 .. Int'Image (Line)'Length) :=
+  (others => ' ');
+
+ Buf : Source_Buffer_Ptr;
+ Cur_Loc : Source_Ptr := Loc;
+  begin
+ if Loc >= First_Source_Ptr then
+Buf := Source_Text (Get_Source_File_Index (Loc));
+
+--  First line
+
+Write_Str (Padding);
+Write_Char ('|');
+Write_Eol;
+
+--  Second line with the actual source code line
+
+Write_Int (Line);
+Write_Str (" |");
+Write_Str (String (Buf (Loc - Source_Ptr (Col) + 1  .. Loc - 1)));
+
+while Cur_Loc <= Buf'Last
+  and then Buf (Cur_Loc) /= ASCII.LF
+loop
+   Write_Char (Buf (Cur_Loc));
+   Cur_Loc := Cur_Loc + 1;
+end loop;
+
+Write_Eol;
+
+--  Third line with carret sign pointing to location Loc
+
+Write_Str (Padding);
+Write_Char ('|');
+Write_Str (String'(1 .. Col - 1 => ' '));
+Write_Str ("^ here");
+Write_Eol;
+ end if;
+  end Write_Source_Code_Line;
+
+  --  Local variables
+
+  E  : Error_Msg_Id;
+  Err_Flag   : Boolean;
+  Use_Prefix : Boolean;
+
--  Start of processing for Output_Messages
 
begin
@@ -2051,12 +2115,16 @@ package body Errout is
 
  E := First_Error_Msg;
  while E /= No_Error_Msg loop
-if not Errors.Table (E).Deleted and then not Debug_Flag_KK then
 
-   --  If -gnatdF is used, separate main messages from previous
-   --  messages with a newline and make continuation messages
-   --  follow the main message with only an indentation of two
-   --  space characters, without repeating 

Re: preprocessor: Fix non-fn fn-like macro at EOF [PR97471]

2020-10-20 Thread Jakub Jelinek via Gcc-patches
On Mon, Oct 19, 2020 at 11:00:34AM -0400, Nathan Sidwell wrote:
> We inject EOF tokens between macro argument lists, but had
> confused/stale logic in the non-fn invocation.Renamed the magic
> 'eof' token, as it's now only used for macro argument termination.
> Always rewind the non-OPEN_PAREN token.
> 
> libcpp/
> * internal.h (struct cpp_reader): Rename 'eof' field to 'endarg'.
> * init.c (cpp_create_reader): Adjust.
> * macro.c (collect_args): Use endarg for separator.  Always rewind
> in the not-fn case.
> gcc/testsuite/
> * c-c++-common/cpp/pr97471.c: New.
> 
> pushing to trunk,

I think this regressed
FAIL: gcc.dg/cpp/endif.c (test for excess errors)

Jakub



[PATCH] tree-optimization/97496 - avoid SLP externs in patterns

2020-10-20 Thread Richard Biener
I forgot to guard the promotion to external for the case where the
def is in a pattern.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-10-20  Richard Biener  

PR tree-optimization/97496
* tree-vect-slp.c (vect_get_and_check_slp_defs): Guard extern
promotion with not in pattern.

* gcc.dg/vect/bb-slp-pr97496.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr97496.c | 14 ++
 gcc/tree-vect-slp.c|  3 ++-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr97496.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr97496.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr97496.c
new file mode 100644
index 000..fa9e9801583
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr97496.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+
+int a;
+int b[1024];
+void c(unsigned g) {
+  if (a) {
+long e = g, d = e;
+int f = 0;
+for (; f < 4; f++) {
+  b[f] = d;
+  d >>= 8;
+}
+  }
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f36d8d1c642..0c1447e7aa0 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -582,7 +582,8 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
  continue;
}
 
- if (is_a  (vinfo))
+ if (is_a  (vinfo)
+ && !oprnd_info->any_pattern)
{
  /* Now for commutative ops we should see whether we can
 make the other operand matching.  */
-- 
2.26.2


[PATCH] Add { target int128 } to gcc.dg/pr97488.c

2020-10-20 Thread Aldy Hernandez via Gcc-patches
__int128 does not exist on 32-bit targets.

Pushed.

gcc/testsuite/ChangeLog:

* gcc.dg/pr97488.c: Add target int128 predicate.
---
 gcc/testsuite/gcc.dg/pr97488.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr97488.c b/gcc/testsuite/gcc.dg/pr97488.c
index 96dc33cf258..de7396cd4ec 100644
--- a/gcc/testsuite/gcc.dg/pr97488.c
+++ b/gcc/testsuite/gcc.dg/pr97488.c
@@ -1,4 +1,4 @@
-// { dg-do compile }
+// { dg-do compile { target int128 } }
 // { dg-options "-O1 -ftree-vrp" }
 
 __int128
-- 
2.26.2