[PATCH][rs6000][PR92379] fix UB shift of 64-bit type by 64 bits

2020-03-13 Thread Aaron Sawdey via Gcc-patches
This is a fix for PR92379. Passes regstrap on ppc64le. Pre-approved by 
Segher, committing after posting.


2020-03-13  Aaron Sawdey 

    PR target/92379
    * config/rs6000/rs6000.c (num_insns_constant_multi) Don't shift a
    64-bit value by 64 bits (UB).

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 24598aff663..5798f924472 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5612,7 +5612,10 @@ num_insns_constant_multi (HOST_WIDE_INT value, 
machine_mode mode)

   && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
 insns = 2;
   total += insns;
-  value >>= BITS_PER_WORD;
+  /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
+     it all at once would be UB. */
+  value >>= (BITS_PER_WORD - 1);
+  value >>= 1;
 }
   return total;
 }







[committed] analyzer: handle NOP_EXPR in get_lvalue [PR94099, PR94105]

2020-03-13 Thread David Malcolm via Gcc-patches
PR analyzer/94099 and PR analyzer/94105 both report ICEs relating to
calling region_model::get_lvalue on a NOP_EXPR.

PR analyzer/94099's ICE happens when generating a checker_path when
encountering an unhandled tree code (NOP_EXPR) in get_lvalue with a
NULL context (from for_each_state_change).

PR analyzer/94105 ICE happens when handling an ARRAY_REF where the
first operand is a NOP_EXPR: the unhandled tree code gives us
a symbolic_region, but the case for ARRAY_REF assumes we have an
array_region.

This patch fixes the ICEs by handling NOP_EXPR within
region_model::get_lvalue, and bulletproofs both of the above sources
of failure.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as 5c048755ec98645f8436b630df3f9294ca9cbc2a.

gcc/analyzer/ChangeLog:
PR analyzer/94099
PR analyzer/94105
* diagnostic-manager.cc (for_each_state_change): Bulletproof
against errors in get_rvalue by passing a
tentative_region_model_context and rejecting if there's an error.
* region-model.cc (region_model::get_lvalue_1): When handling
ARRAY_REF, handle results of error-handling.  Handle NOP_EXPR.

gcc/testsuite/ChangeLog:
PR analyzer/94099
PR analyzer/94105
* gcc.dg/analyzer/pr94099.c: New test.
* gcc.dg/analyzer/pr94105.c: New test.
---
 gcc/analyzer/diagnostic-manager.cc  |  5 +++--
 gcc/analyzer/region-model.cc| 14 -
 gcc/testsuite/gcc.dg/analyzer/pr94099.c | 27 +
 gcc/testsuite/gcc.dg/analyzer/pr94105.c |  3 +++
 4 files changed, 46 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr94099.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr94105.c

diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index 1b2c3ce68fa..bea566da9fc 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -768,9 +768,10 @@ for_each_state_change (const program_state _state,
  if (dst_pv->m_stack_depth
  >= src_state.m_region_model->get_stack_depth ())
continue;
+ tentative_region_model_context ctxt;
  svalue_id src_sid
-   = src_state.m_region_model->get_rvalue (*dst_pv, NULL);
- if (src_sid.null_p ())
+   = src_state.m_region_model->get_rvalue (*dst_pv, );
+ if (src_sid.null_p () || ctxt.had_errors_p ())
continue;
  state_machine::state_t src_sm_val = src_smap.get_state (src_sid);
  if (dst_sm_val != src_sm_val)
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 87980e7c8cd..45a190299ea 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -4749,7 +4749,18 @@ region_model::get_lvalue_1 (path_var pv, 
region_model_context *ctxt)
 
region_id array_rid = get_lvalue (array, ctxt);
svalue_id index_sid = get_rvalue (index, ctxt);
-   array_region *array_reg = get_region (array_rid);
+   region *base_array_reg = get_region (array_rid);
+   array_region *array_reg  = base_array_reg->dyn_cast_array_region ();
+   if (!array_reg)
+ {
+   /* Normally, array_rid ought to refer to an array_region, since
+  array's type will be ARRAY_TYPE.  However, if we have an
+  unexpected tree code for array, we could have a
+  symbolic_region here.  If so, we're in error-handling. */
+   gcc_assert (base_array_reg->get_type () == NULL_TREE);
+   return make_region_for_unexpected_tree_code (ctxt, expr,
+dump_location_t ());
+ }
return array_reg->get_element (this, array_rid, index_sid, ctxt);
   }
   break;
@@ -4849,6 +4860,7 @@ region_model::get_lvalue_1 (path_var pv, 
region_model_context *ctxt)
   }
   break;
 
+case NOP_EXPR:
 case VIEW_CONVERT_EXPR:
   {
tree obj = TREE_OPERAND (expr, 0);
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94099.c 
b/gcc/testsuite/gcc.dg/analyzer/pr94099.c
new file mode 100644
index 000..0a34f561821
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr94099.c
@@ -0,0 +1,27 @@
+/* { dg-additional-options "-O1" } */
+
+struct cg {
+  int hk;
+  int *bg;
+};
+
+union vb {
+  struct cg gk;
+};
+
+void
+l3 (union vb *);
+
+void
+pl (void)
+{
+  union vb th = { 0, };
+  int sc;
+
+  for (sc = 0; sc < 1; ++sc)
+{
+  th.gk.hk = 0;
+  th.gk.bg[sc] = 0; /* { dg-warning "uninitialized" } */
+  l3 ();
+}
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94105.c 
b/gcc/testsuite/gcc.dg/analyzer/pr94105.c
new file mode 100644
index 000..8220723bf6c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr94105.c
@@ -0,0 +1,3 @@
+/* { dg-do compile } */
+
+#include "../../c-c++-common/torture/pr58794-1.c"
-- 
2.21.0



Re: [RS6000] make PLT loads volatile

2020-03-13 Thread Alan Modra via Gcc-patches
On Fri, Mar 13, 2020 at 10:40:38AM -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Mar 13, 2020 at 10:06:01AM +1030, Alan Modra wrote:
> > On Thu, Mar 12, 2020 at 11:57:17AM -0500, Segher Boessenkool wrote:
> > > On Thu, Mar 12, 2020 at 01:18:50PM +1030, Alan Modra wrote:
> > > > With lazy PLT resolution the first load of a PLT entry may be a value
> > > > pointing at a resolver stub.  gcc's loop processing can result in the
> > > > PLT load in inline PLT calls being hoisted out of a loop in the
> > > > mistaken idea that this is an optimisation.  It isn't.  If the value
> > > > hoisted was that for a resolver stub then every call to that function
> > > > in the loop will go via the resolver, slowing things down quite
> > > > dramatically.
> > > > 
> > > > The PLT really is volatile, so teach gcc about that.
> > > 
> > > It would be nice if we could keep it cached after it has been resolved
> > > once, this has potential for regressing performance if we don't?  And
> > > LD_BIND_NOW should keep working just as fast as it is now, too?
> > 
> > Using a call-saved register to cache a load out of the PLT looks
> > really silly
> 
> Who said anything about using call-saved registers?  GCC will usually
> make a stack slot for this, and only use a non-volatile register when
> that is profitable.  (I know it is a bit too aggressive with it, but
> that is a generic problem).

Using a stack slot comes about due to hoisting then running out of
call-saved registers in the loop.  Score another reason not to hoist
PLT loads.

> > when the inline PLT call is turned back into a direct
> > call by the linker.
> 
> Ah, so yeah, for direct calls we do not want this.  I was thinking this
> was about indirect calls (via a bctrl that is), dunno how I got that
> misperception.  Sorry.
> 
> What is this like for indirect calls (at C level)?  Does your patch do
> anything to those?

No effect at all.  To put your mind at rest on this point you can
verify quite easily by noticing that UNSPECV_PLT* is only generated in
rs6000_longcall_ref, and calls to that function are conditional on
GET_CODE (func_desc) == SYMBOL_REF.

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH 0/2] Make C front end share the C++ tree representation of loops and switches

2020-03-13 Thread David Malcolm via Gcc-patches
On Thu, 2020-03-12 at 14:21 -0600, Sandra Loosemore wrote:
> On 1/27/20 2:02 PM, Jeff Law wrote:
> > [snip]
> > 
> > While I think we've missed the boat for gcc-10, I think these
> > patches 
> > should go forward in gcc-11. I'll own getting the paths sorted so
> > that 
> > this problem is avoided.

> I recently retested these patches on trunk,

For reference, the patches are here:
 cover letter: 
https://gcc.gnu.org/pipermail/gcc-patches/2019-November/534142.html
 [1/2] https://gcc.gnu.org/pipermail/gcc-patches/2019-November/534145.html
 [2/2] https://gcc.gnu.org/pipermail/gcc-patches/2019-November/534144.html

> and I found some new 
> regressions in the analyzer tests.
> 
> FAIL: default/gcc.sum:gcc.dg/analyzer/edges-1.c  (test for bogus 
> messages, line 16)
> 
> This one may be a genuine analyzer bug?  It is apparently failing to 
> prune the control flow paths per commit 
> 004f2c07b6d3fa543f0fe86c55a7b3c227de2bb6.

I think that what's happened is that your patch improves the location
information for the gimple stmts, and this uncovers a bug in how I wrote
the test.

Without your patch 2, edges-1.c's test_1 has this output:

  edges-1.c:19:6: warning: leak of FILE 'fp' [CWE-775] [-Wanalyzer-file-leak]
  edges-1.c:10:14: note: (1) opened here
  edges-1.c:12:6: note: (2) assuming 'fp' is non-NULL
  edges-1.c:12:6: note: (3) following 'false' branch (when 'fp' is non-NULL)...
  cc1: note: (4) ...to here
  edges-1.c:19:6: note: (5) following 'false' branch (when 'flag == 0')...
  cc1: note: (6) ...to here
  edges-1.c:19:6: note: (7) 'fp' leaks here; was opened at (1)

With both of your patches I get:

  edges-1.c:19:6: warning: leak of FILE 'fp' [CWE-775] [-Wanalyzer-file-leak]
  edges-1.c:10:14: note: (1) opened here
  edges-1.c:12:6: note: (2) assuming 'fp' is non-NULL
  edges-1.c:12:6: note: (3) following 'false' branch (when 'fp' is non-NULL)...
  edges-1.c:16:10: note: (4) ...to here
  edges-1.c:19:6: note: (5) following 'false' branch (when 'flag == 0')...
  cc1: note: (6) ...to here
  edges-1.c:19:6: note: (7) 'fp' leaks here; was opened at (1)

Note how the patch improves event (4) in the path from being at an
unknown location to being at line 16 (the "while").

I think I underspecified the dg-bogus at line 16: my intent is for it to
verify that we don't emit any events relating to the "while" test, but
the above now has the "...to here".

The following patch fixes that dg-bogus at line 16; the analyzer is
still correctly not reporting the control flow for the while-loop
itself, as that's not needed to trigger the leak (I verified that the
updated dg-bogus is still triggered if I supply -fanalyzer-verbosity=3
to disable the purging of redundant control-flow).

Note how event (6) still doesn't have a location; that's the destination
of the final "if" that's at the end of the function (falling off the
end).  That may well be an issue with the analyzer, though, not your
code.

> PASS -> FAIL: default/gcc.sum:gcc.dg/analyzer/explode-2.c  (test for 
> warnings, line 39)
> PASS -> FAIL: default/gcc.sum:gcc.dg/analyzer/explode-2.c  (test for 
> warnings, line 46)
> PASS -> FAIL: default/gcc.sum:gcc.dg/analyzer/explode-2.c (test for 
> excess errors)
> 
> This one is hitting the "--Wanalyzer-too-complex" diagnostic and
> could 
> probably be fixed by adjusting the parameters at the top of the
> testcase.

I tried doubling the limits but it didn't help; something unexpected
seems to be going on - but I'm in the middle of a rewrite of this code,
so I'd prefer to postpone looking into this for now.

> PASS -> FAIL: default/gcc.sum:gcc.dg/analyzer/loop-4.c  (test for 
> warnings, line 28)
> PASS -> FAIL: default/gcc.sum:gcc.dg/analyzer/loop-4.c  (test for 
> warnings, line 37)
> PASS -> FAIL: default/gcc.sum:gcc.dg/analyzer/loop-4.c (test for
> excess 
> errors)
> 
> It's now printing "2 processed enodes" in both warnings instead of 3
> or 
> 4 (respectively).  At first glance, it's not obvious what the 
> significance of the the things being tested here is, so I'm not sure
> if 
> this is an analyzer bug or just patterns that are too fragile.

The test is trying to measure how many nodes we create at each measured
program point in the exploded_graph - ideally the fewer the better; it
seems that the patch improves the analyzer here, and the test should be
updated to reflect that.

Here's a lightly-tested patch which fixes the edges-1.c and loop-4.c
failures for me.

David

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/edges-1.c: Strengthen the dg-bogus directives to
check for the word "branch" rather than any message, since
test_1's if's "...to here" message now has the location of the
"while" stmt.
* gcc.dg/analyzer/loop-4.c: Reduce number of expected enodes in
two places.
---
 gcc/testsuite/gcc.dg/analyzer/edges-1.c | 4 ++--
 gcc/testsuite/gcc.dg/analyzer/loop-4.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git 

Re: [PATCH] avoid treating more incompatible redeclarations as builtin-ins [PR94040]

2020-03-13 Thread Joseph Myers
On Fri, 13 Mar 2020, Martin Sebor via Gcc-patches wrote:

> On 3/12/20 7:17 PM, Joseph Myers wrote:
> > On Thu, 5 Mar 2020, Martin Sebor wrote:
> > 
> > > Tested on x86_64-linux.  Is this acceptable for GCC 10?  How about 9?
> > 
> > OK for GCC 10.
> 
> Thank you.  I committed it to trunk in r10-7162.
> 
> Do you not want me to commit it to GCC 9 or are you leaving it up to me?

I don't think this is that safe for GCC 9.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [RFA] [PR rtl-optimization/90275] Handle nop reg->reg copies in cse

2020-03-13 Thread Jeff Law via Gcc-patches
On Fri, 2020-03-13 at 09:09 +0100, Christophe Lyon wrote:
> Hi,
> 
> 
> On Thu, 12 Mar 2020 at 23:12, Jeff Law via Gcc-patches
>  wrote:
> > On Thu, 2020-03-12 at 13:23 -0500, Segher Boessenkool wrote:
> > > On Thu, Mar 12, 2020 at 12:03:08PM -0600, Jeff Law wrote:
> > > > On Sat, 2020-02-08 at 10:41 -0600, Segher Boessenkool wrote:
> > > > > I don't think each stanza of code should use it's own "noop-ness", no.
> > > > Richard's patch is actually better than mine in that regard as it 
> > > > handles
> > > > mem
> > > > and
> > > > reg nop moves in an indentical way.  I don't think refactoring the cse.c
> > > > code
> > > > is
> > > > advisable now though -- but I do want to fix the multiply-reported ICE 
> > > > on
> > > > ARM
> > > > and
> > > > Richard's cse changes are the cleanest way to do that that I can see.
> > > 
> > > It looks pretty simple, yeah...  All of CSE is hopelessly fragile, but
> > > this patch does not make things worse.
> > > 
> > > > > I don't know if this patch makes matters worse or not.  It doesn't 
> > > > > seem
> > > > > suitable for stage 4 though.  And Richard said the cse.c part breaks
> > > > > rs6000, if that is true, yes I do object ;-)
> > > > The rs6000 port breakage is trivial to fix.  In fact, I did so and ran 
> > > > it
> > > > through
> > > > my tester, which includes ppc64le and ppc64 a slew of *-elf targets x86
> > > > native
> > > > and more.
> > > 
> > > I don't see anything rs6000 below?  Is it just this generic code?
> > > 
> > > > @@ -5324,9 +5324,11 @@ cse_insn (rtx_insn *insn)
> > > > }
> > > > 
> > > >   /* Similarly, lots of targets don't allow no-op
> > > > -(set (mem x) (mem x)) moves.  */
> > > > +(set (mem x) (mem x)) moves.  Even (set (reg x) (reg x))
> > > > +might be impossible for certain registers (like CC
> > > > registers).  */
> > > >   else if (n_sets == 1
> > > > -  && MEM_P (trial)
> > > > +  && ! CALL_P (insn)
> > > > +  && (MEM_P (trial) || REG_P (trial))
> > > >&& MEM_P (dest)
> > > >&& rtx_equal_p (trial, dest)
> > > >&& !side_effects_p (dest)
> > > 
> > > This adds the !CALL_P (no space btw) condition, why is that?
> > > 
> > > (Is that CCmode reg-reg move comment about rs6000?  Huh, we *do* have
> > > patterns for that, or *should* have at least!)
> > I fixed the extraneous whitespace and committed the change.
> > 
> 
> The new test fails on ARM:
THanks.  I see what's happening, though I'm not sure *how* it happened.  Anyway,
doing some testing on the fix now.

jeff
> 



Re: c: ignore initializers for elements of variable-size types [PR93577]

2020-03-13 Thread Joseph Myers
On Fri, 13 Mar 2020, Christophe Lyon via Gcc-patches wrote:

> The attached small patch fixes the problem (tested on arm and aarch64).
> OK?
> 
> gcc/c/ChangeLog:
> 
> 2020-03-13  Christophe Lyon  
> 
> * c-typeck.c (process_init_element): Handle constructor_type with
> type size represented by POLY_INT_CST.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-03-13  Christophe Lyon  
> 
> * gcc.target/aarch64/sve/acle/general-c/sizeless-1.c: Remove
> superfluous dg-error.
> * gcc.target/aarch64/sve/acle/general-c/sizeless-2.c: Likewise.

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: Remove redundant zero extend

2020-03-13 Thread Jeff Law via Gcc-patches
On Thu, 2020-03-12 at 14:43 -0700, Jim Wilson wrote:
> On Thu, Mar 12, 2020 at 2:38 AM Richard Biener via Gcc-patches
>  wrote:
> > On Thu, Mar 12, 2020 at 4:06 AM Jeff Law via Gcc-patches
> >  wrote:
> > > On Wed, 2020-03-11 at 13:04 +, Nidal Faour via Gcc-patches wrote:
> > > > This patch is a code density oriented and attempt to remove redundant
> > > > sign/zero
> > > > extension from assignment statement.
> > > > The approach taken is to use VRP data while expanding the assignment to
> > > > RTL to
> > > > determine whether a sign/zero extension is necessary.
> > > > Thought the motivation of the patch is code density but it also good for
> > > > speed.
> > > > 
> > > > for example:
> > > > extern unsigned int func ();
> > > > 
> > > >   unsigned char
> > > >   foo (unsigned int arg)
> > > >   {
> > > > if (arg == 2)
> > > >   return 0;
> > > > 
> > > > return (func() == arg || arg == 7);
> > > >   }
> > > > 
> 
> When we reach combine, we have
>   if func result == arg
>  r73 = 1
>   else
>  r73 = arg-7 == 0
>   r75 = zero_extend subreg:QI r73
> 
> On this platform a scc operation always produces 0 or 1, so the
> zero_extend is redundant.  We would need something pushing the zero
> extend back into the if_then_else and then noticing that it is
> redundant on both branches, or something propagating the value ranges
> forward.
> 
> We almost have the latter bit in combine.  Right before we start
> combining instructions, when we set nonzero_sign_valid to 1, we have
> 
> (gdb) print reg_stat[73]
> $1 = (reg_stat_type &) @0x28ad908: {last_death = 0x75de0500,
>   last_set = 0x75de02c0, last_set_value = 0x75ddc490,
>   last_set_table_tick = 6, last_set_label = 5, last_set_nonzero_bits = 1,
>   last_set_sign_bit_copies = 31 '\037', last_set_mode = E_SImode,
>   last_set_invalid = 0 '\000', sign_bit_copies = 31 '\037',
>   nonzero_bits = 1, truncation_label = 0, truncated_to_mode = E_VOIDmode}
> (gdb)
> 
> so we know that r73 has 31 valid sign bits and only one bit is
> nonzero.  This info is still valid when we reach the zero extend insn.
> Unfortunately, we don't use this info to do anything useful.  The
> zero_extend is the only insn in its basic block (not counting the
> unconditional branch that ends the block), so it doesn't get combined
> with anything, and hence doesn't get simplified.  If it did get
> combined with something, then expand_compound_operation would
> eliminate the zero_extend and subreg.  That makes me wonder if there
> is any value in trying to handle single-instruction combinations, just
> to get the simplifications we can get from the nonzero_bits info, but
> it isn't obvious how we would detect when a single insn combine is
> better than the original insn.  Maybe rtx cost info can be used for
> that.
> 
> I looked at combine because I'm familiar with that pass, but the ree
> pass might be the right place to handle this.  I don't know if it has
> any support for handling if statements.  If not, maybe it could be
> extended to handle cases like this.
I've speculated that handling (any_extend (subreg)) would be helpful in ree.c. 
It
doesn't seem like it'd be terribly hard.  The structure of ree.c is painful to
follow sometimes, but it's doable.

jeff



[PATCH] Support the new ("v0") mangling scheme in rust-demangle.

2020-03-13 Thread Eduard-Mihai Burtescu
This is the libiberty (mainly for binutils/gdb) counterpart of
https://github.com/alexcrichton/rustc-demangle/pull/23.

Relevant links for the new Rust mangling scheme (aka "v0"):
* Rust RFC: https://github.com/rust-lang/rfcs/pull/2603
* tracking issue: https://github.com/rust-lang/rust/issues/60705
* implementation: https://github.com/rust-lang/rust/pull/57967

This implementation includes full support for UTF-8 identifiers
via punycode, so I've included a testcase for that as well.
(Let me know if it causes any issues and I'll take it out)

Last year I've submitted several small patches to rust-demangle
in preparation for upstreaming the entire new demangler, and
feedback from that has proven useful.
For example, I started with error-handling macros, but instead
the code now has "rdm->errored = 1;" before several returns/gotos.

The patch is attached instead of inline, as it's over 1000 lines long.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Also, I have no commit access, so I'd be thankful if
someone would commit this for me if/once approved.From 689442d88b87453a697d2121e45a55071cb9056f Mon Sep 17 00:00:00 2001
From: Eduard-Mihai Burtescu 
Date: Fri, 13 Mar 2020 21:07:28 +0200
Subject: [PATCH] Support the new ("v0") mangling scheme in rust-demangle.

This is the libiberty (mainly for binutils/gdb) counterpart of
https://github.com/alexcrichton/rustc-demangle/pull/23.

Relevant links for the new Rust mangling scheme (aka "v0"):
* Rust RFC: https://github.com/rust-lang/rfcs/pull/2603
* tracking issue: https://github.com/rust-lang/rust/issues/60705
* implementation: https://github.com/rust-lang/rust/pull/57967

This implementation includes full support for UTF-8 identifiers
via punycode, so I've included a testcase for that as well.

2020-03-13  Eduard-Mihai Burtescu  
libiberty/ChangeLog:
	* rust-demangle.c (struct rust_demangler): Add
	skipping_printing and bound_lifetime_depth fields.
	(eat): Add (v0-only).
	(parse_integer_62): Add (v0-only).
	(parse_opt_integer_62): Add (v0-only).
	(parse_disambiguator): Add (v0-only).
	(struct rust_mangled_ident): Add punycode{,_len} fields.
	(parse_ident): Support v0 identifiers.
	(print_str): Respect skipping_printing.
	(print_uint64): Add (v0-only).
	(print_uint64_hex): Add (v0-only).
	(print_ident): Respect skipping_printing,
	Support v0 identifiers.
	(print_lifetime_from_index): Add (v0-only).
	(demangle_binder): Add (v0-only).
	(demangle_path): Add (v0-only).
	(demangle_generic_arg): Add (v0-only).
	(demangle_type): Add (v0-only).
	(demangle_path_maybe_open_generics): Add (v0-only).
	(demangle_dyn_trait): Add (v0-only).
	(demangle_const): Add (v0-only).
	(demangle_const_uint): Add (v0-only).
	(basic_type): Add (v0-only).
	(rust_demangle_callback): Support v0 symbols.
	* testsuite/rust-demangle-expected: Add v0 testcases.

diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c
index b87365c85fe9..9a31392458e0 100644
--- a/libiberty/rust-demangle.c
+++ b/libiberty/rust-demangle.c
@@ -1,6 +1,7 @@
 /* Demangler for the Rust programming language
Copyright (C) 2016-2020 Free Software Foundation, Inc.
Written by David Tolnay (dtol...@gmail.com).
+   Rewritten by Eduard-Mihai Burtescu (ed...@lyken.rs) for v0 support.
 
 This file is part of the libiberty library.
 Libiberty is free software; you can redistribute it and/or
@@ -64,11 +65,16 @@ struct rust_demangler
   /* Non-zero if any error occurred. */
   int errored;
 
+  /* Non-zero if nothing should be printed. */
+  int skipping_printing;
+
   /* Non-zero if printing should be verbose (e.g. include hashes). */
   int verbose;
 
   /* Rust mangling version, with legacy mangling being -1. */
   int version;
+
+  uint64_t bound_lifetime_depth;
 };
 
 /* Parsing functions. */
@@ -81,6 +87,18 @@ peek (const struct rust_demangler *rdm)
   return 0;
 }
 
+static int
+eat (struct rust_demangler *rdm, char c)
+{
+  if (peek (rdm) == c)
+{
+  rdm->next++;
+  return 1;
+}
+  else
+return 0;
+}
+
 static char
 next (struct rust_demangler *rdm)
 {
@@ -92,11 +110,58 @@ next (struct rust_demangler *rdm)
   return c;
 }
 
+static uint64_t
+parse_integer_62 (struct rust_demangler *rdm)
+{
+  char c;
+  uint64_t x;
+
+  if (eat (rdm, '_'))
+return 0;
+
+  x = 0;
+  while (!eat (rdm, '_'))
+{
+  c = next (rdm);
+  x *= 62;
+  if (ISDIGIT (c))
+x += c - '0';
+  else if (ISLOWER (c))
+x += 10 + (c - 'a');
+  else if (ISUPPER (c))
+x += 10 + 26 + (c - 'A');
+  else
+{
+  rdm->errored = 1;
+  return 0;
+}
+}
+  return x + 1;
+}
+
+static uint64_t
+parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
+{
+  if (!eat (rdm, tag))
+return 0;
+  return 1 + parse_integer_62 (rdm);
+}
+
+static uint64_t
+parse_disambiguator (struct rust_demangler *rdm)
+{
+  return parse_opt_integer_62 (rdm, 's');
+}
+
 struct rust_mangled_ident
 {
   /* ASCII part of the identifier. */
   const char *ascii;
  

Re: Review request summary

2020-03-13 Thread Richard Sandiford
Vasee Vinayagamoorthy  writes:
> Hello,
>
> The commit [1], as referenced below, which added a new test case,
> contains a typo in the dg-final directive, which eventually causes
> the gcc.log file to not be produced. This patch fixes the typo.
>
> Tested by running regressions for aarch64-none-elf.
>
> OK for trunk?
>
>
>
>
> gcc/testsuite/ChangeLog:
>
> 2020-03-11  Vasee Vinayagamoorthy  

(stray backslash btw, probably a mailer-ism)

>
>   * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: Fix DejaGnu 
> typo.

Looks good, thanks, and sorry again for messing this up.

Pushed to master.

Richard
>
>
> Regards
> Vasee Vinayagamoorthy
>
> PS: I do not have commit rights, therefore could I request
> someone to commit it on my behalf if this patch is approved.
> Thanks in advance.
>
> [1] 
> https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1f520d3412962e22b0338461d82f41abba8a4f12
>
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
> index c2631a541ea..05c305840b6 100644
> --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
> @@ -1,7 +1,7 @@
>  /* { dg-do assemble { target { aarch64*-*-* } } } */
>  /* { dg-require-effective-target aarch64_asm_bf16_ok } */
>  /* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */
> -/* { dg-final { check-function-bodies "**" "" "-O[^0]" } } */
> +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
>  
>  #include 
>  


Review request summary

2020-03-13 Thread Vasee Vinayagamoorthy
Hello,

The commit [1], as referenced below, which added a new test case,
contains a typo in the dg-final directive, which eventually causes
the gcc.log file to not be produced. This patch fixes the typo.

Tested by running regressions for aarch64-none-elf.

OK for trunk?




gcc/testsuite/ChangeLog:

2020-03-11  Vasee Vinayagamoorthy  

* gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: Fix DejaGnu 
typo.


Regards
Vasee Vinayagamoorthy

PS: I do not have commit rights, therefore could I request
someone to commit it on my behalf if this patch is approved.
Thanks in advance.

[1] 
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1f520d3412962e22b0338461d82f41abba8a4f12
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
index c2631a541ea..05c305840b6 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
@@ -1,7 +1,7 @@
 /* { dg-do assemble { target { aarch64*-*-* } } } */
 /* { dg-require-effective-target aarch64_asm_bf16_ok } */
 /* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */
-/* { dg-final { check-function-bodies "**" "" "-O[^0]" } } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
 
 #include 
 


[PATCH] libstdc++: Skip 91371.cc for x32.

2020-03-13 Thread Uros Bizjak via Gcc-patches
x32 does not support MS ABI, skip testcases that require it.

2020-03-13  Uroš Bizjak  

* testsuite/20_util/bind/91371.cc: Skip for x32.
* testsuite/20_util/is_function/91371.cc: Ditto.
* testsuite/20_util/is_member_function_pointer/91371.cc: Ditto.
* testsuite/20_util/is_object/91371.cc: Ditto.

Tested on x86_64-linux-gnu {,-mx32}.

Fixes -mx32 libstdc++ failures in [1].

OK for mainline?

[1] https://gcc.gnu.org/pipermail/gcc-testresults/2020-March/556130.html

Uros.
diff --git a/libstdc++-v3/testsuite/20_util/bind/91371.cc 
b/libstdc++-v3/testsuite/20_util/bind/91371.cc
index a076177ff73..5c872f14148 100644
--- a/libstdc++-v3/testsuite/20_util/bind/91371.cc
+++ b/libstdc++-v3/testsuite/20_util/bind/91371.cc
@@ -15,7 +15,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-do compile { target { { i?86-*-* x86_64-*-* } && { ! x32 } } } }
 // { dg-require-effective-target c++11 }
 
 #include 
diff --git a/libstdc++-v3/testsuite/20_util/is_function/91371.cc 
b/libstdc++-v3/testsuite/20_util/is_function/91371.cc
index eccb3e0c121..700ee60aafd 100644
--- a/libstdc++-v3/testsuite/20_util/is_function/91371.cc
+++ b/libstdc++-v3/testsuite/20_util/is_function/91371.cc
@@ -15,7 +15,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-do compile { target { { i?86-*-* x86_64-*-* } && { ! x32 } } } }
 // { dg-require-effective-target c++11 }
 
 #include 
diff --git a/libstdc++-v3/testsuite/20_util/is_member_function_pointer/91371.cc 
b/libstdc++-v3/testsuite/20_util/is_member_function_pointer/91371.cc
index ace05e041c3..376be9e116e 100644
--- a/libstdc++-v3/testsuite/20_util/is_member_function_pointer/91371.cc
+++ b/libstdc++-v3/testsuite/20_util/is_member_function_pointer/91371.cc
@@ -15,7 +15,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-do compile { target { { i?86-*-* x86_64-*-* } && { ! x32 } } } }
 // { dg-require-effective-target c++11 }
 
 #include 
diff --git a/libstdc++-v3/testsuite/20_util/is_object/91371.cc 
b/libstdc++-v3/testsuite/20_util/is_object/91371.cc
index 8387cdbed9e..6fc3fd85d2e 100644
--- a/libstdc++-v3/testsuite/20_util/is_object/91371.cc
+++ b/libstdc++-v3/testsuite/20_util/is_object/91371.cc
@@ -15,7 +15,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-do compile { target { { i?86-*-* x86_64-*-* } && { ! x32 } } } }
 // { dg-require-effective-target c++11 }
 
 #include 


[PATCH][Arm][2/4] Custom Datapath Extension intrinsics: instructions using FPU/MVE S/D registers

2020-03-13 Thread Dennis Zhang
Hi all,

This patch is part of a series that adds support for the ARMv8.m Custom 
Datapath Extension (CDE).
It enables the ACLE intrinsics calling VCX1, VCX2, and VCX3 
instructions who work with FPU/MVE 32-bit/64-bit registers.

This patch depends on the CDE feature patch: 
https://gcc.gnu.org/pipermail/gcc-patches/2020-March/541921.html
It also depends on the MVE framework patch: 
https://gcc.gnu.org/pipermail/gcc-patches/2020-February/540415.html
ISA has been announced at 
https://developer.arm.com/architectures/instruction-sets/custom-instructions

Regtested and bootstrapped for arm-none-linux-gnueabi-armv8-m.main.

Is it OK for commit please?

Cheers
Dennis

gcc/ChangeLog:

2020-03-12  Dennis Zhang  
 Matthew Malcomson 

* config/arm/arm-builtins.c (CX_IMM_QUALIFIERS): New macro.
(CX_UNARY_QUALIFIERS, CX_BINARY_QUALIFIERS): Likewise.
(CX_TERNARY_QUALIFIERS): Likewise.
(ARM_BUILTIN_CDE_PATTERN_START): Likewise.
(ARM_BUILTIN_CDE_PATTERN_END): Likewise.
(arm_init_acle_builtins): Initialize CDE builtins.
(arm_expand_acle_builtin): Check CDE constant operands.
* config/arm/arm.h (ARM_CDE_CONST_COPROC): New macro to set the range
of CDE constant operand.
(ARM_VCDE_CONST_1, ARM_VCDE_CONST_2, ARM_VCDE_CONST_3): Likewise.
* config/arm/arm_cde.h (__arm_vcx1_u32): New macro of ACLE interface.
(__arm_vcx1a_u32, __arm_vcx2_u32, __arm_vcx2a_u32): Likewise.
(__arm_vcx3_u32, __arm_vcx3a_u32, __arm_vcx1d_u64): Likewise.
(__arm_vcx1da_u64, __arm_vcx2d_u64, __arm_vcx2da_u64): Likewise.
(__arm_vcx3d_u64, __arm_vcx3da_u64): Likewise.
* config/arm/arm_cde_builtins.def: New file.
* config/arm/iterators.md (V_reg): New attribute of SI.
* config/arm/predicates.md (const_int_coproc_operand): New.
(const_int_vcde1_operand, const_int_vcde2_operand): New.
(const_int_vcde3_operand): New.
* config/arm/unspecs.md (UNSPEC_VCDE, UNSPEC_VCDEA): New.
* config/arm/vfp.md (arm_vcx1): New entry.
(arm_vcx1a, arm_vcx2, arm_vcx2a): Likewise.
(arm_vcx3, arm_vcx3a): Likewise.

gcc/testsuite/ChangeLog:

2020-03-12  Dennis Zhang  

* gcc.target/arm/acle/cde_v_1.c: New test.
* gcc.target/arm/acle/cde_v_1_err.c: New test.
* gcc.target/arm/acle/cde_v_1_mve.c: New test.diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 4d31405cf6e09e3a61faa3e8142940bbdb23c60a..89142a276b071b069cddabb5170ad0d4ca213d20 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -305,6 +305,35 @@ arm_mrrc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define MRRC_QUALIFIERS \
   (arm_mrrc_qualifiers)
 
+/* T (immediate, unsigned immediate).  */
+static enum arm_type_qualifiers
+arm_cx_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_immediate, qualifier_unsigned_immediate };
+#define CX_IMM_QUALIFIERS (arm_cx_imm_qualifiers)
+
+/* T (immediate, T, unsigned immediate).  */
+static enum arm_type_qualifiers
+arm_cx_unary_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_immediate, qualifier_none,
+  qualifier_unsigned_immediate };
+#define CX_UNARY_QUALIFIERS (arm_cx_unary_qualifiers)
+
+/* T (immediate, T, T, unsigned immediate).  */
+static enum arm_type_qualifiers
+arm_cx_binary_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_immediate,
+  qualifier_none, qualifier_none,
+  qualifier_unsigned_immediate };
+#define CX_BINARY_QUALIFIERS (arm_cx_binary_qualifiers)
+
+/* T (immediate, T, T, T, unsigned immediate).  */
+static enum arm_type_qualifiers
+arm_cx_ternary_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_immediate,
+  qualifier_none, qualifier_none, qualifier_none,
+  qualifier_unsigned_immediate };
+#define CX_TERNARY_QUALIFIERS (arm_cx_ternary_qualifiers)
+
 /* The first argument (return type) of a store should be void type,
which we represent with qualifier_void.  Their first operand will be
a DImode pointer to the location to store to, so we must use
@@ -438,7 +467,23 @@ static arm_builtin_datum acle_builtin_data[] =
 };
 
 #undef VAR1
+/* IMM_MAX sets the maximum valid value of the CDE immediate operand.
+   ECF_FLAG sets the flag used for set_call_expr_flags.  */
+#define VAR1(T, N, A, IMM_MAX, ECF_FLAG) \
+  {{#N #A, UP (A), CODE_FOR_arm_##N##A, 0, T##_QUALIFIERS}, IMM_MAX, ECF_FLAG},
+
+typedef struct {
+  arm_builtin_datum base;
+  unsigned int imm_max;
+  int ecf_flag;
+} arm_builtin_cde_datum;
+
+static arm_builtin_cde_datum cde_builtin_data[] =
+{
+#include "arm_cde_builtins.def"
+};
 
+#undef VAR1
 #define VAR1(T, N, X) \
   ARM_BUILTIN_NEON_##N##X,
 
@@ -732,6 +777,14 @@ enum arm_builtins
 
 #include "arm_acle_builtins.def"
 
+#undef VAR1
+#define VAR1(T, N, X, ... ) \
+  ARM_BUILTIN_##N##X,
+
+  ARM_BUILTIN_CDE_BASE,
+
+#include "arm_cde_builtins.def"
+
   ARM_BUILTIN_MAX
 };
 
@@ 

[PATCH] LRA: fix for PR92303

2020-03-13 Thread Vladimir Makarov via Gcc-patches

  The following committed patch solves

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92303

  The patch was successfully bootstrapped and tested on x86-64.


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d63b83194d5..4ea81e6c404 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2020-03-13  Vladimir Makarov  
+
+	PR rtl-optimization/92303
+	* lra-spills.c (remove_pseudos): Try to simplify memory subreg.
+
 2020-03-13  Segher Boessenkool  
 
 	PR rtl-optimization/94148
diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c
index 0022611785c..01256e711bc 100644
--- a/gcc/lra-spills.c
+++ b/gcc/lra-spills.c
@@ -421,7 +421,16 @@ remove_pseudos (rtx *loc, rtx_insn *insn)
   if (*loc == NULL_RTX)
 return res;
   code = GET_CODE (*loc);
-  if (code == REG && (i = REGNO (*loc)) >= FIRST_PSEUDO_REGISTER
+  if (code == SUBREG && REG_P (SUBREG_REG (*loc)))
+{
+  /* Try to remove memory subregs to simplify LRA job
+ and avoid LRA cycling in case of subreg memory reload.  */
+  res = remove_pseudos (_REG (*loc), insn);
+  if (GET_CODE (SUBREG_REG (*loc)) == MEM)
+	alter_subreg (loc, false);
+  return res;
+}
+  else if (code == REG && (i = REGNO (*loc)) >= FIRST_PSEUDO_REGISTER
   && lra_get_regno_hard_regno (i) < 0
   /* We do not want to assign memory for former scratches because
 	 it might result in an address reload for some targets.	 In


[committed] correct a built-in declaration in gcc.dg/torture/pr54261-1.c

2020-03-13 Thread Martin Sebor via Gcc-patches

Jeff's build-bot reported warnings in the test as a result of the recent
tightening up of the -Wbuiltin-declaration-mismatch checking. The commit
at the following link adjusts the test to avoid the warning:

  https://gcc.gnu.org/pipermail/gcc-cvs/2020-March/271705.html

Tested on x86_64-linux both with and without the built-in declaration.

Martin


Re: [PATCH] avoid treating more incompatible redeclarations as builtin-ins [PR94040]

2020-03-13 Thread Martin Sebor via Gcc-patches

On 3/6/20 2:11 AM, Richard Biener wrote:

On Fri, Mar 6, 2020 at 2:04 AM Martin Sebor  wrote:


Treating incompatible redeclarations of built-in functions as built-ins
is a problem not just for the middle-end but even for the C front-end
itself, when different parts of it make  different assumptions about
what is and isn't valid.  The test case that is the subject of this
bug report (a GCC 9 and 10 regression) is one such example: it shows
that the attribute format validation assumes the function declaration
the attribute applies to has passed the prerequisite validation.  But
that's not the case when the function is an incompatibly redeclared
built-in where a format attribute's positional argument refers to
parameter of an invalid/nonsensical type.

The attached patch further adjusts the front-end to consider even more
incompatible redeclarations as built-ins: in particular, redeclarations
whose pointer arguments point to incompatible variants of unqualified
types (e.g., char* vs int*, though not char* vs const char*).

Besides avoiding the front-end and some middle-end ICEs, the effect
of the patch is also to diagnose more incompatible redeclarations
of built-ins than before, but fewer invalid calls to such functions
(since they're no longer considered built-ins).  That seems like
an unavoidable trade-off.

Tested on x86_64-linux.  Is this acceptable for GCC 10?  How about 9?


The actual patch needs reviewing from a FE maintainer but I'd support
putting this in for GCC 10.

It would be nice if we can put in code like the following to catch "bogus"
built-in declarations.  I've put it in call verification because there it's
where it matters most, free-lang-data would be another canonical place
which would then process all "reachable" function declarations but we
don't yet call free-lang-data when not doing LTO.


I committed the patch to trunk as it was but I wasn't brave enough
to also add this.  I have tested it though and it fails in pr89998*.c
which declares sprintf to return unsigned int while the built-in has
it return signed in.  GCC does diagnose these mismatches but only
with -Wextra and it still accepts such redeclarations as benign.

The patch also fails in c-c++-common/dfp/signbit-2.c which declares
int signbit (double) while GCC declares it as a variadic function.
(There a few more similar failures like this.)

I'm sure these expected mismatches can be dealt with and I'm up for
tightening up the checking even more and requiring exact matches but
I'd rather not do it at this stage.

Martin



diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index f7b817d94e6..ae695891bd4 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3356,6 +3356,17 @@ verify_gimple_call (gcall *stmt)
 return true;
   }

+  if (fndecl
+  && fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+  && !types_compatible_p (TREE_TYPE (fndecl),
+ TREE_TYPE (builtin_decl_explicit
+   (DECL_FUNCTION_CODE (fndecl)
+{
+  error ("function declaration declared built-in but does not "
+"match expected type");
+  return true;
+}
+
tree lhs = gimple_call_lhs (stmt);
if (lhs
&& (!is_gimple_lvalue (lhs)



Martin




Re: [PATCH] df: Don't abuse bb->aux (PR94148)

2020-03-13 Thread Segher Boessenkool
On Fri, Mar 13, 2020 at 07:30:17AM +0100, Richard Biener wrote:
> On March 12, 2020 11:29:45 PM GMT+01:00, Segher Boessenkool 
>  wrote:
> >The df dataflow solvers use the aux field in the basic_block struct,
> >although that is reserved for any use by passes.  And not only that,
> >it is required that you set all such fields to NULL before calling
> >the solvers, or you quietly get wrong results.
> >
> >This changes the solvers to use a local array for last_change_age
> >instead, just like it already had a local array for last_visit_age.
> >
> >Tested on powerpc64-linux {-m32,-m64}.  Also tested with the tests for
> >PR94042, which it also solves (I need to confirm that still though,
> >there are other testsuite problems interfering with my testing).
> 
> OK for trunk and affected branches (can you report back how old this issue 
> is?) 

It's from r161197:
commit 1a0f3fa13745c4052c53e6d84c64539fb5f57e00
Author: Jan Hubicka 
Date:   Tue Jun 22 17:51:15 2010 +0200

This is https://patchwork.ozlabs.org/patch/55394/ btw, which has the ML
discussion and everything.

$ git branch -r --contains 1a0f3fa13745|grep fsf.releases
  fsf/releases/gcc-4.6
  fsf/releases/gcc-4.7
  fsf/releases/gcc-4.8
  fsf/releases/gcc-4.9
  fsf/releases/gcc-5
  fsf/releases/gcc-6
  fsf/releases/gcc-7
  fsf/releases/gcc-8
  fsf/releases/gcc-9

(Maybe there was a backport to 4.5, but that is far back enough already ;-) )


Segher


Re: [PATCH] avoid treating more incompatible redeclarations as builtin-ins [PR94040]

2020-03-13 Thread Martin Sebor via Gcc-patches

On 3/12/20 7:17 PM, Joseph Myers wrote:

On Thu, 5 Mar 2020, Martin Sebor wrote:


Tested on x86_64-linux.  Is this acceptable for GCC 10?  How about 9?


OK for GCC 10.


Thank you.  I committed it to trunk in r10-7162.

Do you not want me to commit it to GCC 9 or are you leaving it up to me?

Martin


Re: [PATCH v2] generate EH info for volatile asm statements (PR93981)

2020-03-13 Thread J.W. Jagersma via Gcc-patches
On 2020-03-13 14:31, Richard Sandiford wrote:
> "J.W. Jagersma"  writes:
>> On 2020-03-12 10:59, Richard Sandiford wrote:
>>> The other case I mentioned was equivalent to:
>>>
>>>  int
>>>  test_mem2 ()
>>>  {
>>>int i = 2;
>>>try
>>>  {
>>>asm volatile ("ud2; mov%z0 $1, %0" : "=m" (i));
>>>  }
>>>catch (const illegal_opcode&)
>>>  {
>>>if (i == 2) return 0;
>>>  }
>>>return i;
>>>  }
>>>
>>> Is this test expected to pass?
>>
>> Good point.  Yes, this should pass, and I thought it did, but it seems
>> I was mistaken.  To fix that would require transforming "=m" into "+m"
>> as Segher suggested.
>>
>>> However, these "mem" tests are testing gimple register types, so they'll
>>> still be SSA names outside of the asm.  It would also be good to test what
>>> happens for non-register types, e.g. something like:
>>>
>>>  int
>>>  test_mem3 ()
>>>  {
>>>int i[5] = { 1, 2, 3, 4, 5 };
>>>try
>>>  {
>>>asm volatile ("ud2; " : "=m" (i));
>>>  }
>>>catch (const illegal_opcode&)
>>>  {
>>>if (i[0] == 1 && ...) return 0;
>>>  }
>>>return ...;
>>>  }
>>>
>>> and similarly for ud2 after the store.
>>
>> I think I see what you mean.  Would such a test not also cover what the
>> current test_mem() function does?  If so then that could be removed.
> 
> I think it's better to have tests for both the is_gimple_reg_type case
> and the !is_gimple_reg_type case, so keeping test_mem sounds better.

Especially because the initial 'int i = 2;' is currently eliminated
for gimple register types that end up in memory, I see.

>> Also in my current patch I used: (tree-eh.c:2104)
>>
>> if (tree_could_throw_p (opval)
>> || !is_gimple_reg_type (TREE_TYPE (opval))
>> || !is_gimple_reg (get_base_address (opval)))
>>
>> to determine the difference between a register and memory type.  Could
>> there potentially be a case where that identifies an operand as gimple
>> register type, but ends up compiled as a memory operand to an asm?
> 
> The constraints can support both registers and memory, e.g. via "rm"
> or "g".  I'm not sure off-hand what we do with those for gimple.

In gimplify_asm_expr (gimplify.c:6281), I see that:

if (!allows_reg && allows_mem)
  mark_addressable (TREE_VALUE (link));

So TREE_ADDRESSABLE I think is a good predicate to distinguish between
pure memory constraints and everything else.  I have changed the above
to:

   if (!allows_reg && allows_mem)
-   mark_addressable (TREE_VALUE (link));
+   {
+ mark_addressable (TREE_VALUE (link));
+ /* If non-call exceptions are enabled, mark each memory output
+as inout, so that previous assignments are not eliminated.  */
+ if (cfun->can_throw_non_call_exceptions)
+   is_inout = true;
+   }

So that memory operands are converted to in+out.  Then in
lower_eh_constructs_2 we can use TREE_ADDRESSABLE to find non-memory
operands:

+   if (tree_could_throw_p (opval)
+   || TREE_ADDRESSABLE (opval))
+ continue;

(tree_could_throw_p may be superfluous here)
Then I have a new test case that forces an "=rm" into memory, and then
confirm that it behaves the same as "=r":

+int
+rm ()
+{
+  int i = 2;
+  try
+{
+  asm volatile ("mov%z0 $1, %0; ud2" : "=rm" (i) :
+: "eax", "ebx", "ecx", "edx", "edi", "esi"
+#ifdef __x86_64__
+, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+#endif
+);
+}
+  catch (const illegal_opcode&)
+{
+  if (i == 2) return 0;
+}
+  return i;
+}

>>> It wasn't clear from my message above, but: I was mostly worried about
>>> requiring the asm to treat memory operands in a certain way when the
>>> exception is thrown.  IMO it would be better to say that the values of
>>> memory operands are undefined on the exception edge.
>>
>> I'm not sure about the semantic difference between undefined and
>> unspecified.  But gcc should not write to any memory after a throw,
>> because that write operation itself may have side effects.  Likewise
>> asm memory operands should not be redirected to a temporary for the
>> same reason, and also because gcc can't possibly know which parts of
>> an (offsettable) memory operand are written to.
> 
> This might not be what you mean, but for:
> 
> int
> f1 (void)
> {
>   int x = 1;
>   asm volatile ("": "=m" (x));
>   return x;
> }
> 
> struct s { int i[5]; };
> struct s
> f2 (void)
> {
>   struct s x = { 1, 2, 3, 4, 5 };
>   asm volatile ("": "=m" (x));
>   return x;
> }
> 
> we do delete "x = 1" for f1.   I think that's the expected behaviour.
> We don't yet delete the initialisation in f2, but I think in principle
> we could.
> 
> So the kind of contract I was advocating was:
> 
> - the compiler can't make any assumptions 

Order ID: 645-8653-8267689766

2020-03-13 Thread Order Shipping

[https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRRRtgAilx9qs7bQ5XpBzodzVNZfCkpc_jR3xdjbzBzdqjR2KSv]
Shipping Confirmation
Order ID: 645-8653-8267689766
Hello Customer,
Your order is on the way. If you need to return an item from this shipment or 
manage other orders, please call   .
If you need further assistance with your order, Please call Customer Service 
1-855-948-3687
Arriving:
Sunday, March 15


Your package was sent to:
Charles Harrison
1310 Virginia Blvd,
San Antonio, TX 78210

Shipment Details
[https://zdnet3.cbsistatic.com/hub/i/r/2019/08/05/b2e40423-7c4c-48b5-9c7a-ea7ee92f96fe/thumbnail/770x433/c0942922b4c437cffdac1b9d2b0fd7e6/13-inch-mbpro-header.jpg]
New MacBook Pro (13-inch, 8GB RAM, 128GB Storage) - Space Gray
$1199.99

Item Subtotal:
$1199.99
Shipping & Handling:
$0.00
POD Convenience Fee:
$0.00
Shipment Total:
$1199.99
If you need further assistance with your order, please call Customer Service 
1-855-948-3687
We hope to see you again soon!







Re: [PATCH PR94026] combine missed opportunity to simplify comparisons with zero

2020-03-13 Thread Segher Boessenkool
On Fri, Mar 13, 2020 at 03:21:18AM +, Yangfei (Felix) wrote:
> > On Wed, Mar 04, 2020 at 08:39:36AM +, Yangfei (Felix) wrote:
> > >   This is a simple fix for PR94026.
> > >   With this fix, combine will try make an extraction if we are in a 
> > > equality
> > comparison and this is an AND
> > >   with a constant which is power of two minus one.  Shift here should be 
> > > an
> > constant.  For example, combine
> > >   will transform (compare (and (lshiftrt x 8) 6) 0) to (compare 
> > > (zero_extract
> > (x 2 9)) 0).
> > 
> > Why is that a good thing?
> 
> The reported test case is reduced from spec2017 541.leela_r.  I have pasted 
> original code snippet on the bugzilla.  
> We found other compilers like aocc/llvm can catch this pattern and simplify 
> it.  

That wasn't my question, let me rephrase: why would writing it as
zero_extract (instead of as a more canonical form) be wanted?

The aarch backend only has zero_extract formulations for most of the
bitfield instructions.  If you fix that problem, all of this should go
away?  Like, the testcase in the PR starts with

Trying 7 -> 8:
7: r99:SI=r103:SI>>r104:SI#0
  REG_DEAD r104:SI
  REG_DEAD r103:SI
8: r100:SI=r99:SI&0x6
  REG_DEAD r99:SI
Failed to match this instruction:
(set (reg:SI 100)
(and:SI (ashiftrt:SI (reg:SI 103)
(subreg:QI (reg:SI 104) 0))
(const_int 6 [0x6])))

and that should match already (that's an ubfm (ubfx))?


> > (There should be thorough tests on many archs, showing it helps on average,
> > and it doesn't regress anything.  I can do that for you, but not right now).
> 
> I only have aarch64 & x86_64 linux available and have tested this patch with 
> spec17 on both platforms.  
> No obvious improvement & regression witnessed.  This is expected as only one 
> instruction is reduced here.  

What should be tested is what new combinations are done, and which are
*no longer* done.

> > In general, we should have *fewer* zero_extract, not more.

Some reasons for that:

1) All those can be expressed with simpler operations as well;
2) Most very similar expressions cannot be expressed as zero_extract,
although many architectures can handle (some of) those just fine;
3) The optimizers do not handle zero_extract very well at all (this
includes simplify-rtx, to start with).

sign_extract is nastier -- we really want to have a sign_extend that
works on separate bits, not as coarse as address units as we have now --
but it currently isn't handled much either.


Segher


Re: [PATCH v2] generate EH info for volatile asm statements (PR93981)

2020-03-13 Thread Segher Boessenkool
On Fri, Mar 13, 2020 at 02:06:13AM +0100, J.W. Jagersma wrote:
> I don't want to unnecessarily pessimize register outputs if it can be
> avoided.  And even if you do change register outputs to in+out, they
> are still more likely to contain some intermediate value that you would
> want to discard on throwing.

But it also may contain values that you should keep, if it threw the
exception on an earlier machine instruction.

> If you think of it in terms of a function call, the closest equivalent
> of register vs memory outputs would be:
> reg = f ();
> vs.
> f ();
> If f() throws, then in the first case, 'reg' is never assigned to. For
> the second case, you can't expect 'mem' to remain unmodified.

But you also cannot expect it to be modified: the old value may remain
a live value.

> There is also the case of "=rm" and similar which have not been
> discussed so far, but are (in my experience) the most common operand
> constraint.

On some CISC targets, sure.  Not common on load-store architectures
(like most things from the last 30+ years).


Segher


Re: [RS6000] make PLT loads volatile

2020-03-13 Thread Segher Boessenkool
Hi!

On Fri, Mar 13, 2020 at 10:06:01AM +1030, Alan Modra wrote:
> On Thu, Mar 12, 2020 at 11:57:17AM -0500, Segher Boessenkool wrote:
> > On Thu, Mar 12, 2020 at 01:18:50PM +1030, Alan Modra wrote:
> > > With lazy PLT resolution the first load of a PLT entry may be a value
> > > pointing at a resolver stub.  gcc's loop processing can result in the
> > > PLT load in inline PLT calls being hoisted out of a loop in the
> > > mistaken idea that this is an optimisation.  It isn't.  If the value
> > > hoisted was that for a resolver stub then every call to that function
> > > in the loop will go via the resolver, slowing things down quite
> > > dramatically.
> > > 
> > > The PLT really is volatile, so teach gcc about that.
> > 
> > It would be nice if we could keep it cached after it has been resolved
> > once, this has potential for regressing performance if we don't?  And
> > LD_BIND_NOW should keep working just as fast as it is now, too?
> 
> Using a call-saved register to cache a load out of the PLT looks
> really silly

Who said anything about using call-saved registers?  GCC will usually
make a stack slot for this, and only use a non-volatile register when
that is profitable.  (I know it is a bit too aggressive with it, but
that is a generic problem).

> when the inline PLT call is turned back into a direct
> call by the linker.

Ah, so yeah, for direct calls we do not want this.  I was thinking this
was about indirect calls (via a bctrl that is), dunno how I got that
misperception.  Sorry.

What is this like for indirect calls (at C level)?  Does your patch do
anything to those?


Segher


[commited] testsuite: Assorted x32 testsuite fixes

2020-03-13 Thread Uros Bizjak via Gcc-patches
2020-03-13  Uroš Bizjak  

* gcc.target/i386/pr64409.c: Do not limit compilation to x32 targets.
(dg-error): Quote 'ms_abi' attribute.
* gcc.target/i386/pr71958.c: Do not limit compilation to x32 targets.
Require maybe_x32 effective target.
(dg-options): Add -mx32.
(dg-error): Quote 'ms_abi' attribute.
* gcc.target/i386/pr90096.c (dg-error): Update relative
location of target x32 error.

Tested on x86_64-linux-gnu {,-mx32}.

Uros.
diff --git a/gcc/testsuite/gcc.target/i386/pr64409.c 
b/gcc/testsuite/gcc.target/i386/pr64409.c
index 7bf9d1e398d..9df9c1817d4 100644
--- a/gcc/testsuite/gcc.target/i386/pr64409.c
+++ b/gcc/testsuite/gcc.target/i386/pr64409.c
@@ -1,6 +1,6 @@
-/* { dg-do compile { target x32 } } */
+/* { dg-do compile } */
 /* { dg-require-effective-target maybe_x32 } */
 /* { dg-options "-O0 -mx32" } */
 
 int a;
-int* __attribute__ ((ms_abi)) fn1 () { return  } /* { dg-error "X32 does 
not support ms_abi attribute" } */
+int* __attribute__ ((ms_abi)) fn1 () { return  } /* { dg-error "X32 does 
not support 'ms_abi' attribute" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr71958.c 
b/gcc/testsuite/gcc.target/i386/pr71958.c
index 5e60d1787d6..81102fc8eeb 100644
--- a/gcc/testsuite/gcc.target/i386/pr71958.c
+++ b/gcc/testsuite/gcc.target/i386/pr71958.c
@@ -1,6 +1,7 @@
-/* { dg-do compile { target x32 } } */
-/* { dg-options "-mabi=ms" } */
-/* { dg-error "-mabi=ms not supported with X32 ABI" "" { target *-*-* } 0 } */
+/* { dg-do compile } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -mabi=ms" } */
+/* { dg-error "'-mabi=ms' not supported with X32 ABI" "" { target *-*-* } 0 } 
*/
 
 void main ()
 {
diff --git a/gcc/testsuite/gcc.target/i386/pr90096.c 
b/gcc/testsuite/gcc.target/i386/pr90096.c
index fe29e3c76fe..871e0ffc691 100644
--- a/gcc/testsuite/gcc.target/i386/pr90096.c
+++ b/gcc/testsuite/gcc.target/i386/pr90096.c
@@ -19,6 +19,6 @@ bar (__m128 *p)
 {
   return _mm_cvtt_roundss_u64 (*p, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC);
   /* { dg-error "needs isa option -m64 -mavx512f" "" { target lp64 } .-1 } */
-  /* { dg-error "needs isa option -mx32 -mavx512f" "" { target x32 } .-1 } */
+  /* { dg-error "needs isa option -mx32 -mavx512f" "" { target x32 } .-2 } */
 }
 #endif


Re: [PATCH] Increase min-lto-partition.

2020-03-13 Thread Martin Liška

And using EPYC2 with 64 cores I get:

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o -fno-checking 
--param lto-partitions=4 -r

real0m11.040s
user0m33.479s
sys 0m0.718s

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o -fno-checking 
--param lto-partitions=8 -r

real0m6.542s
user0m39.334s
sys 0m0.945s

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o -fno-checking 
--param lto-partitions=128 -r

real0m4.945s
user0m59.344s
sys 0m2.475s

So here the growth of user time is only about 100%.
And baseline is:

time g++ -O2 /tmp/gimple-match.ii -c

real0m39.783s
user0m39.385s
sys 0m0.372s


Martin


Re: [PATCH] Increase min-lto-partition.

2020-03-13 Thread Martin Liška

On 3/13/20 4:11 PM, Jan Hubicka wrote:

$ time g++ -O2 /tmp/gimple-match.ii -c -flto -fno-checking
real0m8.709s
user0m8.543s

WPA+LTRANS:

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o --param lto-partitions=4  -fno-checking
real0m11.220s
user0m33.067s

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o --param lto-partitions=6  -fno-checking
real0m9.880s
user0m35.599s

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o --param lto-partitions=8  -fno-checking
real0m6.681s
user0m39.746s

default:
$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o -fno-checking
real0m6.065s
user1m22.698s


I did
/aux/hubicka/trunk-git/build2/./prev-gcc/xg++ 
-B/aux/hubicka/trunk-git/build2/./prev-gcc/ 
-B/usr/local/x86_64-pc-linux-gnu/bin/ -nostdinc++ 
-B/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/src/.libs 
-B/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/libsupc++/.libs
 
-I/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu
 -I/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/include 
-I/aux/hubicka/trunk-git/libstdc++-v3/libsupc++ 
-L/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/src/.libs 
-L/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/libsupc++/.libs
 -fno-PIE -c   -g -O2 -fchecking=0  -DIN_GCC -fno-exceptions -fno-rtti 
-fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wno-error=format-diag -Wmissing-format-attribute 
-Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
-Wno-overlength-strings -Werror -fno-common -Wno-unused -DHAVE_CONFIG_H -I. -I. 
-I../../gcc -I../../gcc/.  -I../../gcc/../include -I../../gcc/../libcpp/include 
-I/aux/hubicka/trunk-git/build2/./gmp -I/aux/hubicka/trunk-git/gmp 
-I/aux/hubicka/trunk-git/build2/./mpfr/src -I/aux/hubicka/trunk-git/mpfr/src 
-I/aux/hubicka/trunk-git/mpc/src -I../../gcc/../libdecnumber 
-I../../gcc/../libdecnumber/bid -I../libdecnumber -I../../gcc/../libbacktrace 
-I/aux/hubicka/trunk-git/build2/./isl/include 
-I/aux/hubicka/trunk-git/isl/include  -o gimple-match.o -MT gimple-match.o -MMD 
-MP -MF ./.deps/gimple-match.TPo gimple-match.c -flto

(copying from build disabling checking and adding -flto) and I get:
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=128 -r

real0m10.394s
user2m13.809s
sys 0m3.896s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=8 -r

real0m21.033s
user2m3.063s
sys 0m2.539s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=6 -r

real0m23.975s
user1m56.139s
sys 0m2.595s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=4 -r

real0m32.383s
user1m39.411s
sys 0m2.213s

With debug info disabled (like you do, but I guess in less realistic
setting) I get:

hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=128 -r

real0m10.905s
user1m55.065s
sys 0m2.956s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=8 -r

real0m17.297s
user1m26.513s
sys 0m1.626s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=6 -r

real0m22.365s
user1m30.969s
sys 0m1.386s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=4 -r

real0m26.534s
user1m21.593s
sys 0m0.902s

So I do not see such notable idfference in user times (but they are
consistently worse than yours). Perhaps, can you try to perf it
including the system profile? It may give us some idea why things behave
differently.


That's strange. So let's take my gimple-match.ii:
https://drive.google.com/file/d/1B8d3bIvz1KA_ksIo8h-JgkaJTCRiSPR4/view?usp=sharing

For gcc9 package (LTO+PGO) I get:

$ time g++ -O2 gimple-match.ii -c -flto
real0m8.180s
user0m7.992s

$ time 

Re: [PATCH] Increase min-lto-partition.

2020-03-13 Thread Jan Hubicka
> > $ time g++ -O2 /tmp/gimple-match.ii -c -flto -fno-checking
> > real0m8.709s
> > user0m8.543s
> > 
> > WPA+LTRANS:
> > 
> > $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> > gimple-match2.o --param lto-partitions=4  -fno-checking
> > real0m11.220s
> > user0m33.067s
> > 
> > $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> > gimple-match2.o --param lto-partitions=6  -fno-checking
> > real0m9.880s
> > user0m35.599s
> > 
> > $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> > gimple-match2.o --param lto-partitions=8  -fno-checking
> > real0m6.681s
> > user0m39.746s
> > 
> > default:
> > $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> > gimple-match2.o -fno-checking
> > real0m6.065s
> > user1m22.698s

I did
/aux/hubicka/trunk-git/build2/./prev-gcc/xg++ 
-B/aux/hubicka/trunk-git/build2/./prev-gcc/ 
-B/usr/local/x86_64-pc-linux-gnu/bin/ -nostdinc++ 
-B/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/src/.libs 
-B/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/libsupc++/.libs
 
-I/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu
 -I/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/include 
-I/aux/hubicka/trunk-git/libstdc++-v3/libsupc++ 
-L/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/src/.libs 
-L/aux/hubicka/trunk-git/build2/prev-x86_64-pc-linux-gnu/libstdc++-v3/libsupc++/.libs
 -fno-PIE -c   -g -O2 -fchecking=0  -DIN_GCC -fno-exceptions -fno-rtti 
-fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wno-error=format-diag -Wmissing-format-attribute 
-Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
-Wno-overlength-strings -Werror -fno-common -Wno-unused -DHAVE_CONFIG_H -I. -I. 
-I../../gcc -I../../gcc/.  -I../../gcc/../include -I../../gcc/../libcpp/include 
-I/aux/hubicka/trunk-git/build2/./gmp -I/aux/hubicka/trunk-git/gmp 
-I/aux/hubicka/trunk-git/build2/./mpfr/src -I/aux/hubicka/trunk-git/mpfr/src 
-I/aux/hubicka/trunk-git/mpc/src -I../../gcc/../libdecnumber 
-I../../gcc/../libdecnumber/bid -I../libdecnumber -I../../gcc/../libbacktrace 
-I/aux/hubicka/trunk-git/build2/./isl/include 
-I/aux/hubicka/trunk-git/isl/include  -o gimple-match.o -MT gimple-match.o -MMD 
-MP -MF ./.deps/gimple-match.TPo gimple-match.c -flto

(copying from build disabling checking and adding -flto) and I get:
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=128 -r

real0m10.394s
user2m13.809s
sys 0m3.896s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=8 -r

real0m21.033s
user2m3.063s
sys 0m2.539s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=6 -r

real0m23.975s
user1m56.139s
sys 0m2.595s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time 
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel 
gimple-match.o -fno-checking --param lto-partitions=4 -r

real0m32.383s
user1m39.411s
sys 0m2.213s

With debug info disabled (like you do, but I guess in less realistic
setting) I get:

hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=128 -r

real0m10.905s
user1m55.065s
sys 0m2.956s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=8 -r

real0m17.297s
user1m26.513s
sys 0m1.626s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=6 -r

real0m22.365s
user1m30.969s
sys 0m1.386s
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build2/gcc$ time
/aux/hubicka/trunk-install/bin/gcc -flto=auto -flinker-output=nolto-rel
gimple-match.o -fno-checking --param lto-partitions=4 -r

real0m26.534s
user1m21.593s
sys 0m0.902s

So I do not see such notable idfference in user times (but they are
consistently worse than yours). Perhaps, can you try to perf it
including the system profile? It may give us some idea why things behave
differently.

Compiler binary I use is profiledbootstrapped with LTO.

Honza
> > 
> > So I would recommend to set the param value to 75000, which leads to 6 
> > 

Re: [PATCH v2] generate EH info for volatile asm statements (PR93981)

2020-03-13 Thread Segher Boessenkool
Hi!

On Fri, Mar 13, 2020 at 01:31:19PM +, Richard Sandiford wrote:
> This might not be what you mean, but for:
> 
> int
> f1 (void)
> {
>   int x = 1;
>   asm volatile ("": "=m" (x));
>   return x;
> }
> 
> struct s { int i[5]; };
> struct s
> f2 (void)
> {
>   struct s x = { 1, 2, 3, 4, 5 };
>   asm volatile ("": "=m" (x));
>   return x;
> }
> 
> we do delete "x = 1" for f1.   I think that's the expected behaviour.
> We don't yet delete the initialisation in f2, but I think in principle
> we could.

Right.  And this is incorrect if the asm may throw.

> So the kind of contract I was advocating was:
> 
> - the compiler can't make any assumptions about what the asm does
>   or doesn't do to output operands when an exception is raised
> 
> - the source code can't make any assumption about the values bound
>   to output operands when an exception is raised

And the easiest (and only feasible?) way to do this is for the compiler
to automatically make an input for every output as well, imo.


Segher


Re: [PATCH] Increase min-lto-partition.

2020-03-13 Thread Jan Hubicka
> Hi.
> 
> I played a bit with -flinker-output=nolto-rel of gimple-match.ii and I 
> identified
> that current default of min-lto-partition leads to too many LTRANS. We pay 
> with
> LTO overhead and so that user time is high.
> 
> Base is:
> $ g++ -O2 /tmp/gimple-match.ii -c -fno-checking -o x.o
> real  0m40.130s
> user  0m39.911s

Did you configured compiler with checking? If so, I think the benchmarks
are not that good, because -fchecking does not control everything.
It would be relevant for gcc bootstrap but notmuch else. In that case I
would go with explicit --param in our Makefile.

I tried your experiment with linking tramp3d (with -O2 -flto on EPYC)
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=1

real0m12.574s
user1m5.010s
sys 0m0.970s
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=2

real0m17.926s
user1m1.259s
sys 0m1.153s
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=3

real0m22.115s
user0m56.964s
sys 0m0.892s
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=4

real0m23.510s
user0m50.783s
sys 0m0.983s
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=5

real0m28.410s
user0m46.146s
sys 0m0.680s
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=6

real0m32.304s
user0m46.114s
sys 0m0.720s
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=7

real0m42.332s
user0m50.521s
sys 0m0.749s

So going from 1 to 7 seems to decarese user time from 65 to 50s
(30% reduction) however the overall linktime goes up from 12s to 42s
(3.5 times)

Which does not seem that great tradeoff. Moreover I seem to get best
results with:
hubicka@lomikamen-jh:~$ time /aux/hubicka/trunk-install/bin/g++
-flto=auto tramp3d-v44.o --param lto-min-partition=1 --param
lto-partitions=200

real0m5.752s
user1m8.949s
sys 0m3.826s

Both genmatch and tramp3d seems bit extreme sources, but perhaps we want
to explore thi bit further..
I will try to re-measure your results on my setup so we get idea how
much sensitive it is :)

Honza

> 
> LGEN:
> 
> $ time g++ -O2 /tmp/gimple-match.ii -c -flto -fno-checking
> real  0m8.709s
> user  0m8.543s
> 
> WPA+LTRANS:
> 
> $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> gimple-match2.o --param lto-partitions=4  -fno-checking
> real  0m11.220s
> user  0m33.067s
> 
> $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> gimple-match2.o --param lto-partitions=6  -fno-checking
> real  0m9.880s
> user  0m35.599s
> 
> $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> gimple-match2.o --param lto-partitions=8  -fno-checking
> real  0m6.681s
> user  0m39.746s
> 
> default:
> $ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
> gimple-match2.o -fno-checking
> real  0m6.065s
> user  1m22.698s
> 
> So I would recommend to set the param value to 75000, which leads to 6 
> partitions. That would be:
> 
> 9+10s = 19s vs. 40s (total real time 44s). That seems reasonable to me.
> 
> Thoughts?
> Thanks,
> Martin
> 
> gcc/ChangeLog:
> 
> 2020-03-13  Martin Liska  
> 
>   * params.opt: Bump min-lto-partition in order to not create
>   too many LTRANS.
> ---
>  gcc/params.opt | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> 

> diff --git a/gcc/params.opt b/gcc/params.opt
> index e39216aa7d0..49fafac20af 100644
> --- a/gcc/params.opt
> +++ b/gcc/params.opt
> @@ -363,7 +363,7 @@ Common Joined UInteger 
> Var(param_max_lto_streaming_parallelism) Init(32) Integer
>  maximal number of LTO partitions streamed in parallel.
>  
>  -param=lto-min-partition=
> -Common Joined UInteger Var(param_min_partition_size) Init(1) Param
> +Common Joined UInteger Var(param_min_partition_size) Init(75000) Param
>  Minimal size of a partition for LTO (in estimated instructions).
>  
>  -param=lto-partitions=
> 



[PATCH] Increase min-lto-partition.

2020-03-13 Thread Martin Liška

Hi.

I played a bit with -flinker-output=nolto-rel of gimple-match.ii and I 
identified
that current default of min-lto-partition leads to too many LTRANS. We pay with
LTO overhead and so that user time is high.

Base is:
$ g++ -O2 /tmp/gimple-match.ii -c -fno-checking -o x.o
real0m40.130s
user0m39.911s

LGEN:

$ time g++ -O2 /tmp/gimple-match.ii -c -flto -fno-checking
real0m8.709s
user0m8.543s

WPA+LTRANS:

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o --param lto-partitions=4  -fno-checking
real0m11.220s
user0m33.067s

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o --param lto-partitions=6  -fno-checking
real0m9.880s
user0m35.599s

$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o --param lto-partitions=8  -fno-checking
real0m6.681s
user0m39.746s

default:
$ time gcc -flto=auto -flinker-output=nolto-rel gimple-match.o  -r -o 
gimple-match2.o -fno-checking
real0m6.065s
user1m22.698s

So I would recommend to set the param value to 75000, which leads to 6 
partitions. That would be:

9+10s = 19s vs. 40s (total real time 44s). That seems reasonable to me.

Thoughts?
Thanks,
Martin

gcc/ChangeLog:

2020-03-13  Martin Liska  

* params.opt: Bump min-lto-partition in order to not create
too many LTRANS.
---
 gcc/params.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


diff --git a/gcc/params.opt b/gcc/params.opt
index e39216aa7d0..49fafac20af 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -363,7 +363,7 @@ Common Joined UInteger Var(param_max_lto_streaming_parallelism) Init(32) Integer
 maximal number of LTO partitions streamed in parallel.
 
 -param=lto-min-partition=
-Common Joined UInteger Var(param_min_partition_size) Init(1) Param
+Common Joined UInteger Var(param_min_partition_size) Init(75000) Param
 Minimal size of a partition for LTO (in estimated instructions).
 
 -param=lto-partitions=



Re: [PATCH v2] generate EH info for volatile asm statements (PR93981)

2020-03-13 Thread Richard Sandiford
"J.W. Jagersma"  writes:
> On 2020-03-12 10:59, Richard Sandiford wrote:
>> The other case I mentioned was equivalent to:
>> 
>>  int
>>  test_mem2 ()
>>  {
>>int i = 2;
>>try
>>  {
>>asm volatile ("ud2; mov%z0 $1, %0" : "=m" (i));
>>  }
>>catch (const illegal_opcode&)
>>  {
>>if (i == 2) return 0;
>>  }
>>return i;
>>  }
>> 
>> Is this test expected to pass?
>
> Good point.  Yes, this should pass, and I thought it did, but it seems
> I was mistaken.  To fix that would require transforming "=m" into "+m"
> as Segher suggested.
>
>> However, these "mem" tests are testing gimple register types, so they'll
>> still be SSA names outside of the asm.  It would also be good to test what
>> happens for non-register types, e.g. something like:
>> 
>>  int
>>  test_mem3 ()
>>  {
>>int i[5] = { 1, 2, 3, 4, 5 };
>>try
>>  {
>>asm volatile ("ud2; " : "=m" (i));
>>  }
>>catch (const illegal_opcode&)
>>  {
>>if (i[0] == 1 && ...) return 0;
>>  }
>>return ...;
>>  }
>> 
>> and similarly for ud2 after the store.
>
> I think I see what you mean.  Would such a test not also cover what the
> current test_mem() function does?  If so then that could be removed.

I think it's better to have tests for both the is_gimple_reg_type case
and the !is_gimple_reg_type case, so keeping test_mem sounds better.

> Also in my current patch I used: (tree-eh.c:2104)
>
> if (tree_could_throw_p (opval)
> || !is_gimple_reg_type (TREE_TYPE (opval))
> || !is_gimple_reg (get_base_address (opval)))
>
> to determine the difference between a register and memory type.  Could
> there potentially be a case where that identifies an operand as gimple
> register type, but ends up compiled as a memory operand to an asm?

The constraints can support both registers and memory, e.g. via "rm"
or "g".  I'm not sure off-hand what we do with those for gimple.

>> It wasn't clear from my message above, but: I was mostly worried about
>> requiring the asm to treat memory operands in a certain way when the
>> exception is thrown.  IMO it would be better to say that the values of
>> memory operands are undefined on the exception edge.
>
> I'm not sure about the semantic difference between undefined and
> unspecified.  But gcc should not write to any memory after a throw,
> because that write operation itself may have side effects.  Likewise
> asm memory operands should not be redirected to a temporary for the
> same reason, and also because gcc can't possibly know which parts of
> an (offsettable) memory operand are written to.

This might not be what you mean, but for:

int
f1 (void)
{
  int x = 1;
  asm volatile ("": "=m" (x));
  return x;
}

struct s { int i[5]; };
struct s
f2 (void)
{
  struct s x = { 1, 2, 3, 4, 5 };
  asm volatile ("": "=m" (x));
  return x;
}

we do delete "x = 1" for f1.   I think that's the expected behaviour.
We don't yet delete the initialisation in f2, but I think in principle
we could.

So the kind of contract I was advocating was:

- the compiler can't make any assumptions about what the asm does
  or doesn't do to output operands when an exception is raised

- the source code can't make any assumption about the values bound
  to output operands when an exception is raised

Thanks,
Richard


Re: [PATCH] Do not strcat to result of getenv.

2020-03-13 Thread Martin Liška

On 3/13/20 1:39 PM, Jakub Jelinek wrote:

   collect_gcc_options = concat (collect_gcc_options,
xassembler_opts_string, NULL);
instead?


Ah, I forgot that concat is available here. I would choose
this way, even though it's a small memory leak.

I'm going to install the patch.
Martin
>From 4f287d2b028b61b47f4d8cec7b7db14636122653 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 12 Mar 2020 19:56:47 +0100
Subject: [PATCH] Do not strcat to result of getenv.

gcc/ChangeLog:

2020-03-12  Martin Liska  

	PR lto/94157
	* lto-wrapper.c (run_gcc): Use concat for appending
	to collect_gcc_options.

gcc/testsuite/ChangeLog:

2020-03-12  Martin Liska  

	PR lto/94157
	* gcc.dg/lto/pr94157_0.c: New test.
---
 gcc/lto-wrapper.c| 3 ++-
 gcc/testsuite/gcc.dg/lto/pr94157_0.c | 6 ++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr94157_0.c

diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index b8a35c85714..46a88b233f6 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -1317,7 +1317,8 @@ run_gcc (unsigned argc, char *argv[])
 
   char *xassembler_opts_string
 	= XOBFINISH (_obstack, char *);
-  strcat (collect_gcc_options, xassembler_opts_string);
+  collect_gcc_options = concat (collect_gcc_options, xassembler_opts_string,
+NULL);
 }
 
   get_options_from_collect_gcc_options (collect_gcc, collect_gcc_options,
diff --git a/gcc/testsuite/gcc.dg/lto/pr94157_0.c b/gcc/testsuite/gcc.dg/lto/pr94157_0.c
new file mode 100644
index 000..3bca677c4fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr94157_0.c
@@ -0,0 +1,6 @@
+/* { dg-lto-do link } */
+/* { dg-lto-options { { -O0 -fipa-vrp -flto -Wa,--noexecstack -Wa,--noexecstack -Wa,--execstack  -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack } } } */
+
+int main() {
+
+}
-- 
2.25.1



Re: [PATCH] Do not strcat to result of getenv.

2020-03-13 Thread Jakub Jelinek via Gcc-patches
On Fri, Mar 13, 2020 at 01:26:02PM +0100, Martin Liška wrote:
> diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
> index b8a35c85714..84a0bd24e91 100644
> --- a/gcc/lto-wrapper.c
> +++ b/gcc/lto-wrapper.c
> @@ -1317,7 +1317,11 @@ run_gcc (unsigned argc, char *argv[])
>  
>char *xassembler_opts_string
>   = XOBFINISH (_obstack, char *);
> -  strcat (collect_gcc_options, xassembler_opts_string);
> +  char *collect = (char *)xmalloc (strlen (collect_gcc_options)
> ++ strlen (xassembler_opts_string) + 1);
> +  strcpy (collect, collect_gcc_options);
> +  strcat (collect, xassembler_opts_string);

  collect_gcc_options = concat (collect_gcc_options,
xassembler_opts_string, NULL);
instead?
Though, even that looks like a memory leak.
So, e.g. record that collect_gcc_options has been allocated in some bool
and XDELETE it at the end of function?
The string could be as well built in the temporary_obstack by
obstack_grow (_obstack, collect_gcc_options);
before the prepend_xassembler_to_collect_as_options call (which btw seems
incorrectly formatted, the function name should be at the start of line),
but you'd then need to finish the obstack at the end of function.

> +  collect_gcc_options = collect;

Jakub



Re: c: ignore initializers for elements of variable-size types [PR93577]

2020-03-13 Thread Christophe Lyon via Gcc-patches
On Wed, 11 Mar 2020 at 00:37, Joseph Myers  wrote:
>
> On Tue, 10 Mar 2020, Christophe Lyon wrote:
>
> > sizeless-1.c and sizeless-2.c have the same code, but the latter is
> > compiled with -msve-vector-bits=256 and expects different
> > warnings/errors.
> > For line 33:
> > svint8_t *invalid_sve_sc_ptr = &(svint8_t) { *global_sve_sc_ptr };
> > we now have:
> > sizeless-1.c:33:44: error: empty scalar initializer
> > sizeless-1.c:33:44: note: (near initialization for '(anonymous)')
> > and
> > sizeless-2.c:33:44: error: initializer element is not constant
> > sizeless-2.c:33:44: note: (near initialization for 'invalid_sve_sc_ptr')
> > sizeless-2.c:33:44: error: SVE type 'svint8_t' does not have a fixed size
> > so I think the error comes from the compound literal being treated
> > differently with -msve-vector-bits=256
>
> I think the sizeless-2.c diagnostics are correct while there's a problem
> in the sizeless-1.c case (the initializer is not empty, so it should not
> be diagnosed as such).
>
> Does the process_init_element code
>
>   /* Ignore elements of an initializer for a variable-size type.
>  Those are diagnosed in digest_init.  */
>   if (COMPLETE_TYPE_P (constructor_type)
>   && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST)
> return;
>
> fire for the sizeless-1.c case?  If so, maybe it needs to be restricted in
> some way to apply only to variable size structs / unions / arrays rather
> than whatever kind of variable-size type the SVE types are.

It does. Type_size has POLY_INT_CST type.

The attached small patch fixes the problem (tested on arm and aarch64).
OK?

gcc/c/ChangeLog:

2020-03-13  Christophe Lyon  

* c-typeck.c (process_init_element): Handle constructor_type with
type size represented by POLY_INT_CST.

gcc/testsuite/ChangeLog:

2020-03-13  Christophe Lyon  

* gcc.target/aarch64/sve/acle/general-c/sizeless-1.c: Remove
superfluous dg-error.
* gcc.target/aarch64/sve/acle/general-c/sizeless-2.c: Likewise.


diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index d8025de..1302486 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -9968,7 +9968,8 @@ process_init_element (location_t loc, struct
c_expr value, bool implicit,
   /* Ignore elements of an initializer for a variable-size type.
  Those are diagnosed in digest_init.  */
   if (COMPLETE_TYPE_P (constructor_type)
-  && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST)
+  && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST
+  && TREE_CODE (TYPE_SIZE (constructor_type)) != POLY_INT_CST)
 return;

   if (!implicit && warn_designated_init && !was_designated
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c
index ec892a3..e53b871 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c
@@ -83,7 +83,6 @@ statements (int n)
   svint8_t array[2]; /* { dg-error {array elements cannot have SVE
type 'svint8_t'} } */
   svint8_t zero_length_array[0]; /* { dg-error {array elements cannot
have SVE type 'svint8_t'} } */
   svint8_t empty_init_array[] = {}; /* { dg-error {array elements
cannot have SVE type 'svint8_t'} } */
-   /* { dg-error {empty scalar
initializer} "" { target *-*-* } .-1 } */
   typedef svint8_t vla_type[n]; /* { dg-error {array elements cannot
have SVE type 'svint8_t'} } */

   /* Assignment.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c
index 7174393..9986d27 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c
@@ -83,7 +83,6 @@ statements (int n)
   svint8_t array[2]; /* { dg-error {array elements cannot have SVE
type 'svint8_t'} } */
   svint8_t zero_length_array[0]; /* { dg-error {array elements cannot
have SVE type 'svint8_t'} } */
   svint8_t empty_init_array[] = {}; /* { dg-error {array elements
cannot have SVE type 'svint8_t'} } */
-   /* { dg-error {empty scalar
initializer} "" { target *-*-* } .-1 } */
   typedef svint8_t vla_type[n]; /* { dg-error {array elements cannot
have SVE type 'svint8_t'} } */

   /* Assignment.  */

>
> --
> Joseph S. Myers
> jos...@codesourcery.com
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index d8025de..1302486 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -9968,7 +9968,8 @@ process_init_element (location_t loc, struct c_expr 
value, bool implicit,
   /* Ignore elements of an initializer for a variable-size type.
  Those are diagnosed in digest_init.  */
   if (COMPLETE_TYPE_P (constructor_type)
-  && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST)
+  && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST

[PATCH] Do not strcat to result of getenv.

2020-03-13 Thread Martin Liška

Hi.

The patch fixed wrong strcat to a char pointer received
with getenv.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2020-03-12  Martin Liska  

PR lto/94157
* lto-wrapper.c (run_gcc): Allocate a new buffer.
Do not strcat to result of getenv.

gcc/testsuite/ChangeLog:

2020-03-12  Martin Liska  

PR lto/94157
* gcc.dg/lto/pr94157_0.c: New test.
---
 gcc/lto-wrapper.c| 6 +-
 gcc/testsuite/gcc.dg/lto/pr94157_0.c | 6 ++
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr94157_0.c


diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index b8a35c85714..84a0bd24e91 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -1317,7 +1317,11 @@ run_gcc (unsigned argc, char *argv[])
 
   char *xassembler_opts_string
 	= XOBFINISH (_obstack, char *);
-  strcat (collect_gcc_options, xassembler_opts_string);
+  char *collect = (char *)xmalloc (strlen (collect_gcc_options)
+   + strlen (xassembler_opts_string) + 1);
+  strcpy (collect, collect_gcc_options);
+  strcat (collect, xassembler_opts_string);
+  collect_gcc_options = collect;
 }
 
   get_options_from_collect_gcc_options (collect_gcc, collect_gcc_options,
diff --git a/gcc/testsuite/gcc.dg/lto/pr94157_0.c b/gcc/testsuite/gcc.dg/lto/pr94157_0.c
new file mode 100644
index 000..3bca677c4fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr94157_0.c
@@ -0,0 +1,6 @@
+/* { dg-lto-do link } */
+/* { dg-lto-options { { -O0 -fipa-vrp -flto -Wa,--noexecstack -Wa,--noexecstack -Wa,--execstack  -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack -Wa,--execstack } } } */
+
+int main() {
+
+}



Re: [PATCH][GCC][AArch64]: Break apart paradoxical subregs for VSTRUCT writes (PR target/94052)

2020-03-13 Thread Richard Sandiford
Tamar Christina  writes:
> Hi Richard,
>
> I have updated the patch, changelog is the same.
>
> bootstrapped and regtested on aarch64-none-linux-gnu and no issues.
>
> OK for gcc 9 and 8?
>
> Thanks,
> Tamar
>
> [...]
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 89aaf8c018e3340dd2d53fc2a6538d3d1220b103..16abe70be0ad90d3befdc5c4dfdb1471f2c98c58
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -5330,6 +5330,26 @@ (define_expand "mov"
>if (GET_CODE (operands[0]) != REG)
>   operands[1] = force_reg (mode, operands[1]);
>  }
> +
> +  /* If we have a paradoxical subreg trying to write to  from and the
> + registers don't overlap then we need to break it apart.  What it's 
> trying
> + to do is give two kind of information at the same time.  It's trying to
> + convey liveness information by saying that the entire register will be
> + written to eventually, but it also only wants to write a single part of 
> the
> + register.  Hence the paradoxical subreg.
> +
> + Instead of allowing this we will split the two concerns.  The liveness
> + information will be conveyed using a clobber and then we break apart the
> + paradoxical subreg into just a normal write of the part that it wanted 
> to
> + write originally.  */
> +
> +  if (REG_P(operands[0]) && paradoxical_subreg_p (operands[1]))

Sorry for the nit, but: space between REG_P and (

OK for backports with that change, once the bug is fixed in master.

Thanks,
Richard

> +{
> +  if (!reg_overlap_mentioned_p (operands[0], operands[1]))
> + emit_clobber (operands[0]);
> +  operands[1] = SUBREG_REG (operands[1]);
> +  operands[0] = gen_lowpart (GET_MODE (operands[1]), operands[0]);
> +}
>  })
 
 


Re: [PATCH] lra: Tighten check for reloading paradoxical subregs [PR94052]

2020-03-13 Thread Richard Sandiford
Eric Botcazou  writes:
>> I think there are two problems with this:
>> 
>> (1) It never actually checks whether the hard register is valid for the
>> outer mode (in the hard_regno_mode_ok sense).  If it isn't, any attempt
>> to reload in the outer mode is likely to cycle, because the implied
>> regno/mode combination will be just as invalid next time
>> curr_insn_transform sees the subreg.
>> 
>> (2) The check is valid for little-endian only.  For big-endian we need
>> to move hard_regno backwards.
>> 
>> Using simplify_subreg_regno should avoid both problems.
>
> We have apparently a cycle in LRA on the SPARC, see PR rtl-opt/92303.

The patch doesn't help with that PR unfortunately.  It looks like the
cycling there is on a partial subreg rather than a paradoxical one.

Thanks,
Richard


Re: [PATCH] lra: Tighten check for reloading paradoxical subregs [PR94052]

2020-03-13 Thread Eric Botcazou
> I think there are two problems with this:
> 
> (1) It never actually checks whether the hard register is valid for the
> outer mode (in the hard_regno_mode_ok sense).  If it isn't, any attempt
> to reload in the outer mode is likely to cycle, because the implied
> regno/mode combination will be just as invalid next time
> curr_insn_transform sees the subreg.
> 
> (2) The check is valid for little-endian only.  For big-endian we need
> to move hard_regno backwards.
> 
> Using simplify_subreg_regno should avoid both problems.

We have apparently a cycle in LRA on the SPARC, see PR rtl-opt/92303.

-- 
Eric Botcazou


Re: [RFA] [PR rtl-optimization/90275] Handle nop reg->reg copies in cse

2020-03-13 Thread Richard Sandiford
Jeff, thanks for picking this up.

Jeff Law  writes:
> On Thu, 2020-03-12 at 15:26 -0500, Segher Boessenkool wrote:
>> On Thu, Mar 12, 2020 at 12:47:04PM -0600, Jeff Law wrote:
>> > On Thu, 2020-03-12 at 13:23 -0500, Segher Boessenkool wrote:
>> > > >  else if (n_sets == 1
>> > > > - && MEM_P (trial)
>> > > > + && ! CALL_P (insn)
>> > > > + && (MEM_P (trial) || REG_P (trial))
>> > > >   && MEM_P (dest)
>> > > >   && rtx_equal_p (trial, dest)
>> > > >   && !side_effects_p (dest)
>> > > 
>> > > This adds the !CALL_P (no space btw) condition, why is that?
>> > Because n_sets is not valid for CALL_P insns which resulted in a failure on
>> > ppc. 
>> > See find_sets_in_insn which ignores the set on the LHS of a call.  So 
>> > imagine
>> > if
>> > we had a nop register set in parallel with a (set (reg) (call ...)).  We'd
>> > end up
>> > deleting the entire PARALLEL which is obviously wrong.
>> 
>> Ah, I see.  So this is exposed on Power by the TOC stuff, I guess?  CSE
>> sees a TOC set parallel with a call as a no-op because it is set to the
>> same value (an unspec, not an unspec_volatile) that GCC can derive is
>> already in the TOC reg?  Or is this some other case?
> Not entirely sure.  Richard's message didn't include the precise details. 

Yeah, that was exactly it.

On the bright side, removing many calls as dead made for an easy-to-debug
bootstrap failure :-)

Richard


[PATCH] lra: Tighten check for reloading paradoxical subregs [PR94052]

2020-03-13 Thread Richard Sandiford
[See:

  https://gcc.gnu.org/pipermail/gcc-patches/2020-March/541694.html
  https://gcc.gnu.org/pipermail/gcc-patches/2020-March/541759.html

for a walkthrough of what goes wrong in the testcase, but hopefully
the change makes sense on first principles.]

simplify_operand_subreg tries to detect whether the allocation for
a pseudo in a paradoxical subreg is also valid for the outer mode.
The condition it used to check for an invalid combination was:

  else if (REG_P (reg)
   && REGNO (reg) >= FIRST_PSEUDO_REGISTER
   && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0
   && (hard_regno_nregs (hard_regno, innermode)
   < hard_regno_nregs (hard_regno, mode))
   && (regclass = lra_get_allocno_class (REGNO (reg)))
   && (type != OP_IN
   || !in_hard_reg_set_p (reg_class_contents[regclass],
  mode, hard_regno)
   || overlaps_hard_reg_set_p (lra_no_alloc_regs,
   mode, hard_regno)))

I think there are two problems with this:

(1) It never actually checks whether the hard register is valid for the
outer mode (in the hard_regno_mode_ok sense).  If it isn't, any attempt
to reload in the outer mode is likely to cycle, because the implied
regno/mode combination will be just as invalid next time
curr_insn_transform sees the subreg.

(2) The check is valid for little-endian only.  For big-endian we need
to move hard_regno backwards.

Using simplify_subreg_regno should avoid both problems.

As the existing comment says, IRA should always take subreg references
into account when allocating hard registers, so this fix-up should only
really be needed for pseudos allocated by LRA itself.

Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.  OK for master?
I think for backports it would be safer to go with Tamar's patch.

Thanks,
Richard


gcc/
2020-03-10  Richard Sandiford  

PR rtl-optimization/94052
* lra-constraints.c (simplify_operand_subreg): Reload the inner
register of a paradoxical subreg if simplify_subreg_regno fails
to give a valid hard register for the outer mode.

gcc/testsuite/
2020-03-05  Tamar Christina  

PR target/94052
* gcc.target/aarch64/pr94052.C: New test.
---
 gcc/lra-constraints.c  |  24 +--
 gcc/testsuite/g++.target/aarch64/pr94052.C | 174 +
 2 files changed, 188 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/pr94052.C

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index f71e0c9ff8d..bf6d4a2fd4b 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -1489,7 +1489,7 @@ static bool process_address (int, bool, rtx_insn **, 
rtx_insn **);
 static bool
 simplify_operand_subreg (int nop, machine_mode reg_mode)
 {
-  int hard_regno;
+  int hard_regno, inner_hard_regno;
   rtx_insn *before, *after;
   machine_mode mode, innermode;
   rtx reg, new_reg;
@@ -1735,15 +1735,19 @@ simplify_operand_subreg (int nop, machine_mode reg_mode)
  for the new uses.  */
   else if (REG_P (reg)
   && REGNO (reg) >= FIRST_PSEUDO_REGISTER
-  && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0
-  && (hard_regno_nregs (hard_regno, innermode)
-  < hard_regno_nregs (hard_regno, mode))
-  && (regclass = lra_get_allocno_class (REGNO (reg)))
-  && (type != OP_IN
-  || !in_hard_reg_set_p (reg_class_contents[regclass],
- mode, hard_regno)
-  || overlaps_hard_reg_set_p (lra_no_alloc_regs,
-  mode, hard_regno)))
+  && paradoxical_subreg_p (operand)
+  && (inner_hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0
+  && ((hard_regno
+   = simplify_subreg_regno (inner_hard_regno, innermode,
+SUBREG_BYTE (operand), mode)) < 0
+  || ((hard_regno_nregs (inner_hard_regno, innermode)
+   < hard_regno_nregs (hard_regno, mode))
+  && (regclass = lra_get_allocno_class (REGNO (reg)))
+  && (type != OP_IN
+  || !in_hard_reg_set_p (reg_class_contents[regclass],
+ mode, hard_regno)
+  || overlaps_hard_reg_set_p (lra_no_alloc_regs,
+  mode, hard_regno)
 {
   /* The class will be defined later in curr_insn_transform.  */
   enum reg_class rclass
diff --git a/gcc/testsuite/g++.target/aarch64/pr94052.C 
b/gcc/testsuite/g++.target/aarch64/pr94052.C
new file mode 100644
index 000..d36c9bdc158
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/pr94052.C
@@ -0,0 +1,174 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=gnu++11 -w" } */
+
+namespace c 

Re: [PATCH] aarch64: Fix another bug in aarch64_add_offset_1 [PR94121]

2020-03-13 Thread Richard Earnshaw (lists)

On 12/03/2020 14:24, Jakub Jelinek via Gcc-patches wrote:

On Thu, Mar 12, 2020 at 12:27:48PM +0100, Andreas Schwab wrote:

I'm getting this ICE with -mabi=ilp32:

during RTL pass: fwprop1
/opt/gcc/gcc-20200312/gcc/testsuite/gcc.dg/pr94121.c: In function 'bar':
/opt/gcc/gcc-20200312/gcc/testsuite/gcc.dg/pr94121.c:16:1: internal compiler 
error: in decompose, at rtl.h:2279


That is a preexisting issue, caused by another bug in the same function.
When mode is SImode and moffset is 0x8000 (or anything else with the
bit 31 set), we need to sign-extend it.

Fixed thusly, ok for trunk if it passes bootstrap/regtest on aarch64-linux?

2020-03-12  Jakub Jelinek  

PR target/94121
* config/aarch64/aarch64.c (aarch64_add_offset_1): Use gen_int_mode
instead of GEN_INT.


OK.

R.


--- gcc/config/aarch64/aarch64.c.jj 2020-03-12 15:05:20.610726090 +0100
+++ gcc/config/aarch64/aarch64.c2020-03-12 15:18:35.390025244 +0100
@@ -3757,7 +3757,8 @@ aarch64_add_offset_1 (scalar_int_mode mo
if (emit_move_imm)
  {
gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
-  temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
+  temp1 = aarch64_force_temporary (mode, temp1,
+  gen_int_mode (moffset, mode));
  }
insn = emit_insn (offset < 0
? gen_sub3_insn (dest, src, temp1)


Jakub





[patch] Fix PR rtl-optimization/94119

2020-03-13 Thread Eric Botcazou
This is a regression present on all active branches: the dbr pass goes awry on 
a branchy testcase compiled at -O2 for the MIPS, with specific set of options 
(-fno-reorder-blocks -mlong-calls) which leave the CFG in a convoluted state.

The underlynig issue is that relax_delay_slots can streamline the CFG in these 
cases, in particular remove BARRIERs, but removing BARRIERs changes the way 
the instructions are associated with (basic) blocks by the liveness analysis 
code in resource.c (find_basic_block) and thus can cause entries in the cache 
maintained by resource.c to become outdated, thus producing wrong answers.

The fix is to invalidate the cache entries affected by the removal of BARRIERs 
in relax_delay_slots, i.e. for the instructions down to the next BARRIER.

Bootstrapped/regtested on SPAR/Solaris & SPARC64/Linux, applied on all active 
branches (without a testcase because it's MIPS specific and I don't understand 
hows gcc.target/mips works, see mips.exp for the gory details).


2019-03-13  Eric Botcazou  

PR rtl-optimization/94119
* resource.h (clear_hashed_info_until_next_barrier): Declare.
* resource.c (clear_hashed_info_until_next_barrier): New function.
* reorg.c (add_to_delay_list): Fix formatting.
(relax_delay_slots): Call clear_hashed_info_until_next_barrier on
the next instruction after removing a BARRIER.

-- 
Eric Botcazoudiff --git a/gcc/reorg.c b/gcc/reorg.c
index dfd7494bf79..84beb9395aa 100644
--- a/gcc/reorg.c
+++ b/gcc/reorg.c
@@ -575,8 +575,9 @@ add_to_delay_list (rtx_insn *insn, vec *delay_list)
 {
   /* If INSN has its block number recorded, clear it since we may
  be moving the insn to a new block.  */
-  clear_hashed_info_for_insn (insn);
-  delay_list->safe_push (insn);
+  clear_hashed_info_for_insn (insn);
+
+  delay_list->safe_push (insn);
 }
 
 /* Delete INSN from the delay slot of the insn that it is in, which may
@@ -3211,7 +3212,14 @@ relax_delay_slots (rtx_insn *first)
 
 	  if (invert_jump (jump_insn, label, 1))
 		{
-		  delete_related_insns (next);
+		  rtx_insn *from = delete_related_insns (next);
+
+		  /* We have just removed a BARRIER, which means that the block
+		 number of the next insns has effectively been changed (see
+		 find_basic_block in resource.c), so clear it.  */
+		  if (from)
+		clear_hashed_info_until_next_barrier (from);
+
 		  next = jump_insn;
 		}
 
@@ -3484,18 +3492,22 @@ relax_delay_slots (rtx_insn *first)
 
 	  if (invert_jump (delay_jump_insn, label, 1))
 		{
-		  int i;
-
 		  /* Must update the INSN_FROM_TARGET_P bits now that
 		 the branch is reversed, so that mark_target_live_regs
 		 will handle the delay slot insn correctly.  */
-		  for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
+		  for (int i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
 		{
 		  rtx slot = XVECEXP (PATTERN (insn), 0, i);
 		  INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
 		}
 
-		  delete_related_insns (next);
+		  /* We have just removed a BARRIER, which means that the block
+		 number of the next insns has effectively been changed (see
+		 find_basic_block in resource.c), so clear it.  */
+		  rtx_insn *from = delete_related_insns (next);
+		  if (from)
+		clear_hashed_info_until_next_barrier (from);
+
 		  next = insn;
 		}
 
diff --git a/gcc/resource.c b/gcc/resource.c
index d26217c056b..5ce53e61057 100644
--- a/gcc/resource.c
+++ b/gcc/resource.c
@@ -1283,6 +1283,25 @@ clear_hashed_info_for_insn (rtx_insn *insn)
 }
 }
 
+/* Clear any hashed information that we have stored for instructions
+   between INSN and the next BARRIER that follow a JUMP or a LABEL.  */
+
+void
+clear_hashed_info_until_next_barrier (rtx_insn *insn)
+{
+  while (insn && !BARRIER_P (insn))
+{
+  if (JUMP_P (insn) || LABEL_P (insn))
+	{
+	  rtx_insn *next = next_active_insn (insn);
+	  if (next)
+	clear_hashed_info_for_insn (next);
+	}
+
+  insn = next_nonnote_insn (insn);
+}
+}
+
 /* Increment the tick count for the basic block that contains INSN.  */
 
 void
diff --git a/gcc/resource.h b/gcc/resource.h
index e3edb242703..c4f8aa20141 100644
--- a/gcc/resource.h
+++ b/gcc/resource.h
@@ -46,6 +46,7 @@ extern void mark_set_resources (rtx, struct resources *, int,
 enum mark_resource_type);
 extern void mark_referenced_resources (rtx, struct resources *, bool);
 extern void clear_hashed_info_for_insn (rtx_insn *);
+extern void clear_hashed_info_until_next_barrier (rtx_insn *);
 extern void incr_ticks_for_insn (rtx_insn *);
 extern void mark_end_of_function_resources (rtx, bool);
 extern void init_resource_info (rtx_insn *);


Re: [PATCH] aarch64: Fix another bug in aarch64_add_offset_1 [PR94121]

2020-03-13 Thread Jakub Jelinek via Gcc-patches
On Thu, Mar 12, 2020 at 03:24:31PM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Thu, Mar 12, 2020 at 12:27:48PM +0100, Andreas Schwab wrote:
> > I'm getting this ICE with -mabi=ilp32:
> > 
> > during RTL pass: fwprop1
> > /opt/gcc/gcc-20200312/gcc/testsuite/gcc.dg/pr94121.c: In function 'bar':
> > /opt/gcc/gcc-20200312/gcc/testsuite/gcc.dg/pr94121.c:16:1: internal 
> > compiler error: in decompose, at rtl.h:2279
> 
> That is a preexisting issue, caused by another bug in the same function.
> When mode is SImode and moffset is 0x8000 (or anything else with the
> bit 31 set), we need to sign-extend it.
> 
> Fixed thusly, ok for trunk if it passes bootstrap/regtest on aarch64-linux?

Now successfully bootstrapped/regtested on aarch64-linux.

> 2020-03-12  Jakub Jelinek  
> 
>   PR target/94121
>   * config/aarch64/aarch64.c (aarch64_add_offset_1): Use gen_int_mode
>   instead of GEN_INT.
> 
> --- gcc/config/aarch64/aarch64.c.jj   2020-03-12 15:05:20.610726090 +0100
> +++ gcc/config/aarch64/aarch64.c  2020-03-12 15:18:35.390025244 +0100
> @@ -3757,7 +3757,8 @@ aarch64_add_offset_1 (scalar_int_mode mo
>if (emit_move_imm)
>  {
>gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
> -  temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
> +  temp1 = aarch64_force_temporary (mode, temp1,
> +gen_int_mode (moffset, mode));
>  }
>insn = emit_insn (offset < 0
>   ? gen_sub3_insn (dest, src, temp1)
> 

Jakub



Re: [RFA] [PR rtl-optimization/90275] Handle nop reg->reg copies in cse

2020-03-13 Thread Christophe Lyon via Gcc-patches
Hi,


On Thu, 12 Mar 2020 at 23:12, Jeff Law via Gcc-patches
 wrote:
>
> On Thu, 2020-03-12 at 13:23 -0500, Segher Boessenkool wrote:
> > On Thu, Mar 12, 2020 at 12:03:08PM -0600, Jeff Law wrote:
> > > On Sat, 2020-02-08 at 10:41 -0600, Segher Boessenkool wrote:
> > > > I don't think each stanza of code should use it's own "noop-ness", no.
> > > Richard's patch is actually better than mine in that regard as it handles 
> > > mem
> > > and
> > > reg nop moves in an indentical way.  I don't think refactoring the cse.c 
> > > code
> > > is
> > > advisable now though -- but I do want to fix the multiply-reported ICE on 
> > > ARM
> > > and
> > > Richard's cse changes are the cleanest way to do that that I can see.
> >
> > It looks pretty simple, yeah...  All of CSE is hopelessly fragile, but
> > this patch does not make things worse.
> >
> > > > I don't know if this patch makes matters worse or not.  It doesn't seem
> > > > suitable for stage 4 though.  And Richard said the cse.c part breaks
> > > > rs6000, if that is true, yes I do object ;-)
> > > The rs6000 port breakage is trivial to fix.  In fact, I did so and ran it
> > > through
> > > my tester, which includes ppc64le and ppc64 a slew of *-elf targets x86
> > > native
> > > and more.
> >
> > I don't see anything rs6000 below?  Is it just this generic code?
> >
> > > @@ -5324,9 +5324,11 @@ cse_insn (rtx_insn *insn)
> > > }
> > >
> > >   /* Similarly, lots of targets don't allow no-op
> > > -(set (mem x) (mem x)) moves.  */
> > > +(set (mem x) (mem x)) moves.  Even (set (reg x) (reg x))
> > > +might be impossible for certain registers (like CC registers).  
> > > */
> > >   else if (n_sets == 1
> > > -  && MEM_P (trial)
> > > +  && ! CALL_P (insn)
> > > +  && (MEM_P (trial) || REG_P (trial))
> > >&& MEM_P (dest)
> > >&& rtx_equal_p (trial, dest)
> > >&& !side_effects_p (dest)
> >
> > This adds the !CALL_P (no space btw) condition, why is that?
> >
> > (Is that CCmode reg-reg move comment about rs6000?  Huh, we *do* have
> > patterns for that, or *should* have at least!)
> I fixed the extraneous whitespace and committed the change.
>

The new test fails on ARM:
FAIL: gcc.c-torture/compile/pr90275.c   -O3 -g  (internal compiler error)
during RTL pass: cse_local
/gcc/testsuite/gcc.c-torture/compile/pr90275.c: In function 'e':
/gcc/testsuite/gcc.c-torture/compile/pr90275.c:25:1: internal compiler
error: in insert_regs, at cse.c:1128
0x15725bd insert_regs
/gcc/cse.c:1128
0x1579731 cse_insn
/gcc/cse.c:5957
0x157aff6 cse_extended_basic_block
/gcc/cse.c:6615
0x157aff6 cse_main
/gcc/cse.c:6794
0x157bc0d rest_of_handle_cse_after_global_opts
/gcc/cse.c:7766
0x157bc0d execute
/gcc/cse.c:7817
Please submit a full bug report,


Is the patch supposed to fix all the ICEs on ARM?

I see this with cross-compilers, it seems OK on native builds? (I
can't find error reports about this on gcc-testresults)

Christophe


> THanks,
> jeff
> >
>


[PATCH] tree-optimization/94163 constrain alignment set by PRE

2020-03-13 Thread Richard Biener


This avoids HWI -> unsigned truncation to end up with zero alignment
which set_ptr_info_alignment ICEs on.

Bootstrap and regtest in progress.

Richard.

2020-03-13  Richard Biener  

PR tree-optimization/94163
* tree-ssa-pre.c (create_expression_by_pieces): Check
whether alignment would be zero.
---
 gcc/tree-ssa-pre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 8bd17b82368..7b171bc4e8f 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -2810,7 +2810,8 @@ create_expression_by_pieces (basic_block block, pre_expr 
expr,
  unsigned HOST_WIDE_INT hmisalign
= args.length () == 3 ? tree_to_uhwi (args[2]) : 0;
  if ((halign & (halign - 1)) == 0
- && (hmisalign & ~(halign - 1)) == 0)
+ && (hmisalign & ~(halign - 1)) == 0
+ && (unsigned int)halign != 0)
set_ptr_info_alignment (get_ptr_info (forcedname),
halign, hmisalign);
}
-- 
2.16.4


Re: [patch] Fix PR middle-end/92071

2020-03-13 Thread Richard Biener via Gcc-patches
On Fri, Mar 13, 2020 at 8:37 AM Eric Botcazou  wrote:
>
> Hi,
>
> this is a regression present on the mainline for the ARM in the form of an
> assertion failure at -O2 in the back-end, which rightfully refuses to generate
> a load from an unaligned memory location (the ARM target is strict-alignment).
>
> It comes from a __builtin_memcpy generating:
>
> MEM  [(char * {ref-all})] = MEM  [(char
> * {ref-all})a.0_1];
>
> in the .optimized dump, where b is a small union.  As a result, store_field is
> invoked with:
>
> (gdb) p debug_rtx(target)
> (reg/v:DI 113 [ b ])
>
> and sends:
>
> (gdb) p debug_rtx(temp)
> (mem:BLK (reg/f:SI 115) [0 MEM  [(char * {ref-all})a.0_1]+0
> S8 A8])
>
> to store_bit_field with 64 bits and BLKmode.  The unaligned memory reference
> is then generated by store_integral_bit_field, which does:
>
> rtx value_word = operand_subword_force (value, wordnum, value_mode);
>
> thus creating an unaligned word mode (SImode) load.
>
> store_integral_bit_field is ready to handle BLKmode fields, there is even a
> subtlety with their handling on big-endian targets, see PR middle-end/50325,
> but not if they are unaligned, so the fix is simply to call extract_bit_field
> for them in order to generate an unaligned load.  As a bonus, this subsumes
> the big-endian specific path that was added under PR middle-end/50325.
>
> Bootstrapped/regtested on x86-64/Linux and SPARC/Solaris, OK for mainline?

OK.

Thanks,
Richard.

>
> 2019-03-13  Eric Botcazou  
>
> PR middle-end/92071
> * expmed.c (store_integral_bit_field): For fields larger than a word,
> call extract_bit_field on the value if the mode is BLKmode.  Remove
> specific path for big-endian targets and tidy things up a little bit.
>
>
> 2019-03-13  Eric Botcazou  
>
> * gcc.c-torture/compile/20200313-1.c: New test.
>
> --
> Eric Botcazou


[patch] Fix PR middle-end/92071

2020-03-13 Thread Eric Botcazou
Hi,

this is a regression present on the mainline for the ARM in the form of an 
assertion failure at -O2 in the back-end, which rightfully refuses to generate 
a load from an unaligned memory location (the ARM target is strict-alignment).

It comes from a __builtin_memcpy generating:

MEM  [(char * {ref-all})] = MEM  [(char 
* {ref-all})a.0_1];

in the .optimized dump, where b is a small union.  As a result, store_field is 
invoked with:

(gdb) p debug_rtx(target)
(reg/v:DI 113 [ b ])

and sends:

(gdb) p debug_rtx(temp)
(mem:BLK (reg/f:SI 115) [0 MEM  [(char * {ref-all})a.0_1]+0 
S8 A8])

to store_bit_field with 64 bits and BLKmode.  The unaligned memory reference 
is then generated by store_integral_bit_field, which does:

rtx value_word = operand_subword_force (value, wordnum, value_mode);

thus creating an unaligned word mode (SImode) load.

store_integral_bit_field is ready to handle BLKmode fields, there is even a 
subtlety with their handling on big-endian targets, see PR middle-end/50325, 
but not if they are unaligned, so the fix is simply to call extract_bit_field 
for them in order to generate an unaligned load.  As a bonus, this subsumes 
the big-endian specific path that was added under PR middle-end/50325.

Bootstrapped/regtested on x86-64/Linux and SPARC/Solaris, OK for mainline?


2019-03-13  Eric Botcazou  

PR middle-end/92071
* expmed.c (store_integral_bit_field): For fields larger than a word,
call extract_bit_field on the value if the mode is BLKmode.  Remove
specific path for big-endian targets and tidy things up a little bit.


2019-03-13  Eric Botcazou  

* gcc.c-torture/compile/20200313-1.c: New test.

-- 
Eric Botcazoudiff --git a/gcc/expmed.c b/gcc/expmed.c
index 04610276209..e7c03fbf92c 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -933,8 +933,7 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 	 However, only do that if the value is not BLKmode.  */
 
   const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
-  unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
-  unsigned int i;
+  const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
   rtx_insn *last;
 
   /* This is the mode we must force value to, so that there will be enough
@@ -950,35 +949,31 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 
   last = get_last_insn ();
-  for (i = 0; i < nwords; i++)
+  for (int i = 0; i < nwords; i++)
 	{
-	  /* If I is 0, use the low-order word in both field and target;
-	 if I is 1, use the next to lowest word; and so on.  */
-	  unsigned int wordnum = (backwards
-  ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
-  - i - 1
-  : i);
-	  unsigned int bit_offset = (backwards ^ reverse
- ? MAX ((int) bitsize - ((int) i + 1)
-	* BITS_PER_WORD,
-	0)
- : (int) i * BITS_PER_WORD);
-	  rtx value_word = operand_subword_force (value, wordnum, value_mode);
-	  unsigned HOST_WIDE_INT new_bitsize =
-	MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
-
-	  /* If the remaining chunk doesn't have full wordsize we have
-	 to make sure that for big-endian machines the higher order
-	 bits are used.  */
-	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
-	{
-	  int shift = BITS_PER_WORD - new_bitsize;
-	  rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
-	  value_word = simplify_expand_binop (word_mode, lshr_optab,
-		  value_word, shift_rtx,
-		  NULL_RTX, true,
-		  OPTAB_LIB_WIDEN);
-	}
+	  /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
+	 except maybe for the last iteration.  */
+	  const unsigned HOST_WIDE_INT new_bitsize
+	= MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
+	  /* Bit offset from the starting bit number in the target.  */
+	  const unsigned int bit_offset
+	= backwards ^ reverse
+	  ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
+	  : i * BITS_PER_WORD;
+	  /* Starting word number in the value.  */
+	  const unsigned int wordnum
+	= backwards
+	  ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
+	  : i;
+	  /* The chunk of the value in word_mode.  We use bit-field extraction
+	  in BLKmode to handle unaligned memory references and to shift the
+	  last chunk right on big-endian machines if need be.  */
+	  rtx value_word
+	= fieldmode == BLKmode
+	  ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
+   1, NULL_RTX, word_mode, word_mode, false,
+   NULL)
+	  : operand_subword_force (value, wordnum, value_mode);
 
 	  if (!store_bit_field_1 (op0, new_bitsize,
   bitnum + bit_offset,
/* PR middle-end/92071 */
/* Testcase by David Binderman  */

void *a;
union U { double c; char d[8]; };
vo

Re: [PATCH] df: Don't abuse bb->aux (PR94148)

2020-03-13 Thread Richard Biener via Gcc-patches
On March 12, 2020 11:29:45 PM GMT+01:00, Segher Boessenkool 
 wrote:
>The df dataflow solvers use the aux field in the basic_block struct,
>although that is reserved for any use by passes.  And not only that,
>it is required that you set all such fields to NULL before calling
>the solvers, or you quietly get wrong results.
>
>This changes the solvers to use a local array for last_change_age
>instead, just like it already had a local array for last_visit_age.
>
>Tested on powerpc64-linux {-m32,-m64}.  Also tested with the tests for
>PR94042, which it also solves (I need to confirm that still though,
>there are other testsuite problems interfering with my testing).

OK for trunk and affected branches (can you report back how old this issue is?) 

Thanks, 
Richard. 

>   PR rtl-optimization/94148
>   PR rtl-optimization/94042
>   * df-core.c (BB_LAST_CHANGE_AGE): Delete.
>   (df_worklist_propagate_forward): New parameter last_change_age, use
>   that instead of bb->aux.
>   (df_worklist_propagate_backward): Ditto.
>   (df_worklist_dataflow_doublequeue): Use a local array last_change_age.
>
>---
> gcc/df-core.c | 35 ++-
> 1 file changed, 18 insertions(+), 17 deletions(-)
>
>diff --git a/gcc/df-core.c b/gcc/df-core.c
>index 346849e..9875a26 100644
>--- a/gcc/df-core.c
>+++ b/gcc/df-core.c
>@@ -871,9 +871,6 @@ make_pass_df_finish (gcc::context *ctxt)
>The general data flow analysis engine.
>*/
> 
>-/* Return time BB when it was visited for last time.  */
>-#define BB_LAST_CHANGE_AGE(bb) ((ptrdiff_t)(bb)->aux)
>-
> /* Helper function for df_worklist_dataflow.
>Propagate the dataflow forward.
>Given a BB_INDEX, do the dataflow propagation
>@@ -897,7 +894,8 @@ df_worklist_propagate_forward (struct dataflow
>*dataflow,
>unsigned *bbindex_to_postorder,
>bitmap pending,
>sbitmap considered,
>- ptrdiff_t age)
>+ vec _change_age,
>+ int age)
> {
>   edge e;
>   edge_iterator ei;
>@@ -908,7 +906,8 @@ df_worklist_propagate_forward (struct dataflow
>*dataflow,
>   if (EDGE_COUNT (bb->preds) > 0)
> FOR_EACH_EDGE (e, ei, bb->preds)
>   {
>-if (age <= BB_LAST_CHANGE_AGE (e->src)
>+  if (bbindex_to_postorder[e->src->index] < last_change_age.length ()
>+  && age <= last_change_age[bbindex_to_postorder[e->src->index]]
>   && bitmap_bit_p (considered, e->src->index))
>   changed |= dataflow->problem->con_fun_n (e);
>   }
>@@ -942,7 +941,8 @@ df_worklist_propagate_backward (struct dataflow
>*dataflow,
> unsigned *bbindex_to_postorder,
> bitmap pending,
> sbitmap considered,
>-  ptrdiff_t age)
>+  vec _change_age,
>+  int age)
> {
>   edge e;
>   edge_iterator ei;
>@@ -953,7 +953,8 @@ df_worklist_propagate_backward (struct dataflow
>*dataflow,
>   if (EDGE_COUNT (bb->succs) > 0)
> FOR_EACH_EDGE (e, ei, bb->succs)
>   {
>-if (age <= BB_LAST_CHANGE_AGE (e->dest)
>+  if (bbindex_to_postorder[e->dest->index] < last_change_age.length ()
>+  && age <= last_change_age[bbindex_to_postorder[e->dest->index]]
>   && bitmap_bit_p (considered, e->dest->index))
>   changed |= dataflow->problem->con_fun_n (e);
>   }
>@@ -991,10 +992,10 @@ df_worklist_propagate_backward (struct dataflow
>*dataflow,
>  worklists (we are processing WORKLIST and storing new BBs to visit in
>PENDING).
> 
>-   As an optimization we maintain ages when BB was changed (stored in
>bb->aux)
>-   and when it was last visited (stored in last_visit_age).  This
>avoids need
>-   to re-do confluence function for edges to basic blocks whose source
>-   did not change since destination was visited last time.  */
>+   As an optimization we maintain ages when BB was changed (stored in
>+   last_change_age) and when it was last visited (stored in
>last_visit_age).
>+   This avoids need to re-do confluence function for edges to basic
>blocks
>+   whose source did not change since destination was visited last
>time.  */
> 
> static void
> df_worklist_dataflow_doublequeue (struct dataflow *dataflow,
>@@ -1010,11 +1011,11 @@ df_worklist_dataflow_doublequeue (struct
>dataflow *dataflow,
>   int age = 0;
>   bool changed;
>   vec last_visit_age = vNULL;
>+  vec last_change_age = vNULL;
>   int prev_age;
>-  basic_block bb;
>-  int i;
> 
>   last_visit_age.safe_grow_cleared (n_blocks);
>+  last_change_age.safe_grow_cleared (n_blocks);
> 
>   /* Double-queueing. Worklist is for the current iteration,
>  and pending is for the next. */
>@@ -1032,30 +1033,30 @@