Re: [PATCH] [Ada] Make clockid_t type public on GNU/kFreeBSD

2019-07-03 Thread Arnaud Charlet
OK, thanks.

> From: James Clarke 
> 
> Monotonic_Clock and RT_Resolution in the recently-added s-tpopmo.adb
> call clock_gettime/clock_getres with the integral constants from OSC and
> thus rely on clockid_t being an integral type, so we cannot hide it on
> GNU/kFreeBSD. Instead, make the definition public to match all the other
> implementations.
> 
> gcc/ada
>   * libgnarl/s-osinte__kfreebsd-gnu.ads (clockid_t): Make type
>   definition public.
>   (CLOCK_REALTIME): Make value public.
> ---
>  gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads | 8 ++--
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads 
> b/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads
> index 408187314..b60ffd2c0 100644
> --- a/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads
> +++ b/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads
> @@ -206,9 +206,8 @@ package System.OS_Interface is
> function nanosleep (rqtp, rmtp : access timespec) return int;
> pragma Import (C, nanosleep, "nanosleep");
> 
> -   type clockid_t is private;
> -
> -   CLOCK_REALTIME : constant clockid_t;
> +   type clockid_t is new int;
> +   CLOCK_REALTIME : constant clockid_t := 0;
> 
> function clock_gettime
>   (clock_id : clockid_t;
> @@ -607,9 +606,6 @@ private
> end record;
> pragma Convention (C, timespec);
> 
> -   type clockid_t is new int;
> -   CLOCK_REALTIME : constant clockid_t := 0;
> -
> type pthread_attr_t is record
>detachstate   : int;
>schedpolicy   : int;
> --
> 2.17.1
> 


Re: [SVE] [fwprop] PR88833 - Redundant moves for WHILELO-based loops

2019-07-03 Thread Prathamesh Kulkarni
On Wed, 3 Jul 2019 at 17:06, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> > On Tue, 2 Jul 2019 at 18:22, Richard Sandiford
> >  wrote:
> >>
> >> Prathamesh Kulkarni  writes:
> >> > On Tue, 2 Jul 2019 at 16:59, Richard Sandiford
> >> >  wrote:
> >> >>
> >> >> Thanks for fixing this.
> >> >>
> >> >> Prathamesh Kulkarni  writes:
> >> >> > diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
> >> >> > index 89a46a933fa..79bd0cfbd28 100644
> >> >> > --- a/gcc/simplify-rtx.c
> >> >> > +++ b/gcc/simplify-rtx.c
> >> >> > @@ -6697,6 +6697,19 @@ simplify_subreg (machine_mode outermode, rtx 
> >> >> > op,
> >> >> >   }
> >> >> >  }
> >> >> >
> >> >> > +  /* If op is a vector comparison operator, rewrite it in a new mode.
> >> >> > + This probably won't match, but may allow further 
> >> >> > simplifications.
> >> >> > + Also check if number of elements and size of each element
> >> >> > + match for outermode and innermode.  */
> >> >> > +
> >> >>
> >> >> Excess blank line after the comment.  IMO the second part of the comment
> >> >> reads too much like an afterthought.  How about:
> >> >>
> >> >>   /* If OP is a vector comparison and the subreg is not changing the
> >> >>  number of elements or the size of the elements, change the result
> >> >>  of the comparison to the new mode.  */
> >> >>
> >> >> > +  if (COMPARISON_P (op)
> >> >> > +  && VECTOR_MODE_P (outermode)
> >> >> > +  && VECTOR_MODE_P (innermode)
> >> >> > +  && (known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS 
> >> >> > (innermode)))
> >> >> > +  && (known_eq (GET_MODE_UNIT_SIZE (outermode),
> >> >> > + GET_MODE_UNIT_SIZE (innermode
> >> >>
> >> >> Redundant brackets around the known_eq calls.
> >> >>
> >> >> > +return gen_rtx_fmt_ee (GET_CODE (op), outermode, XEXP (op, 0), 
> >> >> > XEXP (op, 1));
> >> >>
> >> >> This should use simplify_gen_relational, so that we try to simplify
> >> >> the new expression.
> >> > Does the attached version look OK ?
> >>
> >> OK with a suitable changelog (remember to post those!) if it passes 
> >> testing.
> > The change to simplify_subreg regressed avx2-pr54700-1.C -;)
> >
> > For following test-case:
> > __attribute__((noipa)) __v8sf
> > f7 (__v8si a, __v8sf b, __v8sf c)
> > {
> >   return a < 0 ? b : c;
> > }
> >
> > Before patch, combine shows:
> > Trying 8, 9 -> 10:
> > 8: r87:V8SI=const_vector
> > 9: r89:V8SI=r87:V8SI>r90:V8SI
> >   REG_DEAD r90:V8SI
> >   REG_DEAD r87:V8SI
> >10: r86:V8SF=unspec[r92:V8SF,r91:V8SF,r89:V8SI#0] 104
> >   REG_DEAD r92:V8SF
> >   REG_DEAD r89:V8SI
> >   REG_DEAD r91:V8SF
> > Successfully matched this instruction:
> > (set (reg:V8SF 86)
> > (unspec:V8SF [
> > (reg:V8SF 92)
> > (reg:V8SF 91)
> > (subreg:V8SF (lt:V8SI (reg:V8SI 90)
> > (const_vector:V8SI [
> > (const_int 0 [0]) repeated x8
> > ])) 0)
> > ] UNSPEC_BLENDV))
> > allowing combination of insns 8, 9 and 10
> >
> > After applying patch, combine shows:
> >
> > Trying 8, 9 -> 10:
> > 8: r87:V8SI=const_vector
> > 9: r89:V8SI=r87:V8SI>r90:V8SI
> >   REG_DEAD r90:V8SI
> >   REG_DEAD r87:V8SI
> >10: r86:V8SF=unspec[r92:V8SF,r91:V8SF,r89:V8SI#0] 104
> >   REG_DEAD r92:V8SF
> >   REG_DEAD r89:V8SI
> >   REG_DEAD r91:V8SF
> > Failed to match this instruction:
> > (set (reg:V8SF 86)
> > (unspec:V8SF [
> > (reg:V8SF 92)
> > (reg:V8SF 91)
> > (lt:V8SF (reg:V8SI 90)
> > (const_vector:V8SI [
> > (const_int 0 [0]) repeated x8
> > ]))
> > ] UNSPEC_BLENDV))
>
> Bah.  If the port isn't self-consistent about whether a subreg should
> be used, it's tempting to say that a subreg should be used and fix the
> places that don't.  At least that way we'd avoid the abomination -
> ABOMINATION! - of using NaNs to represent truth.
>
> But I agree it looks like this is the only pattern affected.
>
> > Adjusting UNSPEC_BLENDV pattern to match the simplified subreg seems to 
> > work.
> > Does it look OK ?
> >
> > Testing the attached patch in progress.
> > (A quick comparison for i386.exp now shows no difference before/after 
> > patch).
> >
> > Thanks,
> > Prathamesh
> >>
> >> Thanks,
> >> Richard
> >
> > 2019-07-03  Prathamesh Kulkarni  
> >
> >   * fwprop.c (reg_single_def_p): New function.
> >   (propagate_rtx_1): Add unconditional else inside RTX_EXTRA case.
> >   (forward_propagate_into): New parameter reg_prop_only
> >   with default value false.
> >   Propagate def's src into loop only if SET_SRC and SET_DEST
> >   of def_set have single definitions.
> >   Likewise if reg_prop_only is set to true.
> >   (fwprop): New param fwprop_addr_p.
> >   Integrate fwprop_addr into fwprop.
> >   (fwprop_addr): Remove.
> >   (pass_rtl_fwpro

Re: [PATCH] Fix PR91069

2019-07-03 Thread Jakub Jelinek
On Wed, Jul 03, 2019 at 12:49:34PM +0200, Richard Biener wrote:
> 2019-07-03  Richard Biener  
> 
>   PR middle-end/91069
>   * match.pd (vec_perm -> bit_insert): Fix element read from
>   first vector.
> 
>   * gcc.dg/pr91069.c: New testcase.

I've noticed the testcase FAILs on ia32, the problem is that long is
32-bit and so v2di type isn't v2di we are expecting, but v4si and mismatches
the v2df in number of elements and element precision.

Fixed thusly, regtested on x86_64-linux and i686-linux, committed to trunk.

Note, still not truly portable, if double and long long have different
sizes on some target, this will still fail.

2019-07-04  Jakub Jelinek  

PR middle-end/91069
* gcc.dg/pr91069.c (v2df): Use 2 * sizeof (double) instead of
hardcoded 16 for better portability.
(v2di): Change from long vector to long long vector.  Use
2 * sizeof (long long) instead of hardcoded 16.

--- gcc/testsuite/gcc.dg/pr91069.c.jj   2019-07-04 00:18:31.510099301 +0200
+++ gcc/testsuite/gcc.dg/pr91069.c  2019-07-04 07:08:24.944046933 +0200
@@ -1,8 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-std=gnu11" } */
 
-typedef double v2df __attribute__((vector_size(16)));
-typedef long v2di __attribute__((vector_size(16)));
+typedef double v2df __attribute__((vector_size(2 * sizeof (double;
+typedef long long v2di __attribute__((vector_size(2 * sizeof (long long;
 
 void foo (v2df *res, v2df *src)
 {


Jakub


[committed] Call lower_omp on GIMPLE_OMP_SCAN body

2019-07-03 Thread Jakub Jelinek
Hi!

For is_simd, we change GIMPLE_OMP_SCAN statements immediately to GIMPLE_NOPs
and emit their bodies after them, so the lower_omp caller keeps iterating
over those statements, but for is_for we don't do that, so we should
lower_omp the bodies.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-07-04  Jakub Jelinek  

* omp-low.c (lower_omp_scan): Call lower_omp on stmt's body
in worksharing loop scans.

--- gcc/omp-low.c.jj2019-07-03 07:02:16.460989884 +0200
+++ gcc/omp-low.c   2019-07-03 21:02:10.097104445 +0200
@@ -8874,8 +8874,10 @@ lower_omp_scan (gimple_stmt_iterator *gs
   gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
   gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
   gsi_replace (gsi_p, gimple_build_nop (), true);
+  return;
 }
-  else if (before)
+  lower_omp (gimple_omp_body_ptr (stmt), octx);
+  if (before)
 {
   gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (stmt));
   gsi_insert_seq_before (&gsi, before, GSI_SAME_STMT);

Jakub


Re: [PATCH] simplify-rtx.c: Change BITSIZE to UNIT_PRECISION in simplification

2019-07-03 Thread John Darrington
On Wed, Jul 03, 2019 at 04:32:54PM -0600, Jeff Law wrote:

 John, I assume you're doing this for an out of tree port (s12z?)?

That is correct.

 Otherwise it'd also be useful if you could include a test which triggers
 the assert.

Once I have the other aspects of the port sorted out I can create a test
case (although it wouldn't be necessary because it happens building
libgcc).
 
 If you could confirm that Richard's suggestion of using
 GET_MODE_PRECISION rather than GET_MODE_UNIT_PRECISION works it'd be
 appreciated.

I had wondered about the difference between the two.   Unfortunately, 
neither are mentioned in gccint.texinfo   - I ended up deciding upon
GET_MOE_UNIT_PRECISION because there is a similar situation a few lines
above in simplify-rtx.c which uses it.   

If GET_MODE_UNIT_PRECISION is inappropriate, then please change it.


Regards,

John

-- 
Avoid eavesdropping.  Send strong encrypted email.
PGP Public key ID: 1024D/2DE827B3 
fingerprint = 8797 A26D 0854 2EAB 0285  A290 8A67 719C 2DE8 27B3
See http://sks-keyservers.net or any PGP keyserver for public key.



[committed] Fix ICE on scan* OpenMP tests on various targets (PR tree-optimization/91074)

2019-07-03 Thread Jakub Jelinek
Hi!

In this case we create
  temp = .MUL_OVERFLOW (arg1, arg2);
  temp2 = REALPART_EXPR ;
  temp3 = IMAGPART_EXPR ;
x86 has been happy in the testing even without the following patch,
though temp hasn't been rewritten into SSA, but on various other targets
the expansion assumes that .???_OVERFLOW result must be a SSA_NAME.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk.

2019-07-04  Jakub Jelinek  

PR tree-optimization/91074
* omp-low.c (lower_omp_for_scan): Set DECL_GIMPLE_REG_P on cplx
temporary.

--- gcc/omp-low.c.jj2019-07-03 07:02:16.460989884 +0200
+++ gcc/omp-low.c   2019-07-03 16:59:25.819391742 +0200
@@ -9699,6 +9699,7 @@ lower_omp_for_scan (gimple_seq *body_p,
   gimple_seq_add_stmt (body_p, g);
 
   tree cplx = create_tmp_var (build_complex_type (unsigned_type_node, false));
+  DECL_GIMPLE_REG_P (cplx) = 1;
   g = gimple_build_call_internal (IFN_MUL_OVERFLOW, 2, thread_nump1, twok);
   gimple_call_set_lhs (g, cplx);
   gimple_seq_add_stmt (body_p, g);

Jakub


[PATCH] Fix vect_init_vector regression (PR tree-optimization/91063)

2019-07-03 Thread Jakub Jelinek
Hi!

A recent change in vect_init_vector emits new statements into a gimple_seq
stmts and then calls vect_init_vector_1 on each of those statements.

This doesn't work well, because vect_init_vector_1 places the given
statement into another sequence (body of some bb, on the edge, ...),
and when the caller holds a gimple_stmt_iterator pointing to that statement
across its insertion into another sequence the ->prev/->next bookkeeping may
go wrong, such as on the testcase where stmt == stmt->prev == stmt->next
because of that.

Fixed by first removing the statement from the stmts sequence (thus
gsi_remove already updates the iterator to the next statement).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-07-04  Jakub Jelinek  

PR tree-optimization/91063
* tree-vect-stmts.c (vect_init_vector): Call gsi_remove to remove
stmt from stmts sequence before calling vect_init_vector_1.
Formatting fix.

* gcc.dg/gomp/pr91063.c: New test.

--- gcc/tree-vect-stmts.c.jj2019-07-03 10:24:33.463768431 +0200
+++ gcc/tree-vect-stmts.c   2019-07-03 12:35:48.998435660 +0200
@@ -1496,15 +1496,19 @@ vect_init_vector (stmt_vec_info stmt_inf
   promotion of invariant/external defs.  */
val = gimple_convert (&stmts, TREE_TYPE (type), val);
  for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
-  !gsi_end_p (gsi2); gsi_next (&gsi2))
-   vect_init_vector_1 (stmt_info, gsi_stmt (gsi2), gsi);
+  !gsi_end_p (gsi2); )
+   {
+ init_stmt = gsi_stmt (gsi2);
+ gsi_remove (&gsi2, false);
+ vect_init_vector_1 (stmt_info, init_stmt, gsi);
+   }
}
}
   val = build_vector_from_val (type, val);
 }
 
   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
-  init_stmt = gimple_build_assign  (new_temp, val);
+  init_stmt = gimple_build_assign (new_temp, val);
   vect_init_vector_1 (stmt_info, init_stmt, gsi);
   return new_temp;
 }
--- gcc/testsuite/gcc.dg/gomp/pr91063.c.jj  2019-07-03 12:50:55.123799217 
+0200
+++ gcc/testsuite/gcc.dg/gomp/pr91063.c 2019-07-03 12:50:43.320989864 +0200
@@ -0,0 +1,17 @@
+/* PR tree-optimization/91063 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp-simd" } */
+/* { dg-additional-options "-mavx512f" { target { i?86-*-* x86_64-*-* } } } */
+
+struct S { void *s; };
+
+int
+foo (struct S *x)
+{
+  int r = 0;
+  int i;
+#pragma omp simd reduction (+ : r)
+  for (i = 0; i < 64; ++i)
+r += (int) (x->s != 0);
+  return r;
+}

Jakub


Go patch committed: Optimize 0, 1, 2-case select statements

2019-07-03 Thread Ian Lance Taylor
This Go patch by Cherry Zhang optimizes 0,1,2-case select statements.
For a select statement with zero-, one-, or two-case with a default
case, we can generate simpler code instead of calling the generic
selectgo.  A zero-case select is just blocking the execution.  A
one-case select is mostly just executing the case.  A two-case select
with a default case is a non-blocking send or receive.  We add these
special cases for lowering a select statement, using libgo code
already written for the gc compiler.  Bootstrapped and ran Go
testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 273032)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-197b6fdfb861f07bab7365e350b5b855cfccc290
+7a8e10be0ddb8909ce25a264d03b24cee4df60cc
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/gogo.cc
===
--- gcc/go/gofrontend/gogo.cc   (revision 273009)
+++ gcc/go/gofrontend/gogo.cc   (working copy)
@@ -6262,7 +6262,8 @@ Function_declaration::get_or_make_decl(G
 
  if (this->asm_name_ == "runtime.gopanic"
  || this->asm_name_ == "__go_runtime_error"
-  || this->asm_name_ == "runtime.panicdottype")
+  || this->asm_name_ == "runtime.panicdottype"
+  || this->asm_name_ == "runtime.block")
flags |= Backend::function_does_not_return;
}
 
Index: gcc/go/gofrontend/runtime.def
===
--- gcc/go/gofrontend/runtime.def   (revision 272944)
+++ gcc/go/gofrontend/runtime.def   (working copy)
@@ -204,6 +204,22 @@ DEF_GO_RUNTIME(CHANRECV2, "runtime.chanr
 DEF_GO_RUNTIME(SELECTGO, "runtime.selectgo", P3(POINTER, POINTER, INT),
   R2(INT, BOOL))
 
+// Non-blocking send a value on a channel, used for two-case select
+// statement with a default case.
+DEF_GO_RUNTIME(SELECTNBSEND, "runtime.selectnbsend", P2(CHAN, POINTER), 
R1(BOOL))
+
+// Non-blocking receive a value from a channel, used for two-case select
+// statement with a default case.
+DEF_GO_RUNTIME(SELECTNBRECV, "runtime.selectnbrecv", P2(POINTER, CHAN), 
R1(BOOL))
+
+// Non-blocking tuple receive from a channel, used for two-case select
+// statement with a default case.
+DEF_GO_RUNTIME(SELECTNBRECV2, "runtime.selectnbrecv2", P3(POINTER, POINTER, 
CHAN),
+   R1(BOOL))
+
+// Block execution.  Used for zero-case select.
+DEF_GO_RUNTIME(BLOCK, "runtime.block", P0(), R0())
+
 
 // Panic.
 DEF_GO_RUNTIME(GOPANIC, "runtime.gopanic", P1(EFACE), R0())
Index: gcc/go/gofrontend/statements.cc
===
--- gcc/go/gofrontend/statements.cc (revision 273032)
+++ gcc/go/gofrontend/statements.cc (working copy)
@@ -5665,6 +5665,28 @@ Select_statement::do_lower(Gogo* gogo, N
   Block* b = new Block(enclosing, loc);
 
   int ncases = this->clauses_->size();
+
+  // Zero-case select.  Just block the execution.
+  if (ncases == 0)
+{
+  Expression* call = Runtime::make_call(Runtime::BLOCK, loc, 0);
+  Statement *s = Statement::make_statement(call, false);
+  b->add_statement(s);
+  this->is_lowered_ = true;
+  return Statement::make_block_statement(b, loc);
+}
+
+  // One-case select.  It is mostly just to run the case.
+  if (ncases == 1)
+return this->lower_one_case(b);
+
+  // Two-case select with one default case.  It is a non-blocking
+  // send/receive.
+  if (ncases == 2
+  && (this->clauses_->at(0).is_default()
+  || this->clauses_->at(1).is_default()))
+return this->lower_two_case(b);
+
   Type* scase_type = Channel_type::select_case_type();
   Expression* ncases_expr =
 Expression::make_integer_ul(ncases, NULL,
@@ -5733,6 +5755,213 @@ Select_statement::do_lower(Gogo* gogo, N
   return Statement::make_block_statement(b, loc);
 }
 
+// Lower a one-case select statement.
+
+Statement*
+Select_statement::lower_one_case(Block* b)
+{
+  Select_clauses::Select_clause& scase = this->clauses_->at(0);
+  Location loc = this->location();
+  Expression* chan = scase.channel();
+  if (chan != NULL)
+{
+  // Lower this to
+  //   if chan == nil { block() }; send/recv; body
+  Temporary_statement* chantmp = Statement::make_temporary(NULL, chan, 
loc);
+  b->add_statement(chantmp);
+  Expression* chanref = Expression::make_temporary_reference(chantmp, loc);
+
+  Expression* nil = Expression::make_nil(loc);
+  Expression* cond = Expression::make_binary(OPERATOR_EQEQ, chanref, nil, 
loc);
+  Block* bnil = new Block(b, loc);
+  Expression* call = Runtime::make_call(Runtime::BLOCK, loc, 0);
+  Statement* s = Statement::make_statement(call, false);
+  bnil->add_statement(s);
+ 

Go patch committed: Fix indentation of select statement AST dump

2019-07-03 Thread Ian Lance Taylor
This Go frontend patch by Cherry Zhang fixes the indentation of select
statements in the AST dump.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 273026)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-bf66d40bc7adb438dcfac85d73bfa7b17217eed9
+197b6fdfb861f07bab7365e350b5b855cfccc290
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/statements.cc
===
--- gcc/go/gofrontend/statements.cc (revision 272944)
+++ gcc/go/gofrontend/statements.cc (working copy)
@@ -5766,6 +5766,7 @@ Select_statement::do_dump_statement(Ast_
 {
   ast_dump_context->ostream() << " {" << dsuffix(location()) << std::endl;
   this->clauses_->dump_clauses(ast_dump_context);
+  ast_dump_context->print_indent();
   ast_dump_context->ostream() << "}";
 }
   ast_dump_context->ostream() << std::endl;


Re: [PATCH,RFC,V3 0/5] Support for CTF in GCC

2019-07-03 Thread Indu Bhagat



On 07/03/2019 05:31 AM, Richard Biener wrote:

On Wed, Jul 3, 2019 at 5:18 AM Jeff Law  wrote:

On 7/2/19 11:54 AM, Indu Bhagat wrote:

Ping.
Can someone please review these patches ? We would like to get the
support for CTF integrated soon.

I'm not sure there's really even consensus that we want CTF support in
GCC.  Though I think that the changes you've made in the last several
weeks do make it somewhat more palatable.  But ultimately the first step
is to get that consensus.

I'd hazard a guess that Jakub in particular isn't on board as he's been
pushing to some degree for post-processing or perhaps doing it via a
plug in.

Richi has been guiding you a bit through how to make the changes easier
to integrate, but I haven't seen him state one way or the other his
preference on whether or not CTF support is something we want.

I'm mostly worried about the lack of a specification and the appearant
restriction on a subset of C (the patches have gcc_unreachable ()
in paths that can be reached by VECTOR_TYPE or COMPLEX_TYPE
not to mention FIXED_POINT_TYPE, etc...).


RE lack of specification : I cannot agree more; This does need to absolutely 
exist
if we envision CTF support in toolchain to be useful to the community.
We plan on getting to this task once the Linker changes are scoped and closer
to done (~ a couple of weeks from now). Will this work ?

RE subset of C : It is true that CTF format currently does leave out a very
small subset of C like FIXED_POINT as you noted ( CTF does have representation
for COMPLEX_TYPE, if my code paths culminate to gcc_unreachable () for that, I
should fix them ).  The end goal is to make it support all of C, and not just a
subset.

Meanwhile, I intend to make the compiler skip types when a C construct is not
supported instead of crashing because of gcc_unreachable (). (You may have also
noted stubs with "TBD WARN instead" notes in the patch series I sent.)




While CTF might be easy and fast to parse and small I fear it will
go the STABS way of being not extensible and bitrotten.


FWIW, I can understand this. We will maintain it. And I hope it will also be a
community effort thereafter with active consumers, so there is a positive
feedback loop.



Given it appears to generate only debug info for symbols and no locations
or whatnot it should be sufficient to introspect the compilation to generate
the CTF info on the side and then merge it in at link-time.  Which makes
me wonder if this shouldn't be a plugin for now until it is more complete
and can be evaluated better (comments in the patches indicate even the
on-disk format is in flux?).  Adding plugin hook invocations to the three
places the CTF info generation hooks off should be easy.


Yes, some bits of the on-disk format are being adapted to make it easier to
adopt the CTF format across the board. E.g., we recently added CU name in the
CTF header. As another example, we added CTF_K_SLICE type because there existed
no way in CTF to represent enum bitfields. For the most part though, CTF format
has stayed as is.

Hmm...a GCC plugin for CTF generation at compile-time may work out for a single
compilation unit.  But I am not sure how will LTO be supported in that case.
Basically, for LTO and -gtLEVEL to work together, I need the lto-wrapper to be
aware of the presence of .ctf sections (so I think). I will need to combine the
.ctf sections from multiple compilation units into a CTF archive, which the
linker can then de-duplicate.

Even if I assume that the technical hurdle in the above paragraph is solvable
within the purview of a plugin, I fear worse problems of adoption, maintenance
and distribution in the long run, if CTF support unfortunately ever remains to 
be
done via a plugin for reasons unforeseen.

Going the plugin route for the short term, will continue to suffer similar
problems of distribution and support.

- Is the plugin infrastructure supported on most platforms ? Also, I see that
  the plugin infrastructure supports all gcc versions from 4.5 onwards.
  Can someone confirm ? ( We minimally want the toolchain support with
  GCC 4.8.5 and GCC 8 and later, for now. )

- How will the plugin be distributed for a variety of platforms and
  architectures outside of what Oracle Linux commits to support ?

  Unless you are suggesting that the GCC plugin be distributed within GCC,
  meanwhile ? Well, that may be acceptable in the short term, depending on how
  I resolve some points raised above.




That said, the patch series isn't ready for integration since it will
crash left and right -- did you bootstrap and run the testsuite
with -gt?



Bootstrap and Testsuite : Yes, I have.  On x86_64/linux, sparc64/linux,
  aarch64/linux.
Run testsuite with -gt : Not yet. Believe me, it's on my plate. And I already
 regret not having done it sooner :)
Bootstrap with -gt : Not yet. I should try soon.

(I have compiled libdtrace-ctf with -gt and parsed the .ctf sect

Re: [PATCH,RFC,V3 0/5] Support for CTF in GCC

2019-07-03 Thread Indu Bhagat




On 07/02/2019 08:18 PM, Jeff Law wrote:

On 7/2/19 11:54 AM, Indu Bhagat wrote:

Ping.
Can someone please review these patches ? We would like to get the
support for CTF integrated soon.

I'm not sure there's really even consensus that we want CTF support in
GCC.  Though I think that the changes you've made in the last several
weeks do make it somewhat more palatable.  But ultimately the first step
is to get that consensus.


Thanks for your message.  Absolutely, consensus is the first step.  We are
happy to take all the constructive feedback and answer all the concerns to make
certain that CTF support in toolchain will be a useful and worthwhile
contribution.



I'd hazard a guess that Jakub in particular isn't on board as he's been
pushing to some degree for post-processing or perhaps doing it via a
plug in.

Richi has been guiding you a bit through how to make the changes easier
to integrate, but I haven't seen him state one way or the other his
preference on whether or not CTF support is something we want.

I'm hesitant to add CTF support in GCC, but can understand how it might
be useful given the kernel's aversion to everything dwarf.  But if the
kernel is the primary consumer than I'd lean towards post-processing.


Kernel is just *one* of the consumers. There are other applications, external
and internal to Oracle, that have shown interest. Not just that, a couple of
distro and package maintainers have shown interest in enabling CTF by default.

Post-processing in kernel and other internally available large applications has
been a deterrent for adoption because of high space and compile-time costs. I
answered some of Jakub's concerns in the post here
https://gcc.gnu.org/ml/gcc-patches/2019-06/msg00131.html.

I would even argue that the usecases will only grow if CTF is properly
supported in the toolchain.

Thanks



Re: [PATH] Patch to fix -finstrument-functions-exclude-function-list handling of namespaces and escaped commas

2019-07-03 Thread Jeff Law
On 6/12/19 12:25 PM, Oliver Browne wrote:
> Patch fixes following PRs:
> c++/90816 - -finstrument-functions-exclude-function-list improperly
> handles namespace/class definitions
> c++/90809 - -finstrument-functions-exclude-function-list mishandles
> comma escaping
> 
> Fixes as follows:
> At flag_instrument_functions_exclude_p [gimplify.c]
> Using lang_hooks.decl_printable_name (fndecl, 1) to get namespace /
> class information as part of printable name to allow for
> inclusion of namespace / class specification when passing symbols to
> -finstrument-functions-exclude-function-list. Was
> previously lang_hooks.decl_printable_name (fndecl, 0).
> 
> At add_comma_separated_to_vector [opts.c]
> Added writing of a null character to w after primary loop finishes, to
> account for offset between r and w when r reaches end of
> passed string.
> 
> from Oliver Browne 
> PR c++/90816
> PR c++/90809
>  * gimplify.c (flag_instrument_functions_exclude_p): include namespace
>information as part of decl name
>  * opts.c (add_comma_separated_to_vector): add null character to correct
>position in last token added to token vector
> Index: gimplify.c
> ===
> --- gimplify.c 2019-06-12 19:07:26.872077000 +0100
> +++ gimplify.c 2019-06-12 18:55:10.609255000 +0100
> @@ -13987,11 +13987,17 @@ flag_instrument_functions_exclude_p (tre
>  {
>const char *name;
> -  int i;
> +  unsigned int i;
>char *s;
> 
> -  name = lang_hooks.decl_printable_name (fndecl, 0);
> -  FOR_EACH_VEC_ELT (*v, i, s)
> +  name = lang_hooks.decl_printable_name (fndecl, 1);
> +   for(i = 0; i < v->length(); i++){
> + s = (*v)[i];
> + if(strstr(name, s) != NULL){
> +   return(true);
> + }
> +   }
> +/*  FOR_EACH_VEC_ELT (*v, i, s)
>   if (strstr (name, s) != NULL)
> -   return true;
> + return true;*/
>  }
So why did you drop the FOR_EACH_VEC_ELT and open-code the loop?  I
don't see that as being a necessary change.  Leaving the
FOR_EACH_VEC_ELT in place would also avoid the mis-formatting you've
introduced as well as removing clutter of a commented-out hunk of code.

> 
> @@ -14278,3 +14284,3 @@ gimplify_hasher::equal (const elt_t *p1,
> 
>return true;
> -}
> \ No newline at end of file
> +}
> Index: opts.c
> ===
> --- opts.c 2019-06-12 19:10:04.354612000 +0100
> +++ opts.c 2019-06-12 18:53:43.675852000 +0100
> @@ -263,7 +263,8 @@ add_comma_separated_to_vector (void **pv
>   *w++ = *r++;
>  }
> -  if (*token_start != '\0')
> +  *w = '\0';
> +  if (*token_start != '\0'){
>  v->safe_push (token_start);
> -
> +  }
>*pvec = v;
So why introduce the unnecessary { } scope?  And why do it in a way that
is different than 99% of the other code in GCC (where the { and } will
be on individual lines with 2 spaces of indention?

Jeff


Re: [PATCH] Add -fprofile-note option.

2019-07-03 Thread Sandra Loosemore

On 7/2/19 6:37 AM, Martin Liška wrote:


@@ -12403,6 +12403,11 @@ the profile feedback data files. See 
@option{-fprofile-dir}.
 To optimize the program based on the collected profile information, use
 @option{-fprofile-use}.  @xref{Optimize Options}, for more information.
 
+@item -fprofile-note=@var{path}

+@opindex fprofile-note
+
+If @var{path} is specified, GCC saves gcno filename into @var{path} location.
+
 @item -fprofile-update=@var{method}
 @opindex fprofile-update
 



"gcno filename" is implementor-speak with no context.  In other places 
the documentation uses "@file{.gcno} file".  Please use that here as 
well, and add a @cindex entry on the main definition/discussion of these 
things and a cross-reference here.


I assume this option only makes sense with some other profiling options. 
 What are they?


Can there be more than one of these files per gcc invocation?  E.g. if 
you specify a command line like


gcc -c foo.c bar.c

??  It looks like the code part of the patch would cause the file to be 
overwritten.  Maybe this should be like -o and diagnose an error?


-Sandra



Merge from trunk to gccgo branch

2019-07-03 Thread Ian Lance Taylor
I merged trunk revision 273026 to the gccgo branch.

Ian


Re: [range-ops] patch 05/04: bonus round!

2019-07-03 Thread Jeff Law
On 7/1/19 4:24 AM, Aldy Hernandez wrote:
> This is completely unrelated to range-ops itself, but may yield better
> results in value_range intersections.  It's just something I found while
> working on VRP, and have been dragging around on our branch.
> 
> If we know the intersection of two ranges is the empty set, there is no
> need to conservatively add anything to the result.
> 
> Tested on x86-64 Linux with --enable-languages=all.
> 
> Aldy
> 
> range-ops-intersect-undefined.patch
> 
> commit 4f9aa7bd1066267eee92f622ff29d78534158e20
> Author: Aldy Hernandez 
> Date:   Fri Jun 28 11:34:19 2019 +0200
> 
> Do not try to further refine a VR_UNDEFINED result when intersecting
> value_ranges.
> 
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 01fb97cedb2..b0d78ee6871 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,9 @@
> +2019-07-01  Aldy Hernandez  
> +
> + * tree-vrp.c (intersect_ranges): If we know the intersection is
> + empty, there is no need to conservatively add anything else to
> + the set.
Do we have a test where this improves the code or at least the computed
ranges?

jeff


Re: [PATCH] Fix out-of-ssa with unsupported vector types (PR rtl-optimization/90756)

2019-07-03 Thread Jeff Law
On 7/3/19 1:59 AM, Jakub Jelinek wrote:
> Hi!
> 
> This weird hink has been added by Alex in r228175, it isn't clear why
> nor how it ever can be correct.  While say for DECL_MODE we have the problem
> that for global vars when switching between functions with different ISA
> selections the mode might not be ok, TYPE_MODE is stored as a raw vector
> mode that a function overrides to BLKmode if that particular vector mode is
> not supported.  This hunk breaks that assumption and leaks unsupported
> vector modes in the IL of the functions which then have no way to handle
> that, but even before that happens usually it breaks because we try to
> convert_mode between BLKmode and the unsupported vector mode or vice versa
> on PHI nodes.
> 
> Alex, do you remember why this has been done?
> 
> Patch has been bootstrapped/regtested on x86_64-linux and i686-linux (the
> latter didn't have SSE enabled by default), Jeff said he'll test it on many
> crosses.  Ok for trunk if that testing succeeds?
> 
> 2019-07-03  Jakub Jelinek  
> 
>   PR rtl-optimization/90756
>   * explow.c (promote_ssa_mode): Always use TYPE_MODE, don't bypass it
>   for VECTOR_TYPE_P.
> 
>   * gcc.dg/pr90756.c: New test.
Nothing tripped related to this patch in the various targets in my tester.

jeff


Re: [range-ops] patch 02/04: enforce canonicalization in value_range

2019-07-03 Thread Jeff Law
On 7/3/19 3:35 AM, Aldy Hernandez wrote:
> On 7/2/19 5:36 PM, Jeff Law wrote:
> 
>> I don't see anything inherently concerning here.  I do wonder if there's
>> any value in having a debugging function in the class that would iterate
>> over the ranges and check them for proper canonicalization, verify that
>> VR_{VARYING,UNDEFINED} objects do not have equivalences, etc.  Thoughts?
> 
> In patch 01 we have:
> 
>> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
>> index 594ee9adc17..97046c22ed1 100644
>> --- a/gcc/tree-vrp.c
>> +++ b/gcc/tree-vrp.c
>> @@ -86,6 +86,8 @@ value_range_base::set (enum value_range_kind kind,
>> tree min, t
>> ree max)
>>  void
>>  value_range::set_equiv (bitmap equiv)
>>  {
>> +  if (undefined_p () || varying_p ())
>> +    equiv = NULL;
> 
> So it shouldn't be possible to attach an equivalence to a VARYING /
> UNDEFINED range.  Plus, we already have a sanity checker:
> 
>> void
>> value_range::check ()
>> {
>>   value_range_base::check ();
>>   switch (m_kind)
>>     {
>>     case VR_UNDEFINED:
>>     case VR_VARYING:
>>   gcc_assert (!m_equiv || bitmap_empty_p (m_equiv));
>>     default:;
>>     }
>> }
> 
> We have no methods for altering a range, except for changing
> equivalences.  So it shouldn't be possible to create a non-canonicalized
> range.
Ah.  Cool.  Thanks for enlightening me ;-)

jeff


Re: [MIPS][Testsuite] specify msa-fmadd.c abis

2019-07-03 Thread Jeff Law
On 7/3/19 12:52 AM, Paul Hua wrote:
> Hi,
> 
> The msa-fmadd.c fails on abi=64, the attached patch fixed by specify the abis.
> 
> spawn -ignore SIGHUP
> /home/xuchenghua/GCC/test/gcc-r272929_obj/gcc/xgcc
> -B/home/xuchenghua/GCC/test/gcc-r272929_obj/gcc/
> /home/xuchenghua/GCC/gcc_git_trunk/gcc/testsuite/gcc.target/mips/msa-fmadd.c
> -fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers
> -fdiagnostics-color=never -O1 -DNOMIPS16=__attribute__((nomips16))
> -DNOMICROMIPS=__attribute__((nomicromips))
> -DNOCOMPRESSION=__attribute__((nocompression)) -EL -mhard-float
> -mdouble-float -mfp64 -mno-mips16 -modd-spreg -mmsa
> -flax-vector-conversions -ffat-lto-objects -fno-ident -S -o
> msa-fmadd.s^M
> /home/xuchenghua/GCC/gcc_git_trunk/gcc/testsuite/gcc.target/mips/msa-fmadd.c:11:16:
> warning: call-clobbered register used for global register variable^M
> /home/xuchenghua/GCC/gcc_git_trunk/gcc/testsuite/gcc.target/mips/msa-fmadd.c:12:16:
> warning: call-clobbered register used for global register variable^M
> FAIL: gcc.target/mips/msa-fmadd.c   -O1  (test for excess errors)
> Excess errors:
> /home/xuchenghua/GCC/gcc_git_trunk/gcc/testsuite/gcc.target/mips/msa-fmadd.c:11:16:
> warning: call-clobbered register used for global register variable
> /home/xuchenghua/GCC/gcc_git_trunk/gcc/testsuite/gcc.target/mips/msa-fmadd.c:12:16:
> warning: call-clobbered register used for global register variable
> 
> 
> Ok for commit ?
> 
> 
> 0001-MIPS-Testsuite-specify-msa-fmadd.c-abis.patch
> 
> From 912581f71ad37b415aec06d23210109e1c778296 Mon Sep 17 00:00:00 2001
> From: Chenghua Xu 
> Date: Mon, 17 Jun 2019 14:36:37 +0800
> Subject: [PATCH] [MIPS][Testsuite] specify msa-fmadd.c abis.
> 
> gcc/testsuite/
> 
>   * gcc.target/mips/mips-fmadd.c: Rename to ...
>   * gcc.target/mips/mips-fmadd-o32.c: ... Here; add abi=32.
>   * gcc.target/mips/mips-fmadd-n64.c: New.
OK
jeff


Re: [PATCH v2 3/5] or1k: Add mrori option, fix option docs

2019-07-03 Thread Stafford Horne
On Wed, Jul 03, 2019 at 05:26:58PM -0500, Segher Boessenkool wrote:
> On Thu, Jul 04, 2019 at 06:49:17AM +0900, Stafford Horne wrote:
> > On Wed, Jul 03, 2019 at 09:49:02AM -0500, Segher Boessenkool wrote:
> > > On Wed, Jul 03, 2019 at 12:33:49PM +0900, Stafford Horne wrote:
> > > > @@ -179,11 +183,11 @@
> > > >[(set (match_operand:SI 0 "register_operand" "=r,r")
> > > > (rotatert:SI (match_operand:SI 1 "register_operand"  "r,r")
> > > >   (match_operand:SI 2 "reg_or_u6_operand" "r,n")))]
> > > > -  "TARGET_ROR"
> > > > +  "TARGET_ROR || TARGET_RORI"
> > > >"@
> > > > l.ror\t%0, %1, %2
> > > > l.rori\t%0, %1, %2"
> > > > -  [(set_attr "insn_support" "*,shftimm")])
> > > > +  [(set_attr "insn_support" "ror,rori")])
> > > 
> > > Does this work?  If you use -mno-ror -mrori?  It will then allow 
> > > generating
> > > a reg for the second operand, and ICE later on, as far as I can see?
> > 
> > It does seem to work.  Why would it produce an internal compiler error?
> > 
> > One thing I have is RegectNegative on mror and mrori, so -mno-ror will not 
> > be
> > allowed and cause an error.
> 
> But both options are off by default, and neither is enabled or disabled
> based on the setting of the other.
> 
> > Example: 
> > 
> > $ cat ./gcc/testsuite/gcc.target/or1k/ror-4.c
> > 
> > unsigned int rotate6 (unsigned int a) {
> >   return ( a >> 6 ) | ( a << ( 32 - 6 ) );
> > }
> 
> That's a fixed distance rotate.  My question is will it work if the
> distance is a variable.  The other direction should work fine, agreed.
> 
> So, does ror-[12].c work with -mrori and no -mror?  The predicates say
> this insn pattern is just fine in that case, but the constraints will
> disagree.

OK, yes I see it now.  Sorry I mis-understood what you meant by second argument.
I will fix.  It's probably going to be easiest to split this to 2 instructions.

-Stafford


Re: [range-ops] patch 01/04: types for VR_UNDEFINED and VR_VARYING

2019-07-03 Thread Jeff Law
On 7/3/19 3:46 AM, Aldy Hernandez wrote:
> 
> 
> On 7/2/19 4:17 PM, Jeff Law wrote:
>> On 7/1/19 2:52 AM, Aldy Hernandez wrote:
>>> As discussed before, this enforces types on undefined and varying, which
>>> makes everything more regular, and removes some special casing
>>> throughout range handling.
>>>
>>> The min/max fields will contain TYPE_MIN_VALUE and TYPE_MAX_VALUE, which
>>> will make it easy to get at the bounds of a range later on.  Since
>>> pointers don't have TYPE_MIN/MAX_VALUE, we are using build_zero_cst()
>>> and wide_int_to_tree(wi::max_value(precision)), for consistency.
>>> UNDEFINED is set similarly, though nobody should ever ask for anything
>>> except type() from it.  That is, no one should be asking for the bounds.
>>>
>>> There is one wrinkle, ipa-cp creates VR_VARYING ranges of floats,
>>> presumably to pass around state??  This causes value_range_base::type()
>>> and others to fail, even though I haven't actually seen a case of
>>> someone actually trying to set a VR_RANGE of a float.  For now, I've
>>> built a NOP_EXPR wrapper so type() works correctly.  The correct
>>> approach would probably be to avoid creating these varying/undefined
>>> ranges in ipa-cp, but I don't have enough ipa-cp-foo to do so.
>>> Suggestions welcome, if you don't like special casing this for ipa-cp.
>>> Or perhaps as a follow up.
>> No idea why we create ranges of floats from ipa-cp.  I presume it's
>> coming from propagate_vr_across_jump_function?  Or somewhere else?
> 
> I believe it was ipcp_vr_lattice::set_to_bottom, while changing an
> UNDEFINED to VARYING.  IMO, we shouldn't even have created UNDEFINED
> ranges of floats.  It's not like we can do anything with float ranges.
Note that propagate_vr_across_jump_function calls set_to_bottom ;-)

I zero'd in on that one because it did so when presented with something
that wasn't an INTEGRAL_TYPE_P and wasn't POINTER_TYPE_P.

I think digging into where these are coming from would be advisable.
Hell, if you've got types, abort the first time we try to create a range
for something that isn't an integer/pointer, then walk backwards.

I wouldn't be surprised if we find just a couple sites that are
responsible for these problems in ipa-cp.


>>> +/* Allocate a new range from the obstack and set it to VARYING for
>>> TYPE.  */
>>> +inline value_range *
>>> +type_range_cache::new_varying (tree type)
>>> +{
>>> +  /* Allocate memory.  */
>>> +  void *p = XOBNEW (&m_range_obj, value_range);
>>> +  /* Invoke the constructors on the memory using placement new.  */
>>> +  value_range *new_p = new (p) value_range ();
>>> +  /* Initialize it to varying.  */
>>> +  new_p->set_varying (type);
>>> +  return new_p;
>>> +}
>> So is placement new C++98/C++03 or C++11?
>>
>> If the former then we can use it, if the latter we probably can't since
>> we haven't stepped forward to C++11.
> 
> Google isn't cooperating here to narrow the specific C++ version, but
> I'm seeing some very old references to placement new, from the mid to
> the late 1990s.
> 
> FWIW, the above snippet shows no warnings when compiled with -std=c++-98
> -Wall.
Given it compiles in C++-98 mode, let's consider it a non-issue.

jeff


[PATCH] [Ada] Make clockid_t type public on GNU/kFreeBSD

2019-07-03 Thread James Clarke
From: James Clarke 

Monotonic_Clock and RT_Resolution in the recently-added s-tpopmo.adb
call clock_gettime/clock_getres with the integral constants from OSC and
thus rely on clockid_t being an integral type, so we cannot hide it on
GNU/kFreeBSD. Instead, make the definition public to match all the other
implementations.

gcc/ada
* libgnarl/s-osinte__kfreebsd-gnu.ads (clockid_t): Make type
definition public.
(CLOCK_REALTIME): Make value public.
---
 gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads 
b/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads
index 408187314..b60ffd2c0 100644
--- a/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads
+++ b/gcc/ada/libgnarl/s-osinte__kfreebsd-gnu.ads
@@ -206,9 +206,8 @@ package System.OS_Interface is
function nanosleep (rqtp, rmtp : access timespec) return int;
pragma Import (C, nanosleep, "nanosleep");

-   type clockid_t is private;
-
-   CLOCK_REALTIME : constant clockid_t;
+   type clockid_t is new int;
+   CLOCK_REALTIME : constant clockid_t := 0;

function clock_gettime
  (clock_id : clockid_t;
@@ -607,9 +606,6 @@ private
end record;
pragma Convention (C, timespec);

-   type clockid_t is new int;
-   CLOCK_REALTIME : constant clockid_t := 0;
-
type pthread_attr_t is record
   detachstate   : int;
   schedpolicy   : int;
--
2.17.1



Re: [OpenRISC] [PATCH v2 4/5] or1k: Initial support for FPU

2019-07-03 Thread Segher Boessenkool
On Thu, Jul 04, 2019 at 07:19:56AM +0900, Stafford Horne wrote:
> On Wed, Jul 03, 2019 at 09:09:51PM +0200, Richard Henderson wrote:
> > On 7/3/19 5:43 PM, Segher Boessenkool wrote:
> > >> @@ -212,6 +214,7 @@ enum reg_class
> > >>  #define REG_CLASS_CONTENTS  \
> > >>  { { 0x, 0x },   \
> > >>{ SIBCALL_REGS_MASK,   0 },   \
> > >> +  { 0x7efe, 0x },   \
> > > 
> > > Above you said r0, r30, r31 are excluded, but this is r0, r8, r30, or
> > > in GCC register numbers, 0, 8, and 31?  You probably should mention r8
> > > somewhere (it's because it is the last arg, this avoid problems, I 
> > > guess?),
> > > and the 30/31 thing is confused some way.  Maybe it is all just that one
> > > documentation line :-)
> > 
> > ... and if r8 is excluded because of arguments, I suspect that this is the
> > wrong fix, as there's nothing inherently wrong with r7:r8 or r8:r9 as a 
> > pair,
> > at least that I can see.
> > 
> > Perhaps function_arg and/or function_arg_advance is the right place for a 
> > fix?
> > The calling convention says that 64-bit arguments are not split across
> > registers+stack, so you already shouldn't have seen (r8, [sp+0]) as a pair.
> 
> I will double check, the mask may be wrong.  It should not matter about the
> function args.
> 
> I didn't see any issue that caused me to add r8.  So I may have just masked 
> thw
> rong bit thinking it's r31.  Is there something worng with what I did?
> 
> The mask is 0x7efe, and names should corresbond to this name list?
> 
> #define REGISTER_NAMES {
>   "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",  # 7e, excl r0
>   "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15", # ff, excl 
> none
>   "r17",  "r19",  "r21",  "r23",  "r25",  "r27",  "r29",  "r31", # fe, excl 
> r31
>   "r16",  "r18",  "r20",  "r22",  "r24",  "r26",  "r28",  "r30", # fe, excl 
> r30
>   "?ap",  "?fp",  "?sr_f" }
> 
> Do I have it backwards?  With an endian issue?

Yes :-)  0x0001 is reg 0 (r0), 0x8000 is reg 31 (r30).


Segher


Re: [PATCH] simplify-rtx.c: Change BITSIZE to UNIT_PRECISION in simplification

2019-07-03 Thread Jeff Law
On 7/3/19 2:17 PM, Richard Sandiford wrote:
>> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
>> index 89a46a9..d74a4ba 100644
>> --- a/gcc/simplify-rtx.c
>> +++ b/gcc/simplify-rtx.c
>> @@ -1504,12 +1504,12 @@ simplify_unary_operation_1 (enum rtx_code code, 
>> machine_mode mode, rtx op)
>>&& CONST_INT_P (XEXP (op, 1))
>>&& XEXP (XEXP (op, 0), 1) == XEXP (op, 1)
>>&& (op_mode = as_a  (GET_MODE (op)),
>> -  GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1
>> +  GET_MODE_UNIT_PRECISION (op_mode) > INTVAL (XEXP (op, 1
>>  {
>>scalar_int_mode tmode;
>> -  gcc_assert (GET_MODE_BITSIZE (int_mode)
>> -  > GET_MODE_BITSIZE (op_mode));
>> -  if (int_mode_for_size (GET_MODE_BITSIZE (op_mode)
>> +  gcc_assert (GET_MODE_UNIT_PRECISION (int_mode)
>> +  > GET_MODE_UNIT_PRECISION (op_mode));
>> +  if (int_mode_for_size (GET_MODE_UNIT_PRECISION (op_mode)
>>   - INTVAL (XEXP (op, 1)), 1).exists (&tmode))
>>  {
>>rtx inner =
> 
> I think these should be GET_MODE_PRECISION rather than
> GET_MODE_UNIT_PRECISION.  They do the same thing in this context,
> but it seems odd to use GET_MODE_UNIT_... when we're specifically
> dealing with scalars.
> 
> Looks good otherwise (and small enough not to need a copyright assignment,
> in case you haven't signed one).
John, I assume you're doing this for an out of tree port (s12z?)?
Otherwise it'd also be useful if you could include a test which triggers
the assert.

If you could confirm that Richard's suggestion of using
GET_MODE_PRECISION rather than GET_MODE_UNIT_PRECISION works it'd be
appreciated.

Thanks,
jeff


Go patch committed: Set varargs lowered for imported call expressions

2019-07-03 Thread Ian Lance Taylor
This Go frontend patch by Than McIntosh fixes a Go frontend buglet:
varargs lowering happens before inlinable function bodies are written
out to export data, so set the "varargs lowered" flag on call
expressions that we import.  This fixes
https://golang.org/issue/32922.  The test case is in
https://golang.org/cl/184918.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 273009)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-ae7d7e05bce19aefaa27efe2ee797933aafbef06
+bf66d40bc7adb438dcfac85d73bfa7b17217eed9
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/expressions.cc
===
--- gcc/go/gofrontend/expressions.cc(revision 272944)
+++ gcc/go/gofrontend/expressions.cc(working copy)
@@ -18338,6 +18338,7 @@ Expression::import_expression(Import_exp
}
  imp->require_c_string(")");
  expr = Expression::make_call(expr, args, is_varargs, loc);
+  expr->call_expression()->set_varargs_are_lowered();
}
   else if (imp->match_c_string("["))
{
@@ -19389,4 +19390,3 @@ Numeric_constant::hash(unsigned int seed
 
   return (static_cast(val) + seed) * PRIME;
 }
-


Re: [PATCH v2 3/5] or1k: Add mrori option, fix option docs

2019-07-03 Thread Segher Boessenkool
On Thu, Jul 04, 2019 at 06:49:17AM +0900, Stafford Horne wrote:
> On Wed, Jul 03, 2019 at 09:49:02AM -0500, Segher Boessenkool wrote:
> > On Wed, Jul 03, 2019 at 12:33:49PM +0900, Stafford Horne wrote:
> > > @@ -179,11 +183,11 @@
> > >[(set (match_operand:SI 0 "register_operand" "=r,r")
> > >   (rotatert:SI (match_operand:SI 1 "register_operand"  "r,r")
> > > (match_operand:SI 2 "reg_or_u6_operand" "r,n")))]
> > > -  "TARGET_ROR"
> > > +  "TARGET_ROR || TARGET_RORI"
> > >"@
> > > l.ror\t%0, %1, %2
> > > l.rori\t%0, %1, %2"
> > > -  [(set_attr "insn_support" "*,shftimm")])
> > > +  [(set_attr "insn_support" "ror,rori")])
> > 
> > Does this work?  If you use -mno-ror -mrori?  It will then allow generating
> > a reg for the second operand, and ICE later on, as far as I can see?
> 
> It does seem to work.  Why would it produce an internal compiler error?
> 
> One thing I have is RegectNegative on mror and mrori, so -mno-ror will not be
> allowed and cause an error.

But both options are off by default, and neither is enabled or disabled
based on the setting of the other.

> Example: 
> 
> $ cat ./gcc/testsuite/gcc.target/or1k/ror-4.c
> 
>   unsigned int rotate6 (unsigned int a) {
> return ( a >> 6 ) | ( a << ( 32 - 6 ) );
>   }

That's a fixed distance rotate.  My question is will it work if the
distance is a variable.  The other direction should work fine, agreed.

So, does ror-[12].c work with -mrori and no -mror?  The predicates say
this insn pattern is just fine in that case, but the constraints will
disagree.


Segher


C++ PATCH for DR 1813, c++/83374 - __is_standard_layout wrong for a class with repeated bases.

2019-07-03 Thread Marek Polacek
This resolves DR 1813 which says that for a class to be a standard-layout
class, it may not have two (possibly indirect) base class subobjects of the
same type.  I was going to play DFS games but then I noticed we'd already set
CLASSTYPE_REPEATED_BASE_P, making this significantly easier.

There are 3 related DRs which I opened PRs for:

91079 - [DR 1881] Standard-layout classes and unnamed bit-fields
91080 - [DR 1672] Layout compatibility with multiple empty bases
91081 - [DR 2120] Array as first non-static data member in standard-layout class

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-07-03  Marek Polacek  

DR 1813
PR c++/83374 - __is_standard_layout wrong for a class with repeated 
bases.
* class.c (check_bases): Set CLASSTYPE_NON_STD_LAYOUT for a class if
CLASSTYPE_REPEATED_BASE_P is true.

* g++.dg/ext/is_std_layout3.C: New test.
* g++.dg/ext/is_std_layout4.C: New test.

diff --git gcc/cp/class.c gcc/cp/class.c
index 73291b341fe..f77b7f4834b 100644
--- gcc/cp/class.c
+++ gcc/cp/class.c
@@ -1715,11 +1715,15 @@ check_bases (tree t,
  && (same_type_ignoring_top_level_qualifiers_p
  (TREE_TYPE (field), basetype)))
CLASSTYPE_NON_STD_LAYOUT (t) = 1;
+ /* DR 1813:
+...has at most one base class subobject of any given type...  */
+ else if (CLASSTYPE_REPEATED_BASE_P (t))
+   CLASSTYPE_NON_STD_LAYOUT (t) = 1;
  else
/* ...either has no non-static data members in the most-derived
   class and at most one base class with non-static data
   members, or has no base classes with non-static data
-  members */
+  members.  FIXME This was reworded in DR 1813.  */
for (basefield = TYPE_FIELDS (basetype); basefield;
 basefield = DECL_CHAIN (basefield))
  if (TREE_CODE (basefield) == FIELD_DECL
diff --git gcc/testsuite/g++.dg/ext/is_std_layout3.C 
gcc/testsuite/g++.dg/ext/is_std_layout3.C
new file mode 100644
index 000..b0555c8207b
--- /dev/null
+++ gcc/testsuite/g++.dg/ext/is_std_layout3.C
@@ -0,0 +1,18 @@
+// DR 1813
+// PR c++/83374 - __is_standard_layout wrong for a class with repeated bases.
+// { dg-do compile { target c++11 } }
+
+struct B { int i; };// standard-layout class
+struct C : B { };   // standard-layout class
+struct D : C { };   // standard-layout class
+struct E : D { char : 4; }; // not a standard-layout class
+static_assert( __is_standard_layout(B), "" );
+static_assert( __is_standard_layout(C), "" );
+static_assert( __is_standard_layout(D), "" );
+static_assert( ! __is_standard_layout(E), "" );
+
+struct Q {};
+struct S : Q { };
+struct T : Q { };
+struct U : S, T { }; // not a standard-layout class
+static_assert( ! __is_standard_layout(U), "" );
diff --git gcc/testsuite/g++.dg/ext/is_std_layout4.C 
gcc/testsuite/g++.dg/ext/is_std_layout4.C
new file mode 100644
index 000..09c0098120d
--- /dev/null
+++ gcc/testsuite/g++.dg/ext/is_std_layout4.C
@@ -0,0 +1,11 @@
+// DR 1813
+// PR c++/83374 - __is_standard_layout wrong for a class with repeated bases.
+// { dg-do compile { target c++11 } }
+
+struct R { };
+struct Q { };
+struct S : R { };
+struct T : Q { };
+struct U : S, T { };
+// No repeated base class subobjects.
+static_assert(__is_standard_layout(U), "");


Call for testers: improve move2add_use_add2_insn for targets with HAVE_POST_MODIFY_DISP and/or clobber-encumbered add

2019-07-03 Thread Joern Wolfgang Rennecke


2019-07-03  Joern Rennecke  

* postreload.c (rtl-iter.h): Include.
(reg_addr_use_luid, reg_addr_use_insn): New static variables.
(reg_addr_use, move2add_last_jump_luid): Likewise.
(move2add_use_add2_insn): Try to use a POST_MODIFY before and add.
(reload_cse_move2add): Keep new static variables up to date.

(move2add_use_add2_insn): If using add fails, try
to use movstr* or xor.

Index: postreload.c
===
--- postreload.c(revision 272931)
+++ postreload.c(working copy)
@@ -40,6 +40,7 @@ Software Foundation; either version 3, o
 #include "cselib.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "rtl-iter.h"
 
 static int reload_cse_noop_set_p (rtx);
 static bool reload_cse_simplify (rtx_insn *, rtx);
@@ -1646,6 +1647,12 @@ reload_combine_note_use (rtx *xp, rtx_in
 static rtx reg_symbol_ref[FIRST_PSEUDO_REGISTER];
 static machine_mode reg_mode[FIRST_PSEUDO_REGISTER];
 
+/* Note when and where we last saw this register used as a plain
+   register-indirect address.  */
+static int reg_addr_use_luid[FIRST_PSEUDO_REGISTER];
+static rtx_insn *reg_addr_use_insn[FIRST_PSEUDO_REGISTER];
+static rtx *reg_addr_use[FIRST_PSEUDO_REGISTER];
+
 /* move2add_luid is linearly increased while scanning the instructions
from first to last.  It is used to set reg_set_luid in
reload_cse_move2add and move2add_note_store.  */
@@ -1654,6 +1661,7 @@ reload_combine_note_use (rtx *xp, rtx_in
 /* move2add_last_label_luid is set whenever a label is found.  Labels
invalidate all previously collected reg_offset data.  */
 static int move2add_last_label_luid;
+static int move2add_last_jump_luid;
 
 /* ??? We don't know how zero / sign extension is handled, hence we
can't go from a narrower to a wider mode.  */
@@ -1768,6 +1776,18 @@ move2add_use_add2_insn (scalar_int_mode
   if (INTVAL (off) == reg_offset [regno])
changed = validate_change (insn, &SET_SRC (pat), reg, 0);
 }
+  else if (HAVE_POST_MODIFY_DISP
+  && reg_addr_use_luid[regno] > reg_set_luid[regno]
+  && reg_addr_use_luid[regno] > move2add_last_jump_luid
+  && !reg_used_between_p (reg, reg_addr_use_insn[regno], insn)
+  && validate_change
+   (reg_addr_use_insn[regno], reg_addr_use[regno], 
+gen_rtx_POST_MODIFY (mode, reg,
+ gen_rtx_PLUS (mode, reg, new_src)),
+1)
+  && validate_change (insn, &SET_SRC (pat), reg, 1)
+  && apply_change_group ())
+changed = true;
   else
 {
   struct full_rtx_costs oldcst, newcst;
@@ -1779,8 +1799,9 @@ move2add_use_add2_insn (scalar_int_mode
   SET_SRC (pat) = src;
 
   if (costs_lt_p (&newcst, &oldcst, speed)
- && have_add2_insn (reg, new_src))
-   changed = validate_change (insn, &SET_SRC (pat), tem, 0);   
+ && have_add2_insn (reg, new_src)
+ && validate_change (insn, &SET_SRC (pat), tem, 0))
+   changed = true;
   else if (sym == NULL_RTX && mode != BImode)
{
  scalar_int_mode narrow_mode;
@@ -1807,6 +1828,19 @@ move2add_use_add2_insn (scalar_int_mode
}
}
}
+ /* Some processors clobber some flags for add (hence it won't match
+above), but none (that the compiler models) for xor.  */
+ if (!changed)
+   {
+ new_src = gen_int_mode (UINTVAL (off) ^ reg_offset[regno], mode);
+ tem = gen_rtx_XOR (mode, reg, new_src);
+ SET_SRC (pat) = tem;
+ get_full_set_rtx_cost (pat, &newcst);
+ SET_SRC (pat) = src;
+ if (costs_lt_p (&newcst, &oldcst, speed)
+ && validate_change (insn, &SET_SRC (pat), tem, 0))
+   changed = true;
+   }
}
 }
   move2add_record_sym_value (reg, sym, off);
@@ -1908,6 +1942,7 @@ reload_cse_move2add (rtx_insn *first)
   for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
 {
   reg_set_luid[i] = 0;
+  reg_addr_use_luid[i] = 0;
   reg_offset[i] = 0;
   reg_base_reg[i] = 0;
   reg_symbol_ref[i] = NULL_RTX;
@@ -1915,6 +1950,7 @@ reload_cse_move2add (rtx_insn *first)
 }
 
   move2add_last_label_luid = 0;
+  move2add_last_jump_luid = 0;
   move2add_luid = 2;
   for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++)
 {
@@ -2104,8 +2140,29 @@ reload_cse_move2add (rtx_insn *first)
}
}
}
+
+  if (HAVE_POST_MODIFY_DISP)
+   {
+ subrtx_var_iterator::array_type array;
+ FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
+   if (MEM_P (*iter))
+ {
+   rtx addr = XEXP (*iter, 0);
+   if (REG_P (addr))
+ {
+   int regno = REGNO (addr);
+   reg_addr_use_luid[regno] = move2add_luid

Re: [OpenRISC] [PATCH v2 4/5] or1k: Initial support for FPU

2019-07-03 Thread Stafford Horne
On Wed, Jul 03, 2019 at 09:09:51PM +0200, Richard Henderson wrote:
> On 7/3/19 5:43 PM, Segher Boessenkool wrote:
> >> @@ -212,6 +214,7 @@ enum reg_class
> >>  #define REG_CLASS_CONTENTS  \
> >>  { { 0x, 0x }, \
> >>{ SIBCALL_REGS_MASK,   0 }, \
> >> +  { 0x7efe, 0x }, \
> > 
> > Above you said r0, r30, r31 are excluded, but this is r0, r8, r30, or
> > in GCC register numbers, 0, 8, and 31?  You probably should mention r8
> > somewhere (it's because it is the last arg, this avoid problems, I guess?),
> > and the 30/31 thing is confused some way.  Maybe it is all just that one
> > documentation line :-)
> 
> ... and if r8 is excluded because of arguments, I suspect that this is the
> wrong fix, as there's nothing inherently wrong with r7:r8 or r8:r9 as a pair,
> at least that I can see.
> 
> Perhaps function_arg and/or function_arg_advance is the right place for a fix?
> The calling convention says that 64-bit arguments are not split across
> registers+stack, so you already shouldn't have seen (r8, [sp+0]) as a pair.

I will double check, the mask may be wrong.  It should not matter about the
function args.

I didn't see any issue that caused me to add r8.  So I may have just masked thw
rong bit thinking it's r31.  Is there something worng with what I did?

The mask is 0x7efe, and names should corresbond to this name list?

#define REGISTER_NAMES {
  "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",  # 7e, excl r0
  "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15", # ff, excl none
  "r17",  "r19",  "r21",  "r23",  "r25",  "r27",  "r29",  "r31", # fe, excl r31
  "r16",  "r18",  "r20",  "r22",  "r24",  "r26",  "r28",  "r30", # fe, excl r30
  "?ap",  "?fp",  "?sr_f" }

Do I have it backwards?  With an endian issue?

-Stafford


Re: [PATCH] PowerPC Prefixed Memory, Patch #4, Add pc-relative reference support

2019-07-03 Thread Segher Boessenkool
Hi Mike,

On Fri, Jun 28, 2019 at 02:50:33PM -0400, Michael Meissner wrote:
> --- gcc/config/rs6000/rs6000-logue.c  (revision 272714)
> +++ gcc/config/rs6000/rs6000-logue.c  (working copy)
> @@ -1406,23 +1406,13 @@ uses_TOC (void)
>  }
>  #endif
>  
> +/* Create a TOC style reference for a symbol.  */
>  rtx
>  create_TOC_reference (rtx symbol, rtx largetoc_reg)

Does this really belong in this file?  It doesn't do anythin *logue, and
it isn't called from anywhere in here.

> +/* Create either a TOC reference to a locally defined item or a pc-relative
> +   reference, depending on the ABI.  */
> +rtx
> +create_data_reference (rtx symbol, rtx largetoc_reg)

Same here.

What is largetoc_reg?  The function comment should say.  It also is only
relevant for create_TOC_reference (where such a comment is also missing),
so could you factor this better please?

Probably a create_data_reference with only one argument?  Which calls
create_TOC_reference with a NULL second arg.  It looks like your
proposed create_data_reference will not do the right thing if called
with a non-null second arg if pcrel.  Perhaps that cannot happen, but
make that clear then?  Just an assert will do, bigger cleanups are
better of course.


Segher


Re: [PATCH v2 4/5] or1k: Initial support for FPU

2019-07-03 Thread Stafford Horne
On Wed, Jul 03, 2019 at 10:43:01AM -0500, Segher Boessenkool wrote:
> Hi Stafford,
> 
> On Wed, Jul 03, 2019 at 12:33:50PM +0900, Stafford Horne wrote:
> > +case 'd':
> > +  if (REG_P (x))
> > + if (GET_MODE (x) == DFmode || GET_MODE (x) == DImode)
> > +   fprintf (file, "%s,%s", reg_names[REGNO (operand)],
> > +   reg_names[REGNO (operand) + 1]);
> > + else
> > +   fprintf (file, "%s", reg_names[REGNO (operand)]);
> > +  else
> 
> The coding conventions says to use braces around nested conditionals.

Right I will fix that.  Interestingly the indentation is correct just missing
the braces.
 
> > @@ -212,6 +214,7 @@ enum reg_class
> >  #define REG_CLASS_CONTENTS  \
> >  { { 0x, 0x },  \
> >{ SIBCALL_REGS_MASK,   0 },  \
> > +  { 0x7efe, 0x },  \
> 
> Above you said r0, r30, r31 are excluded, but this is r0, r8, r30, or
> in GCC register numbers, 0, 8, and 31?  You probably should mention r8
> somewhere (it's because it is the last arg, this avoid problems, I guess?),
> and the 30/31 thing is confused some way.  Maybe it is all just that one
> documentation line :-)
>
> > +;  d - double pair base registers (excludes r0, r30 and r31 which overflow)

Hmm, maybe I messed up the mask.  It should be r0, r30 and r31.  Register pairs
can be a base register (rX) with a +1 or +2 offset second register.

Registers not allowed
  - r0, because its reserved for hardwired zero and doesn't work as a double
zero when paired with a general register.
  - r31, because it cant pair with r32 or r33 (those are overflows)
  - r30, because it cant work when paried with r32 (its an overflow), it would
work with r31, but GCC will not generate that pair anyway.

-Stafford


Re: [patch] Small improvements to coverage info (2/n)

2019-07-03 Thread Jeff Law
On 7/3/19 7:35 AM, Eric Botcazou wrote:
> Hi,
> 
> this is a series of fixes for the exception handling code, with the same goal 
> of preventing instructions from inheriting random source location information 
> in the debug info generated by the compiler.
> 
> Tested on x86_64-suse-linux, both GCC and GDB, OK for mainline?
> 
> 
> 2019-07-03  Eric Botcazou  
> 
>   * except.c (emit_to_new_bb_before): Make sure to put a location on SEQ.
>   * tree-eh.c (replace_goto_queue_1) : Propagate location.
>   (emit_eh_dispatch): Delete.
>   (lower_catch): Emit the eh_dispatch manually and set the location of
>   the first catch statement onto it.
>   (lower_eh_filter): Emit the eh_dispatch manually and set location.
>   (lower_eh_dispatch): Propagate location.
>   * tree-outof-ssa.c (set_location_for_edge): Handle EH edges specially.
>   (eliminate_build): Likewise.
> 
OK
jeff


Re: [patch] Small improvements to coverage info (1/n)

2019-07-03 Thread Jeff Law
On 7/3/19 4:46 AM, Eric Botcazou wrote:
> Hi,
> 
> we have collected a number of small improvements to coverage info generated 
> by 
> the compiler over the years.  One of the issues is when a new expression or 
> statement is built without source location information and ends up inheriting 
> the source location information of the previous instruction in the debug 
> info, 
> which can be totally unrelated.
> 
> Tested on x86_64-suse-linux, both GCC and GDB, OK for mainline?
> 
> 
> 2019-07-03  Eric Botcazou  
> 
>   * tree-cfg.c (gimple_make_forwarder_block): Propagate location info on
>   phi nodes if possible.
>   * tree-scalar-evolution.c (final_value_replacement_loop): Propagate
>   location info on the newly created statement.
>   * tree-ssa-loop-manip.c (create_iv): Propagate location info on the
>   newly created increment if needed.
> 
OK
jeff


Re: [PATCH v2 3/5] or1k: Add mrori option, fix option docs

2019-07-03 Thread Stafford Horne
On Wed, Jul 03, 2019 at 09:49:02AM -0500, Segher Boessenkool wrote:
> On Wed, Jul 03, 2019 at 12:33:49PM +0900, Stafford Horne wrote:
> > @@ -179,11 +183,11 @@
> >[(set (match_operand:SI 0 "register_operand" "=r,r")
> > (rotatert:SI (match_operand:SI 1 "register_operand"  "r,r")
> >   (match_operand:SI 2 "reg_or_u6_operand" "r,n")))]
> > -  "TARGET_ROR"
> > +  "TARGET_ROR || TARGET_RORI"
> >"@
> > l.ror\t%0, %1, %2
> > l.rori\t%0, %1, %2"
> > -  [(set_attr "insn_support" "*,shftimm")])
> > +  [(set_attr "insn_support" "ror,rori")])
> 
> Does this work?  If you use -mno-ror -mrori?  It will then allow generating
> a reg for the second operand, and ICE later on, as far as I can see?

It does seem to work.  Why would it produce an internal compiler error?

One thing I have is RegectNegative on mror and mrori, so -mno-ror will not be
allowed and cause an error.

Example: 

$ cat ./gcc/testsuite/gcc.target/or1k/ror-4.c

unsigned int rotate6 (unsigned int a) {
  return ( a >> 6 ) | ( a << ( 32 - 6 ) );
}

# With rori, direct immediate.

$ or1k-elf-gcc -O2 -c -mrori ./gcc/testsuite/gcc.target/or1k/ror-4.c 
$ or1k-elf-objdump -d ror-4.o 

ror-4.o: file format elf32-or1k

Disassembly of section .text:

 :
   0:   44 00 48 00 l.jr r9
   4:   b9 63 00 c6 l.rori r11,r3,0x6

# With ror, loading immediate to temporary register first.

$ or1k-elf-gcc -O2 -c -mror ./gcc/testsuite/gcc.target/or1k/ror-4.c 
$ or1k-elf-objdump -d ror-4.o 

ror-4.o: file format elf32-or1k

Disassembly of section .text:

 :
   0:   aa 20 00 06 l.ori r17,r0,0x6
   4:   44 00 48 00 l.jr r9
   8:   e1 63 88 c8 l.ror r11,r3,r17

-Stafford


Re: [PATCH] S/390: Improve storing asan frame_pc

2019-07-03 Thread Richard Sandiford
Segher Boessenkool  writes:
> On Tue, Jul 02, 2019 at 03:55:56PM +0200, Ilya Leoshkevich wrote:
>> > Am 02.07.2019 um 15:39 schrieb Jakub Jelinek :
>> > On Tue, Jul 02, 2019 at 03:33:28PM +0200, Ilya Leoshkevich wrote:
>> >>> Am 02.07.2019 um 15:19 schrieb Segher Boessenkool 
>> >>> :
>> >>> 
>> >>> On Tue, Jul 02, 2019 at 08:02:16AM -0500, Segher Boessenkool wrote:
>>  On Tue, Jul 02, 2019 at 10:51:54AM +0200, Ilya Leoshkevich wrote:
>> > +#undef TARGET_INSN_ALIGNMENT
>> > +#define TARGET_INSN_ALIGNMENT 16
>>  
>>  There already is FUNCTION_BOUNDARY for something similar, which fits in
>>  well with STACK_BOUNDARY, PARM_BOUNDARY, many more *_BOUNDARY.  I 
>>  realise
>>  you may prefer a hook, but as long as we aren't getting rid of all the
>>  other macros, what's the point?
>> >>> 
>> >>> And maybe LABEL_BOUNDARY is bettter for this than INSN_BOUNDARY as well?
>> >> 
>> >> Can’t we just use FUNCTION_BOUNDARY then?
>> >> I think .LASANPC is always emitted at the beginning of a function.
>> > 
>> > Isn't e.g. the hotpatch sequence emitted before it?
>> 
>> You are right, with -fpatchable-function-entry it’s moved.
>> 
>> So, I guess I should stick with the current approach.
>> I could change TARGET_INSN_ALIGNMENT hook to INSN_BOUNDARY macro if that
>> would better match the current design.  I would still call it INSN, and
>> not LABEL, because LABEL can refer to data.
>
> On some archs LABEL_BOUNDARY can be bigger than INSN_BOUNDARY (just like
> FUNCTION_BOUNDARY can be even bigger, like on 390 :-) )
>
> Either will work for your purposes afaics.
>
> LABEL in RTL is always a CODE_LABEL I think?  Maybe CODE_LABEL_BOUNDARY
> would make it clearer, it's not like a short name for this is useful
> anyway.

IIUC the new value is effectively a mandatory/guaranteed minimum value of
align_labels/LABEL_ALIGN that applies even in blocks optimized for size.
So IMO sticking with *_ALIGNMENT would be better.

Thanks,
Richard


Re: [PATCH] Fix few build warnings with LLVM toolchain

2019-07-03 Thread Richard Sandiford
Jeff Law  writes:
> On 6/28/19 12:46 PM, Richard Sandiford wrote:
>> Segher Boessenkool  writes:
>>> On Fri, Jun 28, 2019 at 08:55:00AM -0600, Martin Sebor wrote:
 Jeff reminded me in a code review the other day that GCC does
 have a guideline for defining POD structs with the keyword
 "struct" and classes with ctors/dtors using "class":

   https://gcc.gnu.org/codingconventions.html#Struct_Use

 I quickly prototyped a warning to see how closely GCC follows
 this convention.  The result shows that out of just under 800
 structs and classes defined in GCC sources some 200 use
 the keyword 'struct' despite having a ctor or dtor, or about
 25%.  So as is the case with most other conventions, without
 a tool to help remind us they exist they are unlikely to be
 followed with enough consistency to be worth putting in place
 to begin with.
>>>
>>> The goal is not to have the rules adhered to.  The goal is to have
>>> a more readable / maintainable / etc. codebase.
>> 
>> IMO it's a shame we don't follow that attitude for changelogs,
>> where apparently the number of spaces between the name and the email
>> address is of vital importance :-)  Too often a new contributor's
>> first taste of GCC is a slew of comments about things like that.
> :-)  Given that I regularly deal with the random one off contributors
> and cut-n-paste their author information from their email which is
> always one space I find the adherence to two spaces in the ChangeLog
> annoying.  I still try to fix the, but like all the whitespace stuff,
> I'd just assume leave that to commit hooks.  There just isn't a lot of
> value in having a human looking for whitespace issues.  I'd much rather
> be spending time on more substantial questions.
>
>
>> 
>> But surely it's a valid point that we're not following our own
>> conventions on the C++ usage.  I miss how consistent the codebase
>> was in the C days...
> Sure, it's a valid point and I fully applaud the effort to add warnings
> for things where we can and fix this kind of stuff.
>
>> 
>> That can be fixed by changing the conventions if we no longer think
>> they're a good idea.  But if someone's willing to change the codebase
>> to follow (one of) the coventions instead, and willing to add a warning
>> to keep things that way, then that sounds like a really good thing.
>> Especially when it's a common convention that others might want a
>> warning for too.
> Likewise -- I'm certainly open to changing conventions where we don't
> think they make sense.  But I'm not going to unilaterally do that.
>
>
>> 
>> And I don't think the current state of the struct/class tags is really
>> a result of making the codebase more maintainable.  I get the feeling
>> it just kind-of happened.  ISTM we're trying too hard to find a reason
>> not to clean this up.
> I believe the idea was to distinguish between POD and more complex
> types.  POD isn't going to do things behind your back, whereas classes
> certainly can (and it's often useful, for example RAII constructs).

Yeah.  And that sounds like a reasonable rule if enforced properly
(like Martin says).  I'm certainly not objecting to that. :-)

But what started this thread was that we currently use inconsistent
tags for the same type (struct in one file, class in another).
By definition that can't be following the current rules, since a type
is either POD or not.  And I can't think it was done for readability
or maintainability reasons either.  It looks completely accidental.

What I was objecting to mostly was the pushback against fixing that up
and making the tags self-consistent.  This has come up before, and the
response always seems to be to complain about clang rather than admit
that the current state of things isn't great.

Thanks,
Richard


Re: [PATCH] simplify-rtx.c: Change BITSIZE to UNIT_PRECISION in simplification

2019-07-03 Thread Richard Sandiford
> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
> index 89a46a9..d74a4ba 100644
> --- a/gcc/simplify-rtx.c
> +++ b/gcc/simplify-rtx.c
> @@ -1504,12 +1504,12 @@ simplify_unary_operation_1 (enum rtx_code code, 
> machine_mode mode, rtx op)
> && CONST_INT_P (XEXP (op, 1))
> && XEXP (XEXP (op, 0), 1) == XEXP (op, 1)
> && (op_mode = as_a  (GET_MODE (op)),
> -   GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1
> +   GET_MODE_UNIT_PRECISION (op_mode) > INTVAL (XEXP (op, 1
>   {
> scalar_int_mode tmode;
> -   gcc_assert (GET_MODE_BITSIZE (int_mode)
> -   > GET_MODE_BITSIZE (op_mode));
> -   if (int_mode_for_size (GET_MODE_BITSIZE (op_mode)
> +   gcc_assert (GET_MODE_UNIT_PRECISION (int_mode)
> +   > GET_MODE_UNIT_PRECISION (op_mode));
> +   if (int_mode_for_size (GET_MODE_UNIT_PRECISION (op_mode)
>- INTVAL (XEXP (op, 1)), 1).exists (&tmode))
>   {
> rtx inner =

I think these should be GET_MODE_PRECISION rather than
GET_MODE_UNIT_PRECISION.  They do the same thing in this context,
but it seems odd to use GET_MODE_UNIT_... when we're specifically
dealing with scalars.

Looks good otherwise (and small enough not to need a copyright assignment,
in case you haven't signed one).

Thanks,
Richard


Re: [PATCH][AArch64] Remove constraint strings from define_expand constructs in the back end

2019-07-03 Thread Richard Sandiford
Dennis Zhang  writes:
> Hi Richard, Thanks for the tips.
>
> The special exceptions according to TARGET_SECONDARY_RELOAD hook are 
> revised. Some related patterns still need constraints in order to work 
> in an expected way in the TARGET_SECONDARY_RELOAD function.
>
> The updated patch is tested for targets: aarch64_be-linux-gnu,
> aarch64_be-none-linux-gnu, aarch64-linux-gnu, and 
> aarch64-none-linux-gnu. It survives in testsuite regression.
>
> gcc/ChangeLog:
>
> 2019-07-03  Dennis Zhang  
>
>   * config/aarch64/aarch64.md: Remove redundant constraints from
>   define_expand but keep some patterns untouched if they are
>   specially selected by TARGET_SECONDARY_RELOAD hook.
>   * config/aarch64/aarch64-sve.md: Likewise.
>   * config/aarch64/atomics.md: Remove redundant constraints from
>   define_expand.
>   * config/aarch64/aarch64-simd.md: Likewise.

Thanks, applied as r273021.

Richard


Re: [PATCH] Prepare for prefixed instructions on PowerPC

2019-07-03 Thread Segher Boessenkool
On Wed, Jul 03, 2019 at 02:36:11PM -0400, Michael Meissner wrote:
> On Wed, Jul 03, 2019 at 12:55:41PM -0500, Segher Boessenkool wrote:
> > I don't think this is a good idea.  You can set "cost" directly, if that
> > is the only thing you need this for?
> 
> The trouble is the cost is currently a factor based on type + the cost from 
> the
> cost structure.  It really would be hard to set it to a single value in the
> insns without having to have complex means for setting the machine dependent
> costs.  If the numeric RTL attributes could set the value from a function, it
> would be simpler, but that isn't currently supported.

(set (attr "cost") (symbol_ref "any C expression you want here"))

It is supported.  The syntax is a bit weird, sure :-)


It may well be that some helper attribute can help here, but just
num_insns isn't it.


Segher


Re: [OpenRISC] [PATCH v2 4/5] or1k: Initial support for FPU

2019-07-03 Thread Richard Henderson
On 7/3/19 5:43 PM, Segher Boessenkool wrote:
>> @@ -212,6 +214,7 @@ enum reg_class
>>  #define REG_CLASS_CONTENTS  \
>>  { { 0x, 0x },   \
>>{ SIBCALL_REGS_MASK,   0 },   \
>> +  { 0x7efe, 0x },   \
> 
> Above you said r0, r30, r31 are excluded, but this is r0, r8, r30, or
> in GCC register numbers, 0, 8, and 31?  You probably should mention r8
> somewhere (it's because it is the last arg, this avoid problems, I guess?),
> and the 30/31 thing is confused some way.  Maybe it is all just that one
> documentation line :-)

... and if r8 is excluded because of arguments, I suspect that this is the
wrong fix, as there's nothing inherently wrong with r7:r8 or r8:r9 as a pair,
at least that I can see.

Perhaps function_arg and/or function_arg_advance is the right place for a fix?
The calling convention says that 64-bit arguments are not split across
registers+stack, so you already shouldn't have seen (r8, [sp+0]) as a pair.


r~


[Darwin] Revise pie,no-pie and rdynamic driver specs.

2019-07-03 Thread Iain Sandoe
Processing these in the driver self-specs and pushing the corresponding Xlinker 
lines
seemed like a good idea (nominally the Right Place to process driver specs).  
However,
it has the effect that the driver then supposes that there are linker inputs, 
and causes a
link line to be created even if it is not needed (of course, only if a linker 
option is given,
but still not desirable).

The solution is to place these drive specs into the link spec and claim them at 
the end
of that.  We can’t filter them at a lower level since RDYNAMIC, at least, is 
relevant to
kernel and kernel module code.

Tested on i686-darwin9, x86-64-darwin16, x86-64-darwin18
applied to mainline
thanks
Iain

2019-07-03  Iain Sandoe  

* config/darwin.h (DRIVER_SELF_SPECS): Remove the linker cases.
(RDYNAMIC): Rename to, DARWIN_RDYNAMIC.
(DARWIN_PIE_SPEC, DARWIN_NOPIE_SPEC): Adjust to remove the Xlinker
clauses.
(LINK_COMMAND_SPEC_A): Add DARWIN_RDYNAMIC, DARWIN_PIE_SPEC and
DARWIN_NOPIE_SPEC.

diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 72215ce..e17bc64 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -124,30 +124,25 @@ extern GTY(()) int darwin_ms_struct;
   "%{fapple-kext|mkernel:-static}",\
   "%{shared:-Zdynamiclib} %= 10.5 mmacosx-version-min= -Xlinker) \
-   %:version-compare(>= 10.5 mmacosx-version-min= -pie) }} %= 10.5 mmacosx-version-min= -pie) }} "
 
 #define DARWIN_NOPIE_SPEC \
 "%{no-pie|fno-pie|fno-PIE: \
-   %:version-compare(>= 10.7 mmacosx-version-min= -Xlinker ) \
-   %:version-compare(>= 10.7 mmacosx-version-min= -no_pie) } %= 10.7 mmacosx-version-min= -no_pie) }"
 
 #define DARWIN_CC1_SPEC
\
   "%{findirect-virtual-calls: -fapple-kext} %

Re: [PATCH] Prepare for prefixed instructions on PowerPC

2019-07-03 Thread Segher Boessenkool
On Wed, Jul 03, 2019 at 01:06:27PM -0400, Michael Meissner wrote:
> On Mon, Jul 01, 2019 at 04:27:05PM -0500, Segher Boessenkool wrote:
> > > @@ -7385,8 +7385,8 @@ (define_insn "*mov_softfloat"
> > >   *,  *, *, *")
> > >  
> > > (set_attr "length"
> > > - "4,  4, 4, 4, 4, 4,
> > > - 4,  4, 8, 4")])
> > > + "*,  *, *, *, *, *,
> > > + *,  *, 8, *")])
> > 
> > [ That last line should start with a tab as well. ]
> 
> Ok.
> 
> > The entry before the 8 is split as well.  Maybe that should be "4", to
> > stand out?  I don't know what works better; your choice.
> 
> Though the "G" constraint specifically says for SFmode it is a single
> instruction.

Sure.  But that doesn't mean much, does it?  You cannot see the actual
split insns here, and that the constraint is for
  "Constant that can be copied into GPR with two insns for DF/DD
   and one for SF/SD."
is pretty weak :-)

The default is one insn, that's what * does.  You can still use 4 though
when that is beneficial, to make things a tiny bit clearer maybe.

> > > @@ -7696,8 +7696,8 @@ (define_insn "*mov_softfloat64"
> > >   *,   *,  *")
> > >  
> > > (set_attr "length"
> > > -"4,   4,  4,  4,  4,  8,
> > > - 12,  16, 4")])
> > > +"*,   *,  *,  *,  *,  8,
> > > + 12,  16, *")])
> > 
> > Same for the last entry here.
> 
> Well technically that alternative will never fire (destination is "*h" and
> source is "0"), and a nop is emitted.

If it can never fire we should remove it.  But it *can* fire, or I cannot
prove it cannot anyway.  Can you?

This would be a lovely cleanup if we could make it :-/

> I do wish we could never ever load
> floating point into SPR registers.  I've tried in the reload days to eliminate
> it, but there was always some abort if it got eliminated.

It should never load anything into CTR, LR, or VRSAVE, period.  :-)

Maybe we should use those as fixed registers, not allocatable as they
are now.


Segher


[Darwin.comitted] Some TLC for older Darwin versions.

2019-07-03 Thread Iain Sandoe
The command lines and build options for some of the crts for the older PPC
Darwin versions had bit-rotted somewhat.  This adjusts the build criteria for 
the
crts to avoid newer ld64 versions warnings about mismatches in build and object
versions.

Added some more comments so that it's documented why the specs are as they
are.

Tested across a range of systems, although this has no (intended) changes for
the X86 port.  We can test generation of code for older Darwin on 
powerpc-darwin9
but self-hosting on <= Darwin8 needs some more work.

applied to mainline
Thanks
Iain

2019-07-03  Iain Sandoe  

gcc/

* config/darwin.h (REAL_LIBGCC_SPEC): Adjust for earlier Darwin.
(STARTFILE_SPEC): Split crt3 into a separate spec.
(DARWIN_EXTRA_SPECS): Add crt2 and crt3 spec.
(DARWIN_CRT2_SPEC): New.
(DARWIN_CRT3_SPEC): New.
(MIN_LD64_OMIT_STUBS): Revise to 62.1.
* config/rs6000/darwin.h (DARWIN_CRT2_SPEC): Revise conditions.
(DARWIN_CRT3_SPEC): New.

libgcc/

2019-07-03  Iain Sandoe  

* config.host (powerpc-*-darwin*,powerpc64-*-darwin*): Revise crt
list.
* config/rs6000/t-darwin: Build crt3_2 for older systems.  Revise
mmacosx-version-min for crts to run across all system versions.
* config/rs6000/t-darwin64 (LIB2ADD): Remove.
* config/t-darwin: Revise mmacosx-version-min for crts to run across
system versions >= 10.4.


diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index ae324f1..72215ce 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -189,8 +189,15 @@ extern GTY(()) int darwin_ms_struct;
 #define DARWIN_NOCOMPACT_UNWIND \
 " %:version-compare(>= 10.6 mmacosx-version-min= -no_compact_unwind) "
 
-/* This is mostly a clone of the standard LINK_COMMAND_SPEC, plus
-   precomp, libtool, and fat build additions.
+/* In Darwin linker specs we can put -lcrt0.o and ld will search the library
+   path for crt0.o or -lcrtx.a and it will search for for libcrtx.a.  As for
+   other ports, we can also put xxx.{o,a}%s and get the appropriate complete
+   startfile absolute directory.  This latter point is important when we want
+   to override ld's rule of .dylib being found ahead of .a and the user wants
+   the convenience library to be linked.  */
+
+/* The LINK_COMMAND spec is mostly a clone of the standard LINK_COMMAND_SPEC,
+   plus precomp, libtool, and fat build additions.
 
In general, random Darwin linker flags should go into LINK_SPEC
instead of LINK_COMMAND_SPEC.  The command spec is better for
@@ -352,43 +359,42 @@ extern GTY(()) int darwin_ms_struct;
 
 /* Support -mmacosx-version-min by supplying different (stub) libgcc_s.dylib
libraries to link against, and by not linking against libgcc_s on
-   earlier-than-10.3.9.
+   earlier-than-10.3.9.  If we need exceptions, prior to 10.3.9, then we have
+   to link the static eh lib, since there's no shared version on the system.
+
+   Note that by default, except as above, -lgcc_eh is not linked against.
+   This is because,in general, we need to unwind through system libraries that
+   are linked with the shared unwinder in libunwind (or libgcc_s for 10.4/5).
 
-   Note that by default, -lgcc_eh is not linked against!  This is
-   because in a future version of Darwin the EH frame information may
-   be in a new format, or the fallback routine might be changed; if
-   you want to explicitly link against the static version of those
-   routines, because you know you don't need to unwind through system
-   libraries, you need to explicitly say -static-libgcc.
+   The static version of the current libgcc unwinder (which differs from the
+   implementation in libunwind.dylib on systems Darwin10 [10.6]+) can be used
+   by specifying -static-libgcc.
 
-   If it is linked against, it has to be before -lgcc, because it may
+   If libgcc_eh is linked against, it has to be before -lgcc, because it might
need symbols from -lgcc.  */
+
 #undef REAL_LIBGCC_SPEC
 #define REAL_LIBGCC_SPEC  \
"%{static-libgcc|static: -lgcc_eh -lgcc;   \
-  shared-libgcc|fexceptions|fgnu-runtime: \
-   %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_s.10.4)   \
+  shared-libgcc|fexceptions|fobjc-exceptions|fgnu-runtime:\
+   %:version-compare(!> 10.3.9 mmacosx-version-min= -lgcc_eh) \
+   %:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_s.10.4) \
%:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5)   \
-   %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4) \
+   %:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_ext.10.4) \
%:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5) \
-lgcc ;\
   :%:version-compare(>< 10.3.9 10.5 mmacosx-version

Re: [PATCH] Prepare for prefixed instructions on PowerPC

2019-07-03 Thread Michael Meissner
On Wed, Jul 03, 2019 at 12:55:41PM -0500, Segher Boessenkool wrote:
> On Wed, Jul 03, 2019 at 12:50:37PM -0400, Michael Meissner wrote:
> > On Tue, Jul 02, 2019 at 07:09:20PM -0500, Segher Boessenkool wrote:
> > > We'll need to update our insn_cost for prefixed, sure, it currently does
> > >   int n = get_attr_length (insn) / 4;
> > > to figure out how many machine instructions a pattern is, and then uses
> > > "n" differently for the different types of insn.  We'll need to refine
> > > this a bit for prefixed instructions.
> > 
> > Yes, I have some plans with this regard.  In particular, I will be 
> > introducing
> > a "num_insns" RTL attribute, that if set is the number of instructions that
> > will be emitted.
> 
> I don't think this is a good idea.  You can set "cost" directly, if that
> is the only thing you need this for?

The trouble is the cost is currently a factor based on type + the cost from the
cost structure.  It really would be hard to set it to a single value in the
insns without having to have complex means for setting the machine dependent
costs.  If the numeric RTL attributes could set the value from a function, it
would be simpler, but that isn't currently supported.

Here is my current version of rs6000_insn_cost.  At the moment, I'm only
setting the "num_insns" in a few places, so the default would normally kick in.

/* How many real instructions are generated for this insn?  This is slightly
   different from the length attribute, in that the length attribute counts the
   number of bytes.  With prefixed instructions, we don't want to count a
   prefixed instruction (length 12 bytes including possible NOP) as taking 3
   instructions, but just one.  */

static int
rs6000_num_insns (rtx_insn *insn)
{
  /* If the insn provides an override, use it.  */
  int num = get_attr_num_insns (insn);

  if (!num)
{
  /* Try to figure it out based on the length and whether there are
 prefixed instructions.  While prefixed instructions are only 8 bytes,
 we have to use 12 as the size of the first prefixed instruction in
 case the instruction needs to be aligned.  Back to back prefixed
 instructions would only take 20 bytes, since it is guaranteed that one
 of the prefixed instructions does not need the alignment.  */
  int length = get_attr_length (insn);

  if (length >= 12 && TARGET_PREFIXED_ADDR
  && get_attr_prefixed (insn) == PREFIXED_YES)
{
  /* Single prefixed instruction.  */
  if (length == 12)
return 1;

  /* A normal instruction and a prefixed instruction (16) or two back
 to back prefixed instructions (20).  */
  if (length == 16 || length == 20)
return 2;

  /* Guess for larger instruction sizes.  */
  num = 2 + (length - 20) / 4;
}
  else 
num = length / 4;
}

  return num;
}

rs6000_insn_cost (rtx_insn *insn, bool speed)
{
  int cost;

  if (recog_memoized (insn) < 0)
return 0;

  if (!speed)
return get_attr_length (insn);

  cost = get_attr_cost (insn);
  if (cost > 0)
return cost;

  int n = rs6000_num_insns (insn);
  enum attr_type type = get_attr_type (insn);

  switch (type)
{
case TYPE_LOAD:
case TYPE_FPLOAD:
case TYPE_VECLOAD:
  cost = COSTS_N_INSNS (n + 1);
  break;

case TYPE_MUL:
  switch (get_attr_size (insn))
{
case SIZE_8:
  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
  break;
case SIZE_16:
  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
  break;
case SIZE_32:
  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
  break;
case SIZE_64:
  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
  break;
default:
  gcc_unreachable ();
}
  break;

// ...

> > What I was talking about is I've found some insns that don't set the length,
> > and are split.  Using the insn cost mechanism will mean that these 
> > instructions
> > will be thought of being cheaper than they actually are.
> 
> Yes.  Please split RTL insns as early as possible.  It also matters for
> scheduling, and it prevents exponential explosion of the number of
> patterns you need.  Only sometimes do you need to split late, usually
> because RA can put some registers in memory and you want to handle that
> optimally, or things depend on what exact register you were allocated
> (cr0 vs. crN for example, but could be GPR vs. VSR).

Generally most of the places I've been modifying with splits need to be handled
after register allocation.

> And again, you can set cost directly; length alone is not usually enough
> for determining the cost of split patterns.  But you do need length for
> accurate costs with -Os, hrm.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 

Re: [patch, c++ openmp] Improve diagnostics for unmappable types

2019-07-03 Thread Jason Merrill

On 7/1/19 7:16 AM, Andrew Stubbs wrote:

On 28/06/2019 17:21, Jason Merrill wrote:

+  inform ((decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION),
+  "incomplete types are not mappable");


It's better to use input_location as a fallback; essentially no 
diagnostics should use UNKNOWN_LOCATION.  And let's print the type 
with %qT.



+    if (notes)
+  result = false;
+    else
+  return false;


Returning early when !notes doesn't seem worth the extra lines of code.


How is this version?


OK, thanks.

Jason



Re: C++ PATCH to detect narrowing in case values (PR c++/90805)

2019-07-03 Thread Jason Merrill

On 7/3/19 10:13 AM, Marek Polacek wrote:

On Sat, Jun 22, 2019 at 11:28:36PM -0400, Jason Merrill wrote:

On 6/13/19 5:03 PM, Marek Polacek wrote:

Case values are converted constant expressions, so narrowing conversion is not
permitted.  This patch adds detecting narrowing to case_conversion; it's a
handy spot because we have both the value and the (adjusted) type of the
condition.


Is there a reason not to use build_converted_constant_expr?


The function comment says "Note that if TYPE and VALUE are already integral
we don't really do the conversion because the language-independent
warning/optimization code will work better that way" so I avoided adding any
conversions.



What I could do is to, instead of calling check_narrowing, call
build_converted_constant_expr (type, value, tf_warning_or_error);
and not use its result, but I'm not sure what the benefits would be.


I was thinking about using it instead of the current 
perform_implicit_conversion_flags, so we get the somewhat different 
constraints on the conversion.  And then it becomes simpler to use it 
unconditionally but throw the result away in the easy case.


Jason


Re: [PATCH] Prepare for prefixed instructions on PowerPC

2019-07-03 Thread Segher Boessenkool
On Wed, Jul 03, 2019 at 12:50:37PM -0400, Michael Meissner wrote:
> On Tue, Jul 02, 2019 at 07:09:20PM -0500, Segher Boessenkool wrote:
> > We'll need to update our insn_cost for prefixed, sure, it currently does
> >   int n = get_attr_length (insn) / 4;
> > to figure out how many machine instructions a pattern is, and then uses
> > "n" differently for the different types of insn.  We'll need to refine
> > this a bit for prefixed instructions.
> 
> Yes, I have some plans with this regard.  In particular, I will be introducing
> a "num_insns" RTL attribute, that if set is the number of instructions that
> will be emitted.

I don't think this is a good idea.  You can set "cost" directly, if that
is the only thing you need this for?

> What I was talking about is I've found some insns that don't set the length,
> and are split.  Using the insn cost mechanism will mean that these 
> instructions
> will be thought of being cheaper than they actually are.

Yes.  Please split RTL insns as early as possible.  It also matters for
scheduling, and it prevents exponential explosion of the number of
patterns you need.  Only sometimes do you need to split late, usually
because RA can put some registers in memory and you want to handle that
optimally, or things depend on what exact register you were allocated
(cr0 vs. crN for example, but could be GPR vs. VSR).

And again, you can set cost directly; length alone is not usually enough
for determining the cost of split patterns.  But you do need length for
accurate costs with -Os, hrm.


Segher


Re: [PATCH] Prepare for prefixed instructions on PowerPC

2019-07-03 Thread Michael Meissner
On Mon, Jul 01, 2019 at 04:27:05PM -0500, Segher Boessenkool wrote:
> Hi Mike,
> 
> Sorry I missed this patch :-(
> 
> On Thu, Jun 27, 2019 at 04:18:00PM -0400, Michael Meissner wrote:
> > As we discussed off-line earlier, I changed all of the "4" lengths to be 
> > "*",
> > even for instruction alternatives that would not be subject to being 
> > changed to
> > be a prefixed instruction (i.e. the sign extend instruction "extsw"'s 
> > length is
> > set to "*", even though it would not be a prefixed instruction).
> 
> "*" means "use the default for this attribute", which often is nicer to
> see than "4".  For example, "8" stands out more in a sea of "*"s.
> 
> Usually "*" is the insns defined as "normal" alternatives, and "8" or
> "12" etc. are split.
> 
> > @@ -7231,7 +7231,7 @@ (define_insn "*movcc_internal1"
> >(const_string "mtjmpr")
> >(const_string "load")
> >(const_string "store")])
> > -   (set_attr "length" "4,4,12,4,4,8,4,4,4,4,4,4")])
> > +   (set_attr "length" "*,*,12,*,*,8,*,*,*,*,*,*")])
> 
> In this case, the "12" and "8" are actually defined as one insn in the
> template, with some "\;".  Luckily there aren't many of those.
> 
> > @@ -7385,8 +7385,8 @@ (define_insn "*mov_softfloat"
> >   *,  *, *, *")
> >  
> > (set_attr "length"
> > -   "4,  4, 4, 4, 4, 4,
> > - 4,  4, 8, 4")])
> > +   "*,  *, *, *, *, *,
> > + *,  *, 8, *")])
> 
> [ That last line should start with a tab as well. ]

Ok.

> The entry before the 8 is split as well.  Maybe that should be "4", to
> stand out?  I don't know what works better; your choice.

Though the "G" constraint specifically says for SFmode it is a single
instruction.

> > @@ -7696,8 +7696,8 @@ (define_insn "*mov_softfloat64"
> >   *,   *,  *")
> >  
> > (set_attr "length"
> > -"4,   4,  4,  4,  4,  8,
> > - 12,  16, 4")])
> > +"*,   *,  *,  *,  *,  8,
> > + 12,  16, *")])
> 
> Same for the last entry here.

Well technically that alternative will never fire (destination is "*h" and
source is "0"), and a nop is emitted.  I do wish we could never ever load
floating point into SPR registers.  I've tried in the reload days to eliminate
it, but there was always some abort if it got eliminated.

> 
> > @@ -8760,10 +8760,10 @@ (define_insn "*movdi_internal32"
> >vecsimple")
> > (set_attr "size" "64")
> > (set_attr "length"
> > - "8, 8, 8, 4, 4, 4,
> > -  16,4, 4, 4, 4, 4,
> > -  4, 4, 4, 4, 4, 8,
> > -  4")
> > + "8, 8, 8, *, *, *,
> > +  16,*, *, *, *, *,
> > +  *, *, *, *, *, 8,
> > +  *")
> 
> And the last here.

Well it will be split into a single VSPLTISW instruction.

> 
> > @@ -8853,11 +8853,11 @@ (define_insn "*movdi_internal64"
> >  mftgpr,mffgpr")
> > (set_attr "size" "64")
> > (set_attr "length"
> > -   "4, 4, 4, 4, 4,  20,
> > -4, 4, 4, 4, 4,  4,
> > -4, 4, 4, 4, 4,  4,
> > -4, 8, 4, 4, 4,  4,
> > -4, 4")
> > +   "*, *, *, *, *,  20,
> > +*, *, *, *, *,  *,
> > +*, *, *, *, *,  *,
> > +*, 8, *, *, *,  *,
> > +*, *")
> 
> And two of the entries here.

Though the second split becomes a single VSPLTISW instruction once again.

> > @@ -1150,9 +1150,9 @@ (define_insn "vsx_mov_64bit"
> >  store, load,  store, *, vecsimple, 
> > vecsimple,
> >  vecsimple, *, *, vecstore,  vecload")
> > (set_attr "length"
> > -   "4, 4, 4, 8, 4, 8,
> > -8, 8, 8, 8, 4, 4,
> > -4, 20,8, 4, 4")
> > +   "*, *, *, 8, *, 8,
> > +8, 8, 8, 8, *, *,
> > +*, 20,8, *, *")
> 
> No idea which ones are split here :-)  None of the * and all other would
> be nice, bu

Re: [PATCH] Prepare for prefixed instructions on PowerPC

2019-07-03 Thread Michael Meissner
On Tue, Jul 02, 2019 at 07:09:20PM -0500, Segher Boessenkool wrote:
> On Tue, Jul 02, 2019 at 07:36:21PM -0400, Michael Meissner wrote:
> > On Mon, Jul 01, 2019 at 04:27:05PM -0500, Segher Boessenkool wrote:
> > > The entry before the 8 is split as well.  Maybe that should be "4", to
> > > stand out?  I don't know what works better; your choice.
> > 
> > I'll look into it.  Note, the length is used in two places.  One at the end 
> > to
> > generate the appropriate branches, but the other is in rs6000_insn_cost 
> > inside
> > rs6000.c.
> 
> We'll need to update our insn_cost for prefixed, sure, it currently does
>   int n = get_attr_length (insn) / 4;
> to figure out how many machine instructions a pattern is, and then uses
> "n" differently for the different types of insn.  We'll need to refine
> this a bit for prefixed instructions.

Yes, I have some plans with this regard.  In particular, I will be introducing
a "num_insns" RTL attribute, that if set is the number of instructions that
will be emitted.

If "num_insns" is not set, then it will use the length and look at the
"prefixed" attribute.

> > This occurs before the final passes, so it is important that even
> > though the insn will be split, that the length is still set.  However, 
> > things
> > are rather inconsistant, in that sometimes the length field is accurate, and
> > sometimes not.
> 
> It *has* to be correct; it is allowed to be pessimistic though.  This
> is used to determine if a B-form branch can reach, for example.  You get
> ICEs if it isn't correct.  Only on very few testcases, of course :-(

To be clear.  Yes it has to be correct when the label handling is done.

What I was talking about is I've found some insns that don't set the length,
and are split.  Using the insn cost mechanism will mean that these instructions
will be thought of being cheaper than they actually are.

> > I'm finding that the rs6000_insn_cost issue really muddies things up,
> > particularly if a vector load/store insn is done in gpr registers, where it 
> > can
> > be 4 insns.
> 
> Yeah, it doesn't handle vectors specially *at all* currently, not even
> for FP in vectors.  It is pretty much just a switch over the "type", and
> for everything vector it gets to
> default:
>   cost = COSTS_N_INSNS (n);
> (with the above "n") which is a pretty coarse approximation to the truth ;-)
> 
> You could try #undef'ing TARGET_INSN_COST (in rs6000.c) for now, and hope
> that rs6000_rtx_costs does better for what you need right now.  In the end
> it will have to be fixed properly, insn_cost is quite important.
> 
> 
> Segher
> 

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797



[COMMITTED] Fix store merging tests on Arm

2019-07-03 Thread Wilco Dijkstra
Fix the failing store merging test on Arm.  Aligning variables fixes a
few cases, otherwise disable the test on Arm.  All store merging tests
now pass.  Committed as obvious.

ChangeLog:
2019-07-03  Wilco Dijkstra  

testsuite/
* gcc.dg/store_merging_27.c: Fix test for Arm.
* gcc.dg/store_merging_28.c: Likewise.
* gcc.dg/store_merging_29.c: Likewise.
* gcc.dg/tree-ssa/dump-6.c: Likewise.
--

diff --git a/gcc/testsuite/gcc.dg/store_merging_27.c 
b/gcc/testsuite/gcc.dg/store_merging_27.c
index 
a691368ad3f8bad263569378421e699a9c9e8880..d3cd117bdc63442bc0ea4377365f2f83f93af143
 100644
--- a/gcc/testsuite/gcc.dg/store_merging_27.c
+++ b/gcc/testsuite/gcc.dg/store_merging_27.c
@@ -18,7 +18,7 @@ bar (struct S *x)
 int
 main ()
 {
-  struct S s = {};
+  __attribute__((aligned(8))) struct S s = {};
   s.buf[1] = 1;
   s.buf[3] = 2;
   bar (&s);
diff --git a/gcc/testsuite/gcc.dg/store_merging_28.c 
b/gcc/testsuite/gcc.dg/store_merging_28.c
index 
95a082288d73a904d72090fd1cc22d3f027ad23c..2d6cffc46945596566aa4e529b0935df6c33b786
 100644
--- a/gcc/testsuite/gcc.dg/store_merging_28.c
+++ b/gcc/testsuite/gcc.dg/store_merging_28.c
@@ -3,7 +3,7 @@
 /* { dg-require-effective-target store_merge } */
 /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-store-merging-details" } */
 /* { dg-final { scan-tree-dump-times "New sequence of \[24] stores to replace 
old one of 16 stores" 8 "store-merging" { target { i?86-*-* x86_64-*-* } } } } 
*/
-/* { dg-final { scan-tree-dump-times "New sequence of \[24] stores to replace 
old one of 6 stores" 1 "store-merging" } } */
+/* { dg-final { scan-tree-dump-times "New sequence of \[24] stores to replace 
old one of 6 stores" 1 "store-merging" { target { ! arm*-*-* } } } } */
 
 typedef struct S { char data[16]; } S;
 void optimize_me (S);
diff --git a/gcc/testsuite/gcc.dg/store_merging_29.c 
b/gcc/testsuite/gcc.dg/store_merging_29.c
index 
777020fab5a12946e0bc68785394347ae06060ee..6b32aa9b6f973b8ce6daec34870d37f0fe92622d
 100644
--- a/gcc/testsuite/gcc.dg/store_merging_29.c
+++ b/gcc/testsuite/gcc.dg/store_merging_29.c
@@ -3,7 +3,7 @@
 /* { dg-require-effective-target store_merge } */
 /* { dg-options "-O2 -fdump-tree-store-merging-details" } */
 /* { dg-final { scan-tree-dump "New sequence of 3 stores to replace old one of 
6 stores" "store-merging" { target { le && { ! arm*-*-* } } } } } */
-/* { dg-final { scan-tree-dump "New sequence of \[34] stores to replace old 
one of 6 stores" "store-merging" { target { be || { arm*-*-* } } } } } */
+/* { dg-final { scan-tree-dump "New sequence of \[34] stores to replace old 
one of 6 stores" "store-merging" { target { be && { ! arm*-*-* } } } } } */
 
 struct T { char a[1024]; };
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dump-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/dump-6.c
index 
3e09668ddc96320e3a1b964a9e60e7c5ba593460..70659c00c0efde5548fa69d4acd234bfde94791a
 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/dump-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dump-6.c
@@ -5,7 +5,7 @@
{ dg-require-effective-target store_merge } */
 
 
-extern char a2[2];
+extern __attribute__((aligned(2))) char a2[2];
 
 void f2 (void)
 {
@@ -13,7 +13,7 @@ void f2 (void)
   a2[1] = 0;
 }
 
-extern char a4[4];
+extern __attribute__((aligned(4))) char a4[4];
 
 void f4 (void)
 {
@@ -23,7 +23,7 @@ void f4 (void)
   a4[3] = 0;
 }
 
-extern char a8[8];
+extern __attribute__((aligned(8))) char a8[8];
 
 void f8 (void)
 {

Re: [PATCH 1/2] Come up with function_decl_type and use it in tree_function_decl.

2019-07-03 Thread Richard Biener
On July 3, 2019 4:53:30 PM GMT+02:00, "Martin Liška"  wrote:
>On 7/2/19 7:15 PM, Marc Glisse wrote:
>> On Tue, 2 Jul 2019, Martin Liška wrote:
>> 
>>> After the discussion with Richi and Nathan, I made a place in
>tree_function_decl
>>> and I rebased the original Dominik's patch on top of that.
>> 
>> So, last time there were some questions about the legality of this
>transformation. Did you change the exact set of functions on which this
>is applied?
>
>Yes. I was not included in the original discussion, but I hope the
>transformation is valid.
>Btw. clang also removes the new/delete pairs and I guess it was the
>original motivation of the patch.

We also remove malloc/free pairs which the C standard does not explicitly allow 
(but also doesn't explicitly forbid). I don't think standards need to enumerate 
everything allowed and I don't know any explicit wording in the C++ standard 
that forbids this. It's only that users can override the allocation functions 
(but so can they in C) and it was suggested we need to preserve side effects 
unknown to the compiler. 

Richard. 

>Martin
>
>Or has there been a clarification in the standard saying that this is
>ok? (or were we mistaken the first time to believe that there might be
>an issue?)
>> 



Re: [PATCH v2 4/5] or1k: Initial support for FPU

2019-07-03 Thread Segher Boessenkool
Hi Stafford,

On Wed, Jul 03, 2019 at 12:33:50PM +0900, Stafford Horne wrote:
> +case 'd':
> +  if (REG_P (x))
> +   if (GET_MODE (x) == DFmode || GET_MODE (x) == DImode)
> + fprintf (file, "%s,%s", reg_names[REGNO (operand)],
> + reg_names[REGNO (operand) + 1]);
> +   else
> + fprintf (file, "%s", reg_names[REGNO (operand)]);
> +  else

The coding conventions says to use braces around nested conditionals.

> @@ -212,6 +214,7 @@ enum reg_class
>  #define REG_CLASS_CONTENTS  \
>  { { 0x, 0x },\
>{ SIBCALL_REGS_MASK,   0 },\
> +  { 0x7efe, 0x },\

Above you said r0, r30, r31 are excluded, but this is r0, r8, r30, or
in GCC register numbers, 0, 8, and 31?  You probably should mention r8
somewhere (it's because it is the last arg, this avoid problems, I guess?),
and the 30/31 thing is confused some way.  Maybe it is all just that one
documentation line :-)

> +;  d - double pair base registers (excludes r0, r30 and r31 which overflow)


Segher


Re: [PATCH][AArch64] Remove constraint strings from define_expand constructs in the back end

2019-07-03 Thread Dennis Zhang
Hi Richard, Thanks for the tips.

The special exceptions according to TARGET_SECONDARY_RELOAD hook are 
revised. Some related patterns still need constraints in order to work 
in an expected way in the TARGET_SECONDARY_RELOAD function.

The updated patch is tested for targets: aarch64_be-linux-gnu,
aarch64_be-none-linux-gnu, aarch64-linux-gnu, and 
aarch64-none-linux-gnu. It survives in testsuite regression.

gcc/ChangeLog:

2019-07-03  Dennis Zhang  

* config/aarch64/aarch64.md: Remove redundant constraints from
define_expand but keep some patterns untouched if they are
specially selected by TARGET_SECONDARY_RELOAD hook.
* config/aarch64/aarch64-sve.md: Likewise.
* config/aarch64/atomics.md: Remove redundant constraints from
define_expand.
* config/aarch64/aarch64-simd.md: Likewise.

On 7/2/19 8:05 AM, Richard Sandiford wrote:
> James Greenhalgh  writes:
>> On Mon, Jun 24, 2019 at 04:33:40PM +0100, Dennis Zhang wrote:
>>> Hi,
>>>
>>> A number of AArch64 define_expand patterns have specified constraints
>>> for their operands. But the constraint strings are ignored at expand
>>> time and are therefore redundant/useless. We now avoid specifying
>>> constraints in new define_expands, but we should clean up the existing
>>> define_expand definitions.
>>>
>>> For example, the constraint "=w" is removed in the following case:
>>> (define_expand "sqrt2"
>>> [(set (match_operand:GPF_F16 0 "register_operand" "=w")
>>> The "" marks with an empty constraint in define_expand are removed as well.
>>>
>>> The patch is tested with the build configuration of
>>> --target=aarch64-none-linux-gnu, and it passes gcc/testsuite.
>>
>> This is OK for trunk.
> 
> My fault, sorry, but... Kyrill pointed out when the corresponding arm
> patch was posted that it removes constraints from reload expanders that
> actually need them.  This patch has the same problem and so shouldn't
> go in as-is.
> 
> I'd thought at the time that Kyrill's comment applied to both patches,
> but I see now that it really was specific to arm.
> 
> Thanks,
> Richard
> 
>>
>> Thanks,
>> James
>>
>>> gcc/ChangeLog:
>>>
>>> 2019-06-21  Dennis Zhang  
>>>
>>> * config/aarch64/aarch64-simd.md: Remove redundant constraints
>>> from define_expand.
>>> * config/aarch64/aarch64-sve.md: Likewise.
>>> * config/aarch64/aarch64.md: Likewise.
>>> * config/aarch64/atomics.md: Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index df8bf1d9778..837242c7e56 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -19,8 +19,8 @@
 ;; .
 
 (define_expand "mov"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
-	(match_operand:VALL_F16 1 "general_operand" ""))]
+  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
+	(match_operand:VALL_F16 1 "general_operand"))]
   "TARGET_SIMD"
   "
   /* Force the operand into a register if it is not an
@@ -39,8 +39,8 @@
 )
 
 (define_expand "movmisalign"
-  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
-(match_operand:VALL 1 "general_operand" ""))]
+  [(set (match_operand:VALL 0 "nonimmediate_operand")
+(match_operand:VALL 1 "general_operand"))]
   "TARGET_SIMD"
 {
   /* This pattern is not permitted to fail during expansion: if both arguments
@@ -652,8 +652,8 @@
   [(set_attr "type" "neon_fp_rsqrts_")])
 
 (define_expand "rsqrt2"
-  [(set (match_operand:VALLF 0 "register_operand" "=w")
-	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+  [(set (match_operand:VALLF 0 "register_operand")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 		 UNSPEC_RSQRT))]
   "TARGET_SIMD"
 {
@@ -1025,9 +1025,9 @@
 )
 
 (define_expand "ashl3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
  "TARGET_SIMD"
 {
   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
@@ -1072,9 +1072,9 @@
 )
 
 (define_expand "lshr3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
  "TARGET_SIMD"
 {
   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
@@ -1119,9 +1119,9 @@
 )
 
 (define_expand "ashr3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
  "TARGET_SIMD"
 {
   int bit_width = GET_MODE_UNIT_SIZE

Re: [PATCH 1/2] Come up with function_decl_type and use it in tree_function_decl.

2019-07-03 Thread Martin Liška
On 7/2/19 7:15 PM, Marc Glisse wrote:
> On Tue, 2 Jul 2019, Martin Liška wrote:
> 
>> After the discussion with Richi and Nathan, I made a place in 
>> tree_function_decl
>> and I rebased the original Dominik's patch on top of that.
> 
> So, last time there were some questions about the legality of this 
> transformation. Did you change the exact set of functions on which this is 
> applied?

Yes. I was not included in the original discussion, but I hope the 
transformation is valid.
Btw. clang also removes the new/delete pairs and I guess it was the original 
motivation of the patch.

Martin

Or has there been a clarification in the standard saying that this is ok? (or 
were we mistaken the first time to believe that there might be an issue?)
> 



Re: [PATCH v2 3/5] or1k: Add mrori option, fix option docs

2019-07-03 Thread Segher Boessenkool
On Wed, Jul 03, 2019 at 12:33:49PM +0900, Stafford Horne wrote:
> @@ -179,11 +183,11 @@
>[(set (match_operand:SI 0 "register_operand" "=r,r")
>   (rotatert:SI (match_operand:SI 1 "register_operand"  "r,r")
> (match_operand:SI 2 "reg_or_u6_operand" "r,n")))]
> -  "TARGET_ROR"
> +  "TARGET_ROR || TARGET_RORI"
>"@
> l.ror\t%0, %1, %2
> l.rori\t%0, %1, %2"
> -  [(set_attr "insn_support" "*,shftimm")])
> +  [(set_attr "insn_support" "ror,rori")])

Does this work?  If you use -mno-ror -mrori?  It will then allow generating
a reg for the second operand, and ICE later on, as far as I can see?


Segher


Re: C++ PATCH to detect narrowing in case values (PR c++/90805)

2019-07-03 Thread Marek Polacek
On Sat, Jun 22, 2019 at 11:28:36PM -0400, Jason Merrill wrote:
> On 6/13/19 5:03 PM, Marek Polacek wrote:
> > Case values are converted constant expressions, so narrowing conversion is 
> > not
> > permitted.  This patch adds detecting narrowing to case_conversion; it's a
> > handy spot because we have both the value and the (adjusted) type of the
> > condition.
> 
> Is there a reason not to use build_converted_constant_expr?

The function comment says "Note that if TYPE and VALUE are already integral
we don't really do the conversion because the language-independent
warning/optimization code will work better that way" so I avoided adding any
conversions.

What I could do is to, instead of calling check_narrowing, call
build_converted_constant_expr (type, value, tf_warning_or_error);
and not use its result, but I'm not sure what the benefits would be.  I can
retest the patch with that change, if you want.

--
Marek Polacek • Red Hat, Inc. • 300 A St, Boston, MA


Re: [PATCH] Wrap 'expand_all_functions' and 'ipa_passes' around timevars

2019-07-03 Thread Giuliano Belinassi
Hi, Jeff.

On 07/02, Jeff Law wrote:
> On 1/24/19 12:51 PM, Giuliano Belinassi wrote:
> > This patch adds two variables named 'TV_CGRAPH_FUNC_EXPANSION' and
> > 'TV_CGRAPH_IPA_PASSES' that count the elapsed time of the functions
> > 'expand_all_functions' and 'ipa_passes', respectivelly.
> > 
> > The main point of this is that these functions takes a very long time
> > when compiling the 'gimple-match.c' file, and therefore may also take
> > a long time when compiling other large files.
> > 
> > I also accept suggestions about how to improve this :-)
> > 
> > ChangeLog:
> > 
> > 2019-01-24  Giuliano Belinassi 
> > 
> > * cgraph_unit.c (compile): TV_CGRAPH_FUNC_EXPANSION and
> > TV_CGRAPH_IPA_PASSES start, stop.
> > * timevar.def (TV_CGRAPH_IPA_PASSES, TV_CGRAPH_FUNC_EXPANSION): New.
> > 
> > 
> Per our discussion WRT timevar_{start,stop} vs timevar_{push_pop}, this
> is fine for the trunk if you still want to include it.

It is fine for me :)

> 
> jeff


[PATCH] simplify-rtx.c: Change BITSIZE to UNIT_PRECISION in simplification of (extend ashiftrt (ashift ..))) Otherwise the gcc_assert can catch when dealing with partial int modes.

2019-07-03 Thread John Darrington
---
 gcc/ChangeLog  | 6 ++
 gcc/simplify-rtx.c | 8 
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c206ab6..47035ca 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-07-03  John Darrington 
+
+   simplify-rtx.c: Change BITSIZE to UNIT_PRECISION in simplification of
+   (extend ashiftrt (ashift ..)))  Otherwise the gcc_assert will catch
+   when dealing with partial int modes.
+
 2019-07-02  Eric Botcazou  
 
* cfgexpand.c (pass_expand::execute): Deal specially with instructions
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 89a46a9..d74a4ba 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1504,12 +1504,12 @@ simplify_unary_operation_1 (enum rtx_code code, 
machine_mode mode, rtx op)
  && CONST_INT_P (XEXP (op, 1))
  && XEXP (XEXP (op, 0), 1) == XEXP (op, 1)
  && (op_mode = as_a  (GET_MODE (op)),
- GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1
+ GET_MODE_UNIT_PRECISION (op_mode) > INTVAL (XEXP (op, 1
{
  scalar_int_mode tmode;
- gcc_assert (GET_MODE_BITSIZE (int_mode)
- > GET_MODE_BITSIZE (op_mode));
- if (int_mode_for_size (GET_MODE_BITSIZE (op_mode)
+ gcc_assert (GET_MODE_UNIT_PRECISION (int_mode)
+ > GET_MODE_UNIT_PRECISION (op_mode));
+ if (int_mode_for_size (GET_MODE_UNIT_PRECISION (op_mode)
 - INTVAL (XEXP (op, 1)), 1).exists (&tmode))
{
  rtx inner =
-- 
1.8.3.1



[patch] Small improvements to coverage info (2/n)

2019-07-03 Thread Eric Botcazou
Hi,

this is a series of fixes for the exception handling code, with the same goal 
of preventing instructions from inheriting random source location information 
in the debug info generated by the compiler.

Tested on x86_64-suse-linux, both GCC and GDB, OK for mainline?


2019-07-03  Eric Botcazou  

* except.c (emit_to_new_bb_before): Make sure to put a location on SEQ.
* tree-eh.c (replace_goto_queue_1) : Propagate location.
(emit_eh_dispatch): Delete.
(lower_catch): Emit the eh_dispatch manually and set the location of
the first catch statement onto it.
(lower_eh_filter): Emit the eh_dispatch manually and set location.
(lower_eh_dispatch): Propagate location.
* tree-outof-ssa.c (set_location_for_edge): Handle EH edges specially.
(eliminate_build): Likewise.

-- 
Eric BotcazouIndex: except.c
===
--- except.c	(revision 272930)
+++ except.c	(working copy)
@@ -921,7 +921,7 @@ assign_filter_values (void)
 static basic_block
 emit_to_new_bb_before (rtx_insn *seq, rtx_insn *insn)
 {
-  rtx_insn *last;
+  rtx_insn *next, *last;
   basic_block bb;
   edge e;
   edge_iterator ei;
@@ -934,7 +934,16 @@ emit_to_new_bb_before (rtx_insn *seq, rt
   force_nonfallthru (e);
 else
   ei_next (&ei);
-  last = emit_insn_before (seq, insn);
+
+  /* Make sure to put the location of INSN or a subsequent instruction on SEQ
+ to avoid inheriting the location of the previous instruction.  */
+  next = insn;
+  while (next && !NONDEBUG_INSN_P (next))
+next = NEXT_INSN (next);
+  if (next)
+last = emit_insn_before_setloc (seq, insn, INSN_LOCATION (next));
+  else
+last = emit_insn_before (seq, insn);
   if (BARRIER_P (last))
 last = PREV_INSN (last);
   bb = create_basic_block (seq, last, BLOCK_FOR_INSN (insn)->prev_bb);
Index: tree-eh.c
===
--- tree-eh.c	(revision 272930)
+++ tree-eh.c	(working copy)
@@ -503,7 +503,11 @@ replace_goto_queue_1 (gimple *stmt, stru
   seq = find_goto_replacement (tf, temp);
   if (seq)
 	{
-	  gsi_insert_seq_before (gsi, gimple_seq_copy (seq), GSI_SAME_STMT);
+	  gimple_stmt_iterator i;
+	  seq = gimple_seq_copy (seq);
+	  for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
+	gimple_set_location (gsi_stmt (i), gimple_location (stmt));
+	  gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
 	  gsi_remove (gsi, false);
 	  return;
 	}
@@ -811,15 +815,6 @@ emit_resx (gimple_seq *seq, eh_region re
 record_stmt_eh_region (region->outer, x);
 }
 
-/* Emit an EH_DISPATCH statement into SEQ for REGION.  */
-
-static void
-emit_eh_dispatch (gimple_seq *seq, eh_region region)
-{
-  geh_dispatch *x = gimple_build_eh_dispatch (region->index);
-  gimple_seq_add_stmt (seq, x);
-}
-
 /* Note that the current EH region may contain a throw, or a
call to a function which itself may contain a throw.  */
 
@@ -1762,7 +1757,9 @@ lower_catch (struct leh_state *state, gt
   tree out_label;
   gimple_seq new_seq, cleanup;
   gimple *x;
+  geh_dispatch *eh_dispatch;
   location_t try_catch_loc = gimple_location (tp);
+  location_t catch_loc = UNKNOWN_LOCATION;
 
   if (flag_exceptions)
 {
@@ -1776,7 +1773,8 @@ lower_catch (struct leh_state *state, gt
 return gimple_try_eval (tp);
 
   new_seq = NULL;
-  emit_eh_dispatch (&new_seq, try_region);
+  eh_dispatch = gimple_build_eh_dispatch (try_region->index);
+  gimple_seq_add_stmt (&new_seq, eh_dispatch);
   emit_resx (&new_seq, try_region);
 
   this_state.cur_region = state->cur_region;
@@ -1799,6 +1797,8 @@ lower_catch (struct leh_state *state, gt
   gimple_seq handler;
 
   catch_stmt = as_a  (gsi_stmt (gsi));
+  if (catch_loc == UNKNOWN_LOCATION)
+	catch_loc = gimple_location (catch_stmt);
   c = gen_eh_region_catch (try_region, gimple_catch_types (catch_stmt));
 
   handler = gimple_catch_handler (catch_stmt);
@@ -1822,6 +1822,10 @@ lower_catch (struct leh_state *state, gt
 	break;
 }
 
+  /* Try to set a location on the dispatching construct to avoid inheriting
+ the location of the previous statement.  */
+  gimple_set_location (eh_dispatch, catch_loc);
+
   gimple_try_set_cleanup (tp, new_seq);
 
   gimple_seq new_eh_seq = eh_seq;
@@ -1857,11 +1861,13 @@ lower_eh_filter (struct leh_state *state
   if (!eh_region_may_contain_throw (this_region))
 return gimple_try_eval (tp);
 
-  new_seq = NULL;
   this_state.cur_region = state->cur_region;
   this_state.ehp_region = this_region;
 
-  emit_eh_dispatch (&new_seq, this_region);
+  new_seq = NULL;
+  x = gimple_build_eh_dispatch (this_region->index);
+  gimple_set_location (x, gimple_location (tp));
+  gimple_seq_add_stmt (&new_seq, x);
   emit_resx (&new_seq, this_region);
 
   this_region->u.allowed.label = create_artificial_label (UNKNOWN_LOCATION);
@@ -3752,6 +3758,7 @@ lower_eh_dispatch (basic_block src, geh_
 	filter = cre

Go patch committed: Include transitive imports in type descriptor list

2019-07-03 Thread Ian Lance Taylor
This patch to the Go frontend by Cherry Zhang includes transitive
imports in the type descriptor list.

In https://golang.org/cl/179598, we were using Gogo::packages_, when
compiling the main package, as the list of packages of which we need
to register the type descriptors.  This is not complete.  It only
includes main's direct import and one-level indirect imports.  It does
not include all the packages transitively imported.

To fix that, we need to track all the transitive imports.  We have
almost already done that, for init functions.  However, there may be
packages that don't need init functions but do need to register type
descriptors.  For them, we add a dummy init function to its export
data.  So when we compile the main package we will see all the
transitive imports.  The dummy init functions are not real functions
and are not called.

This fixes https://golang.org/issue/32901.

Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 272955)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-aebd2d6303e4bb970b088e84f6c66279095dfea6
+ae7d7e05bce19aefaa27efe2ee797933aafbef06
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/export.cc
===
--- gcc/go/gofrontend/export.cc (revision 272955)
+++ gcc/go/gofrontend/export.cc (working copy)
@@ -909,6 +909,8 @@ Export::populate_init_graph(Init_graph*
++p)
 {
   const Import_init* ii = *p;
+  if (ii->is_dummy())
+continue;
   std::map::const_iterator srcit =
   init_idx.find(ii->init_name());
   go_assert(srcit != init_idx.end());
@@ -1007,7 +1009,7 @@ Export::write_imported_init_fns(const st
 
   // Now add edges from the local init function to each of the
   // imported fcns.
-  if (!import_init_fn.empty())
+  if (!import_init_fn.empty() && import_init_fn[0] != '~')
 {
   unsigned src = 0;
   go_assert(init_idx[import_init_fn] == 0);
@@ -1016,6 +1018,8 @@ Export::write_imported_init_fns(const st
++p)
{
   const Import_init* ii = *p;
+  if (ii->is_dummy())
+continue;
  unsigned sink = init_idx[ii->init_name()];
  add_init_graph_edge(&init_graph, src, sink);
}
Index: gcc/go/gofrontend/gogo.cc
===
--- gcc/go/gofrontend/gogo.cc   (revision 272608)
+++ gcc/go/gofrontend/gogo.cc   (working copy)
@@ -724,6 +724,9 @@ Gogo::init_imports(std::vectorimported_init_fns_.end();
++p)
 {
+  // Don't include dummy inits. They are not real functions.
+  if ((*p)->is_dummy())
+continue;
   if ((*p)->priority() < 0)
go_error_at(Linemap::unknown_location(),
"internal error: failed to set init priority for %s",
@@ -941,7 +944,7 @@ Gogo::build_type_descriptor_list()
   Btype* bat = list_type->field(1)->type()->get_backend(this);
 
   // Create the variable
-  std::string name = this->type_descriptor_list_symbol(this->package_);
+  std::string name = this->type_descriptor_list_symbol(this->pkgpath_symbol());
   Bvariable* bv = this->backend()->implicit_variable(name, name, bt,
  false, true, false,
  0);
@@ -986,20 +989,29 @@ Gogo::register_type_descriptors(std::vec
   Struct_type* list_type = type_descriptor_list_type(1);
   Btype* bt = list_type->get_backend(this);
 
+  // Collect type lists from transitive imports.
+  std::vector list_names;
+  for (Import_init_set::iterator it = this->imported_init_fns_.begin();
+   it != this->imported_init_fns_.end();
+   ++it)
+{
+  std::string pkgpath =
+this->pkgpath_from_init_fn_name((*it)->init_name());
+  list_names.push_back(this->type_descriptor_list_symbol(pkgpath));
+}
+  // Add the main package itself.
+  list_names.push_back(this->type_descriptor_list_symbol("main"));
+
   // Build a list of lists.
   std::vector indexes;
   std::vector vals;
   unsigned long i = 0;
-  for (Packages::iterator it = this->packages_.begin();
-   it != this->packages_.end();
-   ++it)
+  for (std::vector::iterator p = list_names.begin();
+   p != list_names.end();
+   ++p)
 {
-  if (it->second->pkgpath() == "unsafe")
-continue;
-
-  std::string name = this->type_descriptor_list_symbol(it->second);
   Bvariable* bv =
-this->backend()->implicit_variable_reference(name, name, bt);
+this->backend()->implicit_variable_reference(*p, *p, bt);
   Bexpression* bexpr = this->backend()->var_expression(bv, builtin_loc);
   bexpr = this->backend()->address_expression(bexpr, builtin_loc

Re: [Mingw-w64-public] Fwd: [patch] Reimplement GNU threads library on native Windows

2019-07-03 Thread Liu Hao
在 2019/7/2 下午8:27, Jonathan Wakely 写道:
> 
> What do you mean by "unclosed thread"? If I read it correctly, the MSDN
> page
> refers to closing a handle (which makes sense), not closing a thread.
> 

Yes, it meant a thread which has terminated but not deleted due to some
handles left open.


>> This could also mean that there is no effect way to denote a thread
>> uniquely. As a consequence libstdc++ may have to its own bookkeeping
>> mechanism.
> 
> As I said in my last mail, libstdc++ does not need a way to denote a
> thread uniquely.
> 

At my last glance at the `__gthread_` interfaces, libstdc++ requires
thread IDs to be LessThanComparable, which would require retrieval of
thread IDs by handle, as in `__gthread_equal()`.

More than that, if my previous vision was correct (a terminated thread
has no ID associated) then `GetThreadId()` on a thread that has
terminated would not return a valid thread ID. Fortunately, this seems
not the case:

```c
#include 
#include 

DWORD __stdcall ThreadProc(void* pParam)
  {
printf("thread %lu running\n", GetCurrentThreadId());
return 0;
  }

int main(void)
  {
HANDLE hThread = CreateThread(0, 0, ThreadProc, 0, CREATE_SUSPENDED, 0);
printf("thread %lu created\n", GetThreadId(hThread));

ResumeThread(hThread);
WaitForSingleObject(hThread, INFINITE);
printf("thread %lu terminated\n", GetThreadId(hThread));

CloseHandle(hThread);
// `hThread` is now invalid; DO NOT PLAY WITH THIS AT HOME!
printf("thread %lu closed\n", GetThreadId(hThread));
  }
```

This program outputs

```text
E:\Desktop>gcc test.c -Wall -Wextra -Wpedantic && a.exe
test.c: In function 'ThreadProc':
test.c:4:34: warning: unused parameter 'pParam' [-Wunused-parameter]
4 | DWORD __stdcall ThreadProc(void* pParam)
  |~~^~
thread 9172 created
thread 9172 running
thread 9172 terminated
thread 0 closed

E:\Desktop>
```

Despite Microsoft's documentation, the identifier of a thread seems
uncollected as long as there are still handles to the thread. So it
might be safe to assume that the identifier of an `std::thread` *cannot*
be reused before it is `join()`'d or `detach()`'d which closes the
handle stored in the `std::thread` object.


-- 
Best regards,
LH_Mouse



signature.asc
Description: OpenPGP digital signature


Re: [PATCH][ARM] Add support for "noinit" attribute

2019-07-03 Thread Christophe Lyon
On Wed, 3 Jul 2019 at 11:51, Richard Earnshaw  wrote:
>
>
>
> On 02/07/2019 15:49, Christophe Lyon wrote:
> > On Tue, 2 Jul 2019 at 12:30, Richard Earnshaw  
> > wrote:
> >>
> >>
> >>
> >> On 02/07/2019 11:13, Richard Earnshaw wrote:
> >>>
> >>>
> >>> On 02/07/2019 09:39, Richard Earnshaw wrote:
> 
> 
>  On 01/07/2019 16:58, Kyrill Tkachov wrote:
> > Hi Christophe,
> >
> > On 6/13/19 4:13 PM, Christophe Lyon wrote:
> >> Hi,
> >>
> >> Similar to what already exists for TI msp430 or in TI compilers for
> >> arm, this patch adds support for "noinit" attribute for arm. It's very
> >> similar to the corresponding code in GCC for msp430.
> >>
> >> It is useful for embedded targets where the user wants to keep the
> >> value of some data when the program is restarted: such variables are
> >> not zero-initialized.It is mostly a helper/shortcut to placing
> >> variables in a dedicated section.
> >>
> >> It's probably desirable to add the following chunk to the GNU linker:
> >> diff --git a/ld/emulparams/armelf.sh b/ld/emulparams/armelf.sh
> >> index 272a8bc..9555cec 100644
> >> --- a/ld/emulparams/armelf.sh
> >> +++ b/ld/emulparams/armelf.sh
> >> @@ -10,7 +10,19 @@ OTHER_TEXT_SECTIONS='*(.glue_7t) *(.glue_7)
> >> *(.vfp11_veneer) *(.v4_bx)'
> >>   OTHER_BSS_SYMBOLS="${CREATE_SHLIB+PROVIDE (}__bss_start__ =
> >> .${CREATE_SHLIB+)};"
> >>   OTHER_BSS_END_SYMBOLS="${CREATE_SHLIB+PROVIDE (}_bss_end__ =
> >> .${CREATE_SHLIB+)}; ${CREATE_SHLIB+PROVIDE (}__bss_end__ =
> >> .${CREATE_SHLIB+)};"
> >>   OTHER_END_SYMBOLS="${CREATE_SHLIB+PROVIDE (}__end__ =
> >> .${CREATE_SHLIB+)};"
> >> -OTHER_SECTIONS='.note.gnu.arm.ident 0 : { KEEP
> >> (*(.note.gnu.arm.ident)) }'
> >> +OTHER_SECTIONS='
> >> +.note.gnu.arm.ident 0 : { KEEP (*(.note.gnu.arm.ident)) }
> >> +  /* This section contains data that is not initialised during load
> >> + *or* application reset.  */
> >> +   .noinit (NOLOAD) :
> >> +   {
> >> + . = ALIGN(2);
> >> + PROVIDE (__noinit_start = .);
> >> + *(.noinit)
> >> + . = ALIGN(2);
> >> + PROVIDE (__noinit_end = .);
> >> +   }
> >> +'
> >>
> >> so that the noinit section has the "NOLOAD" flag.
> >>
> >> I'll submit that part separately to the binutils project if OK.
> >>
> >> OK?
> >>
> >
> > This is mostly ok by me, with a few code comments inline.
> >
> > I wonder whether this is something we could implement for all targets
> > in the midend, but this would require linker script support for the
> > target to be effective...
> 
>  Can't this be done using named sections?  If the sections were of the
>  form .bss. then it would be easy to make linker scripts do
>  something sane by default and users could filter them out to special
>  noinit sections if desired.
> 
> >>>
> >>> To answer my own question, it would appear to be yes.  You can write 
> >>> today:
> >>>
> >>> int xxx __attribute__ ((section (".bss.noinit")));
> >>>
> >>> int main ()
> >>> {
> >>> return xxx;
> >>> }
> >>>
> >>> And the compiler will generate
> >>>   .section.bss.noinit,"aw",@nobits
> >>>   .align 4
> >>>   .typexxx, @object
> >>>   .sizexxx, 4
> >>> xxx:
> >>>   .zero4
> >>>
> >>> So at this point, all you need is a linker script to filter .bss.noinit
> >>> into your special part of the final image.
> >>>
> >>> This will most likely work today on any GCC target that supports named
> >>> sections, which is pretty much all of them these days.
> >>>
> >>
> >> Alternatively, we already have:
> >>
> >> https://gcc.gnu.org/ml/gcc-patches/2014-09/msg01302.html
> >>
> >> R.
> >>
> >
> > Hi Richard,
> >
> > Indeed this can already be achieved with the "section" attribute as you 
> > propose.
> >
> > The motivation for this patch came from user requests: this feature is
> > already available in some proprietary ARM toolchains (TI, IAR, ...)
> > and it's more convenient for the end-user than having to update linker
> > scripts in addition to adding an attribute to the variable.
> >
>
> ? Your patch has an update to the linker scripts...

Right, but that becomes a feature of the toolchain, rather than having
to edit/create linker scripts for every application.

> > I guess it's a balance between user-friendliness/laziness and GCC
> > developers ability to educate users :-)
>
> Well in that case, this should be done generically, not in just the arm
> backend, or any other backend for that matter.
>

I thought it would be less controversial to mimic msp430, it seems I
was wrong  :)

I'm going to have a look at making this generic, then.

Christophe

> R.
>
> >
> > Christophe
> >
> >
> >>> R.
> >>>
>  R.
> 
> >
> > Thanks,
> >
> > Kyrill
> >
> >> Thanks,
> >>
> >> Christophe
> >
> > diff

Re: [PATCH] Add dbgcnt for gimple_match and generic_match.

2019-07-03 Thread Richard Biener
On Wed, Jul 3, 2019 at 2:39 PM Martin Liška  wrote:
>
> On 7/3/19 1:48 PM, Richard Biener wrote:
> > On Wed, Jul 3, 2019 at 12:39 PM Martin Liška  wrote:
> >>
> >> Hi.
> >>
> >> The patch is about dbgcnt support for match in GIMPLE and GENERIC.
> >> It's handy for isolation of a problem.
> >>
> >> Ready to be installed after it survives tests?
> >
> > Hmm, I think we only want this on (simplify...), not (match...), no?
> > Thus simply move the code into the preceeding if() body.
>
> I've done that with s->kind == simplify::SIMPLIFY. Would it be fine?
>
> >
> > I'd also merge gimple and generic_match counters since passes
> > happen to use both.
>
> Works for me.

OK.

Thanks,
Richard.

> Martin
>
> >
> > OK with that changes.
> > Richard.
> >
> >> Thanks,
> >> Martin
> >>
> >> gcc/ChangeLog:
> >>
> >> 2019-07-03  Martin Liska  
> >>
> >> * dbgcnt.def (DEBUG_COUNTER): Add gimple_match and
> >> generic_match.
> >> * genmatch.c (dt_simplify::gen_1): Generate dbgcnt
> >> condition.
> >> * generic-match-head.c: Include dbgcnt.h.
> >> * gimple-match-head.c: Likewise.
> >> ---
> >>  gcc/dbgcnt.def   | 2 ++
> >>  gcc/generic-match-head.c | 2 +-
> >>  gcc/genmatch.c   | 5 +
> >>  gcc/gimple-match-head.c  | 2 +-
> >>  4 files changed, 9 insertions(+), 2 deletions(-)
> >>
> >>
>


Re: [PATCH 2/2] Rename SINGE_VALUE to TOPN_VALUES counters.

2019-07-03 Thread Martin Liška
On 7/3/19 11:09 AM, Jan Hubicka wrote:
> OK,
> I would rename the __gcov_topn_values_profiler to _v2 since we had this
> function before.

It's bit tricky, but we hadn't because I named that *_values_*. We used to
have *_value_* :)

Martin


Re: Enable nonoverallping_component_refs even after the base pointers are equivalent

2019-07-03 Thread Richard Biener
On Tue, 2 Jul 2019, Jan Hubicka wrote:

> Hi,
> this patch adds the shortcut for must aliases discussed earlier and enables
> access path even if bases are proved to be equivalent - it could still do
> useful job for arrays etc.
> 
> tramp3d stats go from:
> 
> Alias oracle query stats:
>   refs_may_alias_p: 4421560 disambiguations, 4781790 queries
>   ref_maybe_used_by_call_p: 6790 disambiguations, 4447962 queries
>   call_may_clobber_ref_p: 883 disambiguations, 883 queries
>   nonoverlapping_component_refs_p: 0 disambiguations, 9272 queries
>   nonoverlapping_component_refs_since_match_p: 31 disambiguations, 39391 
> queries
>   aliasing_component_refs_p: 918 disambiguations, 30889 queries
>   TBAA oracle: 1924468 disambiguations 3851145 queries
>774336 are in alias set 0
>714019 queries asked about the same object
>0 queries asked about the same alias set
>0 access volatile
>282546 are dependent in the DAG
>155776 are aritificially in conflict with void *
> 
> to
> 
> Alias oracle query stats:
>   refs_may_alias_p: 4421611 disambiguations, 4781828 queries
>   ref_maybe_used_by_call_p: 6790 disambiguations, 4448013 queries
>   call_may_clobber_ref_p: 883 disambiguations, 883 queries
>   nonoverlapping_component_refs_p: 0 disambiguations, 8964 queries
>   nonoverlapping_component_refs_since_match_p: 66 disambiguations, 18470 
> queries
>   aliasing_component_refs_p: 918 disambiguations, 30371 queries
>   TBAA oracle: 1924492 disambiguations 3849967 queries
>774336 are in alias set 0
>714095 queries asked about the same object
>0 queries asked about the same alias set
>0 access volatile
>281268 are dependent in the DAG
>155776 are aritificially in conflict with void *
> 
> PTA query stats:
>   pt_solution_includes: 906632 disambiguations, 1214744 queries
>   pt_solutions_intersect: 121330 disambiguations, 553172 queries
> 
> So twice as many nonoverlapping_component_refs_since_match_p disambiguations,
> half of querries.
> 
> We can miss some of disambiguations where addresses are same but types
> are different, but I think those are not useful - this is the case where
> memory type was dynamically changed.  If we walk with TBAA enabled and
> see the prevoius use, we got kind of lost anyway and propagating even
> older values seems to have no use.
> 
> Note that i tried to implement ranges_must_overlap_p predicate which could
> save us from more tests but got lost in polyints and I think it is not worth
> the effort since these cases are quite borderline.
> 
> Similar test would make sense in aliasing_component_refs_p but I think
> it may make more sense to reorder the tests there. Right now we do:
> 
>   get_ref_base_and_extent (match2, &offadj, &sztmp, &msztmp, &reverse);
>   offset2 -= offadj;
>   get_ref_base_and_extent (match1, &offadj, &sztmp, &msztmp, &reverse);
>   offset1 -= offadj;
>   if (!ranges_maybe_overlap_p (offset1, max_size1, offset2, max_size2))
> 
> First I think that one get_ref_base_and_extent is always redundant since 
> either
> match1 or match2 is base so we could pass it down.
> 
> However it seems to me that perhaps doing
> nonoverlapping_component_refs_since_match_p would be cheaper and one can do 
> the
> range check only after this one returns -1 saving quite many
> get_ref_base_and_extent calls.

Yeah, that sounds worthwhile (this function is the worst offender
compile-time wise)

> Bootstrapped/regtested x86_64-linux, OK?
> 
> Honza
> 
> Index: testsuite/gcc.dg/tree-ssa/alias-access-path-3.c
> ===
> --- testsuite/gcc.dg/tree-ssa/alias-access-path-3.c   (nonexistent)
> +++ testsuite/gcc.dg/tree-ssa/alias-access-path-3.c   (working copy)
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-fre1" } */
> +struct a {int v1;
> +   int v2;};
> +struct b {struct a a[0];};
> +
> +int
> +test (struct b *bptr1, struct b *bptr2, int i, int j)
> +{
> +  bptr1->a[i].v1=123;
> +  bptr2->a[j].v2=1;
> +  return bptr1->a[i].v1;
> +}
> +int
> +test2 (struct b *bptr1, struct b *bptr2, int i, int j)
> +{
> +  bptr1->a[i].v1=123;
> +  bptr2->a[j].v1=1;
> +  return bptr1->a[i].v1;
> +}
> +/* test should be optimized, while test2 should not.  */
> +/* { dg-final { scan-tree-dump-times "return 123" 1 "fre1"} } */
> Index: testsuite/gcc.dg/tree-ssa/alias-access-path-8.c
> ===
> --- testsuite/gcc.dg/tree-ssa/alias-access-path-8.c   (nonexistent)
> +++ testsuite/gcc.dg/tree-ssa/alias-access-path-8.c   (working copy)
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-fre3" } */
> +struct a {
> +  int val;
> +};
> +struct b {
> +  struct a a[10],a2[10];
> +};
> +struct c {
> +  struct b b[10];
> +} *cptr,*cptr2;
> +
> +
> +int

Re: [PATCH] Add dbgcnt for gimple_match and generic_match.

2019-07-03 Thread Martin Liška
On 7/3/19 1:48 PM, Richard Biener wrote:
> On Wed, Jul 3, 2019 at 12:39 PM Martin Liška  wrote:
>>
>> Hi.
>>
>> The patch is about dbgcnt support for match in GIMPLE and GENERIC.
>> It's handy for isolation of a problem.
>>
>> Ready to be installed after it survives tests?
> 
> Hmm, I think we only want this on (simplify...), not (match...), no?
> Thus simply move the code into the preceeding if() body.

I've done that with s->kind == simplify::SIMPLIFY. Would it be fine?

> 
> I'd also merge gimple and generic_match counters since passes
> happen to use both.

Works for me.

Martin

> 
> OK with that changes.
> Richard.
> 
>> Thanks,
>> Martin
>>
>> gcc/ChangeLog:
>>
>> 2019-07-03  Martin Liska  
>>
>> * dbgcnt.def (DEBUG_COUNTER): Add gimple_match and
>> generic_match.
>> * genmatch.c (dt_simplify::gen_1): Generate dbgcnt
>> condition.
>> * generic-match-head.c: Include dbgcnt.h.
>> * gimple-match-head.c: Likewise.
>> ---
>>  gcc/dbgcnt.def   | 2 ++
>>  gcc/generic-match-head.c | 2 +-
>>  gcc/genmatch.c   | 5 +
>>  gcc/gimple-match-head.c  | 2 +-
>>  4 files changed, 9 insertions(+), 2 deletions(-)
>>
>>

>From 035a918ba10ad002ef1707e308143040311f707b Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Wed, 3 Jul 2019 12:37:06 +0200
Subject: [PATCH] Add dbgcnt for gimple_match and generic_match.

gcc/ChangeLog:

2019-07-03  Martin Liska  

	* dbgcnt.def (DEBUG_COUNTER): Add match debug counter.
	* genmatch.c (dt_simplify::gen_1): Generate dbgcnt
	condition.
	* generic-match-head.c: Include dbgcnt.h.
	* gimple-match-head.c: Likewise.
---
 gcc/dbgcnt.def   | 1 +
 gcc/generic-match-head.c | 2 +-
 gcc/genmatch.c   | 4 
 gcc/gimple-match-head.c  | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index dd874c519bb..230072f7bb5 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -195,3 +195,4 @@ DEBUG_COUNTER (tree_sra)
 DEBUG_COUNTER (vect_loop)
 DEBUG_COUNTER (vect_slp)
 DEBUG_COUNTER (dom_unreachable_edges)
+DEBUG_COUNTER (match)
diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 76fc9993481..b54e03552ba 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "case-cfn-macros.h"
 #include "gimplify.h"
 #include "optabs-tree.h"
-
+#include "dbgcnt.h"
 
 /* Routine to determine if the types T1 and T2 are effectively
the same for GENERIC.  If T1 or T2 is not a type, the test
diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 7b9b09c7d8b..109bd9e6f2d 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -3310,6 +3310,10 @@ dt_simplify::gen_1 (FILE *f, int indent, bool gimple, operand *result)
 	}
 }
 
+  if (s->kind == simplify::SIMPLIFY)
+fprintf_indent (f, indent, "if (__builtin_expect (!dbg_cnt (match), 0)) return %s;\n",
+		gimple ? "false" : "NULL_TREE");
+
   fprintf_indent (f, indent, "if (__builtin_expect (dump_file && (dump_flags & TDF_FOLDING), 0)) "
 	   "fprintf (dump_file, \"%s ",
 	   s->kind == simplify::SIMPLIFY
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index f83f2256178..df9f0c50590 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -42,7 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "optabs-tree.h"
 #include "tree-eh.h"
-
+#include "dbgcnt.h"
 
 /* Forward declarations of the private auto-generated matchers.
They expect valueized operands in canonical order and do not
-- 
2.22.0



Re: [PATCH,RFC,V3 0/5] Support for CTF in GCC

2019-07-03 Thread Richard Biener
On Wed, Jul 3, 2019 at 5:18 AM Jeff Law  wrote:
>
> On 7/2/19 11:54 AM, Indu Bhagat wrote:
> > Ping.
> > Can someone please review these patches ? We would like to get the
> > support for CTF integrated soon.
> I'm not sure there's really even consensus that we want CTF support in
> GCC.  Though I think that the changes you've made in the last several
> weeks do make it somewhat more palatable.  But ultimately the first step
> is to get that consensus.
>
> I'd hazard a guess that Jakub in particular isn't on board as he's been
> pushing to some degree for post-processing or perhaps doing it via a
> plug in.
>
> Richi has been guiding you a bit through how to make the changes easier
> to integrate, but I haven't seen him state one way or the other his
> preference on whether or not CTF support is something we want.

I'm mostly worried about the lack of a specification and the appearant
restriction on a subset of C (the patches have gcc_unreachable ()
in paths that can be reached by VECTOR_TYPE or COMPLEX_TYPE
not to mention FIXED_POINT_TYPE, etc...).

While CTF might be easy and fast to parse and small I fear it will
go the STABS way of being not extensible and bitrotten.

Given it appears to generate only debug info for symbols and no locations
or whatnot it should be sufficient to introspect the compilation to generate
the CTF info on the side and then merge it in at link-time.  Which makes
me wonder if this shouldn't be a plugin for now until it is more complete
and can be evaluated better (comments in the patches indicate even the
on-disk format is in flux?).  Adding plugin hook invocations to the three
places the CTF info generation hooks off should be easy.

That said, the patch series isn't ready for integration since it will
crash left and right -- did you bootstrap and run the testsuite
with -gt?

Richard.

> I'm hesitant to add CTF support in GCC, but can understand how it might
> be useful given the kernel's aversion to everything dwarf.  But if the
> kernel is the primary consumer than I'd lean towards post-processing.
>
> Jeff
>


Re: [PATCH v2] S/390: Improve storing asan frame_pc

2019-07-03 Thread Segher Boessenkool
Hi Ilya,

This looks great, thanks!  You'll an okay from a global maintainer though,
or of all affected maintainers separately.


Segher


On Tue, Jul 02, 2019 at 05:34:07PM +0200, Ilya Leoshkevich wrote:
> Bootstrap and regtest running on x86_64-redhat-linux, s390x-redhat-linux
> and ppc64le-redhat-linux.
> 
> Currently s390 emits the following sequence to store a frame_pc:
> 
>   a:
>   .LASANPC0:
> 
>   lg  %r1,.L5-.L4(%r13)
>   la  %r1,0(%r1,%r12)
>   stg %r1,176(%r11)
> 
>   .L5:
>   .quad   .LASANPC0@GOTOFF
> 
> The reason GOT indirection is used instead of larl is that gcc does not
> know that .LASANPC0, being a code label, is aligned on a 2-byte
> boundary, and larl can load only even addresses.
> 
> This patch provides such an alignment hint.  Since targets don't provide
> their instruction alignments yet, the new macro is introduced for that
> purpose.  It returns 1-byte alignment by default, so this change is a
> no-op for targets other than s390.
> 
> As a result, we get the desired:
> 
>   larl%r1,.LASANPC0
>   stg %r1,176(%r11)
> 
> gcc/ChangeLog:
> 
> 2019-06-28  Ilya Leoshkevich  
> 
>   * asan.c (asan_emit_stack_protection): Provide an alignment
>   hint.
>   * config/s390/s390.h (CODE_LABEL_BOUNDARY): Specify that s390
>   requires code labels to be aligned on a 2-byte boundary.
>   * defaults.h (CODE_LABEL_BOUNDARY): New macro.
>   * doc/tm.texi: Document CODE_LABEL_BOUNDARY.
>   * doc/tm.texi.in: Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-06-28  Ilya Leoshkevich  
> 
>   * gcc.target/s390/asan-no-gotoff.c: New test.
> ---
>  gcc/asan.c |  1 +
>  gcc/config/s390/s390.h |  3 +++
>  gcc/defaults.h |  5 +
>  gcc/doc/tm.texi|  4 
>  gcc/doc/tm.texi.in |  4 
>  gcc/testsuite/gcc.target/s390/asan-no-gotoff.c | 15 +++
>  6 files changed, 32 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/s390/asan-no-gotoff.c
> 
> diff --git a/gcc/asan.c b/gcc/asan.c
> index 605d04f87f7..2db69f476bc 100644
> --- a/gcc/asan.c
> +++ b/gcc/asan.c
> @@ -1523,6 +1523,7 @@ asan_emit_stack_protection (rtx base, rtx pbase, 
> unsigned int alignb,
>DECL_INITIAL (decl) = decl;
>TREE_ASM_WRITTEN (decl) = 1;
>TREE_ASM_WRITTEN (id) = 1;
> +  SET_DECL_ALIGN (decl, CODE_LABEL_BOUNDARY);
>emit_move_insn (mem, expand_normal (build_fold_addr_expr (decl)));
>shadow_base = expand_binop (Pmode, lshr_optab, base,
> gen_int_shift_amount (Pmode, ASAN_SHADOW_SHIFT),
> diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
> index 969f58a2ba0..3d0266c9dff 100644
> --- a/gcc/config/s390/s390.h
> +++ b/gcc/config/s390/s390.h
> @@ -334,6 +334,9 @@ extern const char *s390_host_detect_local_cpu (int argc, 
> const char **argv);
>  /* Allocation boundary (in *bits*) for the code of a function.  */
>  #define FUNCTION_BOUNDARY 64
>  
> +/* Alignment required for a code label, in bits.  */
> +#define CODE_LABEL_BOUNDARY 16
> +
>  /* There is no point aligning anything to a rounder boundary than this.  */
>  #define BIGGEST_ALIGNMENT 64
>  
> diff --git a/gcc/defaults.h b/gcc/defaults.h
> index af7ea185f1e..97c4c17537d 100644
> --- a/gcc/defaults.h
> +++ b/gcc/defaults.h
> @@ -1459,4 +1459,9 @@ see the files COPYING3 and COPYING.RUNTIME 
> respectively.  If not, see
>  #define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB
>  #endif
>  
> +/* Alignment required for a code label, in bits.  */
> +#ifndef CODE_LABEL_BOUNDARY
> +#define CODE_LABEL_BOUNDARY BITS_PER_UNIT
> +#endif
> +
>  #endif  /* ! GCC_DEFAULTS_H */
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 14c1ea6a323..3b50fc0c0a7 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -1019,6 +1019,10 @@ to a value equal to or larger than 
> @code{STACK_BOUNDARY}.
>  Alignment required for a function entry point, in bits.
>  @end defmac
>  
> +@defmac CODE_LABEL_BOUNDARY
> +Alignment required for a code label, in bits.
> +@end defmac
> +
>  @defmac BIGGEST_ALIGNMENT
>  Biggest alignment that any data type can require on this machine, in
>  bits.  Note that this is not the biggest alignment that is supported,
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index b4d57b86e2f..ab038b7462c 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -969,6 +969,10 @@ to a value equal to or larger than @code{STACK_BOUNDARY}.
>  Alignment required for a function entry point, in bits.
>  @end defmac
>  
> +@defmac CODE_LABEL_BOUNDARY
> +Alignment required for a code label, in bits.
> +@end defmac
> +
>  @defmac BIGGEST_ALIGNMENT
>  Biggest alignment that any data type can require on this machine, in
>  bits.  Note that this is not the biggest alignment that is supported,
> diff --git a

Re: [PATCH V3] PR88497 - Extend reassoc for vector bit_field_ref

2019-07-03 Thread Richard Biener
On Wed, 3 Jul 2019, Kewen.Lin wrote:

> Hi Richard,
> 
> Thanks very much for reviewing my patch.  I'll update it as your comments.
> Before sending the next version, I've several questions embedded for further 
> check.
> 
> on 2019/7/2 下午8:43, Richard Biener wrote:
> > On Wed, 20 Mar 2019, Kewen.Lin wrote:
> > 
> >> +/* { dg-require-effective-target vect_double } */
> >> +/* { dg-require-effective-target powerpc_vsx_ok { target { powerpc*-*-* } 
> >> } } */
> >> +/* { dg-options "-O2 -ffast-math" } */
> >> +/* { dg-options "-O2 -ffast-math -mvsx -fdump-tree-reassoc1" { target { 
> >> powerpc*-*-* } } } */
> > 
> > Use
> > 
> > /* { dg-additional-options "-mvsx" { target { powerpc...
> > 
> > that saves duplicate typing.  I guess that x86_64/i?86 also works
> > if you enable SSE2 - can you check that and do adjustments accordingly?
> > 
> 
> OK, I'll learn SSE2 and update it. 

I think most testcases should just pass with SSE2.

> >> +/* Hold the information of one specific VECTOR_TYPE SSA_NAME.
> >> +- offsets: for different BIT_FIELD_REF offsets accessing same VECTOR.
> >> +- ops_indexes: the index of vec ops* for each relavant BIT_FIELD_REF. 
> >>  */
> >> +struct v_info
> >> +{
> >> +  auto_vec offsets;
> > 
> > with SVE this probably needs to be poly_int64 or so
> > 
> 
> OK, will extend it to poly_int64 (can it be negative? or poly_uint64 better?)
> 
> >> +  auto_vec ops_indexes;
> >> +};
> > 
> > To have less allocations you could use a
> > 
> >  auto_vec, 32> elements;
> > 
> > or even
> > 
> >  hash_map > >
> > 
> > where you use .release() in the cleanup and .create (TYPE_VECTOR_SUBPARTS 
> > (vector_type)) during collecting and then can use quick_push ()
> > (ah, no - duplicates...).
> > 
> 
> Good idea!
> 
> >> +
> >> +typedef struct v_info *v_info_ptr;
> >> +
> >> +/* Comparison function for qsort on unsigned BIT_FIELD_REF offsets.  */
> >> +static int
> >> +unsigned_cmp (const void *p_i, const void *p_j)
> >> +{
> >> +  if (*(const unsigned HOST_WIDE_INT *) p_i
> >> +  >= *(const unsigned HOST_WIDE_INT *) p_j)
> >> +return 1;
> >> +  else
> >> +return -1;
> > 
> > That's an issue with some qsort implementations comparing
> > an element against itself.
> > 
> > I think so you should add the equality case.
> > 
> 
> The equality case seems already involved in ">=".
> Do you mean that I need to make it equality case explicitly? 
> Or return zero for "=="? like:
> 
>
>const unsigned HOST_WIDE_INT val_i = *(const unsigned HOST_WIDE_INT *) p_i;
>const unsigned HOST_WIDE_INT val_j = *(const unsigned HOST_WIDE_INT *) p_j;
>if (val_i != val_j)
>  return val_i > val_j? 1: -1;
>else
>  return 0;

Yes.  It needs to return zero, otherwise some qsort will endlessly
swap two same elements.

> >> +
> >> +   TODO:
> >> +1) The current implementation restrict all candidate VECTORs should 
> >> have
> >> +   the same VECTOR type, but it can be extended into different groups 
> >> by
> >> +   VECTOR types in future if any profitable cases found.
> >> +2) The current implementation requires the whole VECTORs should be 
> >> fully
> >> +   covered, but it can be extended to support partial, checking 
> >> adjacent
> >> +   but not fill the whole, it may need some cost model to define the
> >> +   boundary to do or not.
> >> +
> 
> >> +  tree elem_type = TREE_TYPE (vec_type);
> >> +  unsigned HOST_WIDE_INT size = TREE_INT_CST_LOW (TYPE_SIZE 
> >> (elem_type));
> >> +  if (size != TREE_INT_CST_LOW (op1))
> > 
> >   if (!tree_int_cst_equal (TYPE_SIZE (elem_type), op1))
> > 
> > avoids some of the TREE_INT_CST_LOW we like to avoid.
> > 
> > You miss a check for op2 % op1 being zero.  Given you store a 
> > HOST_WIDE_INT offset you also want to check for INTEGER_CST op1/op2
> > (beware of SVE...).
> 
> OK, thanks!  For op2 % op1 == zero, I thought it's a must for granted, I'll 
> fix it.
> I think it can be constructed in IR artificially, but since I have almost 
> none knowledge 
> on other targets vector support, I'm curious that does it exist in real world 
> codes?

BIT_FIELD_REF is quite unconstrained, you could build a testcase
for the GIMPLE frontend quite easily.  Note that the first reassoc
runs before vector lowering so vector code written via vector
extensions does not necessarily have target support but will be lowered
later.

> btw, BIT_FIELD_REF in tree.def says the op1/op2 is constant, it looks need to 
> be updated
> due to SVE?

A POLY_CONST_INT is also "constant" in some sense ;)
 
> > 
> > There's also a poly-int friendly multiple_p predicate so you could
> > have the initial checks SVE friendly but bail out on non-INTEGER_CST
> > offset later.
> > 
> 
> Got it, thanks!
> 
> > 
> > Since you are using a hashtable keyed off SSA name pointers code
> > generation will depend on host addresses here if you consider
> > there's one mismatching vector type that might become ref_vec
> > dependent on the order of elem

Re: [range-ops] patch 01/04: types for VR_UNDEFINED and VR_VARYING

2019-07-03 Thread Aldy Hernandez

On 7/3/19 7:08 AM, Richard Biener wrote:

On Wed, Jul 3, 2019 at 11:19 AM Aldy Hernandez  wrote:




On 7/3/19 4:28 AM, Richard Biener wrote:

On Mon, Jul 1, 2019 at 10:52 AM Aldy Hernandez  wrote:


As discussed before, this enforces types on undefined and varying, which
makes everything more regular, and removes some special casing
throughout range handling.


I don't like it too much given you need to introduce that "cache".

Why do VARYING or UNDEFINED need a type?  Nobody should ever need
to ask for the type of a range anyhow - the type should be always that from
the context we're looking at (the SSA name the range is associated with,
the operation we are performing on one or two ranges, etc.).

Thinking again about this it looks fundamentally wrong to associate
a type with the VARYING or UNDEFINED lattice states.


We discussed this 2 weeks ago, and it was my understanding that we had
an reached an agreement on the general approach.  Types on varying and
undefined was the *first* thing I brought up.  Explanation quoted below.


Yes, and I asked how you handled that const static node for VARYING
which you answered, well - we could do some caching per type and I
replied I didn't like that very much.

So - I see why it might be "convenient" but I still see no actual
technical requirement to have a type for them.  I see you have
canonicalization for symbolic ranges to integer ranges so
you can have one for varying/undefined to integer ranges as well;
just make that canonicalization take a type argument.


By the way, the type for varying/undefined requires no space in the
value_range_base structure, as it is kept in the min/max fields which we
already use for VR_RANGE/VR_ANTI_RANGE.


You can as well populate those with actual canonical integer range values
then.  [MIN, MAX] and [MAX, MIN] (or whatever we'd consider canonical for
the empty range).

But as said, point me to the place where you need the type of VARYING.
It should already exist since the current code does away without.

I refuse to uglify the current VRP with a not needed type-indexed cache
for VARYING nodes just to make ranger intergation more happy.   Just
ignore that extra 'type' argument in the ranger API then?


Ok, I see.  Your main beef is with the type cache.

How about we keep VARYING and UNDEFINED typeless until right before we 
call into the ranger.  At which point, we have can populate min/max 
because we have the tree_code and the type handy.  So right before we 
call into the ranger do:


if (varying_p ())
  foo->set_varying(TYPE);

This would avoid the type cache, and keep the ranger happy.

Another option, as you've hinted, would be to normalize VARYING into 
[MIN, MAX] before calling into the ranger.  The problem with this 
approach is that we would then need to change varying_p() to something like:


value_range_base::varying_p()
{
  return (m_kind == VR_VARYING ||
  (vrp_val_is_min (m_min) && vrp_val_is_max (m_max));
}

Thus slowing everyone down (remember both range-ops and tree-vrp will 
share the implementation for varying_p).  Plus, I'd prefer to keep one 
representation for VARYING, that is m_kind == VR_VARYING.


Perhaps this last alternative would be more consistent-- never allowing 
types for VARYING, at the expense of the calls to vrp_val_is*.


Thoughts?

Aldy


Re: [PATCH] Add dbgcnt for gimple_match and generic_match.

2019-07-03 Thread Richard Biener
On Wed, Jul 3, 2019 at 12:39 PM Martin Liška  wrote:
>
> Hi.
>
> The patch is about dbgcnt support for match in GIMPLE and GENERIC.
> It's handy for isolation of a problem.
>
> Ready to be installed after it survives tests?

Hmm, I think we only want this on (simplify...), not (match...), no?
Thus simply move the code into the preceeding if() body.

I'd also merge gimple and generic_match counters since passes
happen to use both.

OK with that changes.
Richard.

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> 2019-07-03  Martin Liska  
>
> * dbgcnt.def (DEBUG_COUNTER): Add gimple_match and
> generic_match.
> * genmatch.c (dt_simplify::gen_1): Generate dbgcnt
> condition.
> * generic-match-head.c: Include dbgcnt.h.
> * gimple-match-head.c: Likewise.
> ---
>  gcc/dbgcnt.def   | 2 ++
>  gcc/generic-match-head.c | 2 +-
>  gcc/genmatch.c   | 5 +
>  gcc/gimple-match-head.c  | 2 +-
>  4 files changed, 9 insertions(+), 2 deletions(-)
>
>


Re: [patch] Fix debug info for discriminated record types

2019-07-03 Thread Richard Biener
On Wed, Jul 3, 2019 at 12:12 PM Eric Botcazou  wrote:
>
> Hi,
>
> this is a regression present on the mainline and 9 branch: since
>   https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01066.html
> the debug info emitted in Ada with -fgnat-encodings=minimal for discriminated
> record types containing an array component whose bound is a discriminant is
> incomplete, i.e. the DW_AT_*_bound attribute of the range subtype is missing.
>
> Tested on x86_64-suse-linux, OK for mainline and 9 branch?

OK.

Richard.

>
> 2019-07-03  Eric Botcazou  
>
> * dwarf2out.c (add_scalar_info): Add back refererence to existing DIE
> if it has the DW_AT_data_member_location attribute.
>
>
> 2019-07-03  Eric Botcazou  
>
> * gnat.dg/specs/debug1.ads: New test.
>
> --
> Eric Botcazou


Re: [SVE] [fwprop] PR88833 - Redundant moves for WHILELO-based loops

2019-07-03 Thread Richard Sandiford
Prathamesh Kulkarni  writes:
> On Tue, 2 Jul 2019 at 18:22, Richard Sandiford
>  wrote:
>>
>> Prathamesh Kulkarni  writes:
>> > On Tue, 2 Jul 2019 at 16:59, Richard Sandiford
>> >  wrote:
>> >>
>> >> Thanks for fixing this.
>> >>
>> >> Prathamesh Kulkarni  writes:
>> >> > diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
>> >> > index 89a46a933fa..79bd0cfbd28 100644
>> >> > --- a/gcc/simplify-rtx.c
>> >> > +++ b/gcc/simplify-rtx.c
>> >> > @@ -6697,6 +6697,19 @@ simplify_subreg (machine_mode outermode, rtx op,
>> >> >   }
>> >> >  }
>> >> >
>> >> > +  /* If op is a vector comparison operator, rewrite it in a new mode.
>> >> > + This probably won't match, but may allow further simplifications.
>> >> > + Also check if number of elements and size of each element
>> >> > + match for outermode and innermode.  */
>> >> > +
>> >>
>> >> Excess blank line after the comment.  IMO the second part of the comment
>> >> reads too much like an afterthought.  How about:
>> >>
>> >>   /* If OP is a vector comparison and the subreg is not changing the
>> >>  number of elements or the size of the elements, change the result
>> >>  of the comparison to the new mode.  */
>> >>
>> >> > +  if (COMPARISON_P (op)
>> >> > +  && VECTOR_MODE_P (outermode)
>> >> > +  && VECTOR_MODE_P (innermode)
>> >> > +  && (known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS 
>> >> > (innermode)))
>> >> > +  && (known_eq (GET_MODE_UNIT_SIZE (outermode),
>> >> > + GET_MODE_UNIT_SIZE (innermode
>> >>
>> >> Redundant brackets around the known_eq calls.
>> >>
>> >> > +return gen_rtx_fmt_ee (GET_CODE (op), outermode, XEXP (op, 0), 
>> >> > XEXP (op, 1));
>> >>
>> >> This should use simplify_gen_relational, so that we try to simplify
>> >> the new expression.
>> > Does the attached version look OK ?
>>
>> OK with a suitable changelog (remember to post those!) if it passes testing.
> The change to simplify_subreg regressed avx2-pr54700-1.C -;)
>
> For following test-case:
> __attribute__((noipa)) __v8sf
> f7 (__v8si a, __v8sf b, __v8sf c)
> {
>   return a < 0 ? b : c;
> }
>
> Before patch, combine shows:
> Trying 8, 9 -> 10:
> 8: r87:V8SI=const_vector
> 9: r89:V8SI=r87:V8SI>r90:V8SI
>   REG_DEAD r90:V8SI
>   REG_DEAD r87:V8SI
>10: r86:V8SF=unspec[r92:V8SF,r91:V8SF,r89:V8SI#0] 104
>   REG_DEAD r92:V8SF
>   REG_DEAD r89:V8SI
>   REG_DEAD r91:V8SF
> Successfully matched this instruction:
> (set (reg:V8SF 86)
> (unspec:V8SF [
> (reg:V8SF 92)
> (reg:V8SF 91)
> (subreg:V8SF (lt:V8SI (reg:V8SI 90)
> (const_vector:V8SI [
> (const_int 0 [0]) repeated x8
> ])) 0)
> ] UNSPEC_BLENDV))
> allowing combination of insns 8, 9 and 10
>
> After applying patch, combine shows:
>
> Trying 8, 9 -> 10:
> 8: r87:V8SI=const_vector
> 9: r89:V8SI=r87:V8SI>r90:V8SI
>   REG_DEAD r90:V8SI
>   REG_DEAD r87:V8SI
>10: r86:V8SF=unspec[r92:V8SF,r91:V8SF,r89:V8SI#0] 104
>   REG_DEAD r92:V8SF
>   REG_DEAD r89:V8SI
>   REG_DEAD r91:V8SF
> Failed to match this instruction:
> (set (reg:V8SF 86)
> (unspec:V8SF [
> (reg:V8SF 92)
> (reg:V8SF 91)
> (lt:V8SF (reg:V8SI 90)
> (const_vector:V8SI [
> (const_int 0 [0]) repeated x8
> ]))
> ] UNSPEC_BLENDV))

Bah.  If the port isn't self-consistent about whether a subreg should
be used, it's tempting to say that a subreg should be used and fix the
places that don't.  At least that way we'd avoid the abomination -
ABOMINATION! - of using NaNs to represent truth.

But I agree it looks like this is the only pattern affected.

> Adjusting UNSPEC_BLENDV pattern to match the simplified subreg seems to work.
> Does it look OK ?
>
> Testing the attached patch in progress.
> (A quick comparison for i386.exp now shows no difference before/after patch).
>
> Thanks,
> Prathamesh
>>
>> Thanks,
>> Richard
>
> 2019-07-03  Prathamesh Kulkarni  
>
>   * fwprop.c (reg_single_def_p): New function.
>   (propagate_rtx_1): Add unconditional else inside RTX_EXTRA case.
>   (forward_propagate_into): New parameter reg_prop_only
>   with default value false.
>   Propagate def's src into loop only if SET_SRC and SET_DEST
>   of def_set have single definitions.
>   Likewise if reg_prop_only is set to true.
>   (fwprop): New param fwprop_addr_p.
>   Integrate fwprop_addr into fwprop.
>   (fwprop_addr): Remove.
>   (pass_rtl_fwprop_addr::execute): Call fwprop with arg set
>   to true.
>   (pass_rtl_fwprop::execute): Call fwprop with arg set to false.
>   * simplify-rtx.c (simplify_subreg): Add case for vector comparison.
>   * i386/config/sse.md (UNSPEC_BLENDV): Adjust pattern.

typo: config/i386/sse.md

>
> testsuite/
>   * gfortran.dg/pr88833.f90: New test. 
>
> d

Re: [range-ops] patch 01/04: types for VR_UNDEFINED and VR_VARYING

2019-07-03 Thread Richard Biener
On Wed, Jul 3, 2019 at 11:19 AM Aldy Hernandez  wrote:
>
>
>
> On 7/3/19 4:28 AM, Richard Biener wrote:
> > On Mon, Jul 1, 2019 at 10:52 AM Aldy Hernandez  wrote:
> >>
> >> As discussed before, this enforces types on undefined and varying, which
> >> makes everything more regular, and removes some special casing
> >> throughout range handling.
> >
> > I don't like it too much given you need to introduce that "cache".
> >
> > Why do VARYING or UNDEFINED need a type?  Nobody should ever need
> > to ask for the type of a range anyhow - the type should be always that from
> > the context we're looking at (the SSA name the range is associated with,
> > the operation we are performing on one or two ranges, etc.).
> >
> > Thinking again about this it looks fundamentally wrong to associate
> > a type with the VARYING or UNDEFINED lattice states.
>
> We discussed this 2 weeks ago, and it was my understanding that we had
> an reached an agreement on the general approach.  Types on varying and
> undefined was the *first* thing I brought up.  Explanation quoted below.

Yes, and I asked how you handled that const static node for VARYING
which you answered, well - we could do some caching per type and I
replied I didn't like that very much.

So - I see why it might be "convenient" but I still see no actual
technical requirement to have a type for them.  I see you have
canonicalization for symbolic ranges to integer ranges so
you can have one for varying/undefined to integer ranges as well;
just make that canonicalization take a type argument.

> By the way, the type for varying/undefined requires no space in the
> value_range_base structure, as it is kept in the min/max fields which we
> already use for VR_RANGE/VR_ANTI_RANGE.

You can as well populate those with actual canonical integer range values
then.  [MIN, MAX] and [MAX, MIN] (or whatever we'd consider canonical for
the empty range).

But as said, point me to the place where you need the type of VARYING.
It should already exist since the current code does away without.

I refuse to uglify the current VRP with a not needed type-indexed cache
for VARYING nodes just to make ranger intergation more happy.   Just
ignore that extra 'type' argument in the ranger API then?

Richard.

> Aldy
>
> https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01292.html
>
> In order to unify the APIs for value_range and irange, we'd like to make
> some minor changes to value_range.  We believe most of these changes
> could go in now, and would prefer so, to get broader testing and
> minimize the plethora of changes we drag around on our branch.
>
> First, introduce a type for VR_VARYING and VR_UNDEFINED.
> 
>
> irange utilizes 0 or more sub-ranges to represent a range, and VARYING
> is simply one subrange [MIN, MAX].value_range represents this with
> VR_VARYING, and since there is no type associated with it, we cannot
> calculate the lower and upper bounds for the range.  There is also a
> lack of canonicalness in value range in that VR_VARYING and [MIN, MAX]
> are two different representations of the same value.
>
> We tried to adjust irange to not associate a type with the empty range
> [] (representing undefined), but found we were unable to perform all
> operations properly.  In particular, we cannot invert an empty range.
> i.e. invert ( [] ) should produce [MIN, MAX].  Again, we need to have a
> type associated with this empty range.
>
> We'd like to tweak value_range so that set_varying() and set_undefined()
> both take a type, and then always set the min/max fields based on that
> type.  This takes no additional memory in the structure, and is
> virtually transparent to all the existing uses of value_range.
>
> This allows:
>  1)  invert to be implemented properly for both VARYING and UNDEFINED
> by simply changing one to the other.
>  2)  the type() method to always work without any special casing by
> simply returning TREE_TYPE(min)
>  3)  the new incoming bounds() routines to work trivially for these
> cases as well (lbound/ubound, num_pairs(), etc).


Re: [patch][aarch64]: fix frame pointer setup before tlsdesc call

2019-07-03 Thread Richard Earnshaw


On 25/06/2019 17:51, Sylvia Taylor wrote:
> Greetings,
> 
> This patch fixes a bug with TLS in which the frame pointer is not
> established until after the tlsdesc call, thus not conforming to
> the aarch64 procedure call standard.
> 
> Changed the tlsdesc instruction patterns to set a dependency on the
> x29 frame pointer. This helps the instruction scheduler to arrange
> the tlsdesc call after the frame pointer is set.
> 
> Example of frame pointer (x29) set incorrectly after tlsdesc call:
> 
> stp   x29, x30, [sp, -16]!
> adrp  x0, :tlsdesc:.LANCHOR0
> ldr   x2, [x0, #:tlsdesc_lo12:.LANCHOR0]
> add   x0, x0, :tlsdesc_lo12:.LANCHOR0
> .tlsdesccall  .LANCHOR0
> blr   x2
> ...
> mov   x29, sp
> ...
> 
> After introducing dependency on x29, the scheduler does the frame
> pointer setup before tlsdesc:
> 
> stp   x29, x30, [sp, -16]!
> mov   x29, sp
> adrp  x0, :tlsdesc:.LANCHOR0
> ldr   x2, [x0, #:tlsdesc_lo12:.LANCHOR0]
> add   x0, x0, :tlsdesc_lo12:.LANCHOR0
> .tlsdesccall  .LANCHOR0
> blr   x2
> ...
> 
> Testcase used with -O2 -fpic:
> 
> void foo()
> {
>static __thread int x = 0;
>bar (&x);
> }
> 
> I am not sure what would classify as an effective check for this
> testcase. The only idea I received so far would be to write a regexp
> inside a scan-assembler-not that would potentially look for this pattern:
> 
> 
> .tlsdesccall 
> blr 
> 
> [mov x29, sp] OR [add x29, sp, 0]
> 
> 
> (similar to what was attempted in gcc/testsuite/gcc.target/arm/pr85434.c)
> 
> I would like maintainers' input on whether such a testcase should be added
> and if there are better ways of checking for the instruction order.
> 
> Bootstrapped and tested on aarch64-none-linux-gnu.
> 
> Ok for trunk? If yes, I don't have any commit rights, so can someone please
> commit it on my behalf.
> 
> Cheers,
> Syl
> 
> gcc/ChangeLog:
> 
> 2019-06-25  Sylvia Taylor  
> 
>   * config/aarch64/aarch64.md
>   (tlsdesc_small_advsimd_): Update.
>   (tlsdesc_small_sve_): Likewise.
>   (FP_REGNUM): New.
> 

Thanks, I've put this in.  When writing ChangeLogs, you need a little 
more detail than this.  "New" and "Update" don't really explain the 
change.  I modified the ChangeLog entry as follows:

* config/aarch64/aarch64.md (FP_REGNUM): New constant.
(tlsdesc_small_advsimd_): Add use of FP_REGNUM.
(tlsdesc_small_sve_): Likewise.

R.


[PATCH] Fix BIT_INSERT_EXPR dumping

2019-07-03 Thread Richard Biener


It currently shows weirdly, fixed as follows.

Committed to trunk.

Richard.

2019-07-03  Richard Biener  

* gimple-pretty-print.c (dump_ternary_rhs): Fix BIT_INSERT_EXPR
dumping.

Index: gcc/gimple-pretty-print.c
===
--- gcc/gimple-pretty-print.c   (revision 272958)
+++ gcc/gimple-pretty-print.c   (working copy)
@@ -604,10 +604,14 @@ dump_ternary_rhs (pretty_printer *buffer
  pp_string (buffer, ", ");
  dump_generic_node (buffer, gimple_assign_rhs3 (gs),
 spc, flags, false);
- pp_string (buffer, " (");
  if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs2 (gs
-   pp_decimal_int (buffer, TYPE_PRECISION
- (TREE_TYPE (gimple_assign_rhs2 (gs;
+   {
+ pp_string (buffer, " (");
+ pp_decimal_int (buffer, TYPE_PRECISION
+ (TREE_TYPE (gimple_assign_rhs2 (gs;
+ pp_string (buffer, " bits)");
+   }
+ pp_greater (buffer);
}
   break;
 


Re: [PATCH] Try fix PR90911

2019-07-03 Thread Richard Biener
On Tue, 25 Jun 2019, Richard Biener wrote:

> 
> PR90911 reports a slowdown of 456.hmmer with the recent introduction
> of vectorizer versioning of outer loops, more specifically the case
> of re-using if-conversion created versions.
> 
> The patch below fixes things up to adjust the edge probability
> and scale the loop bodies in two steps, delaying scalar_loop
> scaling until all peeling is done.  This restores profile-mismatches
> to the same state as it was on the GCC 9 branch and seems to
> fix the observed slowdown of 456.hmmer.
> 
> Boostrap & regtest running on x86_64-unknown-linux-gnu.
> 
> Honza, does this look OK?

Ping.

> Thanks,
> Richard.
> 
> 2019-06-25  Richard Biener  
> 
>   * tree-vectorizer.h (_loop_vec_info::scalar_loop_scaling): New field.
>   (LOOP_VINFO_SCALAR_LOOP_SCALING): new.
>   * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
>   scalar_loop_scaling.
>   (vect_transform_loop): Scale scalar loop profile if needed.
>   * tree-vect-loop-manip.c (vect_loop_versioning): When re-using
>   the loop copy from if-conversion adjust edge probabilities
>   and scale the vectorized loop body profile, queue the scalar
>   profile for updating after peeling.
> 
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h (revision 272636)
> +++ gcc/tree-vectorizer.h (working copy)
> @@ -548,6 +548,9 @@ typedef struct _loop_vec_info : public v
>/* Mark loops having masked stores.  */
>bool has_mask_store;
>  
> +  /* Queued scaling factor for the scalar loop.  */
> +  profile_probability scalar_loop_scaling;
> +
>/* If if-conversion versioned this loop before conversion, this is the
>   loop version without if-conversion.  */
>struct loop *scalar_loop;
> @@ -603,6 +606,7 @@ typedef struct _loop_vec_info : public v
>  #define LOOP_VINFO_PEELING_FOR_NITER(L)(L)->peeling_for_niter
>  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
>  #define LOOP_VINFO_SCALAR_LOOP(L)   (L)->scalar_loop
> +#define LOOP_VINFO_SCALAR_LOOP_SCALING(L)  (L)->scalar_loop_scaling
>  #define LOOP_VINFO_HAS_MASK_STORE(L)   (L)->has_mask_store
>  #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
>  #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) 
> (L)->single_scalar_iteration_cost
> Index: gcc/tree-vect-loop.c
> ===
> --- gcc/tree-vect-loop.c  (revision 272636)
> +++ gcc/tree-vect-loop.c  (working copy)
> @@ -835,6 +835,7 @@ _loop_vec_info::_loop_vec_info (struct l
>  operands_swapped (false),
>  no_data_dependencies (false),
>  has_mask_store (false),
> +scalar_loop_scaling (profile_probability::uninitialized ()),
>  scalar_loop (NULL),
>  orig_loop_info (NULL)
>  {
> @@ -8562,6 +8563,10 @@ vect_transform_loop (loop_vec_info loop_
>epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
> &step_vector, &niters_vector_mult_vf, th,
> check_profitability, niters_no_overflow);
> +  if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)
> +  && LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo).initialized_p ())
> +scale_loop_frequencies (LOOP_VINFO_SCALAR_LOOP (loop_vinfo),
> + LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo));
>  
>if (niters_vector == NULL_TREE)
>  {
> Index: gcc/tree-vect-loop-manip.c
> ===
> --- gcc/tree-vect-loop-manip.c(revision 272636)
> +++ gcc/tree-vect-loop-manip.c(working copy)
> @@ -3114,8 +3114,17 @@ vect_loop_versioning (loop_vec_info loop
>GSI_SAME_STMT);
>   }
>  
> -  /* ???  if-conversion uses profile_probability::always () but
> - prob below is profile_probability::likely ().  */
> +  /* if-conversion uses profile_probability::always () for both paths,
> +  reset the paths probabilities appropriately.  */
> +  edge te, fe;
> +  extract_true_false_edges_from_block (condition_bb, &te, &fe);
> +  te->probability = prob;
> +  fe->probability = prob.invert ();
> +  /* We can scale loops counts immediately but have to postpone
> + scaling the scalar loop because we re-use it during peeling.  */
> +  scale_loop_frequencies (loop_to_version, prob);
> +  LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = prob.invert ();
> +
>nloop = scalar_loop;
>if (dump_enabled_p ())
>   dump_printf_loc (MSG_NOTE, vect_location,
> 

-- 
Richard Biener 
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)

[PATCH] Fix PR91062

2019-07-03 Thread Richard Biener


The following avoids GC collecting during pass execution when a pass
calls cgraph::get_body.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

2019-07-03  Richard Biener  

PR ipa/91062
* tree-pass.h (execute_all_ipa_transforms): Add a flag
parameter whether to disable GC collection.
* passes.c (execute_one_ipa_transform_pass): Likewise, and
honor it.
(execute_all_ipa_transforms): Likewise and pass it down.
* cgraph.c (cgraph_node::get_body): Do not invoke garbage
collection from applying IPA transforms.
* cgraphunit.c (cgraph_node::expand): Allow garbage collection
from applying IPA transforms.

Index: gcc/tree-pass.h
===
--- gcc/tree-pass.h (revision 272958)
+++ gcc/tree-pass.h (working copy)
@@ -632,7 +632,7 @@ extern bool execute_one_pass (opt_pass *
 extern void execute_pass_list (function *, opt_pass *);
 extern void execute_ipa_pass_list (opt_pass *);
 extern void execute_ipa_summary_passes (ipa_opt_pass_d *);
-extern void execute_all_ipa_transforms (void);
+extern void execute_all_ipa_transforms (bool);
 extern void execute_all_ipa_stmt_fixups (struct cgraph_node *, gimple **);
 extern bool pass_init_dump_file (opt_pass *);
 extern void pass_fini_dump_file (opt_pass *);
Index: gcc/passes.c
===
--- gcc/passes.c(revision 272958)
+++ gcc/passes.c(working copy)
@@ -2182,7 +2182,7 @@ execute_ipa_summary_passes (ipa_opt_pass
 
 static void
 execute_one_ipa_transform_pass (struct cgraph_node *node,
-   ipa_opt_pass_d *ipa_pass)
+   ipa_opt_pass_d *ipa_pass, bool do_not_collect)
 {
   opt_pass *pass = ipa_pass;
   unsigned int todo_after = 0;
@@ -2228,14 +2228,14 @@ execute_one_ipa_transform_pass (struct c
   redirect_edge_var_map_empty ();
 
   /* Signal this is a suitable GC collection point.  */
-  if (!(todo_after & TODO_do_not_ggc_collect))
+  if (!do_not_collect && !(todo_after & TODO_do_not_ggc_collect))
 ggc_collect ();
 }
 
 /* For the current function, execute all ipa transforms. */
 
 void
-execute_all_ipa_transforms (void)
+execute_all_ipa_transforms (bool do_not_collect)
 {
   struct cgraph_node *node;
   if (!cfun)
@@ -2247,7 +2247,8 @@ execute_all_ipa_transforms (void)
   unsigned int i;
 
   for (i = 0; i < node->ipa_transforms_to_apply.length (); i++)
-   execute_one_ipa_transform_pass (node, node->ipa_transforms_to_apply[i]);
+   execute_one_ipa_transform_pass (node, node->ipa_transforms_to_apply[i],
+   do_not_collect);
   node->ipa_transforms_to_apply.release ();
 }
 }
Index: gcc/cgraph.c
===
--- gcc/cgraph.c(revision 272958)
+++ gcc/cgraph.c(working copy)
@@ -3618,7 +3618,7 @@ cgraph_node::get_body (void)
   set_dump_file (NULL);
 
   push_cfun (DECL_STRUCT_FUNCTION (decl));
-  execute_all_ipa_transforms ();
+  execute_all_ipa_transforms (true);
   cgraph_edge::rebuild_edges ();
   free_dominance_info (CDI_DOMINATORS);
   free_dominance_info (CDI_POST_DOMINATORS);
Index: gcc/cgraphunit.c
===
--- gcc/cgraphunit.c(revision 272958)
+++ gcc/cgraphunit.c(working copy)
@@ -2184,7 +2184,7 @@ cgraph_node::expand (void)
 
   bitmap_obstack_initialize (®_obstack); /* FIXME, only at RTL generation*/
 
-  execute_all_ipa_transforms ();
+  execute_all_ipa_transforms (false);
 
   /* Perform all tree transforms and optimizations.  */
 


[PATCH] Fix PR91069

2019-07-03 Thread Richard Biener


Late FRE uncovered a bug in vec-perm folding.

Boostrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-07-03  Richard Biener  

PR middle-end/91069
* match.pd (vec_perm -> bit_insert): Fix element read from
first vector.

* gcc.dg/pr91069.c: New testcase.

Index: gcc/match.pd
===
--- gcc/match.pd(revision 272958)
+++ gcc/match.pd(working copy)
@@ -5520,7 +5520,7 @@ (define_operator_list COND_TERNARY
   first vector we only can insert the first elt from
   the first vector.  */
at = 0;
-   if ((ins = fold_read_from_vector (cop0, 0)))
+   if ((ins = fold_read_from_vector (cop0, sel[0])))
  op0 = op1;
  }
else
Index: gcc/testsuite/gcc.dg/pr91069.c
===
--- gcc/testsuite/gcc.dg/pr91069.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/pr91069.c  (working copy)
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+
+typedef double v2df __attribute__((vector_size(16)));
+typedef long v2di __attribute__((vector_size(16)));
+
+void foo (v2df *res, v2df *src)
+{
+  v2df x = *src;
+  *res = __builtin_shuffle ((v2df) { 1.0, 0.0 }, x, (v2di) { 1, 3 });
+}
+
+int main()
+{
+  v2df x = (v2df) { 0.0, 2.0 };
+  foo (&x, &x);
+  if (x[0] != 0.0 || x[1] != 2.0)
+__builtin_abort ();
+  return 0;
+}


[patch] Small improvements to coverage info (1/n)

2019-07-03 Thread Eric Botcazou
Hi,

we have collected a number of small improvements to coverage info generated by 
the compiler over the years.  One of the issues is when a new expression or 
statement is built without source location information and ends up inheriting 
the source location information of the previous instruction in the debug info, 
which can be totally unrelated.

Tested on x86_64-suse-linux, both GCC and GDB, OK for mainline?


2019-07-03  Eric Botcazou  

* tree-cfg.c (gimple_make_forwarder_block): Propagate location info on
phi nodes if possible.
* tree-scalar-evolution.c (final_value_replacement_loop): Propagate
location info on the newly created statement.
* tree-ssa-loop-manip.c (create_iv): Propagate location info on the
newly created increment if needed.

-- 
Eric BotcazouIndex: tree-cfg.c
===
--- tree-cfg.c	(revision 272930)
+++ tree-cfg.c	(working copy)
@@ -5756,6 +5756,7 @@ gimple_make_forwarder_block (edge fallth
   basic_block dummy, bb;
   tree var;
   gphi_iterator gsi;
+  bool forward_location_p;
 
   dummy = fallthru->src;
   bb = fallthru->dest;
@@ -5763,6 +5764,9 @@ gimple_make_forwarder_block (edge fallth
   if (single_pred_p (bb))
 return;
 
+  /* We can forward location info if we have only one predecessor.  */
+  forward_location_p = single_pred_p (dummy);
+
   /* If we redirected a branch we must create new PHI nodes at the
  start of BB.  */
   for (gsi = gsi_start_phis (dummy); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -5774,7 +5778,8 @@ gimple_make_forwarder_block (edge fallth
   new_phi = create_phi_node (var, bb);
   gimple_phi_set_result (phi, copy_ssa_name (var, phi));
   add_phi_arg (new_phi, gimple_phi_result (phi), fallthru,
-		   UNKNOWN_LOCATION);
+		   forward_location_p
+		   ? gimple_phi_arg_location (phi, 0) : UNKNOWN_LOCATION);
 }
 
   /* Add the arguments we have stored on edges.  */
Index: tree-scalar-evolution.c
===
--- tree-scalar-evolution.c	(revision 272930)
+++ tree-scalar-evolution.c	(working copy)
@@ -3680,6 +3680,8 @@ final_value_replacement_loop (struct loo
 	true, GSI_SAME_STMT);
 
   gassign *ass = gimple_build_assign (rslt, def);
+  gimple_set_location (ass,
+			   gimple_phi_arg_location (phi, exit->dest_idx));
   gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
   if (dump_file)
 	{
Index: tree-ssa-loop-manip.c
===
--- tree-ssa-loop-manip.c	(revision 272930)
+++ tree-ssa-loop-manip.c	(working copy)
@@ -126,10 +126,22 @@ create_iv (tree base, tree step, tree va
 gsi_insert_seq_on_edge_immediate (pe, stmts);
 
   stmt = gimple_build_assign (va, incr_op, vb, step);
+  /* Prevent the increment from inheriting a bogus location if it is not put
+ immediately after a statement whose location is known.  */
   if (after)
-gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
+{
+  if (gsi_end_p (*incr_pos))
+	{
+	  edge e = single_succ_edge (gsi_bb (*incr_pos));
+	  gimple_set_location (stmt, e->goto_locus);
+	}
+  gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
+}
   else
-gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
+{
+  gimple_set_location (stmt, gimple_location (gsi_stmt (*incr_pos)));
+  gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
+}
 
   initial = force_gimple_operand (base, &stmts, true, var);
   if (stmts)


Re: [SVE] [fwprop] PR88833 - Redundant moves for WHILELO-based loops

2019-07-03 Thread Prathamesh Kulkarni
On Tue, 2 Jul 2019 at 18:22, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> > On Tue, 2 Jul 2019 at 16:59, Richard Sandiford
> >  wrote:
> >>
> >> Thanks for fixing this.
> >>
> >> Prathamesh Kulkarni  writes:
> >> > diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
> >> > index 89a46a933fa..79bd0cfbd28 100644
> >> > --- a/gcc/simplify-rtx.c
> >> > +++ b/gcc/simplify-rtx.c
> >> > @@ -6697,6 +6697,19 @@ simplify_subreg (machine_mode outermode, rtx op,
> >> >   }
> >> >  }
> >> >
> >> > +  /* If op is a vector comparison operator, rewrite it in a new mode.
> >> > + This probably won't match, but may allow further simplifications.
> >> > + Also check if number of elements and size of each element
> >> > + match for outermode and innermode.  */
> >> > +
> >>
> >> Excess blank line after the comment.  IMO the second part of the comment
> >> reads too much like an afterthought.  How about:
> >>
> >>   /* If OP is a vector comparison and the subreg is not changing the
> >>  number of elements or the size of the elements, change the result
> >>  of the comparison to the new mode.  */
> >>
> >> > +  if (COMPARISON_P (op)
> >> > +  && VECTOR_MODE_P (outermode)
> >> > +  && VECTOR_MODE_P (innermode)
> >> > +  && (known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS 
> >> > (innermode)))
> >> > +  && (known_eq (GET_MODE_UNIT_SIZE (outermode),
> >> > + GET_MODE_UNIT_SIZE (innermode
> >>
> >> Redundant brackets around the known_eq calls.
> >>
> >> > +return gen_rtx_fmt_ee (GET_CODE (op), outermode, XEXP (op, 0), XEXP 
> >> > (op, 1));
> >>
> >> This should use simplify_gen_relational, so that we try to simplify
> >> the new expression.
> > Does the attached version look OK ?
>
> OK with a suitable changelog (remember to post those!) if it passes testing.
The change to simplify_subreg regressed avx2-pr54700-1.C -;)

For following test-case:
__attribute__((noipa)) __v8sf
f7 (__v8si a, __v8sf b, __v8sf c)
{
  return a < 0 ? b : c;
}

Before patch, combine shows:
Trying 8, 9 -> 10:
8: r87:V8SI=const_vector
9: r89:V8SI=r87:V8SI>r90:V8SI
  REG_DEAD r90:V8SI
  REG_DEAD r87:V8SI
   10: r86:V8SF=unspec[r92:V8SF,r91:V8SF,r89:V8SI#0] 104
  REG_DEAD r92:V8SF
  REG_DEAD r89:V8SI
  REG_DEAD r91:V8SF
Successfully matched this instruction:
(set (reg:V8SF 86)
(unspec:V8SF [
(reg:V8SF 92)
(reg:V8SF 91)
(subreg:V8SF (lt:V8SI (reg:V8SI 90)
(const_vector:V8SI [
(const_int 0 [0]) repeated x8
])) 0)
] UNSPEC_BLENDV))
allowing combination of insns 8, 9 and 10

After applying patch, combine shows:

Trying 8, 9 -> 10:
8: r87:V8SI=const_vector
9: r89:V8SI=r87:V8SI>r90:V8SI
  REG_DEAD r90:V8SI
  REG_DEAD r87:V8SI
   10: r86:V8SF=unspec[r92:V8SF,r91:V8SF,r89:V8SI#0] 104
  REG_DEAD r92:V8SF
  REG_DEAD r89:V8SI
  REG_DEAD r91:V8SF
Failed to match this instruction:
(set (reg:V8SF 86)
(unspec:V8SF [
(reg:V8SF 92)
(reg:V8SF 91)
(lt:V8SF (reg:V8SI 90)
(const_vector:V8SI [
(const_int 0 [0]) repeated x8
]))
] UNSPEC_BLENDV))

Adjusting UNSPEC_BLENDV pattern to match the simplified subreg seems to work.
Does it look OK ?

Testing the attached patch in progress.
(A quick comparison for i386.exp now shows no difference before/after patch).

Thanks,
Prathamesh
>
> Thanks,
> Richard
2019-07-03  Prathamesh Kulkarni  

* fwprop.c (reg_single_def_p): New function.
(propagate_rtx_1): Add unconditional else inside RTX_EXTRA case.
(forward_propagate_into): New parameter reg_prop_only
with default value false.
Propagate def's src into loop only if SET_SRC and SET_DEST
of def_set have single definitions.
Likewise if reg_prop_only is set to true.
(fwprop): New param fwprop_addr_p.
Integrate fwprop_addr into fwprop.
(fwprop_addr): Remove.
(pass_rtl_fwprop_addr::execute): Call fwprop with arg set
to true.
(pass_rtl_fwprop::execute): Call fwprop with arg set to false.
* simplify-rtx.c (simplify_subreg): Add case for vector comparison.
* i386/config/sse.md (UNSPEC_BLENDV): Adjust pattern.

testsuite/
* gfortran.dg/pr88833.f90: New test. 

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d7d542524fb..5384fe218f9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16623,10 +16623,9 @@
(unspec:VF_128_256
  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
-  (subreg:VF_128_256
-(lt:
+(lt:VF_128_256
   (match_operand: 3 "register_operand" "Yz,Yz,x")
-  (match_operand: 4 "const0_operand" "C,C,C")) 0)]
+

[PATCH] Add dbgcnt for gimple_match and generic_match.

2019-07-03 Thread Martin Liška
Hi.

The patch is about dbgcnt support for match in GIMPLE and GENERIC.
It's handy for isolation of a problem.

Ready to be installed after it survives tests?
Thanks,
Martin

gcc/ChangeLog:

2019-07-03  Martin Liska  

* dbgcnt.def (DEBUG_COUNTER): Add gimple_match and
generic_match.
* genmatch.c (dt_simplify::gen_1): Generate dbgcnt
condition.
* generic-match-head.c: Include dbgcnt.h.
* gimple-match-head.c: Likewise.
---
 gcc/dbgcnt.def   | 2 ++
 gcc/generic-match-head.c | 2 +-
 gcc/genmatch.c   | 5 +
 gcc/gimple-match-head.c  | 2 +-
 4 files changed, 9 insertions(+), 2 deletions(-)


diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index dd874c519bb..7ab32816a05 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -195,3 +195,5 @@ DEBUG_COUNTER (tree_sra)
 DEBUG_COUNTER (vect_loop)
 DEBUG_COUNTER (vect_slp)
 DEBUG_COUNTER (dom_unreachable_edges)
+DEBUG_COUNTER (gimple_match)
+DEBUG_COUNTER (generic_match)
diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 76fc9993481..b54e03552ba 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "case-cfn-macros.h"
 #include "gimplify.h"
 #include "optabs-tree.h"
-
+#include "dbgcnt.h"
 
 /* Routine to determine if the types T1 and T2 are effectively
the same for GENERIC.  If T1 or T2 is not a type, the test
diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 7b9b09c7d8b..8ec73f82482 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -3310,6 +3310,11 @@ dt_simplify::gen_1 (FILE *f, int indent, bool gimple, operand *result)
 	}
 }
 
+  if (gimple)
+fprintf_indent (f, indent, "if (__builtin_expect (!dbg_cnt (gimple_match), 0)) return false;\n");
+  else
+fprintf_indent (f, indent, "if (__builtin_expect (!dbg_cnt (generic_match), 0)) return NULL_TREE;\n");
+
   fprintf_indent (f, indent, "if (__builtin_expect (dump_file && (dump_flags & TDF_FOLDING), 0)) "
 	   "fprintf (dump_file, \"%s ",
 	   s->kind == simplify::SIMPLIFY
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index f83f2256178..df9f0c50590 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -42,7 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "optabs-tree.h"
 #include "tree-eh.h"
-
+#include "dbgcnt.h"
 
 /* Forward declarations of the private auto-generated matchers.
They expect valueized operands in canonical order and do not



[PATCH] PR libstdc++/91067 fix missing exports for filesystem iterators

2019-07-03 Thread Jonathan Wakely

The copy assignment operator for recursive_directory_iterator was not
exported despite being needed. The __shared_ptr default constructors are
not needed when compiling with GCC but Clang requires them for -O1.

PR libstdc++/91067
* acinclude.m4 (libtool_VERSION): Bump to 6:27:0.
* configure: Regenerate.
* config/abi/pre/gnu.ver (GLIBCXX_3.4.27): Add new version. Export
missing symbols.
* testsuite/27_io/filesystem/iterators/91067.cc: New test.
* testsuite/util/testsuite_abi.cc: Add new symbol version.

Tested x86_64-linux, and sanity tested with Clang.

I plan to commit this to trunk and gcc-9-branch today.


commit 586263dbb0a93d67c970ee0b08b382cf2bc5211b
Author: Jonathan Wakely 
Date:   Wed Jul 3 10:18:52 2019 +0100

PR libstdc++/91067 fix missing exports for filesystem iterators

The copy assignment operator for recursive_directory_iterator was not
exported despite being needed. The __shared_ptr default constructors are
not needed when compiling with GCC but Clang requires them for -O1.

PR libstdc++/91067
* acinclude.m4 (libtool_VERSION): Bump to 6:27:0.
* configure: Regenerate.
* config/abi/pre/gnu.ver (GLIBCXX_3.4.27): Add new version. Export
missing symbols.
* testsuite/27_io/filesystem/iterators/91067.cc: New test.
* testsuite/util/testsuite_abi.cc: Add new symbol version.

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index fad390ba322..24145fdf1ce 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -3832,7 +3832,7 @@ changequote([,])dnl
 fi
 
 # For libtool versioning info, format is CURRENT:REVISION:AGE
-libtool_VERSION=6:26:0
+libtool_VERSION=6:27:0
 
 # Everything parsed; figure out what files and settings to use.
 case $enable_symvers in
diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index 58d3e900bbf..ff4b74cb971 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -2266,6 +2266,22 @@ GLIBCXX_3.4.26 {
 
 } GLIBCXX_3.4.25;
 
+GLIBCXX_3.4.27 {
+
+# __shared_ptr<_Dir>::__shared_ptr()
+
_ZNSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE[012]EEC2Ev;
+
_ZNSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE[012]EEC2Ev;
+
+# __shared_ptr::__shared_ptr()
+
_ZNSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE[012]EEC2Ev;
+
_ZNSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE[012]EEC2Ev;
+
+# recursive_directory_iterator::operator=(const 
recursive_directory_iterator&)
+_ZNSt10filesystem28recursive_directory_iteratoraSERKS0_;
+_ZNSt10filesystem7__cxx1128recursive_directory_iteratoraSERKS1_;
+
+} GLIBCXX_3.4.26;
+
 # Symbols in the support library (libsupc++) have their own tag.
 CXXABI_1.3 {
 
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc
new file mode 100644
index 000..54172d9f20b
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc
@@ -0,0 +1,45 @@
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++17" }
+// { dg-do link { target c++17 } }
+// { dg-require-filesystem-ts "" }
+
+#include 
+
+void
+test01()
+{
+  std::filesystem::directory_iterator d;
+  d = d;
+  d = std::move(d);
+}
+
+void
+test02()
+{
+  std::filesystem::recursive_directory_iterator d;
+  d = d;
+  d = std::move(d);
+}
+
+int
+main()
+{
+  test01();
+  test02();
+}
diff --git a/libstdc++-v3/testsuite/util/testsuite_abi.cc 
b/libstdc++-v3/testsuite/util/testsuite_abi.cc
index a2d2cab3653..1277972049f 100644
--- a/libstdc++-v3/testsuite/util/testsuite_abi.cc
+++ b/libstdc++-v3/testsuite/util/testsuite_abi.cc
@@ -207,6 +207,7 @@ check_version(symbol& test, bool added)
   known_versions.push_back("GLIBCXX_3.4.24");
   known_versions.push_back("GLIBCXX_3.4.25");
   known_versions.push_back("GLIBCXX_3.4.26");
+  known_versions.push_back("GLIBCXX_3.4.27");
   known_versions.push_back("CXXA

[patch] Fix debug info for discriminated record types

2019-07-03 Thread Eric Botcazou
Hi,

this is a regression present on the mainline and 9 branch: since
  https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01066.html
the debug info emitted in Ada with -fgnat-encodings=minimal for discriminated 
record types containing an array component whose bound is a discriminant is 
incomplete, i.e. the DW_AT_*_bound attribute of the range subtype is missing.

Tested on x86_64-suse-linux, OK for mainline and 9 branch?


2019-07-03  Eric Botcazou  

* dwarf2out.c (add_scalar_info): Add back refererence to existing DIE
if it has the DW_AT_data_member_location attribute.


2019-07-03  Eric Botcazou  

* gnat.dg/specs/debug1.ads: New test.

-- 
Eric BotcazouIndex: dwarf2out.c
===
--- dwarf2out.c	(revision 272930)
+++ dwarf2out.c	(working copy)
@@ -20845,6 +20845,7 @@ add_scalar_info (dw_die_ref die, enum dw
 	  if (decl_die != NULL)
 	{
 	  if (get_AT (decl_die, DW_AT_location)
+		  || get_AT (decl_die, DW_AT_data_member_location)
 		  || get_AT (decl_die, DW_AT_const_value))
 		{
 		  add_AT_die_ref (die, attr, decl_die);
-- { dg-do compile }
-- { dg-options "-cargs -g -dA -fgnat-encodings=minimal -margs" }

package Debug1 is

   type Index_T is new Positive range 1 .. 128;

   type Array_Type is array (Index_T range <>) of Integer;

   type Record_Type (N : Index_T := 16) is record
  A : Array_Type (1 .. N);
   end record;

   R : Record_Type;

end Debug1;

--  { dg-final { scan-assembler-times "DW_AT_upper_bound" 4 } }


Re: [PATCH][ARM] Add support for "noinit" attribute

2019-07-03 Thread Richard Earnshaw




On 02/07/2019 15:49, Christophe Lyon wrote:

On Tue, 2 Jul 2019 at 12:30, Richard Earnshaw  wrote:




On 02/07/2019 11:13, Richard Earnshaw wrote:



On 02/07/2019 09:39, Richard Earnshaw wrote:



On 01/07/2019 16:58, Kyrill Tkachov wrote:

Hi Christophe,

On 6/13/19 4:13 PM, Christophe Lyon wrote:

Hi,

Similar to what already exists for TI msp430 or in TI compilers for
arm, this patch adds support for "noinit" attribute for arm. It's very
similar to the corresponding code in GCC for msp430.

It is useful for embedded targets where the user wants to keep the
value of some data when the program is restarted: such variables are
not zero-initialized.It is mostly a helper/shortcut to placing
variables in a dedicated section.

It's probably desirable to add the following chunk to the GNU linker:
diff --git a/ld/emulparams/armelf.sh b/ld/emulparams/armelf.sh
index 272a8bc..9555cec 100644
--- a/ld/emulparams/armelf.sh
+++ b/ld/emulparams/armelf.sh
@@ -10,7 +10,19 @@ OTHER_TEXT_SECTIONS='*(.glue_7t) *(.glue_7)
*(.vfp11_veneer) *(.v4_bx)'
  OTHER_BSS_SYMBOLS="${CREATE_SHLIB+PROVIDE (}__bss_start__ =
.${CREATE_SHLIB+)};"
  OTHER_BSS_END_SYMBOLS="${CREATE_SHLIB+PROVIDE (}_bss_end__ =
.${CREATE_SHLIB+)}; ${CREATE_SHLIB+PROVIDE (}__bss_end__ =
.${CREATE_SHLIB+)};"
  OTHER_END_SYMBOLS="${CREATE_SHLIB+PROVIDE (}__end__ =
.${CREATE_SHLIB+)};"
-OTHER_SECTIONS='.note.gnu.arm.ident 0 : { KEEP
(*(.note.gnu.arm.ident)) }'
+OTHER_SECTIONS='
+.note.gnu.arm.ident 0 : { KEEP (*(.note.gnu.arm.ident)) }
+  /* This section contains data that is not initialised during load
+ *or* application reset.  */
+   .noinit (NOLOAD) :
+   {
+ . = ALIGN(2);
+ PROVIDE (__noinit_start = .);
+ *(.noinit)
+ . = ALIGN(2);
+ PROVIDE (__noinit_end = .);
+   }
+'

so that the noinit section has the "NOLOAD" flag.

I'll submit that part separately to the binutils project if OK.

OK?



This is mostly ok by me, with a few code comments inline.

I wonder whether this is something we could implement for all targets
in the midend, but this would require linker script support for the
target to be effective...


Can't this be done using named sections?  If the sections were of the
form .bss. then it would be easy to make linker scripts do
something sane by default and users could filter them out to special
noinit sections if desired.



To answer my own question, it would appear to be yes.  You can write today:

int xxx __attribute__ ((section (".bss.noinit")));

int main ()
{
return xxx;
}

And the compiler will generate
  .section.bss.noinit,"aw",@nobits
  .align 4
  .typexxx, @object
  .sizexxx, 4
xxx:
  .zero4

So at this point, all you need is a linker script to filter .bss.noinit
into your special part of the final image.

This will most likely work today on any GCC target that supports named
sections, which is pretty much all of them these days.



Alternatively, we already have:

https://gcc.gnu.org/ml/gcc-patches/2014-09/msg01302.html

R.



Hi Richard,

Indeed this can already be achieved with the "section" attribute as you propose.

The motivation for this patch came from user requests: this feature is
already available in some proprietary ARM toolchains (TI, IAR, ...)
and it's more convenient for the end-user than having to update linker
scripts in addition to adding an attribute to the variable.



? Your patch has an update to the linker scripts...




I guess it's a balance between user-friendliness/laziness and GCC
developers ability to educate users :-)


Well in that case, this should be done generically, not in just the arm 
backend, or any other backend for that matter.


R.



Christophe



R.


R.



Thanks,

Kyrill


Thanks,

Christophe


diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e3e71ea..332c41b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -151,6 +151,7 @@ static tree arm_handle_notshared_attribute (tree
*, tree, tree, int, bool *);
   #endif
   static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree,
int, bool *);
   static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree,
int, bool *);
+static tree arm_data_attr (tree *, tree, tree, int, bool *);
   static void arm_output_function_epilogue (FILE *);
   static void arm_output_function_prologue (FILE *);
   static int arm_comp_type_attributes (const_tree, const_tree);
@@ -375,7 +376,8 @@ static const struct attribute_spec
arm_attribute_table[] =
   arm_handle_cmse_nonsecure_entry, NULL },
 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
   arm_handle_cmse_nonsecure_call, NULL },
-  { NULL, 0, 0, false, false, false, false, NULL, NULL }
+  { "noinit", 0, 0, true, false, false, false, arm_data_attr, NULL },
+  { NULL, 0, 0, false, false, false, false, NULL, NULL },
   };

   /* Initialize the GCC target structure.  */
@@ -808,6 +810,10 @@ static const struct attribute_spec
arm_attribute_table[] =

   #undef TARGET_CONSTANT_ALIGNMENT
   #define 

Re: [range-ops] patch 01/04: types for VR_UNDEFINED and VR_VARYING

2019-07-03 Thread Aldy Hernandez




On 7/2/19 4:17 PM, Jeff Law wrote:

On 7/1/19 2:52 AM, Aldy Hernandez wrote:

As discussed before, this enforces types on undefined and varying, which
makes everything more regular, and removes some special casing
throughout range handling.

The min/max fields will contain TYPE_MIN_VALUE and TYPE_MAX_VALUE, which
will make it easy to get at the bounds of a range later on.  Since
pointers don't have TYPE_MIN/MAX_VALUE, we are using build_zero_cst()
and wide_int_to_tree(wi::max_value(precision)), for consistency.
UNDEFINED is set similarly, though nobody should ever ask for anything
except type() from it.  That is, no one should be asking for the bounds.

There is one wrinkle, ipa-cp creates VR_VARYING ranges of floats,
presumably to pass around state??  This causes value_range_base::type()
and others to fail, even though I haven't actually seen a case of
someone actually trying to set a VR_RANGE of a float.  For now, I've
built a NOP_EXPR wrapper so type() works correctly.  The correct
approach would probably be to avoid creating these varying/undefined
ranges in ipa-cp, but I don't have enough ipa-cp-foo to do so.
Suggestions welcome, if you don't like special casing this for ipa-cp.
Or perhaps as a follow up.

No idea why we create ranges of floats from ipa-cp.  I presume it's
coming from propagate_vr_across_jump_function?  Or somewhere else?


I believe it was ipcp_vr_lattice::set_to_bottom, while changing an 
UNDEFINED to VARYING.  IMO, we shouldn't even have created UNDEFINED 
ranges of floats.  It's not like we can do anything with float ranges.






In this patch I start introducing a couple small API changes that will
be needed for range-ops.  Since they're needed/useful for this patch, I
started adding them on a need-to-use basis.  They are:

value_range_base (tree type)

 This is our constructor for building a varying:
 value_range_base foo (some_type);

value_range_base::supports_type_p(tree type)

 We use this instead of having to test everywhere for
 INTEGRAL_TYPE_P and POINTER_TYPE_P which VRP uses throughout.
 I have not ported every use of the INTEGRAL || POINTER in the
 compiler to this function.  But that could be a follow up
 cleanup if someone (else) is interested :).

Cleanups of this nature are pre-approved once this patch is ACK'd and
installed.




value_range_base_normalize_symbolics():

 This normalizes symbolics into constants.  In VRP we usually
 normalize necessary symbolics into [MIN, MAX].  This patch does
 slightly better.  Now we transform:

   // [SYM, SYM] -> VARYING
   // [SYM, NUM] -> [-MIN, NUM]
   // [NUM, SYM] -> [NUM, +MAX]
   // ~[SYM, NUM] -> [NUM + 1, +MAX]
   // ~[NUM, SYM] -> [-MIN, NUM - 1]

 TBH, this bit and its use in *multiplicative_op probably fits
 better with the canonicalization patch, but as I said.  They're
 a bit intertwined.  Ughh.

I think it does fit better there, but we ought to be able to manage.




Finally, as you mentioned before, we need a way of caching varyings in
the allocation pool.  The type_range_cache class in the attached patch
is Andrew's doing, but I'll be happy to take the blame and address
anything that needs doing.

Tested on x86-64 Linux with --enable-languages=all.

Aldy

range-ops-type.patch

commit 24c3a6a2cb7424a9c022930cada3a5f3c84a388a
Author: Aldy Hernandez 
Date:   Fri Jun 28 11:00:10 2019 +0200

 VR_UNDEFINED and VR_VARYING now have a type.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 01fb97cedb2..a5247735694 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,72 @@
+2019-07-01  Aldy Hernandez  
+
+   * gimple-ssa-evrp-analyze.c (record_ranges_from_phis): Skip PHIs
+   who's result are not valid for a value_range.
+   Set type for varying value_range.
+   * ipa-cp.c (ipcp_vr_lattice::init): Set type for varying/undef
+   value_range.
+   (ipcp_vr_lattice::set_to_bottom): Same.
+   (initialize_node_lattices): Same.
+   * tree-ssa-threadedge.c (record_temporary_equivalences_from_phis):
+   Same.
+   * tree-ssanames.c (get_range_info): Same.
+   * tree-vrp.c (value_range::set_equiv): Do not set equiv on
+   undef/varying.
+   (value_range_base::value_range_base): New constructor.
+   (value_range_base::check): Remove assert for empty min/max.
+   (value_range_base::equal_p): Allow comparison of typeless undefs.
+   (value_range_base::set_undefined): Add type.
+   (value_range::set_undefined): Same.
+   (value_range_base::set_varying): Same.
+   (value_range::set_varying): Same.
+   (value_range_base::type): Remove assert.
+   (value_range_base::dump): Display type for varying/undef.
+   (value_range_base::dump): Add argument-less overload.
+   (value_range::dump): Same.
+   (vrp_val_max): Add handle_pointers argument.
+   (vrp_val_min): Same.
+   (vrp_val_is_max): Same.
+   (vrp_val_is_min): Same.
+   (value_range_bas

Re: [range-ops] patch 02/04: enforce canonicalization in value_range

2019-07-03 Thread Aldy Hernandez

On 7/2/19 5:36 PM, Jeff Law wrote:


I don't see anything inherently concerning here.  I do wonder if there's
any value in having a debugging function in the class that would iterate
over the ranges and check them for proper canonicalization, verify that
VR_{VARYING,UNDEFINED} objects do not have equivalences, etc.  Thoughts?


In patch 01 we have:


diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 594ee9adc17..97046c22ed1 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -86,6 +86,8 @@ value_range_base::set (enum value_range_kind kind, tree min, t
ree max)
 void
 value_range::set_equiv (bitmap equiv)
 {
+  if (undefined_p () || varying_p ())
+equiv = NULL;


So it shouldn't be possible to attach an equivalence to a VARYING / 
UNDEFINED range.  Plus, we already have a sanity checker:



void
value_range::check ()
{
  value_range_base::check ();
  switch (m_kind)
{
case VR_UNDEFINED:
case VR_VARYING:
  gcc_assert (!m_equiv || bitmap_empty_p (m_equiv));
default:;
}
}


We have no methods for altering a range, except for changing 
equivalences.  So it shouldn't be possible to create a non-canonicalized 
range.


Aldy


[patch2/2][arm]: remove builtin expand for sha1

2019-07-03 Thread Sylvia Taylor
Greetings,

This patch removes the builtin expand handling for sha1h/c/m/p and
replaces it with expand patterns. This should make it more consistent
with how we handle intrinsic implementations and cleans up the custom
sha1 code in the arm_expand builtins for unop and ternop.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Cheers,
Syl

gcc/ChangeLog:

2019-07-03  Sylvia Taylor  

* config/arm/arm-builtins.c
(arm_expand_ternop_builtin): Remove builtin_sha1cpm.
(arm_expand_unop_builtin): Remove builtin_sha1h.
* config/arm/crypto.md
(crypto_sha1h): New expand pattern.
(crypto_sha1c): Likewise.
(crypto_sha1m): Likewise.
(crypto_sha1p): Likewise.
(crypto_sha1h_lb): Modify.
(crypto_sha1c_lb): Likewise.
(crypto_sha1m_lb): Likewise.
(crypto_sha1p_lb): Likewise.
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 
f646ab537fcdac54a3eaf0f1fa403698e29ef005..4702a4078d1f9fd766a5efccbfdc58e2b927133c
 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -1993,25 +1993,12 @@ arm_expand_ternop_builtin (enum insn_code icode,
   rtx op0 = expand_normal (arg0);
   rtx op1 = expand_normal (arg1);
   rtx op2 = expand_normal (arg2);
-  rtx op3 = NULL_RTX;
 
-  /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
- lane operand depending on endianness.  */
-  bool builtin_sha1cpm_p = false;
-
-  if (insn_data[icode].n_operands == 5)
-{
-  gcc_assert (icode == CODE_FOR_crypto_sha1c
-  || icode == CODE_FOR_crypto_sha1p
-  || icode == CODE_FOR_crypto_sha1m);
-  builtin_sha1cpm_p = true;
-}
   machine_mode tmode = insn_data[icode].operand[0].mode;
   machine_mode mode0 = insn_data[icode].operand[1].mode;
   machine_mode mode1 = insn_data[icode].operand[2].mode;
   machine_mode mode2 = insn_data[icode].operand[3].mode;
 
-
   if (VECTOR_MODE_P (mode0))
 op0 = safe_vector_operand (op0, mode0);
   if (VECTOR_MODE_P (mode1))
@@ -2034,13 +2021,8 @@ arm_expand_ternop_builtin (enum insn_code icode,
 op1 = copy_to_mode_reg (mode1, op1);
   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
 op2 = copy_to_mode_reg (mode2, op2);
-  if (builtin_sha1cpm_p)
-op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
 
-  if (builtin_sha1cpm_p)
-pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
-  else
-pat = GEN_FCN (icode) (target, op0, op1, op2);
+  pat = GEN_FCN (icode) (target, op0, op1, op2);
   if (! pat)
 return 0;
   emit_insn (pat);
@@ -2096,16 +2078,8 @@ arm_expand_unop_builtin (enum insn_code icode,
   rtx pat;
   tree arg0 = CALL_EXPR_ARG (exp, 0);
   rtx op0 = expand_normal (arg0);
-  rtx op1 = NULL_RTX;
   machine_mode tmode = insn_data[icode].operand[0].mode;
   machine_mode mode0 = insn_data[icode].operand[1].mode;
-  bool builtin_sha1h_p = false;
-
-  if (insn_data[icode].n_operands == 3)
-{
-  gcc_assert (icode == CODE_FOR_crypto_sha1h);
-  builtin_sha1h_p = true;
-}
 
   if (! target
   || GET_MODE (target) != tmode
@@ -2121,13 +2095,9 @@ arm_expand_unop_builtin (enum insn_code icode,
   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
 }
-  if (builtin_sha1h_p)
-op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
 
-  if (builtin_sha1h_p)
-pat = GEN_FCN (icode) (target, op0, op1);
-  else
-pat = GEN_FCN (icode) (target, op0);
+  pat = GEN_FCN (icode) (target, op0);
+
   if (! pat)
 return 0;
   emit_insn (pat);
diff --git a/gcc/config/arm/crypto.md b/gcc/config/arm/crypto.md
index 
d1ae76800d94a5a9e06e109dc8dc0328166dcfdc..fc43a7862f9886f4249235f5836006c51fce7340
 100644
--- a/gcc/config/arm/crypto.md
+++ b/gcc/config/arm/crypto.md
@@ -66,13 +66,23 @@
of the V4SI, adjusted for endianness. Required due to neon_vget_lane and
neon_set_lane that change the element ordering in memory for big-endian.  */
 
-(define_insn "crypto_sha1h"
+(define_expand "crypto_sha1h"
+  [(set (match_operand:V4SI 0 "register_operand")
+   (match_operand:V4SI 1 "register_operand"))]
+  "TARGET_CRYPTO"
+{
+  rtx op2 = GEN_INT (NEON_ENDIAN_LANE_N (V2SImode, 0));
+  emit_insn (gen_crypto_sha1h_lb (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_insn "crypto_sha1h_lb"
   [(set (match_operand:V4SI 0 "register_operand" "=w")
-  (unspec:V4SI
- [(vec_select:SI
-   (match_operand:V4SI 1 "register_operand" "w")
-   (parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
-  UNSPEC_SHA1H))]
+   (unspec:V4SI
+ [(vec_select:SI
+  (match_operand:V4SI 1 "register_operand" "w")
+  (parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
+   UNSPEC_SHA1H))]
   "TARGET_CRYPTO && INTVAL (operands[2]) == NEON_ENDIAN_LANE_N (V2SImode, 0)"
   "sha1h.32\\t%q0, %q1"
   [(set_attr "type" "crypto_sha1_fast")]
@@ -90,7 +100,22 @@
 

[PATCH, committed] Add myself to MAINTAINERS

2019-07-03 Thread Andrea Corallo
Hi all,
I have committed the attached patch adding myself to the Write After
Approval section of the MAINTAINERS file.

Bests
  Andrea

ChangeLog:

2019-07-02  Andrea Corallo  

* MAINTAINERS (Write After Approval): Add myself.
diff --git a/MAINTAINERS b/MAINTAINERS
index b8d703c..7c1eebc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -353,6 +353,7 @@ William Cohen	
 Michael Collison
 Josh Conner	
 R. Kelley Cook	
+Andrea Corallo	
 Christian Cornelssen
 Ludovic Courtès	
 Lawrence Crowl	


[patch1/2][arm][PR90317]: fix sha1 patterns

2019-07-03 Thread Sylvia Taylor
Greetings,

This patch fixes:

1) Ice message thrown when using the crypto_sha1h intrinsic due to
incompatible mode used for zero_extend. Removed zero extend as it is
not a good choice for vector modes and using an equivalent single
mode like TI (128bits) instead of V4SI produces extra instructions
making it inefficient.

This affects gcc version 8 and above.

2) Incorrect combine optimizations made due to vec_select usage
in the sha1 patterns on arm. The patterns should only combine
a vec select within a sha1h instruction when the lane is 0.

This affects gcc version 5 and above.

- Fixed by explicitly declaring the valid const int for such
optimizations. For cases when the lane is not 0, the vector
lane selection now occurs in a e.g. vmov instruction prior 
to sha1h.

- Updated the sha1h testcases on arm to check for additional
cases with custom vector lane selection.

The intrinsic functions for the sha1 patterns have also been
simplified which seems to eliminate extra vmovs like:
- vmov.i32 q8, #0.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Cheers,
Syl

gcc/ChangeLog:

2019-07-03  Sylvia Taylor  

PR target/90317
* config/arm/arm_neon.h
(vsha1h_u32): Refactor.
(vsha1cq_u32): Likewise.
(vsha1pq_u32): Likewise.
(vsha1mq_u32): Likewise.
* config/arm/crypto.md:
(crypto_sha1h): Remove zero extend, correct vec select.
(crypto_sha1c): Correct vec select.
(crypto_sha1m): Likewise.
(crypto_sha1p): Likewise.

gcc/testsuite/ChangeLog:

2019-07-03  Sylvia Taylor  

PR target/90317
* gcc.target/arm/crypto-vsha1cq_u32.c (foo): Change.
(GET_LANE, TEST_SHA1C_VEC_SELECT): New.
* gcc.target/arm/crypto-vsha1h_u32.c (foo): Change.
(GET_LANE, TEST_SHA1H_VEC_SELECT): New.
* gcc.target/arm/crypto-vsha1mq_u32.c (foo): Change.
(GET_LANE, TEST_SHA1M_VEC_SELECT): New.
* gcc.target/arm/crypto-vsha1pq_u32.c (foo): Change.
(GET_LANE, TEST_SHA1P_VEC_SELECT): New.
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 
6b982392ece69bb245ffd3bdc34d09c6f01745eb..1f200d491d1de3993bc3a682d586da137958ff6b
 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -16938,37 +16938,32 @@ __extension__ extern __inline uint32_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1h_u32 (uint32_t __hash_e)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  __t = __builtin_arm_crypto_sha1h (__t);
-  return vgetq_lane_u32 (__t, 0);
+  return vgetq_lane_u32 (__builtin_arm_crypto_sha1h (vdupq_n_u32 (__hash_e)),
+0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk);
+  return __builtin_arm_crypto_sha1c (__hash_abcd, vdupq_n_u32 (__hash_e),
+__wk);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk);
+  return __builtin_arm_crypto_sha1p (__hash_abcd, vdupq_n_u32 (__hash_e),
+__wk);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk);
+  return __builtin_arm_crypto_sha1m (__hash_abcd,  vdupq_n_u32 (__hash_e),
+__wk);
 }
 
 __extension__ extern __inline uint32x4_t
diff --git a/gcc/config/arm/crypto.md b/gcc/config/arm/crypto.md
index 
63d9d9ffa424fa51b05ebee5138b2c7c0f304745..30ab1dbeb1205129c532a1a7f1763cf140440595
 100644
--- a/gcc/config/arm/crypto.md
+++ b/gcc/config/arm/crypto.md
@@ -62,14 +62,18 @@
   [(set_attr "type" "")]
 )
 
+/* The vec_select operation always selects index 0 from the lower V2SI subreg
+   of the V4SI, adjusted for endianness. Required due to neon_vget_lane and
+   neon_set_lane that change the element ordering in memory for big-endian.  */
+
 (define_insn "crypto_sha1h"
   [(set (match_operand:V4SI 0 "register_operand" "=w")
-(zero_extend:V4SI
-  (unspec:SI [(vec_select:SI
-(match_operand:V4SI 1 "register_operand" "w")
-(parallel [(match_operand:SI 2 "immediate_operand" 
"i")]))]
-   UNSPEC_SHA1H)))]
-  "TARGET_CRYPTO"
+  

Re: [range-ops] patch 01/04: types for VR_UNDEFINED and VR_VARYING

2019-07-03 Thread Aldy Hernandez




On 7/3/19 4:28 AM, Richard Biener wrote:

On Mon, Jul 1, 2019 at 10:52 AM Aldy Hernandez  wrote:


As discussed before, this enforces types on undefined and varying, which
makes everything more regular, and removes some special casing
throughout range handling.


I don't like it too much given you need to introduce that "cache".

Why do VARYING or UNDEFINED need a type?  Nobody should ever need
to ask for the type of a range anyhow - the type should be always that from
the context we're looking at (the SSA name the range is associated with,
the operation we are performing on one or two ranges, etc.).

Thinking again about this it looks fundamentally wrong to associate
a type with the VARYING or UNDEFINED lattice states.


We discussed this 2 weeks ago, and it was my understanding that we had 
an reached an agreement on the general approach.  Types on varying and 
undefined was the *first* thing I brought up.  Explanation quoted below.


By the way, the type for varying/undefined requires no space in the 
value_range_base structure, as it is kept in the min/max fields which we 
already use for VR_RANGE/VR_ANTI_RANGE.


Aldy

https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01292.html

In order to unify the APIs for value_range and irange, we'd like to make
some minor changes to value_range.  We believe most of these changes
could go in now, and would prefer so, to get broader testing and
minimize the plethora of changes we drag around on our branch.

First, introduce a type for VR_VARYING and VR_UNDEFINED.


irange utilizes 0 or more sub-ranges to represent a range, and VARYING
is simply one subrange [MIN, MAX].value_range represents this with
VR_VARYING, and since there is no type associated with it, we cannot
calculate the lower and upper bounds for the range.  There is also a
lack of canonicalness in value range in that VR_VARYING and [MIN, MAX]
are two different representations of the same value.

We tried to adjust irange to not associate a type with the empty range
[] (representing undefined), but found we were unable to perform all
operations properly.  In particular, we cannot invert an empty range.
i.e. invert ( [] ) should produce [MIN, MAX].  Again, we need to have a
type associated with this empty range.

We'd like to tweak value_range so that set_varying() and set_undefined()
both take a type, and then always set the min/max fields based on that
type.  This takes no additional memory in the structure, and is
virtually transparent to all the existing uses of value_range.

This allows:
1)  invert to be implemented properly for both VARYING and UNDEFINED
by simply changing one to the other.
2)  the type() method to always work without any special casing by
simply returning TREE_TYPE(min)
3)  the new incoming bounds() routines to work trivially for these
cases as well (lbound/ubound, num_pairs(), etc).


Re: [PATCH 2/2] Rename SINGE_VALUE to TOPN_VALUES counters.

2019-07-03 Thread Jan Hubicka
> And the second part is rename so that it reflect reality
> that single value can actually track multiple values.
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Thanks,
> Martin

> From cc9e93d43941176e92b5821e5a8134a5319a10b4 Mon Sep 17 00:00:00 2001
> From: Martin Liska 
> Date: Thu, 20 Jun 2019 14:50:23 +0200
> Subject: [PATCH 2/2] Rename SINGE_VALUE to TOPN_VALUES counters.
> 
> gcc/ChangeLog:
> 
> 2019-06-20  Martin Liska  
> 
>   * gcov-counter.def (GCOV_COUNTER_V_SINGLE): Remove.
>   (GCOV_COUNTER_V_TOPN): New.
>   (GCOV_COUNTER_V_INDIR): Use _topn.
>   * gcov-io.h (GCOV_DISK_SINGLE_VALUES): Remove.
>   (GCOV_TOPN_VALUES): New.
>   (GCOV_SINGLE_VALUE_COUNTERS): Remove.
>   (GCOV_TOPN_VALUES_COUNTERS): New.
>   * profile.c (instrument_values): Use HIST_TYPE_TOPN_VALUES.
>   * tree-profile.c:
>   (gimple_init_gcov_profiler): Rename variables from one_value
>   to topn_values.
>   (gimple_gen_one_value_profiler): Remove.
>   (gimple_gen_topn_values_profiler): New function.
>   * value-prof.c (dump_histogram_value): Use TOPN_VALUES
>   names instead of SINGLE_VALUE.
>   (stream_out_histogram_value): Likewise.
>   (stream_in_histogram_value): Likewise.
>   (get_most_common_single_value): Likewise.
>   (gimple_divmod_fixed_value_transform): Likewise.
>   (gimple_stringops_transform): Likewise.
>   (gimple_divmod_values_to_profile): Likewise.
>   (gimple_stringops_values_to_profile): Likewise.
>   (gimple_find_values_to_profile): Likewise.
>   * value-prof.h (enum hist_type): Rename to TOPN.
>   (gimple_gen_one_value_profiler): Remove.
>   (gimple_gen_topn_values_profiler): New.
> 
> libgcc/ChangeLog:
> 
> 2019-06-20  Martin Liska  
> 
>   * Makefile.in: Use topn_values instead of one_value names.
>   * libgcov-merge.c (__gcov_merge_single): Move to ...
>   (__gcov_merge_topn): ... this.
>   (merge_single_value_set): Move to ...
>   (merge_topn_values_set): ... this.
>   * libgcov-profiler.c (__gcov_one_value_profiler_body): Move to
>   ...
>   (__gcov_topn_values_profiler_body): ... this.
>   (__gcov_one_value_profiler_v2): Move to ...
>   (__gcov_topn_values_profiler): ... this.
>   (__gcov_one_value_profiler_v2_atomic): Move to ...
>   (__gcov_topn_values_profiler_atomic): ... this.
>   (__gcov_indirect_call_profiler_v4): Remove.
>   * libgcov-util.c (__gcov_single_counter_op): Move to ...
>   (__gcov_topn_counter_op): ... this.
>   * libgcov.h (L_gcov_merge_single): Remove.
>   (L_gcov_merge_topn): New.
>   (__gcov_merge_single): Remove.
>   (__gcov_merge_topn): New.
>   (__gcov_one_value_profiler_v2): Move to ..
>   (__gcov_topn_values_profiler): ... this.
>   (__gcov_one_value_profiler_v2_atomic): Move to ...
>   (__gcov_topn_values_profiler_atomic): ... this.

OK,
I would rename the __gcov_topn_values_profiler to _v2 since we had this
function before.

Honza


Re: [PATCH] [RFC, PGO+LTO] Missed function specialization + partial devirtualization

2019-07-03 Thread Jan Hubicka
> Hi.
> 
> So the first part is about support of N tracked values to be supported.
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Thanks,
> Martin

> From f3e361fb6d799acf538bc76a91bfcc8e265b7cbe Mon Sep 17 00:00:00 2001
> From: Martin Liska 
> Date: Wed, 19 Jun 2019 14:15:14 +0200
> Subject: [PATCH 1/2] Support N values in libgcov for single value counter
>  type.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-06-20  Martin Liska  
> 
>   * gcc.dg/tree-prof/val-prof-2.c: Update scanned pattern
>   as we do now better.
> 
> libgcc/ChangeLog:
> 
> 2019-06-20  Martin Liska  
> 
>   * libgcov-merge.c (merge_single_value_set): Support N values.
>   * libgcov-profiler.c (__gcov_one_value_profiler_body): Likewise.

OK,
Thanks.
Honza


Re: [GCC][middle-end] Add rules to strip away unneeded type casts in expressions (2nd patch)

2019-07-03 Thread Richard Biener
On Tue, 2 Jul 2019, Tamar Christina wrote:

> 
> Hi All,
> 
> Here's an updated patch with the changes processed from the previous review.
> 
> I've bootstrapped and regtested on aarch64-none-linux-gnu and 
> x86_64-pc-linux-gnu and no issues.
> 
> Ok for trunk?

+ (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@1)))
+   (op @1 (convert @2))
+   (with { tree utype = unsigned_type_for (TREE_TYPE (@1)); }
+   (convert (op (convert:utype @1)

indenting is off, one space more for (convert (it's inside the with)

+   (convert:utype @2)
+  (with { tree arg0 = strip_float_extensions (@1);

indenting is off here, one space less for the (with please.

you'll run into strip_float_extensions for integer types as well so
please move the FLOAT_TYPE_P (type) check before the with like

 (if (FLOAT_TYPE_P (type)
  && DECIMAL_FLOAT_TYPE_P (TREE_TPE (@0)) == DECIMAL_FLOAT_TYPE_P 
(type))
  (with { tree arg0 = strip_float_extensions (@1);

+ (if ((newtype == dfloat32_type_node
+   || newtype == dfloat64_type_node
+   || newtype == dfloat128_type_node)
+ && newtype == type)
+   (convert:newtype (op (convert:newtype @1) (convert:newtype 
@2)))

I think you want to elide the outermost convert:newtype here and use

(op (convert:newtype @1) (convert:newtype @2))

newtype == type check you also want to write types_match_p (newtype, type)

+ (if (TYPE_PRECISION (newtype) < TYPE_PRECISION (itype)
+  && (flag_unsafe_math_optimizations
+  || (TYPE_PRECISION (newtype) == TYPE_PRECISION 
(type)
+  && real_can_shorten_arithmetic (TYPE_MODE 
(itype),
+  TYPE_MODE 
(type))
+  && !excess_precision_type (newtype
+(convert:newtype (op (convert:newtype @1)
+ (convert:newtype @2)))

here the outermost convert looks bogus - you need to build an
expression of type 'type' thus

   (convert:type (op (convert:newtype @1) (convert:newtype @2)))

I think you also want to avoid endless recursion by adding a

 && !types_match_p (itype, newtype)

since in that case you've re-created the original expression.

OK with those changes.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> The 07/02/2019 11:20, Richard Biener wrote:
> > On Tue, 2 Jul 2019, Tamar Christina wrote:
> > 
> > > Hi Richi,
> > > 
> > > The 06/25/2019 10:02, Richard Biener wrote:
> > > > 
> > > > This looks like a literal 1:1 translation plus merging with the
> > > > existing pattern around integers.  You changed
> > > > (op:s@0 (convert@3 @1) (convert?@4 @2)) to
> > > > (op:s@0 (convert1?@3 @1) (convert2?@4 @2)) where this now also
> > > > matches if there are no inner conversions at all - was that a
> > > > desired change or did you merely want to catch when the first
> > > > operand is not a conversion but the second is, possibly only
> > > > for the RDIV_EXPR case?
> > > >
> > > 
> > > Yes, the ? ? is for the RDIV case, I really only want (c a) `op` (c b),
> > > a `op` (c b) and (c a) `op` b.  But didn't find a way to do this.
> > 
> > One way would be to do
> > 
> >  (simplify
> >   (convert (op:sc@0 (convert @1) (convert? @2)))
> > 
> > but that doesn't work for RDIV.  Using :C is tempting but you
> > do not get to know the original operand order which you of
> > course need.  So I guess the way you do it is fine - you
> > could guard all of the code with a few types_match () checks
> > but I'm not sure it is worth the trouble.
> > 
> > Richard.
> > 
> > > The only thing I can think of that gets this is without overmatching is
> > > to either duplicate the code or move this back to a C helper function and
> > > call that from match.pd.  But I was hoping to have it all in match.pd
> > > instead of hiding it away in a C call.
> > > 
> > > Do you have a better way of doing it or a preference to an approach?
> > >  
> > > > +(for op (plus minus mult rdiv)
> > > > + (simplify
> > > > +   (convert (op:s@0 (convert1?@3 @1) (convert2?@4 @2)))
> > > > +   (with { tree arg0 = strip_float_extensions (@1);
> > > > +  tree arg1 = strip_float_extensions (@2);
> > > > +  tree itype = TREE_TYPE (@0);
> > > > 
> > > > you now unconditionally call strip_float_extensions on each operand
> > > > even for the integer case, please sink stuff only used in one
> > > > case arm.  I guess keeping the integer case first via
> > > > 
> > > 
> > > Done, Initially didn't think it would be an issue since I don't use the 
> > > value it
> > > creates in the integer case. But I re-ordered it.
> > >  
> > > > should work (with the 'with' being in the ifs else position).
> > > > 
> > > > +  (if (code == REAL_TYPE)
> > > > +   /* Ignore the conversion if we don't need to store intermediate
> > > > +  results and neither type is a decimal float.  */
> > > > + (if

Re: [PATCH, Modula-2 (C/C++/D/F/Go/Jit)] (Register spec fn) (v2)

2019-07-03 Thread Rainer Orth
Hi Gaius,

> here is version two of the patches which introduce Modula-2 into the
> GCC trunk.  The patches include:
>
>   (*)  a patch to allow all front ends to register a lang spec function.
>(included are patches for all front ends to provide an empty
> callback function).
>   (*)  patch diffs to allow the Modula-2 front end driver to be
>built using GCC Makefile and friends.
>
> The compressed tarball includes:
>
>   (*)  gcc/m2  (compiler driver and lang-spec stuff for Modula-2).
>Including the need for registering lang spec functions.
>   (*)  gcc/testsuite/gm2  (a Modula-2 dejagnu test to ensure that
>the gm2 driver is built and can understands --version).
>
> These patches have been re-written after taking on board the comments
> found in this thread:
>
>https://gcc.gnu.org/ml/gcc-patches/2013-11/msg02620.html
>
> it is a revised patch set from:
>
>https://gcc.gnu.org/ml/gcc-patches/2019-06/msg00220.html
>
> I've run make bootstrap and run the regression tests on trunk and no
> extra failures occur for all languages touched in the ChangeLog.
>
> I'm currently tracking gcc trunk and gcc-9 with gm2 (which works well
> with amd64/arm64/i386) - these patches are currently simply for the
> driver to minimise the patch size.  There are also > 1800 tests in a
> dejagnu testsuite for gm2 which can be included at some future time.

I meant to give a build with gm2 included a try on Solaris, but ended up
pretty much confused:

* I've started with the gm2 repo on savannah.  Running the combine-trees
  script on master tried to combine gm2 with gcc 4.7.4.  Trying again
  with configure --with-gcc=none (no branch, for trunk?) didn't work
  either (don't remember the details off-hand).

* Next, I discovered and tried the gcc_trunk branch there.  While it
  matches the patch set you sent here, it lacks most of the compiler
  proper, which only lives on master!?  In addition, the patches in
  there lack support for building libgm2.  Those are present on the
  master branch (which has both trunk and trunc in
  gcc-versionno/gcc/gm2/patches/gcc).  I tried to merge the trees and
  apply the patches manually, but failed again later.

At this point, I gave up.  Am I missing something fundamental here?

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[Ada] Forced elaboration order in Elaboration order v4.0

2019-07-03 Thread Pierre-Marie de Rodat
This patch refactors the forced elaboration order functionality,
reintegrates it in Binde, and impelements it in Bindo.


-- Source --


--  server.ads

package Server is
end Server;

--  client.ads

with Server;

package Client is
end Client;

--  main.adb

with Client;

procedure Main is begin null; end Main;

--  duplicate_1.txt

server (spec)
client (spec)
server (spec)

--  error_unit_1.txt

no such unit
client (spec)

--  error_unit_2.txt

no such unit
client (spec)

--  error_unit_3.txt

no such unit --  comment
client (spec)

--  error_unit_4.txt

 no such unit --  comment

client (spec)

--  error_unit_5.txt

no such unit (body)
client (spec)

--  error_unit_6.txt

no such unit (body)
client (spec)

--  error_unit_7.txt

no such unit (body)--  comment
client (spec)

--  error_unit_8.txt

no such unit (body)--  comment
client (spec)

--  error_unit_9.txt

no such unit--  comment
client (spec)

--  no_unit_1.txt

--  no_unit_2.txt

--  no_unit_3.txt

  --  comment

--  no_unit_4.txt

--  no_unit_5.txt

--  no_unit_6.txt

   --  comment

--  no_unit_7.txt

--  no_unit_8.txt

--  comment
--  comment

--  ok_unit_1.txt

server (spec)
client (spec)

--  ok_unit_2.txt

server (spec)
client (spec)

--  ok_unit_3.txt

server (spec)
client (spec)

--  ok_unit_4.txt

server (spec)  --  comment
client (spec)

--  ok_unit_5.txt

server (spec)
client (spec)

--  ok_unit_6.txt

server (spec)
client (spec)--  comment

--  ok_unit_7.txt

server (spec)
client (spec)--  comment

--  ok_unit_8.txt

--  comment
--  comment
server (spec)

   --  comment
--  comment

client (spec)--  comment

--  ok_unit_9.txt

server (spec)--  comment
client (spec)


-- Compilation and output --

$ gnatmake -q main.adb
$ gnatbind -fno_unit_1.txt main.ali
$ gnatbind -fno_unit_2.txt main.ali
$ gnatbind -fno_unit_3.txt main.ali
$ gnatbind -fno_unit_4.txt main.ali
$ gnatbind -fno_unit_5.txt main.ali
$ gnatbind -fno_unit_6.txt main.ali
$ gnatbind -fno_unit_7.txt main.ali
$ gnatbind -fno_unit_8.txt main.ali
$ gnatbind -ferror_unit_1.txt main.ali
$ gnatbind -ferror_unit_2.txt main.ali
$ gnatbind -ferror_unit_3.txt main.ali
$ gnatbind -ferror_unit_4.txt main.ali
$ gnatbind -ferror_unit_5.txt main.ali
$ gnatbind -ferror_unit_6.txt main.ali
$ gnatbind -ferror_unit_7.txt main.ali
$ gnatbind -ferror_unit_8.txt main.ali
$ gnatbind -ferror_unit_9.txt main.ali
$ gnatbind -fduplicate_1.txt main.ali
$ gnatbind -fok_unit_1.txt main.ali
$ gnatbind -fok_unit_2.txt main.ali
$ gnatbind -fok_unit_3.txt main.ali
$ gnatbind -fok_unit_4.txt main.ali
$ gnatbind -fok_unit_5.txt main.ali
$ gnatbind -fok_unit_6.txt main.ali
$ gnatbind -fok_unit_7.txt main.ali
$ gnatbind -fok_unit_8.txt main.ali
$ gnatbind -fok_unit_9.txt main.ali
"no such unit": not present; ignored
"no such unit": not present; ignored
"no such unit": not present; ignored
"no such unit": not present; ignored
"no such unit%b": not present; ignored
"no such unit%b": not present; ignored
"no such unit%b": not present; ignored
"no such unit%b": not present; ignored
"no such unit": not present; ignored
server (spec) <-- client (spec)
error: duplicate_1.txt:3: duplicate unit name "server (spec)" from line 1
server (spec) <-- client (spec)
server (spec) <-- client (spec)
server (spec) <-- client (spec)
server (spec) <-- client (spec)
server (spec) <-- client (spec)
server (spec) <-- client (spec)
server (spec) <-- client (spec)
server (spec) <-- client (spec)
server (spec) <-- client (spec)

Tested on x86_64-pc-linux-gnu, committed on trunk

2019-07-03  Hristian Kirtchev  

gcc/ada/

* binde.adb: Remove with clause for System.OS_Lib.
(Force_Elab_Order): Refactor the majority of the code in Butil.
Use the new forced units iterator to obtain unit names.
* bindo-builders.adb: Add with and use clauses for Binderr,
Butil, Opt, Output, Types, GNAT, and GNAT.Dynamic_HTables.  Add
a hash table which maps units to line number in the forced
elaboration order file.
(Add_Unit): New routine.
(Build_Library_Graph): Create forced edges between pairs of
units listed in the forced elaboration order file.
(Create_Forced_Edge, Create_Forced_Edges, Destroy_Line_Number,
Duplicate_Unit_Error, Hash_Unit, Internal_Unit_Info,
Is_Duplicate_Unit, Missing_Unit_Info): New routines.
* bindo-graphs.adb (Is_Internal_Unit, Is_Predefined_Unit):
Refactor some of the behavior to Bindo-Units.
* bindo-graphs.ads: Enable the enumeration literal for forced
edges.
* bindo-units.adb, bindo-units.ads (Is_Internal_Unit,
Is_Predefined_Unit): New routines.
* butil.adb: Add with and use clauses for Opt, GNAT, and
System.OS_Lib.  Add with clause for Unchecked_Deallocation.
(Has_Next, Iterate_Forced_Units, Next, Parse_N

[Ada] Spurious visibility error in inlined function

2019-07-03 Thread Pierre-Marie de Rodat
This patch corrects the use of tree replication when inlining a function
that returns an unconstrained result, and its sole statement is an
extended return statement. The use of New_Copy_Tree ensires that global
references saved in a generic template are properly carried over when
the function is instantiated and inlined.

Tested on x86_64-pc-linux-gnu, committed on trunk

2019-07-03  Hristian Kirtchev  

gcc/ada/

* inline.adb (Build_Return_Object_Formal): New routine.
(Can_Split_Unconstrained_Function): Code clean up.
(Copy_Formals,Copy_Return_Object): New routines.
(Split_Unconstrained_Function): Code clean up and refactoring.

gcc/testsuite/

* gnat.dg/inline15.adb, gnat.dg/inline15_gen.adb,
gnat.dg/inline15_gen.ads, gnat.dg/inline15_types.ads: New
testcase.--- gcc/ada/inline.adb
+++ gcc/ada/inline.adb
@@ -1706,11 +1706,29 @@ package body Inline is
   --  Use generic machinery to build an unexpanded body for the subprogram.
   --  This body is subsequently used for inline expansions at call sites.
 
+  procedure Build_Return_Object_Formal
+(Loc  : Source_Ptr;
+ Obj_Decl : Node_Id;
+ Formals  : List_Id);
+  --  Create a formal parameter for return object declaration Obj_Decl of
+  --  an extended return statement and add it to list Formals.
+
   function Can_Split_Unconstrained_Function (N : Node_Id) return Boolean;
   --  Return true if we generate code for the function body N, the function
   --  body N has no local declarations and its unique statement is a single
   --  extended return statement with a handled statements sequence.
 
+  procedure Copy_Formals
+(Loc : Source_Ptr;
+ Subp_Id : Entity_Id;
+ Formals : List_Id);
+  --  Create new formal parameters from the formal parameters of subprogram
+  --  Subp_Id and add them to list Formals.
+
+  function Copy_Return_Object (Obj_Decl : Node_Id) return Node_Id;
+  --  Create a copy of return object declaration Obj_Decl of an extended
+  --  return statement.
+
   procedure Split_Unconstrained_Function
 (N   : Node_Id;
  Spec_Id : Entity_Id);
@@ -1757,6 +1775,9 @@ package body Inline is
Body_To_Inline :=
  Copy_Generic_Node (N, Empty, Instantiating => True);
 else
+   --  ??? Shouldn't this use New_Copy_Tree? What about global
+   --  references captured in the body to inline?
+
Body_To_Inline := Copy_Separate_Tree (N);
 end if;
 
@@ -1845,30 +1866,70 @@ package body Inline is
  Set_Ekind (Defining_Entity (Original_Body), Ekind (Spec_Id));
   end Build_Body_To_Inline;
 
+  
+  -- Build_Return_Object_Formal --
+  
+
+  procedure Build_Return_Object_Formal
+(Loc  : Source_Ptr;
+ Obj_Decl : Node_Id;
+ Formals  : List_Id)
+  is
+ Obj_Def : constant Node_Id   := Object_Definition (Obj_Decl);
+ Obj_Id  : constant Entity_Id := Defining_Entity   (Obj_Decl);
+ Typ_Def : Node_Id;
+
+  begin
+ --  Build the type definition of the formal parameter. The use of
+ --  New_Copy_Tree ensures that global references preserved in the
+ --  case of generics.
+
+ if Is_Entity_Name (Obj_Def) then
+Typ_Def := New_Copy_Tree (Obj_Def);
+ else
+Typ_Def := New_Copy_Tree (Subtype_Mark (Obj_Def));
+ end if;
+
+ --  Generate:
+ --
+ --Obj_Id : [out] Typ_Def
+
+ --  Mode OUT should not be used when the return object is declared as
+ --  a constant. Check the definition of the object declaration because
+ --  the object has not been analyzed yet.
+
+ Append_To (Formals,
+   Make_Parameter_Specification (Loc,
+ Defining_Identifier=>
+   Make_Defining_Identifier (Loc, Chars (Obj_Id)),
+ In_Present => False,
+ Out_Present=> not Constant_Present (Obj_Decl),
+ Null_Exclusion_Present => False,
+ Parameter_Type => Typ_Def));
+  end Build_Return_Object_Formal;
+
   --
   -- Can_Split_Unconstrained_Function --
   --
 
   function Can_Split_Unconstrained_Function (N : Node_Id) return Boolean is
- Ret_Node : constant Node_Id :=
-  First (Statements (Handled_Statement_Sequence (N)));
- D : Node_Id;
+ Stmt : constant Node_Id :=
+  First (Statements (Handled_Statement_Sequence (N)));
+ Decl : Node_Id;
 
   begin
  --  No user defined declarations allowed in the function except inside
  --  the unique return statement; implicit labels are the 

[Ada] Incorrect expansion on renamings of formal parameters

2019-07-03 Thread Pierre-Marie de Rodat
This patch fixes an issue whereby a renaming of an unconstrained formal
parameter leads to spurious runtime errors; manifesting either as a
storage or constraint error due to incorrect bounds being assumed.

This issue also occurs when the renamings are implicit such as through
generic instantiations.

Tested on x86_64-pc-linux-gnu, committed on trunk

2019-07-03  Justin Squirek  

gcc/ada/

* sem_ch8.adb (Analyze_Object_Renaming): Add call to search for
the appropriate actual subtype of the object renaming being
analyzed.
(Check_Constrained_Object): Minor cleanup.

gcc/testsuite/

* gnat.dg/renaming13.adb, gnat.dg/renaming14.adb: New testcases.--- gcc/ada/sem_ch8.adb
+++ gcc/ada/sem_ch8.adb
@@ -784,9 +784,9 @@ package body Sem_Ch8 is
 
   begin
  if Nkind_In (Nam, N_Function_Call, N_Explicit_Dereference)
-   and then Is_Composite_Type (Etype (Nam))
-   and then not Is_Constrained (Etype (Nam))
-   and then not Has_Unknown_Discriminants (Etype (Nam))
+   and then Is_Composite_Type (Typ)
+   and then not Is_Constrained (Typ)
+   and then not Has_Unknown_Discriminants (Typ)
and then Expander_Active
  then
 --  If Actual_Subtype is already set, nothing to do
@@ -1122,7 +1122,11 @@ package body Sem_Ch8 is
  Wrong_Type (Nam, T);
   end if;
 
-  T2 := Etype (Nam);
+  --  We must search for an actual subtype here so that the bounds of
+  --  objects of unconstrained types don't get dropped on the floor - such
+  --  as with renamings of formal parameters.
+
+  T2 := Get_Actual_Subtype_If_Available (Nam);
 
   --  Ada 2005 (AI-326): Handle wrong use of incomplete type
 

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/renaming13.adb
@@ -0,0 +1,21 @@
+--  { dg-do run }
+
+procedure Renaming13 is
+   type Stack_Type_Base is array (Natural range <>) of Integer;
+
+   procedure Foo (Buf : in out Stack_Type_Base) is
+  S : Stack_Type_Base renames Buf;
+
+  procedure Init is
+  begin
+ S := (others => 0);
+  end;
+
+   begin
+  Init;
+   end;
+
+   Temp : Stack_Type_Base (1 .. 100);
+begin
+   Foo (Temp);
+end;

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/renaming14.adb
@@ -0,0 +1,32 @@
+--  { dg-do run }
+
+procedure Renaming14 is
+   type Rec_Typ is record
+  XX : Integer;
+   end record;
+
+   type Stack_Type_Base is array (Natural range <>) of Rec_Typ;
+
+   generic
+  S : in out Stack_Type_Base;
+   package Stack is
+  procedure Init;
+   end;
+
+   package body Stack is
+  procedure Init is
+  begin
+ S := (others => (XX => 0));
+  end;
+   end;
+
+   procedure Foo (Buf : in out Stack_Type_Base) is
+  package Stack_Inst is new Stack (Buf);
+   begin
+  Stack_Inst.Init;
+   end;
+
+   Temp : Stack_Type_Base (1 .. 100);
+begin
+   Foo (Temp);
+end;



[Ada] Crash on anonymous access-to-class-wide with tasks

2019-07-03 Thread Pierre-Marie de Rodat
This patch fixes a bug in which if an object declaration is of an
anonymous access type whose designated type is a limited class-wide type
(but not an interface), and the object is initialized with an allocator,
and the designated type of the allocator contains tasks, the compiler
would crash.

Tested on x86_64-pc-linux-gnu, committed on trunk

2019-07-03  Bob Duff  

gcc/ada/

* sem_ch3.adb (Access_Definition): The code was creating a
master in the case where the designated type is a class-wide
interface type. Create a master in the noninterface case as
well. That is, create a master for all limited class-wide types.

gcc/testsuite/

* gnat.dg/task2.adb, gnat.dg/task2_pkg.adb,
gnat.dg/task2_pkg.ads: New testcase.--- gcc/ada/sem_ch3.adb
+++ gcc/ada/sem_ch3.adb
@@ -924,15 +924,16 @@ package body Sem_Ch3 is
  Set_Has_Delayed_Freeze (Current_Scope);
   end if;
 
-  --  Ada 2005: If the designated type is an interface that may contain
-  --  tasks, create a Master entity for the declaration. This must be done
-  --  before expansion of the full declaration, because the declaration may
-  --  include an expression that is an allocator, whose expansion needs the
-  --  proper Master for the created tasks.
+  --  If the designated type is limited and class-wide, the object might
+  --  contain tasks, so we create a Master entity for the declaration. This
+  --  must be done before expansion of the full declaration, because the
+  --  declaration may include an expression that is an allocator, whose
+  --  expansion needs the proper Master for the created tasks.
 
   if Nkind (Related_Nod) = N_Object_Declaration and then Expander_Active
   then
- if Is_Interface (Desig_Type) and then Is_Limited_Record (Desig_Type)
+ if Is_Limited_Record (Desig_Type)
+   and then Is_Class_Wide_Type (Desig_Type)
  then
 Build_Class_Wide_Master (Anon_Type);
 

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/task2.adb
@@ -0,0 +1,9 @@
+--  { dg-do run }
+
+with Task2_Pkg; use Task2_Pkg;
+
+procedure Task2 is
+   X : access T2'Class := new T2;
+begin
+   null;
+end Task2;

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/task2_pkg.adb
@@ -0,0 +1,6 @@
+package body Task2_Pkg is
+   task body T2 is
+   begin
+  null;
+   end T2;
+end Task2_Pkg;

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/task2_pkg.ads
@@ -0,0 +1,4 @@
+package Task2_Pkg is
+   type T is task Interface;
+   task type T2 is new T with end;
+end Task2_pkg;



[Ada] Missing consistency check for constant modifier

2019-07-03 Thread Pierre-Marie de Rodat
This patch fixes an issue whereby instantiations of generic packages
were incorrectly allowed despite formal and actual subprograms not
having matching declarations with anonymous constant access type
parameters.


-- Source --


-- gen1.ads

package Gen1 is
   generic
  with procedure View (IA : not null access constant Integer);
   procedure Dispatch (IA : access Integer);
end;

-- gen2.adb

package body Gen1 is
   procedure Dispatch (IA : access Integer) is
   begin
  View (IA);
   end;
end;

-- bad1.ads

with Gen1;
package Bad1 is
   procedure Bad_View (IA : not null access Integer);
   procedure Bad_Dispatch is new Gen1.Dispatch (Bad_View);
end;

-- bad1.adb

package body Bad1 is
   procedure Bad_View (IA : not null access Integer) is
   begin
  IA.all := IA.all + 1;
   end;
end;

-- gen2.ads

package Gen2 is
   generic
  with procedure View (IA : access constant Integer);
   procedure Dispatch (IA : access Integer);
end;

-- gen2.adb

package body Gen2 is
   procedure Dispatch (IA : access Integer) is
   begin
  View (IA);
   end;
end;

-- bad2.ads

with Gen2;
package Bad2 is
   procedure Bad_View (IA : access Integer);
   procedure Bad_Dispatch is new Gen2.Dispatch (Bad_View);
end;

-- bad2.adb

package body Bad2 is
   procedure Bad_View (IA : access Integer) is
   begin
  IA.all := IA.all + 1;
   end;
end;

-
-- Compilation --
-

$ gnatmake -q bad1.adb
$ bad1.ads:4:04: instantiation error at gen1.ads:3
$ bad1.ads:4:04: not mode conformant with declaration at line 3
$ bad1.ads:4:04: constant modifier does not match
$ gnatmake: "bad1.adb" compilation error
$ gnatmake -q bad2.adb
$ bad2.ads:4:04: instantiation error at gen2.ads:3
$ bad2.ads:4:04: not mode conformant with declaration at line 3
$ bad2.ads:4:04: constant modifier does not match
$ gnatmake: "bad2.adb" compilation error

Tested on x86_64-pc-linux-gnu, committed on trunk

2019-07-03  Justin Squirek  

gcc/ada/

* sem_ch6.adb (Check_Conformance): Add expression checking for
constant modifiers in anonymous access types (in addition to
"non-null" types) so that they are considered "matching" for
subsequent conformance tests.--- gcc/ada/sem_ch6.adb
+++ gcc/ada/sem_ch6.adb
@@ -5444,10 +5444,14 @@ package body Sem_Ch6 is
and then Directly_Designated_Type (Old_Formal_Base) =
 Directly_Designated_Type (New_Formal_Base)
and then ((Is_Itype (Old_Formal_Base)
-   and then Can_Never_Be_Null (Old_Formal_Base))
+   and then (Can_Never_Be_Null (Old_Formal_Base)
+  or else Is_Access_Constant
+(Old_Formal_Base)))
  or else
   (Is_Itype (New_Formal_Base)
-and then Can_Never_Be_Null (New_Formal_Base)));
+and then (Can_Never_Be_Null (New_Formal_Base)
+   or else Is_Access_Constant
+ (New_Formal_Base;
 
  --  Types must always match. In the visible part of an instance,
  --  usual overloading rules for dispatching operations apply, and



[Ada] Crash on front-end inlining of subp. with aspect specifications

2019-07-03 Thread Pierre-Marie de Rodat
This patch fixes a gap in the handling of formals when inlining a call
to a subprogram marked Inline_Always. For the inlining, the formals are
replaced by the actuals in the block constructed for inlining, The
traversal that performs this replacement does not apply to aspect
specifications that may appear in the original body, because these
aspects are only indirectly reachable from the nodes to which they
apply: a separate traversal is required to perform the replacement in
the expressions for any aspect specification present in the source.

Tested on x86_64-pc-linux-gnu, committed on trunk

2019-07-03  Ed Schonberg  

gcc/ada/

* inline.adb (Process_Formals_In_Aspects): New procedure within
Expand_Inlined_Call, to perform a replacement of references to
formals that appear in aspect specifications within the body
being inlined.

gcc/testsuite/

* gnat.dg/inline16.adb, gnat.dg/inline16_gen.adb,
gnat.dg/inline16_gen.ads, gnat.dg/inline16_types.ads: New
testcase.--- gcc/ada/inline.adb
+++ gcc/ada/inline.adb
@@ -2481,6 +2481,13 @@ package body Inline is
   --  thunk generated for it. Replace a return statement with an assignment
   --  to the target of the call, with appropriate conversions if needed.
 
+  function Process_Formals_In_Aspects (N : Node_Id)
+return Traverse_Result;
+  --  Because aspects are linked indirectly to the rest of the tree,
+  --  replacement of formals appearing in aspect specifications must
+  --  be performed in a separate pass, using an instantiation of the
+  --  previous subprogram over aspect specifications reachable from N.
+
   function Process_Sloc (Nod : Node_Id) return Traverse_Result;
   --  If the call being expanded is that of an internal subprogram, set the
   --  sloc of the generated block to that of the call itself, so that the
@@ -2821,6 +2828,29 @@ package body Inline is
 
   procedure Replace_Formals is new Traverse_Proc (Process_Formals);
 
+  
+  -- Process_Formals_In_Aspects --
+  
+
+  function Process_Formals_In_Aspects (N : Node_Id)
+return Traverse_Result
+  is
+ A : Node_Id;
+  begin
+ if Has_Aspects (N) then
+A := First (Aspect_Specifications (N));
+while Present (A) loop
+   Replace_Formals (Expression (A));
+
+   Next (A);
+end loop;
+ end if;
+ return OK;
+  end Process_Formals_In_Aspects;
+
+  procedure Replace_Formals_In_Aspects is
+ new Traverse_Proc (Process_Formals_In_Aspects);
+
   --
   -- Process_Sloc --
   --
@@ -3633,6 +3663,7 @@ package body Inline is
   --  Attach block to tree before analysis and rewriting.
 
   Replace_Formals (Blk);
+  Replace_Formals_In_Aspects (Blk);
   Set_Parent (Blk, N);
 
   if GNATprove_Mode then

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/inline16.adb
@@ -0,0 +1,26 @@
+--  { dg-do compile }
+--  { dg-options "-gnatN" }
+
+with Inline16_Types; use Inline16_Types;
+with Inline16_Gen;
+
+procedure Inline16 is
+   type TYPE1 is record
+  f1 : NvU32;
+  f2 : NvU32;
+  f3 : NvU32;
+   end record
+  with Size => 96, Object_Size => 96;
+
+   package Gfw_Image_Read_Pkg1 is new Inline16_Gen (Payload_Type => TYPE1);
+   use Gfw_Image_Read_Pkg1;
+   procedure Get_Boot_Block_Info(Status : out Integer)
+   is
+  Ifr_Fixed_Min   : TYPE1;
+   begin
+  Gfw_Image_Read(Ifr_Fixed_Min);
+  Status := 13;
+   end Get_Boot_Block_Info;
+begin
+   null;
+end Inline16;

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/inline16_gen.adb
@@ -0,0 +1,16 @@
+with Inline16_Types; use Inline16_Types;
+
+package body Inline16_Gen
+with SPARK_Mode => On
+is
+   procedure Gfw_Image_Read (Data: out Payload_Type)
+  with SPARK_Mode => Off
+   is
+  Array_Len : constant NvU32 := Data'Size / NvU8'Size;
+  Array_Max_Idx : constant NvU32 := Array_Len - 1;
+  type Payload_As_Arr_Type is new Arr_NvU8_Idx32(0 .. Array_Max_Idx);
+  Data_As_Array : Payload_As_Arr_Type with Address => Data'Address;
+   begin
+  null;
+   end Gfw_Image_Read;
+end Inline16_Gen;

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/inline16_gen.ads
@@ -0,0 +1,9 @@
+generic
+   type Payload_Type is private;
+package Inline16_Gen
+with SPARK_Mode => On
+is
+   procedure Gfw_Image_Read(Data   : out Payload_Type)
+ with Inline_Always;
+
+end Inline16_Gen;

--- /dev/null
new file mode 100644
+++ gcc/testsuite/gnat.dg/inline16_types.ads
@@ -0,0 +1,7 @@
+package Inline16_Types with SPARK_Mode is
+
+   type  NvU8 is mod 2 ** 8  with Size => 8;
+   type NvU32 is mod 2 ** 32 with Size => 32;
+
+   type Arr_NvU8_Idx32 is array (NvU32 range <>) of NvU8;
+end Inline16_Types;



  1   2   >